PostgreSQL Source Code  git master
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "common/int.h"
25 #include "common/md5.h"
26 #include "lib/hyperloglog.h"
27 #include "libpq/pqformat.h"
28 #include "miscadmin.h"
29 #include "parser/scansup.h"
30 #include "port/pg_bswap.h"
31 #include "regex/regex.h"
32 #include "utils/builtins.h"
33 #include "utils/bytea.h"
34 #include "utils/lsyscache.h"
35 #include "utils/memutils.h"
36 #include "utils/pg_locale.h"
37 #include "utils/sortsupport.h"
38 #include "utils/varlena.h"
39 
40 
41 /* GUC variable */
43 
44 typedef struct varlena unknown;
45 typedef struct varlena VarString;
46 
47 typedef struct
48 {
49  bool use_wchar; /* T if multibyte encoding */
50  char *str1; /* use these if not use_wchar */
51  char *str2; /* note: these point to original texts */
52  pg_wchar *wstr1; /* use these if use_wchar */
53  pg_wchar *wstr2; /* note: these are palloc'd */
54  int len1; /* string lengths in logical characters */
55  int len2;
56  /* Skip table for Boyer-Moore-Horspool search algorithm: */
57  int skiptablemask; /* mask for ANDing with skiptable subscripts */
58  int skiptable[256]; /* skip distance for given mismatched char */
60 
61 typedef struct
62 {
63  char *buf1; /* 1st string, or abbreviation original string
64  * buf */
65  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
66  int buflen1;
67  int buflen2;
68  int last_len1; /* Length of last buf1 string/strxfrm() input */
69  int last_len2; /* Length of last buf2 string/strxfrm() blob */
70  int last_returned; /* Last comparison result (cache) */
71  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
72  bool collate_c;
73  bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
74  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
75  hyperLogLogState full_card; /* Full key cardinality state */
76  double prop_card; /* Required cardinality proportion */
79 
80 /*
81  * This should be large enough that most strings will fit, but small enough
82  * that we feel comfortable putting it on the stack
83  */
84 #define TEXTBUFLEN 1024
85 
86 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
87 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
88 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
89 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
90 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
91 
92 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
93 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
94 
95 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
96 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
97 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
98 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
99 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
100 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
101 static int32 text_length(Datum str);
102 static text *text_catenate(text *t1, text *t2);
103 static text *text_substring(Datum str,
104  int32 start,
105  int32 length,
106  bool length_not_specified);
107 static text *text_overlay(text *t1, text *t2, int sp, int sl);
108 static int text_position(text *t1, text *t2);
109 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
110 static int text_position_next(int start_pos, TextPositionState *state);
112 static int text_cmp(text *arg1, text *arg2, Oid collid);
113 static bytea *bytea_catenate(bytea *t1, bytea *t2);
115  int S,
116  int L,
117  bool length_not_specified);
118 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
119 static void appendStringInfoText(StringInfo str, const text *t);
122  const char *fldsep, const char *null_string);
124 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
125  int *value);
126 static const char *text_format_parse_format(const char *start_ptr,
127  const char *end_ptr,
128  int *argpos, int *widthpos,
129  int *flags, int *width);
130 static void text_format_string_conversion(StringInfo buf, char conversion,
131  FmgrInfo *typOutputInfo,
132  Datum value, bool isNull,
133  int flags, int width);
134 static void text_format_append_string(StringInfo buf, const char *str,
135  int flags, int width);
136 
137 
138 /*****************************************************************************
139  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
140  *****************************************************************************/
141 
142 /*
143  * cstring_to_text
144  *
145  * Create a text value from a null-terminated C string.
146  *
147  * The new text value is freshly palloc'd with a full-size VARHDR.
148  */
149 text *
150 cstring_to_text(const char *s)
151 {
152  return cstring_to_text_with_len(s, strlen(s));
153 }
154 
155 /*
156  * cstring_to_text_with_len
157  *
158  * Same as cstring_to_text except the caller specifies the string length;
159  * the string need not be null_terminated.
160  */
161 text *
162 cstring_to_text_with_len(const char *s, int len)
163 {
164  text *result = (text *) palloc(len + VARHDRSZ);
165 
166  SET_VARSIZE(result, len + VARHDRSZ);
167  memcpy(VARDATA(result), s, len);
168 
169  return result;
170 }
171 
172 /*
173  * text_to_cstring
174  *
175  * Create a palloc'd, null-terminated C string from a text value.
176  *
177  * We support being passed a compressed or toasted text value.
178  * This is a bit bogus since such values shouldn't really be referred to as
179  * "text *", but it seems useful for robustness. If we didn't handle that
180  * case here, we'd need another routine that did, anyway.
181  */
182 char *
184 {
185  /* must cast away the const, unfortunately */
186  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
187  int len = VARSIZE_ANY_EXHDR(tunpacked);
188  char *result;
189 
190  result = (char *) palloc(len + 1);
191  memcpy(result, VARDATA_ANY(tunpacked), len);
192  result[len] = '\0';
193 
194  if (tunpacked != t)
195  pfree(tunpacked);
196 
197  return result;
198 }
199 
200 /*
201  * text_to_cstring_buffer
202  *
203  * Copy a text value into a caller-supplied buffer of size dst_len.
204  *
205  * The text string is truncated if necessary to fit. The result is
206  * guaranteed null-terminated (unless dst_len == 0).
207  *
208  * We support being passed a compressed or toasted text value.
209  * This is a bit bogus since such values shouldn't really be referred to as
210  * "text *", but it seems useful for robustness. If we didn't handle that
211  * case here, we'd need another routine that did, anyway.
212  */
213 void
214 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
215 {
216  /* must cast away the const, unfortunately */
217  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
218  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
219 
220  if (dst_len > 0)
221  {
222  dst_len--;
223  if (dst_len >= src_len)
224  dst_len = src_len;
225  else /* ensure truncation is encoding-safe */
226  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
227  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
228  dst[dst_len] = '\0';
229  }
230 
231  if (srcunpacked != src)
232  pfree(srcunpacked);
233 }
234 
235 
236 /*****************************************************************************
237  * USER I/O ROUTINES *
238  *****************************************************************************/
239 
240 
241 #define VAL(CH) ((CH) - '0')
242 #define DIG(VAL) ((VAL) + '0')
243 
244 /*
245  * byteain - converts from printable representation of byte array
246  *
247  * Non-printable characters must be passed as '\nnn' (octal) and are
248  * converted to internal form. '\' must be passed as '\\'.
249  * ereport(ERROR, ...) if bad form.
250  *
251  * BUGS:
252  * The input is scanned twice.
253  * The error checking of input is minimal.
254  */
255 Datum
257 {
258  char *inputText = PG_GETARG_CSTRING(0);
259  char *tp;
260  char *rp;
261  int bc;
262  bytea *result;
263 
264  /* Recognize hex input */
265  if (inputText[0] == '\\' && inputText[1] == 'x')
266  {
267  size_t len = strlen(inputText);
268 
269  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
270  result = palloc(bc);
271  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
272  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
273 
274  PG_RETURN_BYTEA_P(result);
275  }
276 
277  /* Else, it's the traditional escaped style */
278  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
279  {
280  if (tp[0] != '\\')
281  tp++;
282  else if ((tp[0] == '\\') &&
283  (tp[1] >= '0' && tp[1] <= '3') &&
284  (tp[2] >= '0' && tp[2] <= '7') &&
285  (tp[3] >= '0' && tp[3] <= '7'))
286  tp += 4;
287  else if ((tp[0] == '\\') &&
288  (tp[1] == '\\'))
289  tp += 2;
290  else
291  {
292  /*
293  * one backslash, not followed by another or ### valid octal
294  */
295  ereport(ERROR,
296  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
297  errmsg("invalid input syntax for type %s", "bytea")));
298  }
299  }
300 
301  bc += VARHDRSZ;
302 
303  result = (bytea *) palloc(bc);
304  SET_VARSIZE(result, bc);
305 
306  tp = inputText;
307  rp = VARDATA(result);
308  while (*tp != '\0')
309  {
310  if (tp[0] != '\\')
311  *rp++ = *tp++;
312  else if ((tp[0] == '\\') &&
313  (tp[1] >= '0' && tp[1] <= '3') &&
314  (tp[2] >= '0' && tp[2] <= '7') &&
315  (tp[3] >= '0' && tp[3] <= '7'))
316  {
317  bc = VAL(tp[1]);
318  bc <<= 3;
319  bc += VAL(tp[2]);
320  bc <<= 3;
321  *rp++ = bc + VAL(tp[3]);
322 
323  tp += 4;
324  }
325  else if ((tp[0] == '\\') &&
326  (tp[1] == '\\'))
327  {
328  *rp++ = '\\';
329  tp += 2;
330  }
331  else
332  {
333  /*
334  * We should never get here. The first pass should not allow it.
335  */
336  ereport(ERROR,
337  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
338  errmsg("invalid input syntax for type %s", "bytea")));
339  }
340  }
341 
342  PG_RETURN_BYTEA_P(result);
343 }
344 
345 /*
346  * byteaout - converts to printable representation of byte array
347  *
348  * In the traditional escaped format, non-printable characters are
349  * printed as '\nnn' (octal) and '\' as '\\'.
350  */
351 Datum
353 {
354  bytea *vlena = PG_GETARG_BYTEA_PP(0);
355  char *result;
356  char *rp;
357 
359  {
360  /* Print hex format */
361  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
362  *rp++ = '\\';
363  *rp++ = 'x';
364  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
365  }
366  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
367  {
368  /* Print traditional escaped format */
369  char *vp;
370  int len;
371  int i;
372 
373  len = 1; /* empty string has 1 char */
374  vp = VARDATA_ANY(vlena);
375  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
376  {
377  if (*vp == '\\')
378  len += 2;
379  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
380  len += 4;
381  else
382  len++;
383  }
384  rp = result = (char *) palloc(len);
385  vp = VARDATA_ANY(vlena);
386  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
387  {
388  if (*vp == '\\')
389  {
390  *rp++ = '\\';
391  *rp++ = '\\';
392  }
393  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
394  {
395  int val; /* holds unprintable chars */
396 
397  val = *vp;
398  rp[0] = '\\';
399  rp[3] = DIG(val & 07);
400  val >>= 3;
401  rp[2] = DIG(val & 07);
402  val >>= 3;
403  rp[1] = DIG(val & 03);
404  rp += 4;
405  }
406  else
407  *rp++ = *vp;
408  }
409  }
410  else
411  {
412  elog(ERROR, "unrecognized bytea_output setting: %d",
413  bytea_output);
414  rp = result = NULL; /* keep compiler quiet */
415  }
416  *rp = '\0';
417  PG_RETURN_CSTRING(result);
418 }
419 
420 /*
421  * bytearecv - converts external binary format to bytea
422  */
423 Datum
425 {
427  bytea *result;
428  int nbytes;
429 
430  nbytes = buf->len - buf->cursor;
431  result = (bytea *) palloc(nbytes + VARHDRSZ);
432  SET_VARSIZE(result, nbytes + VARHDRSZ);
433  pq_copymsgbytes(buf, VARDATA(result), nbytes);
434  PG_RETURN_BYTEA_P(result);
435 }
436 
437 /*
438  * byteasend - converts bytea to binary format
439  *
440  * This is a special case: just copy the input...
441  */
442 Datum
444 {
445  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
446 
447  PG_RETURN_BYTEA_P(vlena);
448 }
449 
450 Datum
452 {
454 
455  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
456 
457  /* Append the value unless null. */
458  if (!PG_ARGISNULL(1))
459  {
461 
462  /* On the first time through, we ignore the delimiter. */
463  if (state == NULL)
464  state = makeStringAggState(fcinfo);
465  else if (!PG_ARGISNULL(2))
466  {
467  bytea *delim = PG_GETARG_BYTEA_PP(2);
468 
470  }
471 
473  }
474 
475  /*
476  * The transition type for string_agg() is declared to be "internal",
477  * which is a pass-by-value type the same size as a pointer.
478  */
479  PG_RETURN_POINTER(state);
480 }
481 
482 Datum
484 {
486 
487  /* cannot be called directly because of internal-type argument */
488  Assert(AggCheckCallContext(fcinfo, NULL));
489 
490  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
491 
492  if (state != NULL)
493  {
494  bytea *result;
495 
496  result = (bytea *) palloc(state->len + VARHDRSZ);
497  SET_VARSIZE(result, state->len + VARHDRSZ);
498  memcpy(VARDATA(result), state->data, state->len);
499  PG_RETURN_BYTEA_P(result);
500  }
501  else
502  PG_RETURN_NULL();
503 }
504 
505 /*
506  * textin - converts "..." to internal representation
507  */
508 Datum
510 {
511  char *inputText = PG_GETARG_CSTRING(0);
512 
513  PG_RETURN_TEXT_P(cstring_to_text(inputText));
514 }
515 
516 /*
517  * textout - converts internal representation to "..."
518  */
519 Datum
521 {
522  Datum txt = PG_GETARG_DATUM(0);
523 
525 }
526 
527 /*
528  * textrecv - converts external binary format to text
529  */
530 Datum
532 {
534  text *result;
535  char *str;
536  int nbytes;
537 
538  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
539 
540  result = cstring_to_text_with_len(str, nbytes);
541  pfree(str);
542  PG_RETURN_TEXT_P(result);
543 }
544 
545 /*
546  * textsend - converts text to binary format
547  */
548 Datum
550 {
551  text *t = PG_GETARG_TEXT_PP(0);
553 
554  pq_begintypsend(&buf);
557 }
558 
559 
560 /*
561  * unknownin - converts "..." to internal representation
562  */
563 Datum
565 {
566  char *str = PG_GETARG_CSTRING(0);
567 
568  /* representation is same as cstring */
570 }
571 
572 /*
573  * unknownout - converts internal representation to "..."
574  */
575 Datum
577 {
578  /* representation is same as cstring */
579  char *str = PG_GETARG_CSTRING(0);
580 
582 }
583 
584 /*
585  * unknownrecv - converts external binary format to unknown
586  */
587 Datum
589 {
591  char *str;
592  int nbytes;
593 
594  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
595  /* representation is same as cstring */
596  PG_RETURN_CSTRING(str);
597 }
598 
599 /*
600  * unknownsend - converts unknown to binary format
601  */
602 Datum
604 {
605  /* representation is same as cstring */
606  char *str = PG_GETARG_CSTRING(0);
608 
609  pq_begintypsend(&buf);
610  pq_sendtext(&buf, str, strlen(str));
612 }
613 
614 
615 /* ========== PUBLIC ROUTINES ========== */
616 
617 /*
618  * textlen -
619  * returns the logical length of a text*
620  * (which is less than the VARSIZE of the text*)
621  */
622 Datum
624 {
626 
627  /* try to avoid decompressing argument */
629 }
630 
631 /*
632  * text_length -
633  * Does the real work for textlen()
634  *
635  * This is broken out so it can be called directly by other string processing
636  * functions. Note that the argument is passed as a Datum, to indicate that
637  * it may still be in compressed form. We can avoid decompressing it at all
638  * in some cases.
639  */
640 static int32
642 {
643  /* fastpath when max encoding length is one */
646  else
647  {
648  text *t = DatumGetTextPP(str);
649 
651  VARSIZE_ANY_EXHDR(t)));
652  }
653 }
654 
655 /*
656  * textoctetlen -
657  * returns the physical length of a text*
658  * (which is less than the VARSIZE of the text*)
659  */
660 Datum
662 {
664 
665  /* We need not detoast the input at all */
667 }
668 
669 /*
670  * textcat -
671  * takes two text* and returns a text* that is the concatenation of
672  * the two.
673  *
674  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
675  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
676  * Allocate space for output in all cases.
677  * XXX - thomas 1997-07-10
678  */
679 Datum
681 {
682  text *t1 = PG_GETARG_TEXT_PP(0);
683  text *t2 = PG_GETARG_TEXT_PP(1);
684 
686 }
687 
688 /*
689  * text_catenate
690  * Guts of textcat(), broken out so it can be used by other functions
691  *
692  * Arguments can be in short-header form, but not compressed or out-of-line
693  */
694 static text *
696 {
697  text *result;
698  int len1,
699  len2,
700  len;
701  char *ptr;
702 
703  len1 = VARSIZE_ANY_EXHDR(t1);
704  len2 = VARSIZE_ANY_EXHDR(t2);
705 
706  /* paranoia ... probably should throw error instead? */
707  if (len1 < 0)
708  len1 = 0;
709  if (len2 < 0)
710  len2 = 0;
711 
712  len = len1 + len2 + VARHDRSZ;
713  result = (text *) palloc(len);
714 
715  /* Set size of result string... */
716  SET_VARSIZE(result, len);
717 
718  /* Fill data field of result string... */
719  ptr = VARDATA(result);
720  if (len1 > 0)
721  memcpy(ptr, VARDATA_ANY(t1), len1);
722  if (len2 > 0)
723  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
724 
725  return result;
726 }
727 
728 /*
729  * charlen_to_bytelen()
730  * Compute the number of bytes occupied by n characters starting at *p
731  *
732  * It is caller's responsibility that there actually are n characters;
733  * the string need not be null-terminated.
734  */
735 static int
736 charlen_to_bytelen(const char *p, int n)
737 {
739  {
740  /* Optimization for single-byte encodings */
741  return n;
742  }
743  else
744  {
745  const char *s;
746 
747  for (s = p; n > 0; n--)
748  s += pg_mblen(s);
749 
750  return s - p;
751  }
752 }
753 
754 /*
755  * text_substr()
756  * Return a substring starting at the specified position.
757  * - thomas 1997-12-31
758  *
759  * Input:
760  * - string
761  * - starting position (is one-based)
762  * - string length
763  *
764  * If the starting position is zero or less, then return from the start of the string
765  * adjusting the length to be consistent with the "negative start" per SQL.
766  * If the length is less than zero, return the remaining string.
767  *
768  * Added multibyte support.
769  * - Tatsuo Ishii 1998-4-21
770  * Changed behavior if starting position is less than one to conform to SQL behavior.
771  * Formerly returned the entire string; now returns a portion.
772  * - Thomas Lockhart 1998-12-10
773  * Now uses faster TOAST-slicing interface
774  * - John Gray 2002-02-22
775  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
776  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
777  * error; if E < 1, return '', not entire string). Fixed MB related bug when
778  * S > LC and < LC + 4 sometimes garbage characters are returned.
779  * - Joe Conway 2002-08-10
780  */
781 Datum
783 {
785  PG_GETARG_INT32(1),
786  PG_GETARG_INT32(2),
787  false));
788 }
789 
790 /*
791  * text_substr_no_len -
792  * Wrapper to avoid opr_sanity failure due to
793  * one function accepting a different number of args.
794  */
795 Datum
797 {
799  PG_GETARG_INT32(1),
800  -1, true));
801 }
802 
803 /*
804  * text_substring -
805  * Does the real work for text_substr() and text_substr_no_len()
806  *
807  * This is broken out so it can be called directly by other string processing
808  * functions. Note that the argument is passed as a Datum, to indicate that
809  * it may still be in compressed/toasted form. We can avoid detoasting all
810  * of it in some cases.
811  *
812  * The result is always a freshly palloc'd datum.
813  */
814 static text *
815 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
816 {
818  int32 S = start; /* start position */
819  int32 S1; /* adjusted start position */
820  int32 L1; /* adjusted substring length */
821 
822  /* life is easy if the encoding max length is 1 */
823  if (eml == 1)
824  {
825  S1 = Max(S, 1);
826 
827  if (length_not_specified) /* special case - get length to end of
828  * string */
829  L1 = -1;
830  else
831  {
832  /* end position */
833  int E = S + length;
834 
835  /*
836  * A negative value for L is the only way for the end position to
837  * be before the start. SQL99 says to throw an error.
838  */
839  if (E < S)
840  ereport(ERROR,
841  (errcode(ERRCODE_SUBSTRING_ERROR),
842  errmsg("negative substring length not allowed")));
843 
844  /*
845  * A zero or negative value for the end position can happen if the
846  * start was negative or one. SQL99 says to return a zero-length
847  * string.
848  */
849  if (E < 1)
850  return cstring_to_text("");
851 
852  L1 = E - S1;
853  }
854 
855  /*
856  * If the start position is past the end of the string, SQL99 says to
857  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
858  * that for us. Convert to zero-based starting position
859  */
860  return DatumGetTextPSlice(str, S1 - 1, L1);
861  }
862  else if (eml > 1)
863  {
864  /*
865  * When encoding max length is > 1, we can't get LC without
866  * detoasting, so we'll grab a conservatively large slice now and go
867  * back later to do the right thing
868  */
869  int32 slice_start;
870  int32 slice_size;
871  int32 slice_strlen;
872  text *slice;
873  int32 E1;
874  int32 i;
875  char *p;
876  char *s;
877  text *ret;
878 
879  /*
880  * if S is past the end of the string, the tuple toaster will return a
881  * zero-length string to us
882  */
883  S1 = Max(S, 1);
884 
885  /*
886  * We need to start at position zero because there is no way to know
887  * in advance which byte offset corresponds to the supplied start
888  * position.
889  */
890  slice_start = 0;
891 
892  if (length_not_specified) /* special case - get length to end of
893  * string */
894  slice_size = L1 = -1;
895  else
896  {
897  int E = S + length;
898 
899  /*
900  * A negative value for L is the only way for the end position to
901  * be before the start. SQL99 says to throw an error.
902  */
903  if (E < S)
904  ereport(ERROR,
905  (errcode(ERRCODE_SUBSTRING_ERROR),
906  errmsg("negative substring length not allowed")));
907 
908  /*
909  * A zero or negative value for the end position can happen if the
910  * start was negative or one. SQL99 says to return a zero-length
911  * string.
912  */
913  if (E < 1)
914  return cstring_to_text("");
915 
916  /*
917  * if E is past the end of the string, the tuple toaster will
918  * truncate the length for us
919  */
920  L1 = E - S1;
921 
922  /*
923  * Total slice size in bytes can't be any longer than the start
924  * position plus substring length times the encoding max length.
925  */
926  slice_size = (S1 + L1) * eml;
927  }
928 
929  /*
930  * If we're working with an untoasted source, no need to do an extra
931  * copying step.
932  */
935  slice = DatumGetTextPSlice(str, slice_start, slice_size);
936  else
937  slice = (text *) DatumGetPointer(str);
938 
939  /* see if we got back an empty string */
940  if (VARSIZE_ANY_EXHDR(slice) == 0)
941  {
942  if (slice != (text *) DatumGetPointer(str))
943  pfree(slice);
944  return cstring_to_text("");
945  }
946 
947  /* Now we can get the actual length of the slice in MB characters */
948  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
949  VARSIZE_ANY_EXHDR(slice));
950 
951  /*
952  * Check that the start position wasn't > slice_strlen. If so, SQL99
953  * says to return a zero-length string.
954  */
955  if (S1 > slice_strlen)
956  {
957  if (slice != (text *) DatumGetPointer(str))
958  pfree(slice);
959  return cstring_to_text("");
960  }
961 
962  /*
963  * Adjust L1 and E1 now that we know the slice string length. Again
964  * remember that S1 is one based, and slice_start is zero based.
965  */
966  if (L1 > -1)
967  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
968  else
969  E1 = slice_start + 1 + slice_strlen;
970 
971  /*
972  * Find the start position in the slice; remember S1 is not zero based
973  */
974  p = VARDATA_ANY(slice);
975  for (i = 0; i < S1 - 1; i++)
976  p += pg_mblen(p);
977 
978  /* hang onto a pointer to our start position */
979  s = p;
980 
981  /*
982  * Count the actual bytes used by the substring of the requested
983  * length.
984  */
985  for (i = S1; i < E1; i++)
986  p += pg_mblen(p);
987 
988  ret = (text *) palloc(VARHDRSZ + (p - s));
989  SET_VARSIZE(ret, VARHDRSZ + (p - s));
990  memcpy(VARDATA(ret), s, (p - s));
991 
992  if (slice != (text *) DatumGetPointer(str))
993  pfree(slice);
994 
995  return ret;
996  }
997  else
998  elog(ERROR, "invalid backend encoding: encoding max length < 1");
999 
1000  /* not reached: suppress compiler warning */
1001  return NULL;
1002 }
1003 
1004 /*
1005  * textoverlay
1006  * Replace specified substring of first string with second
1007  *
1008  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1009  * This code is a direct implementation of what the standard says.
1010  */
1011 Datum
1013 {
1014  text *t1 = PG_GETARG_TEXT_PP(0);
1015  text *t2 = PG_GETARG_TEXT_PP(1);
1016  int sp = PG_GETARG_INT32(2); /* substring start position */
1017  int sl = PG_GETARG_INT32(3); /* substring length */
1018 
1019  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1020 }
1021 
1022 Datum
1024 {
1025  text *t1 = PG_GETARG_TEXT_PP(0);
1026  text *t2 = PG_GETARG_TEXT_PP(1);
1027  int sp = PG_GETARG_INT32(2); /* substring start position */
1028  int sl;
1029 
1030  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1031  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1032 }
1033 
1034 static text *
1035 text_overlay(text *t1, text *t2, int sp, int sl)
1036 {
1037  text *result;
1038  text *s1;
1039  text *s2;
1040  int sp_pl_sl;
1041 
1042  /*
1043  * Check for possible integer-overflow cases. For negative sp, throw a
1044  * "substring length" error because that's what should be expected
1045  * according to the spec's definition of OVERLAY().
1046  */
1047  if (sp <= 0)
1048  ereport(ERROR,
1049  (errcode(ERRCODE_SUBSTRING_ERROR),
1050  errmsg("negative substring length not allowed")));
1051  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1052  ereport(ERROR,
1053  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1054  errmsg("integer out of range")));
1055 
1056  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1057  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1058  result = text_catenate(s1, t2);
1059  result = text_catenate(result, s2);
1060 
1061  return result;
1062 }
1063 
1064 /*
1065  * textpos -
1066  * Return the position of the specified substring.
1067  * Implements the SQL POSITION() function.
1068  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1069  * - thomas 1997-07-27
1070  */
1071 Datum
1073 {
1074  text *str = PG_GETARG_TEXT_PP(0);
1075  text *search_str = PG_GETARG_TEXT_PP(1);
1076 
1077  PG_RETURN_INT32((int32) text_position(str, search_str));
1078 }
1079 
1080 /*
1081  * text_position -
1082  * Does the real work for textpos()
1083  *
1084  * Inputs:
1085  * t1 - string to be searched
1086  * t2 - pattern to match within t1
1087  * Result:
1088  * Character index of the first matched char, starting from 1,
1089  * or 0 if no match.
1090  *
1091  * This is broken out so it can be called directly by other string processing
1092  * functions.
1093  */
1094 static int
1096 {
1098  int result;
1099 
1100  text_position_setup(t1, t2, &state);
1101  result = text_position_next(1, &state);
1102  text_position_cleanup(&state);
1103  return result;
1104 }
1105 
1106 
1107 /*
1108  * text_position_setup, text_position_next, text_position_cleanup -
1109  * Component steps of text_position()
1110  *
1111  * These are broken out so that a string can be efficiently searched for
1112  * multiple occurrences of the same pattern. text_position_next may be
1113  * called multiple times with increasing values of start_pos, which is
1114  * the 1-based character position to start the search from. The "state"
1115  * variable is normally just a local variable in the caller.
1116  */
1117 
1118 static void
1120 {
1121  int len1 = VARSIZE_ANY_EXHDR(t1);
1122  int len2 = VARSIZE_ANY_EXHDR(t2);
1123 
1125  {
1126  /* simple case - single byte encoding */
1127  state->use_wchar = false;
1128  state->str1 = VARDATA_ANY(t1);
1129  state->str2 = VARDATA_ANY(t2);
1130  state->len1 = len1;
1131  state->len2 = len2;
1132  }
1133  else
1134  {
1135  /* not as simple - multibyte encoding */
1136  pg_wchar *p1,
1137  *p2;
1138 
1139  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1140  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1141  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1142  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1143 
1144  state->use_wchar = true;
1145  state->wstr1 = p1;
1146  state->wstr2 = p2;
1147  state->len1 = len1;
1148  state->len2 = len2;
1149  }
1150 
1151  /*
1152  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1153  * notes we use the terminology that the "haystack" is the string to be
1154  * searched (t1) and the "needle" is the pattern being sought (t2).
1155  *
1156  * If the needle is empty or bigger than the haystack then there is no
1157  * point in wasting cycles initializing the table. We also choose not to
1158  * use B-M-H for needles of length 1, since the skip table can't possibly
1159  * save anything in that case.
1160  */
1161  if (len1 >= len2 && len2 > 1)
1162  {
1163  int searchlength = len1 - len2;
1164  int skiptablemask;
1165  int last;
1166  int i;
1167 
1168  /*
1169  * First we must determine how much of the skip table to use. The
1170  * declaration of TextPositionState allows up to 256 elements, but for
1171  * short search problems we don't really want to have to initialize so
1172  * many elements --- it would take too long in comparison to the
1173  * actual search time. So we choose a useful skip table size based on
1174  * the haystack length minus the needle length. The closer the needle
1175  * length is to the haystack length the less useful skipping becomes.
1176  *
1177  * Note: since we use bit-masking to select table elements, the skip
1178  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1179  */
1180  if (searchlength < 16)
1181  skiptablemask = 3;
1182  else if (searchlength < 64)
1183  skiptablemask = 7;
1184  else if (searchlength < 128)
1185  skiptablemask = 15;
1186  else if (searchlength < 512)
1187  skiptablemask = 31;
1188  else if (searchlength < 2048)
1189  skiptablemask = 63;
1190  else if (searchlength < 4096)
1191  skiptablemask = 127;
1192  else
1193  skiptablemask = 255;
1194  state->skiptablemask = skiptablemask;
1195 
1196  /*
1197  * Initialize the skip table. We set all elements to the needle
1198  * length, since this is the correct skip distance for any character
1199  * not found in the needle.
1200  */
1201  for (i = 0; i <= skiptablemask; i++)
1202  state->skiptable[i] = len2;
1203 
1204  /*
1205  * Now examine the needle. For each character except the last one,
1206  * set the corresponding table element to the appropriate skip
1207  * distance. Note that when two characters share the same skip table
1208  * entry, the one later in the needle must determine the skip
1209  * distance.
1210  */
1211  last = len2 - 1;
1212 
1213  if (!state->use_wchar)
1214  {
1215  const char *str2 = state->str2;
1216 
1217  for (i = 0; i < last; i++)
1218  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1219  }
1220  else
1221  {
1222  const pg_wchar *wstr2 = state->wstr2;
1223 
1224  for (i = 0; i < last; i++)
1225  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1226  }
1227  }
1228 }
1229 
1230 static int
1232 {
1233  int haystack_len = state->len1;
1234  int needle_len = state->len2;
1235  int skiptablemask = state->skiptablemask;
1236 
1237  Assert(start_pos > 0); /* else caller error */
1238 
1239  if (needle_len <= 0)
1240  return start_pos; /* result for empty pattern */
1241 
1242  start_pos--; /* adjust for zero based arrays */
1243 
1244  /* Done if the needle can't possibly fit */
1245  if (haystack_len < start_pos + needle_len)
1246  return 0;
1247 
1248  if (!state->use_wchar)
1249  {
1250  /* simple case - single byte encoding */
1251  const char *haystack = state->str1;
1252  const char *needle = state->str2;
1253  const char *haystack_end = &haystack[haystack_len];
1254  const char *hptr;
1255 
1256  if (needle_len == 1)
1257  {
1258  /* No point in using B-M-H for a one-character needle */
1259  char nchar = *needle;
1260 
1261  hptr = &haystack[start_pos];
1262  while (hptr < haystack_end)
1263  {
1264  if (*hptr == nchar)
1265  return hptr - haystack + 1;
1266  hptr++;
1267  }
1268  }
1269  else
1270  {
1271  const char *needle_last = &needle[needle_len - 1];
1272 
1273  /* Start at startpos plus the length of the needle */
1274  hptr = &haystack[start_pos + needle_len - 1];
1275  while (hptr < haystack_end)
1276  {
1277  /* Match the needle scanning *backward* */
1278  const char *nptr;
1279  const char *p;
1280 
1281  nptr = needle_last;
1282  p = hptr;
1283  while (*nptr == *p)
1284  {
1285  /* Matched it all? If so, return 1-based position */
1286  if (nptr == needle)
1287  return p - haystack + 1;
1288  nptr--, p--;
1289  }
1290 
1291  /*
1292  * No match, so use the haystack char at hptr to decide how
1293  * far to advance. If the needle had any occurrence of that
1294  * character (or more precisely, one sharing the same
1295  * skiptable entry) before its last character, then we advance
1296  * far enough to align the last such needle character with
1297  * that haystack position. Otherwise we can advance by the
1298  * whole needle length.
1299  */
1300  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1301  }
1302  }
1303  }
1304  else
1305  {
1306  /* The multibyte char version. This works exactly the same way. */
1307  const pg_wchar *haystack = state->wstr1;
1308  const pg_wchar *needle = state->wstr2;
1309  const pg_wchar *haystack_end = &haystack[haystack_len];
1310  const pg_wchar *hptr;
1311 
1312  if (needle_len == 1)
1313  {
1314  /* No point in using B-M-H for a one-character needle */
1315  pg_wchar nchar = *needle;
1316 
1317  hptr = &haystack[start_pos];
1318  while (hptr < haystack_end)
1319  {
1320  if (*hptr == nchar)
1321  return hptr - haystack + 1;
1322  hptr++;
1323  }
1324  }
1325  else
1326  {
1327  const pg_wchar *needle_last = &needle[needle_len - 1];
1328 
1329  /* Start at startpos plus the length of the needle */
1330  hptr = &haystack[start_pos + needle_len - 1];
1331  while (hptr < haystack_end)
1332  {
1333  /* Match the needle scanning *backward* */
1334  const pg_wchar *nptr;
1335  const pg_wchar *p;
1336 
1337  nptr = needle_last;
1338  p = hptr;
1339  while (*nptr == *p)
1340  {
1341  /* Matched it all? If so, return 1-based position */
1342  if (nptr == needle)
1343  return p - haystack + 1;
1344  nptr--, p--;
1345  }
1346 
1347  /*
1348  * No match, so use the haystack char at hptr to decide how
1349  * far to advance. If the needle had any occurrence of that
1350  * character (or more precisely, one sharing the same
1351  * skiptable entry) before its last character, then we advance
1352  * far enough to align the last such needle character with
1353  * that haystack position. Otherwise we can advance by the
1354  * whole needle length.
1355  */
1356  hptr += state->skiptable[*hptr & skiptablemask];
1357  }
1358  }
1359  }
1360 
1361  return 0; /* not found */
1362 }
1363 
1364 static void
1366 {
1367  if (state->use_wchar)
1368  {
1369  pfree(state->wstr1);
1370  pfree(state->wstr2);
1371  }
1372 }
1373 
1374 /* varstr_cmp()
1375  * Comparison function for text strings with given lengths.
1376  * Includes locale support, but must copy strings to temporary memory
1377  * to allow null-termination for inputs to strcoll().
1378  * Returns an integer less than, equal to, or greater than zero, indicating
1379  * whether arg1 is less than, equal to, or greater than arg2.
1380  */
1381 int
1382 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1383 {
1384  int result;
1385 
1386  /*
1387  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1388  * have to do some memory copying. This turns out to be significantly
1389  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1390  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1391  */
1392  if (lc_collate_is_c(collid))
1393  {
1394  result = memcmp(arg1, arg2, Min(len1, len2));
1395  if ((result == 0) && (len1 != len2))
1396  result = (len1 < len2) ? -1 : 1;
1397  }
1398  else
1399  {
1400  char a1buf[TEXTBUFLEN];
1401  char a2buf[TEXTBUFLEN];
1402  char *a1p,
1403  *a2p;
1404  pg_locale_t mylocale = 0;
1405 
1406  if (collid != DEFAULT_COLLATION_OID)
1407  {
1408  if (!OidIsValid(collid))
1409  {
1410  /*
1411  * This typically means that the parser could not resolve a
1412  * conflict of implicit collations, so report it that way.
1413  */
1414  ereport(ERROR,
1415  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1416  errmsg("could not determine which collation to use for string comparison"),
1417  errhint("Use the COLLATE clause to set the collation explicitly.")));
1418  }
1419  mylocale = pg_newlocale_from_collation(collid);
1420  }
1421 
1422  /*
1423  * memcmp() can't tell us which of two unequal strings sorts first,
1424  * but it's a cheap way to tell if they're equal. Testing shows that
1425  * memcmp() followed by strcoll() is only trivially slower than
1426  * strcoll() by itself, so we don't lose much if this doesn't work out
1427  * very often, and if it does - for example, because there are many
1428  * equal strings in the input - then we win big by avoiding expensive
1429  * collation-aware comparisons.
1430  */
1431  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1432  return 0;
1433 
1434 #ifdef WIN32
1435  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1436  if (GetDatabaseEncoding() == PG_UTF8
1437  && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1438  {
1439  int a1len;
1440  int a2len;
1441  int r;
1442 
1443  if (len1 >= TEXTBUFLEN / 2)
1444  {
1445  a1len = len1 * 2 + 2;
1446  a1p = palloc(a1len);
1447  }
1448  else
1449  {
1450  a1len = TEXTBUFLEN;
1451  a1p = a1buf;
1452  }
1453  if (len2 >= TEXTBUFLEN / 2)
1454  {
1455  a2len = len2 * 2 + 2;
1456  a2p = palloc(a2len);
1457  }
1458  else
1459  {
1460  a2len = TEXTBUFLEN;
1461  a2p = a2buf;
1462  }
1463 
1464  /* stupid Microsloth API does not work for zero-length input */
1465  if (len1 == 0)
1466  r = 0;
1467  else
1468  {
1469  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1470  (LPWSTR) a1p, a1len / 2);
1471  if (!r)
1472  ereport(ERROR,
1473  (errmsg("could not convert string to UTF-16: error code %lu",
1474  GetLastError())));
1475  }
1476  ((LPWSTR) a1p)[r] = 0;
1477 
1478  if (len2 == 0)
1479  r = 0;
1480  else
1481  {
1482  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1483  (LPWSTR) a2p, a2len / 2);
1484  if (!r)
1485  ereport(ERROR,
1486  (errmsg("could not convert string to UTF-16: error code %lu",
1487  GetLastError())));
1488  }
1489  ((LPWSTR) a2p)[r] = 0;
1490 
1491  errno = 0;
1492 #ifdef HAVE_LOCALE_T
1493  if (mylocale)
1494  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1495  else
1496 #endif
1497  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1498  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1499  * headers */
1500  ereport(ERROR,
1501  (errmsg("could not compare Unicode strings: %m")));
1502 
1503  /*
1504  * In some locales wcscoll() can claim that nonidentical strings
1505  * are equal. Believing that would be bad news for a number of
1506  * reasons, so we follow Perl's lead and sort "equal" strings
1507  * according to strcmp (on the UTF-8 representation).
1508  */
1509  if (result == 0)
1510  {
1511  result = memcmp(arg1, arg2, Min(len1, len2));
1512  if ((result == 0) && (len1 != len2))
1513  result = (len1 < len2) ? -1 : 1;
1514  }
1515 
1516  if (a1p != a1buf)
1517  pfree(a1p);
1518  if (a2p != a2buf)
1519  pfree(a2p);
1520 
1521  return result;
1522  }
1523 #endif /* WIN32 */
1524 
1525  if (len1 >= TEXTBUFLEN)
1526  a1p = (char *) palloc(len1 + 1);
1527  else
1528  a1p = a1buf;
1529  if (len2 >= TEXTBUFLEN)
1530  a2p = (char *) palloc(len2 + 1);
1531  else
1532  a2p = a2buf;
1533 
1534  memcpy(a1p, arg1, len1);
1535  a1p[len1] = '\0';
1536  memcpy(a2p, arg2, len2);
1537  a2p[len2] = '\0';
1538 
1539  if (mylocale)
1540  {
1541  if (mylocale->provider == COLLPROVIDER_ICU)
1542  {
1543 #ifdef USE_ICU
1544 #ifdef HAVE_UCOL_STRCOLLUTF8
1545  if (GetDatabaseEncoding() == PG_UTF8)
1546  {
1547  UErrorCode status;
1548 
1549  status = U_ZERO_ERROR;
1550  result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1551  arg1, len1,
1552  arg2, len2,
1553  &status);
1554  if (U_FAILURE(status))
1555  ereport(ERROR,
1556  (errmsg("collation failed: %s", u_errorName(status))));
1557  }
1558  else
1559 #endif
1560  {
1561  int32_t ulen1,
1562  ulen2;
1563  UChar *uchar1,
1564  *uchar2;
1565 
1566  ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1567  ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1568 
1569  result = ucol_strcoll(mylocale->info.icu.ucol,
1570  uchar1, ulen1,
1571  uchar2, ulen2);
1572 
1573  pfree(uchar1);
1574  pfree(uchar2);
1575  }
1576 #else /* not USE_ICU */
1577  /* shouldn't happen */
1578  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1579 #endif /* not USE_ICU */
1580  }
1581  else
1582  {
1583 #ifdef HAVE_LOCALE_T
1584  result = strcoll_l(a1p, a2p, mylocale->info.lt);
1585 #else
1586  /* shouldn't happen */
1587  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1588 #endif
1589  }
1590  }
1591  else
1592  result = strcoll(a1p, a2p);
1593 
1594  /*
1595  * In some locales strcoll() can claim that nonidentical strings are
1596  * equal. Believing that would be bad news for a number of reasons,
1597  * so we follow Perl's lead and sort "equal" strings according to
1598  * strcmp().
1599  */
1600  if (result == 0)
1601  result = strcmp(a1p, a2p);
1602 
1603  if (a1p != a1buf)
1604  pfree(a1p);
1605  if (a2p != a2buf)
1606  pfree(a2p);
1607  }
1608 
1609  return result;
1610 }
1611 
1612 /* text_cmp()
1613  * Internal comparison function for text strings.
1614  * Returns -1, 0 or 1
1615  */
1616 static int
1617 text_cmp(text *arg1, text *arg2, Oid collid)
1618 {
1619  char *a1p,
1620  *a2p;
1621  int len1,
1622  len2;
1623 
1624  a1p = VARDATA_ANY(arg1);
1625  a2p = VARDATA_ANY(arg2);
1626 
1627  len1 = VARSIZE_ANY_EXHDR(arg1);
1628  len2 = VARSIZE_ANY_EXHDR(arg2);
1629 
1630  return varstr_cmp(a1p, len1, a2p, len2, collid);
1631 }
1632 
1633 /*
1634  * Comparison functions for text strings.
1635  *
1636  * Note: btree indexes need these routines not to leak memory; therefore,
1637  * be careful to free working copies of toasted datums. Most places don't
1638  * need to be so careful.
1639  */
1640 
1641 Datum
1643 {
1644  Datum arg1 = PG_GETARG_DATUM(0);
1645  Datum arg2 = PG_GETARG_DATUM(1);
1646  bool result;
1647  Size len1,
1648  len2;
1649 
1650  /*
1651  * Since we only care about equality or not-equality, we can avoid all the
1652  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1653  * don't even have to do a bitwise comparison if we can show the lengths
1654  * of the strings are unequal; which might save us from having to detoast
1655  * one or both values.
1656  */
1657  len1 = toast_raw_datum_size(arg1);
1658  len2 = toast_raw_datum_size(arg2);
1659  if (len1 != len2)
1660  result = false;
1661  else
1662  {
1663  text *targ1 = DatumGetTextPP(arg1);
1664  text *targ2 = DatumGetTextPP(arg2);
1665 
1666  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1667  len1 - VARHDRSZ) == 0);
1668 
1669  PG_FREE_IF_COPY(targ1, 0);
1670  PG_FREE_IF_COPY(targ2, 1);
1671  }
1672 
1673  PG_RETURN_BOOL(result);
1674 }
1675 
1676 Datum
1678 {
1679  Datum arg1 = PG_GETARG_DATUM(0);
1680  Datum arg2 = PG_GETARG_DATUM(1);
1681  bool result;
1682  Size len1,
1683  len2;
1684 
1685  /* See comment in texteq() */
1686  len1 = toast_raw_datum_size(arg1);
1687  len2 = toast_raw_datum_size(arg2);
1688  if (len1 != len2)
1689  result = true;
1690  else
1691  {
1692  text *targ1 = DatumGetTextPP(arg1);
1693  text *targ2 = DatumGetTextPP(arg2);
1694 
1695  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1696  len1 - VARHDRSZ) != 0);
1697 
1698  PG_FREE_IF_COPY(targ1, 0);
1699  PG_FREE_IF_COPY(targ2, 1);
1700  }
1701 
1702  PG_RETURN_BOOL(result);
1703 }
1704 
1705 Datum
1707 {
1708  text *arg1 = PG_GETARG_TEXT_PP(0);
1709  text *arg2 = PG_GETARG_TEXT_PP(1);
1710  bool result;
1711 
1712  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1713 
1714  PG_FREE_IF_COPY(arg1, 0);
1715  PG_FREE_IF_COPY(arg2, 1);
1716 
1717  PG_RETURN_BOOL(result);
1718 }
1719 
1720 Datum
1722 {
1723  text *arg1 = PG_GETARG_TEXT_PP(0);
1724  text *arg2 = PG_GETARG_TEXT_PP(1);
1725  bool result;
1726 
1727  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1728 
1729  PG_FREE_IF_COPY(arg1, 0);
1730  PG_FREE_IF_COPY(arg2, 1);
1731 
1732  PG_RETURN_BOOL(result);
1733 }
1734 
1735 Datum
1737 {
1738  text *arg1 = PG_GETARG_TEXT_PP(0);
1739  text *arg2 = PG_GETARG_TEXT_PP(1);
1740  bool result;
1741 
1742  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1743 
1744  PG_FREE_IF_COPY(arg1, 0);
1745  PG_FREE_IF_COPY(arg2, 1);
1746 
1747  PG_RETURN_BOOL(result);
1748 }
1749 
1750 Datum
1752 {
1753  text *arg1 = PG_GETARG_TEXT_PP(0);
1754  text *arg2 = PG_GETARG_TEXT_PP(1);
1755  bool result;
1756 
1757  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1758 
1759  PG_FREE_IF_COPY(arg1, 0);
1760  PG_FREE_IF_COPY(arg2, 1);
1761 
1762  PG_RETURN_BOOL(result);
1763 }
1764 
1765 Datum
1767 {
1768  text *arg1 = PG_GETARG_TEXT_PP(0);
1769  text *arg2 = PG_GETARG_TEXT_PP(1);
1770  int32 result;
1771 
1772  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1773 
1774  PG_FREE_IF_COPY(arg1, 0);
1775  PG_FREE_IF_COPY(arg2, 1);
1776 
1777  PG_RETURN_INT32(result);
1778 }
1779 
1780 Datum
1782 {
1784  Oid collid = ssup->ssup_collation;
1785  MemoryContext oldcontext;
1786 
1787  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1788 
1789  /* Use generic string SortSupport */
1790  varstr_sortsupport(ssup, collid, false);
1791 
1792  MemoryContextSwitchTo(oldcontext);
1793 
1794  PG_RETURN_VOID();
1795 }
1796 
1797 /*
1798  * Generic sortsupport interface for character type's operator classes.
1799  * Includes locale support, and support for BpChar semantics (i.e. removing
1800  * trailing spaces before comparison).
1801  *
1802  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1803  * same representation. Callers that always use the C collation (e.g.
1804  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1805  * this will not work with any other collation, though.
1806  */
1807 void
1809 {
1810  bool abbreviate = ssup->abbreviate;
1811  bool collate_c = false;
1812  VarStringSortSupport *sss;
1813  pg_locale_t locale = 0;
1814 
1815  /*
1816  * If possible, set ssup->comparator to a function which can be used to
1817  * directly compare two datums. If we can do this, we'll avoid the
1818  * overhead of a trip through the fmgr layer for every comparison, which
1819  * can be substantial.
1820  *
1821  * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1822  * uses strcoll() to perform comparisons and knows about the special
1823  * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1824  * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1825  * both of which use memcmp() rather than strcoll().
1826  */
1827  if (lc_collate_is_c(collid))
1828  {
1829  if (!bpchar)
1830  ssup->comparator = varstrfastcmp_c;
1831  else
1832  ssup->comparator = bpcharfastcmp_c;
1833 
1834  collate_c = true;
1835  }
1836  else
1837  {
1838  /*
1839  * We need a collation-sensitive comparison. To make things faster,
1840  * we'll figure out the collation based on the locale id and cache the
1841  * result.
1842  */
1843  if (collid != DEFAULT_COLLATION_OID)
1844  {
1845  if (!OidIsValid(collid))
1846  {
1847  /*
1848  * This typically means that the parser could not resolve a
1849  * conflict of implicit collations, so report it that way.
1850  */
1851  ereport(ERROR,
1852  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1853  errmsg("could not determine which collation to use for string comparison"),
1854  errhint("Use the COLLATE clause to set the collation explicitly.")));
1855  }
1856  locale = pg_newlocale_from_collation(collid);
1857  }
1858 
1859  /*
1860  * There is a further exception on Windows. When the database
1861  * encoding is UTF-8 and we are not using the C collation, complex
1862  * hacks are required. We don't currently have a comparator that
1863  * handles that case, so we fall back on the slow method of having the
1864  * sort code invoke bttextcmp() (in the case of text) via the fmgr
1865  * trampoline. ICU locales work just the same on Windows, however.
1866  */
1867 #ifdef WIN32
1868  if (GetDatabaseEncoding() == PG_UTF8 &&
1869  !(locale && locale->provider == COLLPROVIDER_ICU))
1870  return;
1871 #endif
1872 
1874  }
1875 
1876  /*
1877  * Unfortunately, it seems that abbreviation for non-C collations is
1878  * broken on many common platforms; testing of multiple versions of glibc
1879  * reveals that, for many locales, strcoll() and strxfrm() do not return
1880  * consistent results, which is fatal to this optimization. While no
1881  * other libc other than Cygwin has so far been shown to have a problem,
1882  * we take the conservative course of action for right now and disable
1883  * this categorically. (Users who are certain this isn't a problem on
1884  * their system can define TRUST_STRXFRM.)
1885  *
1886  * Even apart from the risk of broken locales, it's possible that there
1887  * are platforms where the use of abbreviated keys should be disabled at
1888  * compile time. Having only 4 byte datums could make worst-case
1889  * performance drastically more likely, for example. Moreover, macOS's
1890  * strxfrm() implementation is known to not effectively concentrate a
1891  * significant amount of entropy from the original string in earlier
1892  * transformed blobs. It's possible that other supported platforms are
1893  * similarly encumbered. So, if we ever get past disabling this
1894  * categorically, we may still want or need to disable it for particular
1895  * platforms.
1896  */
1897 #ifndef TRUST_STRXFRM
1898  if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
1899  abbreviate = false;
1900 #endif
1901 
1902  /*
1903  * If we're using abbreviated keys, or if we're using a locale-aware
1904  * comparison, we need to initialize a StringSortSupport object. Both
1905  * cases will make use of the temporary buffers we initialize here for
1906  * scratch space (and to detect requirement for BpChar semantics from
1907  * caller), and the abbreviation case requires additional state.
1908  */
1909  if (abbreviate || !collate_c)
1910  {
1911  sss = palloc(sizeof(VarStringSortSupport));
1912  sss->buf1 = palloc(TEXTBUFLEN);
1913  sss->buflen1 = TEXTBUFLEN;
1914  sss->buf2 = palloc(TEXTBUFLEN);
1915  sss->buflen2 = TEXTBUFLEN;
1916  /* Start with invalid values */
1917  sss->last_len1 = -1;
1918  sss->last_len2 = -1;
1919  /* Initialize */
1920  sss->last_returned = 0;
1921  sss->locale = locale;
1922 
1923  /*
1924  * To avoid somehow confusing a strxfrm() blob and an original string,
1925  * constantly keep track of the variety of data that buf1 and buf2
1926  * currently contain.
1927  *
1928  * Comparisons may be interleaved with conversion calls. Frequently,
1929  * conversions and comparisons are batched into two distinct phases,
1930  * but the correctness of caching cannot hinge upon this. For
1931  * comparison caching, buffer state is only trusted if cache_blob is
1932  * found set to false, whereas strxfrm() caching only trusts the state
1933  * when cache_blob is found set to true.
1934  *
1935  * Arbitrarily initialize cache_blob to true.
1936  */
1937  sss->cache_blob = true;
1938  sss->collate_c = collate_c;
1939  sss->bpchar = bpchar;
1940  ssup->ssup_extra = sss;
1941 
1942  /*
1943  * If possible, plan to use the abbreviated keys optimization. The
1944  * core code may switch back to authoritative comparator should
1945  * abbreviation be aborted.
1946  */
1947  if (abbreviate)
1948  {
1949  sss->prop_card = 0.20;
1950  initHyperLogLog(&sss->abbr_card, 10);
1951  initHyperLogLog(&sss->full_card, 10);
1952  ssup->abbrev_full_comparator = ssup->comparator;
1953  ssup->comparator = varstrcmp_abbrev;
1956  }
1957  }
1958 }
1959 
1960 /*
1961  * sortsupport comparison func (for C locale case)
1962  */
1963 static int
1965 {
1966  VarString *arg1 = DatumGetVarStringPP(x);
1967  VarString *arg2 = DatumGetVarStringPP(y);
1968  char *a1p,
1969  *a2p;
1970  int len1,
1971  len2,
1972  result;
1973 
1974  a1p = VARDATA_ANY(arg1);
1975  a2p = VARDATA_ANY(arg2);
1976 
1977  len1 = VARSIZE_ANY_EXHDR(arg1);
1978  len2 = VARSIZE_ANY_EXHDR(arg2);
1979 
1980  result = memcmp(a1p, a2p, Min(len1, len2));
1981  if ((result == 0) && (len1 != len2))
1982  result = (len1 < len2) ? -1 : 1;
1983 
1984  /* We can't afford to leak memory here. */
1985  if (PointerGetDatum(arg1) != x)
1986  pfree(arg1);
1987  if (PointerGetDatum(arg2) != y)
1988  pfree(arg2);
1989 
1990  return result;
1991 }
1992 
1993 /*
1994  * sortsupport comparison func (for BpChar C locale case)
1995  *
1996  * BpChar outsources its sortsupport to this module. Specialization for the
1997  * varstr_sortsupport BpChar case, modeled on
1998  * internal_bpchar_pattern_compare().
1999  */
2000 static int
2002 {
2003  BpChar *arg1 = DatumGetBpCharPP(x);
2004  BpChar *arg2 = DatumGetBpCharPP(y);
2005  char *a1p,
2006  *a2p;
2007  int len1,
2008  len2,
2009  result;
2010 
2011  a1p = VARDATA_ANY(arg1);
2012  a2p = VARDATA_ANY(arg2);
2013 
2014  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2015  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2016 
2017  result = memcmp(a1p, a2p, Min(len1, len2));
2018  if ((result == 0) && (len1 != len2))
2019  result = (len1 < len2) ? -1 : 1;
2020 
2021  /* We can't afford to leak memory here. */
2022  if (PointerGetDatum(arg1) != x)
2023  pfree(arg1);
2024  if (PointerGetDatum(arg2) != y)
2025  pfree(arg2);
2026 
2027  return result;
2028 }
2029 
2030 /*
2031  * sortsupport comparison func (for locale case)
2032  */
2033 static int
2035 {
2036  VarString *arg1 = DatumGetVarStringPP(x);
2037  VarString *arg2 = DatumGetVarStringPP(y);
2038  bool arg1_match;
2040 
2041  /* working state */
2042  char *a1p,
2043  *a2p;
2044  int len1,
2045  len2,
2046  result;
2047 
2048  a1p = VARDATA_ANY(arg1);
2049  a2p = VARDATA_ANY(arg2);
2050 
2051  len1 = VARSIZE_ANY_EXHDR(arg1);
2052  len2 = VARSIZE_ANY_EXHDR(arg2);
2053 
2054  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2055  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2056  {
2057  /*
2058  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2059  * last_len2. Existing contents of buffers might still be used by
2060  * next call.
2061  *
2062  * It's fine to allow the comparison of BpChar padding bytes here,
2063  * even though that implies that the memcmp() will usually be
2064  * performed for BpChar callers (though multibyte characters could
2065  * still prevent that from occurring). The memcmp() is still very
2066  * cheap, and BpChar's funny semantics have us remove trailing spaces
2067  * (not limited to padding), so we need make no distinction between
2068  * padding space characters and "real" space characters.
2069  */
2070  result = 0;
2071  goto done;
2072  }
2073 
2074  if (sss->bpchar)
2075  {
2076  /* Get true number of bytes, ignoring trailing spaces */
2077  len1 = bpchartruelen(a1p, len1);
2078  len2 = bpchartruelen(a2p, len2);
2079  }
2080 
2081  if (len1 >= sss->buflen1)
2082  {
2083  pfree(sss->buf1);
2084  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2085  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2086  }
2087  if (len2 >= sss->buflen2)
2088  {
2089  pfree(sss->buf2);
2090  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2091  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2092  }
2093 
2094  /*
2095  * We're likely to be asked to compare the same strings repeatedly, and
2096  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2097  * comparisons, even though in general there is no reason to think that
2098  * that will work out (every string datum may be unique). Caching does
2099  * not slow things down measurably when it doesn't work out, and can speed
2100  * things up by rather a lot when it does. In part, this is because the
2101  * memcmp() compares data from cachelines that are needed in L1 cache even
2102  * when the last comparison's result cannot be reused.
2103  */
2104  arg1_match = true;
2105  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2106  {
2107  arg1_match = false;
2108  memcpy(sss->buf1, a1p, len1);
2109  sss->buf1[len1] = '\0';
2110  sss->last_len1 = len1;
2111  }
2112 
2113  /*
2114  * If we're comparing the same two strings as last time, we can return the
2115  * same answer without calling strcoll() again. This is more likely than
2116  * it seems (at least with moderate to low cardinality sets), because
2117  * quicksort compares the same pivot against many values.
2118  */
2119  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2120  {
2121  memcpy(sss->buf2, a2p, len2);
2122  sss->buf2[len2] = '\0';
2123  sss->last_len2 = len2;
2124  }
2125  else if (arg1_match && !sss->cache_blob)
2126  {
2127  /* Use result cached following last actual strcoll() call */
2128  result = sss->last_returned;
2129  goto done;
2130  }
2131 
2132  if (sss->locale)
2133  {
2134  if (sss->locale->provider == COLLPROVIDER_ICU)
2135  {
2136 #ifdef USE_ICU
2137 #ifdef HAVE_UCOL_STRCOLLUTF8
2138  if (GetDatabaseEncoding() == PG_UTF8)
2139  {
2140  UErrorCode status;
2141 
2142  status = U_ZERO_ERROR;
2143  result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2144  a1p, len1,
2145  a2p, len2,
2146  &status);
2147  if (U_FAILURE(status))
2148  ereport(ERROR,
2149  (errmsg("collation failed: %s", u_errorName(status))));
2150  }
2151  else
2152 #endif
2153  {
2154  int32_t ulen1,
2155  ulen2;
2156  UChar *uchar1,
2157  *uchar2;
2158 
2159  ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2160  ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2161 
2162  result = ucol_strcoll(sss->locale->info.icu.ucol,
2163  uchar1, ulen1,
2164  uchar2, ulen2);
2165 
2166  pfree(uchar1);
2167  pfree(uchar2);
2168  }
2169 #else /* not USE_ICU */
2170  /* shouldn't happen */
2171  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2172 #endif /* not USE_ICU */
2173  }
2174  else
2175  {
2176 #ifdef HAVE_LOCALE_T
2177  result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2178 #else
2179  /* shouldn't happen */
2180  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2181 #endif
2182  }
2183  }
2184  else
2185  result = strcoll(sss->buf1, sss->buf2);
2186 
2187  /*
2188  * In some locales strcoll() can claim that nonidentical strings are
2189  * equal. Believing that would be bad news for a number of reasons, so we
2190  * follow Perl's lead and sort "equal" strings according to strcmp().
2191  */
2192  if (result == 0)
2193  result = strcmp(sss->buf1, sss->buf2);
2194 
2195  /* Cache result, perhaps saving an expensive strcoll() call next time */
2196  sss->cache_blob = false;
2197  sss->last_returned = result;
2198 done:
2199  /* We can't afford to leak memory here. */
2200  if (PointerGetDatum(arg1) != x)
2201  pfree(arg1);
2202  if (PointerGetDatum(arg2) != y)
2203  pfree(arg2);
2204 
2205  return result;
2206 }
2207 
2208 /*
2209  * Abbreviated key comparison func
2210  */
2211 static int
2213 {
2214  /*
2215  * When 0 is returned, the core system will call varstrfastcmp_c()
2216  * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2217  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2218  * authoritatively, for the same reason that there is a strcoll()
2219  * tie-breaker call to strcmp() in varstr_cmp().
2220  */
2221  if (x > y)
2222  return 1;
2223  else if (x == y)
2224  return 0;
2225  else
2226  return -1;
2227 }
2228 
2229 /*
2230  * Conversion routine for sortsupport. Converts original to abbreviated key
2231  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2232  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2233  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2234  * locale is used, or in case of bytea, just memcpy() from original instead.
2235  */
2236 static Datum
2238 {
2240  VarString *authoritative = DatumGetVarStringPP(original);
2241  char *authoritative_data = VARDATA_ANY(authoritative);
2242 
2243  /* working state */
2244  Datum res;
2245  char *pres;
2246  int len;
2247  uint32 hash;
2248 
2249  pres = (char *) &res;
2250  /* memset(), so any non-overwritten bytes are NUL */
2251  memset(pres, 0, sizeof(Datum));
2252  len = VARSIZE_ANY_EXHDR(authoritative);
2253 
2254  /* Get number of bytes, ignoring trailing spaces */
2255  if (sss->bpchar)
2256  len = bpchartruelen(authoritative_data, len);
2257 
2258  /*
2259  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2260  * abbreviate keys. The full comparator for the C locale is always
2261  * memcmp(). It would be incorrect to allow bytea callers (callers that
2262  * always force the C collation -- bytea isn't a collatable type, but this
2263  * approach is convenient) to use strxfrm(). This is because bytea
2264  * strings may contain NUL bytes. Besides, this should be faster, too.
2265  *
2266  * More generally, it's okay that bytea callers can have NUL bytes in
2267  * strings because varstrcmp_abbrev() need not make a distinction between
2268  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2269  * authoritative representation. Hopefully a comparison at or past one
2270  * abbreviated key's terminating NUL byte will resolve the comparison
2271  * without consulting the authoritative representation; specifically, some
2272  * later non-NUL byte in the longer string can resolve the comparison
2273  * against a subsequent terminating NUL in the shorter string. There will
2274  * usually be what is effectively a "length-wise" resolution there and
2275  * then.
2276  *
2277  * If that doesn't work out -- if all bytes in the longer string
2278  * positioned at or past the offset of the smaller string's (first)
2279  * terminating NUL are actually representative of NUL bytes in the
2280  * authoritative binary string (perhaps with some *terminating* NUL bytes
2281  * towards the end of the longer string iff it happens to still be small)
2282  * -- then an authoritative tie-breaker will happen, and do the right
2283  * thing: explicitly consider string length.
2284  */
2285  if (sss->collate_c)
2286  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2287  else
2288  {
2289  Size bsize;
2290 #ifdef USE_ICU
2291  int32_t ulen = -1;
2292  UChar *uchar = NULL;
2293 #endif
2294 
2295  /*
2296  * We're not using the C collation, so fall back on strxfrm or ICU
2297  * analogs.
2298  */
2299 
2300  /* By convention, we use buffer 1 to store and NUL-terminate */
2301  if (len >= sss->buflen1)
2302  {
2303  pfree(sss->buf1);
2304  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2305  sss->buf1 = palloc(sss->buflen1);
2306  }
2307 
2308  /* Might be able to reuse strxfrm() blob from last call */
2309  if (sss->last_len1 == len && sss->cache_blob &&
2310  memcmp(sss->buf1, authoritative_data, len) == 0)
2311  {
2312  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2313  /* No change affecting cardinality, so no hashing required */
2314  goto done;
2315  }
2316 
2317  memcpy(sss->buf1, authoritative_data, len);
2318 
2319  /*
2320  * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2321  * necessary for ICU, but doesn't hurt.
2322  */
2323  sss->buf1[len] = '\0';
2324  sss->last_len1 = len;
2325 
2326 #ifdef USE_ICU
2327  /* When using ICU and not UTF8, convert string to UChar. */
2328  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2330  ulen = icu_to_uchar(&uchar, sss->buf1, len);
2331 #endif
2332 
2333  /*
2334  * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2335  * and try again. Both of these functions have the result buffer
2336  * content undefined if the result did not fit, so we need to retry
2337  * until everything fits, even though we only need the first few bytes
2338  * in the end. When using ucol_nextSortKeyPart(), however, we only
2339  * ask for as many bytes as we actually need.
2340  */
2341  for (;;)
2342  {
2343 #ifdef USE_ICU
2344  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2345  {
2346  /*
2347  * When using UTF8, use the iteration interface so we only
2348  * need to produce as many bytes as we actually need.
2349  */
2350  if (GetDatabaseEncoding() == PG_UTF8)
2351  {
2352  UCharIterator iter;
2353  uint32_t state[2];
2354  UErrorCode status;
2355 
2356  uiter_setUTF8(&iter, sss->buf1, len);
2357  state[0] = state[1] = 0; /* won't need that again */
2358  status = U_ZERO_ERROR;
2359  bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2360  &iter,
2361  state,
2362  (uint8_t *) sss->buf2,
2363  Min(sizeof(Datum), sss->buflen2),
2364  &status);
2365  if (U_FAILURE(status))
2366  ereport(ERROR,
2367  (errmsg("sort key generation failed: %s",
2368  u_errorName(status))));
2369  }
2370  else
2371  bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2372  uchar, ulen,
2373  (uint8_t *) sss->buf2, sss->buflen2);
2374  }
2375  else
2376 #endif
2377 #ifdef HAVE_LOCALE_T
2378  if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2379  bsize = strxfrm_l(sss->buf2, sss->buf1,
2380  sss->buflen2, sss->locale->info.lt);
2381  else
2382 #endif
2383  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2384 
2385  sss->last_len2 = bsize;
2386  if (bsize < sss->buflen2)
2387  break;
2388 
2389  /*
2390  * Grow buffer and retry.
2391  */
2392  pfree(sss->buf2);
2393  sss->buflen2 = Max(bsize + 1,
2394  Min(sss->buflen2 * 2, MaxAllocSize));
2395  sss->buf2 = palloc(sss->buflen2);
2396  }
2397 
2398  /*
2399  * Every Datum byte is always compared. This is safe because the
2400  * strxfrm() blob is itself NUL terminated, leaving no danger of
2401  * misinterpreting any NUL bytes not intended to be interpreted as
2402  * logically representing termination.
2403  *
2404  * (Actually, even if there were NUL bytes in the blob it would be
2405  * okay. See remarks on bytea case above.)
2406  */
2407  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2408 
2409 #ifdef USE_ICU
2410  if (uchar)
2411  pfree(uchar);
2412 #endif
2413  }
2414 
2415  /*
2416  * Maintain approximate cardinality of both abbreviated keys and original,
2417  * authoritative keys using HyperLogLog. Used as cheap insurance against
2418  * the worst case, where we do many string transformations for no saving
2419  * in full strcoll()-based comparisons. These statistics are used by
2420  * varstr_abbrev_abort().
2421  *
2422  * First, Hash key proper, or a significant fraction of it. Mix in length
2423  * in order to compensate for cases where differences are past
2424  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2425  */
2426  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2427  Min(len, PG_CACHE_LINE_SIZE)));
2428 
2429  if (len > PG_CACHE_LINE_SIZE)
2430  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2431 
2432  addHyperLogLog(&sss->full_card, hash);
2433 
2434  /* Hash abbreviated key */
2435 #if SIZEOF_DATUM == 8
2436  {
2437  uint32 lohalf,
2438  hihalf;
2439 
2440  lohalf = (uint32) res;
2441  hihalf = (uint32) (res >> 32);
2442  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2443  }
2444 #else /* SIZEOF_DATUM != 8 */
2445  hash = DatumGetUInt32(hash_uint32((uint32) res));
2446 #endif
2447 
2448  addHyperLogLog(&sss->abbr_card, hash);
2449 
2450  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2451  sss->cache_blob = true;
2452 done:
2453 
2454  /*
2455  * Byteswap on little-endian machines.
2456  *
2457  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2458  * comparator) works correctly on all platforms. If we didn't do this,
2459  * the comparator would have to call memcmp() with a pair of pointers to
2460  * the first byte of each abbreviated key, which is slower.
2461  */
2462  res = DatumBigEndianToNative(res);
2463 
2464  /* Don't leak memory here */
2465  if (PointerGetDatum(authoritative) != original)
2466  pfree(authoritative);
2467 
2468  return res;
2469 }
2470 
2471 /*
2472  * Callback for estimating effectiveness of abbreviated key optimization, using
2473  * heuristic rules. Returns value indicating if the abbreviation optimization
2474  * should be aborted, based on its projected effectiveness.
2475  */
2476 static bool
2477 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2478 {
2480  double abbrev_distinct,
2481  key_distinct;
2482 
2483  Assert(ssup->abbreviate);
2484 
2485  /* Have a little patience */
2486  if (memtupcount < 100)
2487  return false;
2488 
2489  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2490  key_distinct = estimateHyperLogLog(&sss->full_card);
2491 
2492  /*
2493  * Clamp cardinality estimates to at least one distinct value. While
2494  * NULLs are generally disregarded, if only NULL values were seen so far,
2495  * that might misrepresent costs if we failed to clamp.
2496  */
2497  if (abbrev_distinct <= 1.0)
2498  abbrev_distinct = 1.0;
2499 
2500  if (key_distinct <= 1.0)
2501  key_distinct = 1.0;
2502 
2503  /*
2504  * In the worst case all abbreviated keys are identical, while at the same
2505  * time there are differences within full key strings not captured in
2506  * abbreviations.
2507  */
2508 #ifdef TRACE_SORT
2509  if (trace_sort)
2510  {
2511  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2512 
2513  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2514  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2515  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2516  sss->prop_card);
2517  }
2518 #endif
2519 
2520  /*
2521  * If the number of distinct abbreviated keys approximately matches the
2522  * number of distinct authoritative original keys, that's reason enough to
2523  * proceed. We can win even with a very low cardinality set if most
2524  * tie-breakers only memcmp(). This is by far the most important
2525  * consideration.
2526  *
2527  * While comparisons that are resolved at the abbreviated key level are
2528  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2529  * those two outcomes are so much cheaper than a full strcoll() once
2530  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2531  * cardinality against the overall size of the set in order to more
2532  * accurately model costs. Assume that an abbreviated comparison, and an
2533  * abbreviated comparison with a cheap memcmp()-based authoritative
2534  * resolution are equivalent.
2535  */
2536  if (abbrev_distinct > key_distinct * sss->prop_card)
2537  {
2538  /*
2539  * When we have exceeded 10,000 tuples, decay required cardinality
2540  * aggressively for next call.
2541  *
2542  * This is useful because the number of comparisons required on
2543  * average increases at a linearithmic rate, and at roughly 10,000
2544  * tuples that factor will start to dominate over the linear costs of
2545  * string transformation (this is a conservative estimate). The decay
2546  * rate is chosen to be a little less aggressive than halving -- which
2547  * (since we're called at points at which memtupcount has doubled)
2548  * would never see the cost model actually abort past the first call
2549  * following a decay. This decay rate is mostly a precaution against
2550  * a sudden, violent swing in how well abbreviated cardinality tracks
2551  * full key cardinality. The decay also serves to prevent a marginal
2552  * case from being aborted too late, when too much has already been
2553  * invested in string transformation.
2554  *
2555  * It's possible for sets of several million distinct strings with
2556  * mere tens of thousands of distinct abbreviated keys to still
2557  * benefit very significantly. This will generally occur provided
2558  * each abbreviated key is a proxy for a roughly uniform number of the
2559  * set's full keys. If it isn't so, we hope to catch that early and
2560  * abort. If it isn't caught early, by the time the problem is
2561  * apparent it's probably not worth aborting.
2562  */
2563  if (memtupcount > 10000)
2564  sss->prop_card *= 0.65;
2565 
2566  return false;
2567  }
2568 
2569  /*
2570  * Abort abbreviation strategy.
2571  *
2572  * The worst case, where all abbreviated keys are identical while all
2573  * original strings differ will typically only see a regression of about
2574  * 10% in execution time for small to medium sized lists of strings.
2575  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2576  * often expect very large improvements, particularly with sets of strings
2577  * of moderately high to high abbreviated cardinality. There is little to
2578  * lose but much to gain, which our strategy reflects.
2579  */
2580 #ifdef TRACE_SORT
2581  if (trace_sort)
2582  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2583  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2584  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2585 #endif
2586 
2587  return true;
2588 }
2589 
2590 Datum
2592 {
2593  text *arg1 = PG_GETARG_TEXT_PP(0);
2594  text *arg2 = PG_GETARG_TEXT_PP(1);
2595  text *result;
2596 
2597  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2598 
2599  PG_RETURN_TEXT_P(result);
2600 }
2601 
2602 Datum
2604 {
2605  text *arg1 = PG_GETARG_TEXT_PP(0);
2606  text *arg2 = PG_GETARG_TEXT_PP(1);
2607  text *result;
2608 
2609  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2610 
2611  PG_RETURN_TEXT_P(result);
2612 }
2613 
2614 
2615 /*
2616  * The following operators support character-by-character comparison
2617  * of text datums, to allow building indexes suitable for LIKE clauses.
2618  * Note that the regular texteq/textne comparison operators, and regular
2619  * support functions 1 and 2 with "C" collation are assumed to be
2620  * compatible with these!
2621  */
2622 
2623 static int
2625 {
2626  int result;
2627  int len1,
2628  len2;
2629 
2630  len1 = VARSIZE_ANY_EXHDR(arg1);
2631  len2 = VARSIZE_ANY_EXHDR(arg2);
2632 
2633  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2634  if (result != 0)
2635  return result;
2636  else if (len1 < len2)
2637  return -1;
2638  else if (len1 > len2)
2639  return 1;
2640  else
2641  return 0;
2642 }
2643 
2644 
2645 Datum
2647 {
2648  text *arg1 = PG_GETARG_TEXT_PP(0);
2649  text *arg2 = PG_GETARG_TEXT_PP(1);
2650  int result;
2651 
2652  result = internal_text_pattern_compare(arg1, arg2);
2653 
2654  PG_FREE_IF_COPY(arg1, 0);
2655  PG_FREE_IF_COPY(arg2, 1);
2656 
2657  PG_RETURN_BOOL(result < 0);
2658 }
2659 
2660 
2661 Datum
2663 {
2664  text *arg1 = PG_GETARG_TEXT_PP(0);
2665  text *arg2 = PG_GETARG_TEXT_PP(1);
2666  int result;
2667 
2668  result = internal_text_pattern_compare(arg1, arg2);
2669 
2670  PG_FREE_IF_COPY(arg1, 0);
2671  PG_FREE_IF_COPY(arg2, 1);
2672 
2673  PG_RETURN_BOOL(result <= 0);
2674 }
2675 
2676 
2677 Datum
2679 {
2680  text *arg1 = PG_GETARG_TEXT_PP(0);
2681  text *arg2 = PG_GETARG_TEXT_PP(1);
2682  int result;
2683 
2684  result = internal_text_pattern_compare(arg1, arg2);
2685 
2686  PG_FREE_IF_COPY(arg1, 0);
2687  PG_FREE_IF_COPY(arg2, 1);
2688 
2689  PG_RETURN_BOOL(result >= 0);
2690 }
2691 
2692 
2693 Datum
2695 {
2696  text *arg1 = PG_GETARG_TEXT_PP(0);
2697  text *arg2 = PG_GETARG_TEXT_PP(1);
2698  int result;
2699 
2700  result = internal_text_pattern_compare(arg1, arg2);
2701 
2702  PG_FREE_IF_COPY(arg1, 0);
2703  PG_FREE_IF_COPY(arg2, 1);
2704 
2705  PG_RETURN_BOOL(result > 0);
2706 }
2707 
2708 
2709 Datum
2711 {
2712  text *arg1 = PG_GETARG_TEXT_PP(0);
2713  text *arg2 = PG_GETARG_TEXT_PP(1);
2714  int result;
2715 
2716  result = internal_text_pattern_compare(arg1, arg2);
2717 
2718  PG_FREE_IF_COPY(arg1, 0);
2719  PG_FREE_IF_COPY(arg2, 1);
2720 
2721  PG_RETURN_INT32(result);
2722 }
2723 
2724 
2725 Datum
2727 {
2729  MemoryContext oldcontext;
2730 
2731  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2732 
2733  /* Use generic string SortSupport, forcing "C" collation */
2734  varstr_sortsupport(ssup, C_COLLATION_OID, false);
2735 
2736  MemoryContextSwitchTo(oldcontext);
2737 
2738  PG_RETURN_VOID();
2739 }
2740 
2741 
2742 /*-------------------------------------------------------------
2743  * byteaoctetlen
2744  *
2745  * get the number of bytes contained in an instance of type 'bytea'
2746  *-------------------------------------------------------------
2747  */
2748 Datum
2750 {
2751  Datum str = PG_GETARG_DATUM(0);
2752 
2753  /* We need not detoast the input at all */
2755 }
2756 
2757 /*
2758  * byteacat -
2759  * takes two bytea* and returns a bytea* that is the concatenation of
2760  * the two.
2761  *
2762  * Cloned from textcat and modified as required.
2763  */
2764 Datum
2766 {
2767  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2768  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2769 
2771 }
2772 
2773 /*
2774  * bytea_catenate
2775  * Guts of byteacat(), broken out so it can be used by other functions
2776  *
2777  * Arguments can be in short-header form, but not compressed or out-of-line
2778  */
2779 static bytea *
2781 {
2782  bytea *result;
2783  int len1,
2784  len2,
2785  len;
2786  char *ptr;
2787 
2788  len1 = VARSIZE_ANY_EXHDR(t1);
2789  len2 = VARSIZE_ANY_EXHDR(t2);
2790 
2791  /* paranoia ... probably should throw error instead? */
2792  if (len1 < 0)
2793  len1 = 0;
2794  if (len2 < 0)
2795  len2 = 0;
2796 
2797  len = len1 + len2 + VARHDRSZ;
2798  result = (bytea *) palloc(len);
2799 
2800  /* Set size of result string... */
2801  SET_VARSIZE(result, len);
2802 
2803  /* Fill data field of result string... */
2804  ptr = VARDATA(result);
2805  if (len1 > 0)
2806  memcpy(ptr, VARDATA_ANY(t1), len1);
2807  if (len2 > 0)
2808  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2809 
2810  return result;
2811 }
2812 
2813 #define PG_STR_GET_BYTEA(str_) \
2814  DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2815 
2816 /*
2817  * bytea_substr()
2818  * Return a substring starting at the specified position.
2819  * Cloned from text_substr and modified as required.
2820  *
2821  * Input:
2822  * - string
2823  * - starting position (is one-based)
2824  * - string length (optional)
2825  *
2826  * If the starting position is zero or less, then return from the start of the string
2827  * adjusting the length to be consistent with the "negative start" per SQL.
2828  * If the length is less than zero, an ERROR is thrown. If no third argument
2829  * (length) is provided, the length to the end of the string is assumed.
2830  */
2831 Datum
2833 {
2835  PG_GETARG_INT32(1),
2836  PG_GETARG_INT32(2),
2837  false));
2838 }
2839 
2840 /*
2841  * bytea_substr_no_len -
2842  * Wrapper to avoid opr_sanity failure due to
2843  * one function accepting a different number of args.
2844  */
2845 Datum
2847 {
2849  PG_GETARG_INT32(1),
2850  -1,
2851  true));
2852 }
2853 
2854 static bytea *
2856  int S,
2857  int L,
2858  bool length_not_specified)
2859 {
2860  int S1; /* adjusted start position */
2861  int L1; /* adjusted substring length */
2862 
2863  S1 = Max(S, 1);
2864 
2865  if (length_not_specified)
2866  {
2867  /*
2868  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2869  * end of the string if we pass it a negative value for length.
2870  */
2871  L1 = -1;
2872  }
2873  else
2874  {
2875  /* end position */
2876  int E = S + L;
2877 
2878  /*
2879  * A negative value for L is the only way for the end position to be
2880  * before the start. SQL99 says to throw an error.
2881  */
2882  if (E < S)
2883  ereport(ERROR,
2884  (errcode(ERRCODE_SUBSTRING_ERROR),
2885  errmsg("negative substring length not allowed")));
2886 
2887  /*
2888  * A zero or negative value for the end position can happen if the
2889  * start was negative or one. SQL99 says to return a zero-length
2890  * string.
2891  */
2892  if (E < 1)
2893  return PG_STR_GET_BYTEA("");
2894 
2895  L1 = E - S1;
2896  }
2897 
2898  /*
2899  * If the start position is past the end of the string, SQL99 says to
2900  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2901  * us. Convert to zero-based starting position
2902  */
2903  return DatumGetByteaPSlice(str, S1 - 1, L1);
2904 }
2905 
2906 /*
2907  * byteaoverlay
2908  * Replace specified substring of first string with second
2909  *
2910  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2911  * This code is a direct implementation of what the standard says.
2912  */
2913 Datum
2915 {
2916  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2917  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2918  int sp = PG_GETARG_INT32(2); /* substring start position */
2919  int sl = PG_GETARG_INT32(3); /* substring length */
2920 
2921  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2922 }
2923 
2924 Datum
2926 {
2927  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2928  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2929  int sp = PG_GETARG_INT32(2); /* substring start position */
2930  int sl;
2931 
2932  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2933  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2934 }
2935 
2936 static bytea *
2937 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2938 {
2939  bytea *result;
2940  bytea *s1;
2941  bytea *s2;
2942  int sp_pl_sl;
2943 
2944  /*
2945  * Check for possible integer-overflow cases. For negative sp, throw a
2946  * "substring length" error because that's what should be expected
2947  * according to the spec's definition of OVERLAY().
2948  */
2949  if (sp <= 0)
2950  ereport(ERROR,
2951  (errcode(ERRCODE_SUBSTRING_ERROR),
2952  errmsg("negative substring length not allowed")));
2953  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
2954  ereport(ERROR,
2955  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2956  errmsg("integer out of range")));
2957 
2958  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2959  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2960  result = bytea_catenate(s1, t2);
2961  result = bytea_catenate(result, s2);
2962 
2963  return result;
2964 }
2965 
2966 /*
2967  * byteapos -
2968  * Return the position of the specified substring.
2969  * Implements the SQL POSITION() function.
2970  * Cloned from textpos and modified as required.
2971  */
2972 Datum
2974 {
2975  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2976  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2977  int pos;
2978  int px,
2979  p;
2980  int len1,
2981  len2;
2982  char *p1,
2983  *p2;
2984 
2985  len1 = VARSIZE_ANY_EXHDR(t1);
2986  len2 = VARSIZE_ANY_EXHDR(t2);
2987 
2988  if (len2 <= 0)
2989  PG_RETURN_INT32(1); /* result for empty pattern */
2990 
2991  p1 = VARDATA_ANY(t1);
2992  p2 = VARDATA_ANY(t2);
2993 
2994  pos = 0;
2995  px = (len1 - len2);
2996  for (p = 0; p <= px; p++)
2997  {
2998  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2999  {
3000  pos = p + 1;
3001  break;
3002  };
3003  p1++;
3004  };
3005 
3006  PG_RETURN_INT32(pos);
3007 }
3008 
3009 /*-------------------------------------------------------------
3010  * byteaGetByte
3011  *
3012  * this routine treats "bytea" as an array of bytes.
3013  * It returns the Nth byte (a number between 0 and 255).
3014  *-------------------------------------------------------------
3015  */
3016 Datum
3018 {
3019  bytea *v = PG_GETARG_BYTEA_PP(0);
3020  int32 n = PG_GETARG_INT32(1);
3021  int len;
3022  int byte;
3023 
3024  len = VARSIZE_ANY_EXHDR(v);
3025 
3026  if (n < 0 || n >= len)
3027  ereport(ERROR,
3028  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3029  errmsg("index %d out of valid range, 0..%d",
3030  n, len - 1)));
3031 
3032  byte = ((unsigned char *) VARDATA_ANY(v))[n];
3033 
3034  PG_RETURN_INT32(byte);
3035 }
3036 
3037 /*-------------------------------------------------------------
3038  * byteaGetBit
3039  *
3040  * This routine treats a "bytea" type like an array of bits.
3041  * It returns the value of the Nth bit (0 or 1).
3042  *
3043  *-------------------------------------------------------------
3044  */
3045 Datum
3047 {
3048  bytea *v = PG_GETARG_BYTEA_PP(0);
3049  int32 n = PG_GETARG_INT32(1);
3050  int byteNo,
3051  bitNo;
3052  int len;
3053  int byte;
3054 
3055  len = VARSIZE_ANY_EXHDR(v);
3056 
3057  if (n < 0 || n >= len * 8)
3058  ereport(ERROR,
3059  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3060  errmsg("index %d out of valid range, 0..%d",
3061  n, len * 8 - 1)));
3062 
3063  byteNo = n / 8;
3064  bitNo = n % 8;
3065 
3066  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3067 
3068  if (byte & (1 << bitNo))
3069  PG_RETURN_INT32(1);
3070  else
3071  PG_RETURN_INT32(0);
3072 }
3073 
3074 /*-------------------------------------------------------------
3075  * byteaSetByte
3076  *
3077  * Given an instance of type 'bytea' creates a new one with
3078  * the Nth byte set to the given value.
3079  *
3080  *-------------------------------------------------------------
3081  */
3082 Datum
3084 {
3085  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3086  int32 n = PG_GETARG_INT32(1);
3087  int32 newByte = PG_GETARG_INT32(2);
3088  int len;
3089 
3090  len = VARSIZE(res) - VARHDRSZ;
3091 
3092  if (n < 0 || n >= len)
3093  ereport(ERROR,
3094  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3095  errmsg("index %d out of valid range, 0..%d",
3096  n, len - 1)));
3097 
3098  /*
3099  * Now set the byte.
3100  */
3101  ((unsigned char *) VARDATA(res))[n] = newByte;
3102 
3103  PG_RETURN_BYTEA_P(res);
3104 }
3105 
3106 /*-------------------------------------------------------------
3107  * byteaSetBit
3108  *
3109  * Given an instance of type 'bytea' creates a new one with
3110  * the Nth bit set to the given value.
3111  *
3112  *-------------------------------------------------------------
3113  */
3114 Datum
3116 {
3117  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3118  int32 n = PG_GETARG_INT32(1);
3119  int32 newBit = PG_GETARG_INT32(2);
3120  int len;
3121  int oldByte,
3122  newByte;
3123  int byteNo,
3124  bitNo;
3125 
3126  len = VARSIZE(res) - VARHDRSZ;
3127 
3128  if (n < 0 || n >= len * 8)
3129  ereport(ERROR,
3130  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3131  errmsg("index %d out of valid range, 0..%d",
3132  n, len * 8 - 1)));
3133 
3134  byteNo = n / 8;
3135  bitNo = n % 8;
3136 
3137  /*
3138  * sanity check!
3139  */
3140  if (newBit != 0 && newBit != 1)
3141  ereport(ERROR,
3142  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3143  errmsg("new bit must be 0 or 1")));
3144 
3145  /*
3146  * Update the byte.
3147  */
3148  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3149 
3150  if (newBit == 0)
3151  newByte = oldByte & (~(1 << bitNo));
3152  else
3153  newByte = oldByte | (1 << bitNo);
3154 
3155  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3156 
3157  PG_RETURN_BYTEA_P(res);
3158 }
3159 
3160 
3161 /* text_name()
3162  * Converts a text type to a Name type.
3163  */
3164 Datum
3166 {
3167  text *s = PG_GETARG_TEXT_PP(0);
3168  Name result;
3169  int len;
3170 
3171  len = VARSIZE_ANY_EXHDR(s);
3172 
3173  /* Truncate oversize input */
3174  if (len >= NAMEDATALEN)
3175  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3176 
3177  /* We use palloc0 here to ensure result is zero-padded */
3178  result = (Name) palloc0(NAMEDATALEN);
3179  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3180 
3181  PG_RETURN_NAME(result);
3182 }
3183 
3184 /* name_text()
3185  * Converts a Name type to a text type.
3186  */
3187 Datum
3189 {
3190  Name s = PG_GETARG_NAME(0);
3191 
3193 }
3194 
3195 
3196 /*
3197  * textToQualifiedNameList - convert a text object to list of names
3198  *
3199  * This implements the input parsing needed by nextval() and other
3200  * functions that take a text parameter representing a qualified name.
3201  * We split the name at dots, downcase if not double-quoted, and
3202  * truncate names if they're too long.
3203  */
3204 List *
3206 {
3207  char *rawname;
3208  List *result = NIL;
3209  List *namelist;
3210  ListCell *l;
3211 
3212  /* Convert to C string (handles possible detoasting). */
3213  /* Note we rely on being able to modify rawname below. */
3214  rawname = text_to_cstring(textval);
3215 
3216  if (!SplitIdentifierString(rawname, '.', &namelist))
3217  ereport(ERROR,
3218  (errcode(ERRCODE_INVALID_NAME),
3219  errmsg("invalid name syntax")));
3220 
3221  if (namelist == NIL)
3222  ereport(ERROR,
3223  (errcode(ERRCODE_INVALID_NAME),
3224  errmsg("invalid name syntax")));
3225 
3226  foreach(l, namelist)
3227  {
3228  char *curname = (char *) lfirst(l);
3229 
3230  result = lappend(result, makeString(pstrdup(curname)));
3231  }
3232 
3233  pfree(rawname);
3234  list_free(namelist);
3235 
3236  return result;
3237 }
3238 
3239 /*
3240  * SplitIdentifierString --- parse a string containing identifiers
3241  *
3242  * This is the guts of textToQualifiedNameList, and is exported for use in
3243  * other situations such as parsing GUC variables. In the GUC case, it's
3244  * important to avoid memory leaks, so the API is designed to minimize the
3245  * amount of stuff that needs to be allocated and freed.
3246  *
3247  * Inputs:
3248  * rawstring: the input string; must be overwritable! On return, it's
3249  * been modified to contain the separated identifiers.
3250  * separator: the separator punctuation expected between identifiers
3251  * (typically '.' or ','). Whitespace may also appear around
3252  * identifiers.
3253  * Outputs:
3254  * namelist: filled with a palloc'd list of pointers to identifiers within
3255  * rawstring. Caller should list_free() this even on error return.
3256  *
3257  * Returns true if okay, false if there is a syntax error in the string.
3258  *
3259  * Note that an empty string is considered okay here, though not in
3260  * textToQualifiedNameList.
3261  */
3262 bool
3263 SplitIdentifierString(char *rawstring, char separator,
3264  List **namelist)
3265 {
3266  char *nextp = rawstring;
3267  bool done = false;
3268 
3269  *namelist = NIL;
3270 
3271  while (scanner_isspace(*nextp))
3272  nextp++; /* skip leading whitespace */
3273 
3274  if (*nextp == '\0')
3275  return true; /* allow empty string */
3276 
3277  /* At the top of the loop, we are at start of a new identifier. */
3278  do
3279  {
3280  char *curname;
3281  char *endp;
3282 
3283  if (*nextp == '"')
3284  {
3285  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3286  curname = nextp + 1;
3287  for (;;)
3288  {
3289  endp = strchr(nextp + 1, '"');
3290  if (endp == NULL)
3291  return false; /* mismatched quotes */
3292  if (endp[1] != '"')
3293  break; /* found end of quoted name */
3294  /* Collapse adjacent quotes into one quote, and look again */
3295  memmove(endp, endp + 1, strlen(endp));
3296  nextp = endp;
3297  }
3298  /* endp now points at the terminating quote */
3299  nextp = endp + 1;
3300  }
3301  else
3302  {
3303  /* Unquoted name --- extends to separator or whitespace */
3304  char *downname;
3305  int len;
3306 
3307  curname = nextp;
3308  while (*nextp && *nextp != separator &&
3309  !scanner_isspace(*nextp))
3310  nextp++;
3311  endp = nextp;
3312  if (curname == nextp)
3313  return false; /* empty unquoted name not allowed */
3314 
3315  /*
3316  * Downcase the identifier, using same code as main lexer does.
3317  *
3318  * XXX because we want to overwrite the input in-place, we cannot
3319  * support a downcasing transformation that increases the string
3320  * length. This is not a problem given the current implementation
3321  * of downcase_truncate_identifier, but we'll probably have to do
3322  * something about this someday.
3323  */
3324  len = endp - curname;
3325  downname = downcase_truncate_identifier(curname, len, false);
3326  Assert(strlen(downname) <= len);
3327  strncpy(curname, downname, len); /* strncpy is required here */
3328  pfree(downname);
3329  }
3330 
3331  while (scanner_isspace(*nextp))
3332  nextp++; /* skip trailing whitespace */
3333 
3334  if (*nextp == separator)
3335  {
3336  nextp++;
3337  while (scanner_isspace(*nextp))
3338  nextp++; /* skip leading whitespace for next */
3339  /* we expect another name, so done remains false */
3340  }
3341  else if (*nextp == '\0')
3342  done = true;
3343  else
3344  return false; /* invalid syntax */
3345 
3346  /* Now safe to overwrite separator with a null */
3347  *endp = '\0';
3348 
3349  /* Truncate name if it's overlength */
3350  truncate_identifier(curname, strlen(curname), false);
3351 
3352  /*
3353  * Finished isolating current name --- add it to list
3354  */
3355  *namelist = lappend(*namelist, curname);
3356 
3357  /* Loop back if we didn't reach end of string */
3358  } while (!done);
3359 
3360  return true;
3361 }
3362 
3363 
3364 /*
3365  * SplitDirectoriesString --- parse a string containing file/directory names
3366  *
3367  * This works fine on file names too; the function name is historical.
3368  *
3369  * This is similar to SplitIdentifierString, except that the parsing
3370  * rules are meant to handle pathnames instead of identifiers: there is
3371  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3372  * and we apply canonicalize_path() to each extracted string. Because of the
3373  * last, the returned strings are separately palloc'd rather than being
3374  * pointers into rawstring --- but we still scribble on rawstring.
3375  *
3376  * Inputs:
3377  * rawstring: the input string; must be modifiable!
3378  * separator: the separator punctuation expected between directories
3379  * (typically ',' or ';'). Whitespace may also appear around
3380  * directories.
3381  * Outputs:
3382  * namelist: filled with a palloc'd list of directory names.
3383  * Caller should list_free_deep() this even on error return.
3384  *
3385  * Returns true if okay, false if there is a syntax error in the string.
3386  *
3387  * Note that an empty string is considered okay here.
3388  */
3389 bool
3390 SplitDirectoriesString(char *rawstring, char separator,
3391  List **namelist)
3392 {
3393  char *nextp = rawstring;
3394  bool done = false;
3395 
3396  *namelist = NIL;
3397 
3398  while (scanner_isspace(*nextp))
3399  nextp++; /* skip leading whitespace */
3400 
3401  if (*nextp == '\0')
3402  return true; /* allow empty string */
3403 
3404  /* At the top of the loop, we are at start of a new directory. */
3405  do
3406  {
3407  char *curname;
3408  char *endp;
3409 
3410  if (*nextp == '"')
3411  {
3412  /* Quoted name --- collapse quote-quote pairs */
3413  curname = nextp + 1;
3414  for (;;)
3415  {
3416  endp = strchr(nextp + 1, '"');
3417  if (endp == NULL)
3418  return false; /* mismatched quotes */
3419  if (endp[1] != '"')
3420  break; /* found end of quoted name */
3421  /* Collapse adjacent quotes into one quote, and look again */
3422  memmove(endp, endp + 1, strlen(endp));
3423  nextp = endp;
3424  }
3425  /* endp now points at the terminating quote */
3426  nextp = endp + 1;
3427  }
3428  else
3429  {
3430  /* Unquoted name --- extends to separator or end of string */
3431  curname = endp = nextp;
3432  while (*nextp && *nextp != separator)
3433  {
3434  /* trailing whitespace should not be included in name */
3435  if (!scanner_isspace(*nextp))
3436  endp = nextp + 1;
3437  nextp++;
3438  }
3439  if (curname == endp)
3440  return false; /* empty unquoted name not allowed */
3441  }
3442 
3443  while (scanner_isspace(*nextp))
3444  nextp++; /* skip trailing whitespace */
3445 
3446  if (*nextp == separator)
3447  {
3448  nextp++;
3449  while (scanner_isspace(*nextp))
3450  nextp++; /* skip leading whitespace for next */
3451  /* we expect another name, so done remains false */
3452  }
3453  else if (*nextp == '\0')
3454  done = true;
3455  else
3456  return false; /* invalid syntax */
3457 
3458  /* Now safe to overwrite separator with a null */
3459  *endp = '\0';
3460 
3461  /* Truncate path if it's overlength */
3462  if (strlen(curname) >= MAXPGPATH)
3463  curname[MAXPGPATH - 1] = '\0';
3464 
3465  /*
3466  * Finished isolating current name --- add it to list
3467  */
3468  curname = pstrdup(curname);
3469  canonicalize_path(curname);
3470  *namelist = lappend(*namelist, curname);
3471 
3472  /* Loop back if we didn't reach end of string */
3473  } while (!done);
3474 
3475  return true;
3476 }
3477 
3478 
3479 /*****************************************************************************
3480  * Comparison Functions used for bytea
3481  *
3482  * Note: btree indexes need these routines not to leak memory; therefore,
3483  * be careful to free working copies of toasted datums. Most places don't
3484  * need to be so careful.
3485  *****************************************************************************/
3486 
3487 Datum
3489 {
3490  Datum arg1 = PG_GETARG_DATUM(0);
3491  Datum arg2 = PG_GETARG_DATUM(1);
3492  bool result;
3493  Size len1,
3494  len2;
3495 
3496  /*
3497  * We can use a fast path for unequal lengths, which might save us from
3498  * having to detoast one or both values.
3499  */
3500  len1 = toast_raw_datum_size(arg1);
3501  len2 = toast_raw_datum_size(arg2);
3502  if (len1 != len2)
3503  result = false;
3504  else
3505  {
3506  bytea *barg1 = DatumGetByteaPP(arg1);
3507  bytea *barg2 = DatumGetByteaPP(arg2);
3508 
3509  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3510  len1 - VARHDRSZ) == 0);
3511 
3512  PG_FREE_IF_COPY(barg1, 0);
3513  PG_FREE_IF_COPY(barg2, 1);
3514  }
3515 
3516  PG_RETURN_BOOL(result);
3517 }
3518 
3519 Datum
3521 {
3522  Datum arg1 = PG_GETARG_DATUM(0);
3523  Datum arg2 = PG_GETARG_DATUM(1);
3524  bool result;
3525  Size len1,
3526  len2;
3527 
3528  /*
3529  * We can use a fast path for unequal lengths, which might save us from
3530  * having to detoast one or both values.
3531  */
3532  len1 = toast_raw_datum_size(arg1);
3533  len2 = toast_raw_datum_size(arg2);
3534  if (len1 != len2)
3535  result = true;
3536  else
3537  {
3538  bytea *barg1 = DatumGetByteaPP(arg1);
3539  bytea *barg2 = DatumGetByteaPP(arg2);
3540 
3541  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3542  len1 - VARHDRSZ) != 0);
3543 
3544  PG_FREE_IF_COPY(barg1, 0);
3545  PG_FREE_IF_COPY(barg2, 1);
3546  }
3547 
3548  PG_RETURN_BOOL(result);
3549 }
3550 
3551 Datum
3553 {
3554  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3555  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3556  int len1,
3557  len2;
3558  int cmp;
3559 
3560  len1 = VARSIZE_ANY_EXHDR(arg1);
3561  len2 = VARSIZE_ANY_EXHDR(arg2);
3562 
3563  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3564 
3565  PG_FREE_IF_COPY(arg1, 0);
3566  PG_FREE_IF_COPY(arg2, 1);
3567 
3568  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3569 }
3570 
3571 Datum
3573 {
3574  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3575  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3576  int len1,
3577  len2;
3578  int cmp;
3579 
3580  len1 = VARSIZE_ANY_EXHDR(arg1);
3581  len2 = VARSIZE_ANY_EXHDR(arg2);
3582 
3583  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3584 
3585  PG_FREE_IF_COPY(arg1, 0);
3586  PG_FREE_IF_COPY(arg2, 1);
3587 
3588  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3589 }
3590 
3591 Datum
3593 {
3594  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3595  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3596  int len1,
3597  len2;
3598  int cmp;
3599 
3600  len1 = VARSIZE_ANY_EXHDR(arg1);
3601  len2 = VARSIZE_ANY_EXHDR(arg2);
3602 
3603  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3604 
3605  PG_FREE_IF_COPY(arg1, 0);
3606  PG_FREE_IF_COPY(arg2, 1);
3607 
3608  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3609 }
3610 
3611 Datum
3613 {
3614  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3615  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3616  int len1,
3617  len2;
3618  int cmp;
3619 
3620  len1 = VARSIZE_ANY_EXHDR(arg1);
3621  len2 = VARSIZE_ANY_EXHDR(arg2);
3622 
3623  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3624 
3625  PG_FREE_IF_COPY(arg1, 0);
3626  PG_FREE_IF_COPY(arg2, 1);
3627 
3628  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3629 }
3630 
3631 Datum
3633 {
3634  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3635  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3636  int len1,
3637  len2;
3638  int cmp;
3639 
3640  len1 = VARSIZE_ANY_EXHDR(arg1);
3641  len2 = VARSIZE_ANY_EXHDR(arg2);
3642 
3643  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3644  if ((cmp == 0) && (len1 != len2))
3645  cmp = (len1 < len2) ? -1 : 1;
3646 
3647  PG_FREE_IF_COPY(arg1, 0);
3648  PG_FREE_IF_COPY(arg2, 1);
3649 
3650  PG_RETURN_INT32(cmp);
3651 }
3652 
3653 Datum
3655 {
3657  MemoryContext oldcontext;
3658 
3659  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3660 
3661  /* Use generic string SortSupport, forcing "C" collation */
3662  varstr_sortsupport(ssup, C_COLLATION_OID, false);
3663 
3664  MemoryContextSwitchTo(oldcontext);
3665 
3666  PG_RETURN_VOID();
3667 }
3668 
3669 /*
3670  * appendStringInfoText
3671  *
3672  * Append a text to str.
3673  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3674  */
3675 static void
3677 {
3679 }
3680 
3681 /*
3682  * replace_text
3683  * replace all occurrences of 'old_sub_str' in 'orig_str'
3684  * with 'new_sub_str' to form 'new_str'
3685  *
3686  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3687  * otherwise returns 'new_str'
3688  */
3689 Datum
3691 {
3692  text *src_text = PG_GETARG_TEXT_PP(0);
3693  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3694  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3695  int src_text_len;
3696  int from_sub_text_len;
3698  text *ret_text;
3699  int start_posn;
3700  int curr_posn;
3701  int chunk_len;
3702  char *start_ptr;
3704 
3705  text_position_setup(src_text, from_sub_text, &state);
3706 
3707  /*
3708  * Note: we check the converted string length, not the original, because
3709  * they could be different if the input contained invalid encoding.
3710  */
3711  src_text_len = state.len1;
3712  from_sub_text_len = state.len2;
3713 
3714  /* Return unmodified source string if empty source or pattern */
3715  if (src_text_len < 1 || from_sub_text_len < 1)
3716  {
3717  text_position_cleanup(&state);
3718  PG_RETURN_TEXT_P(src_text);
3719  }
3720 
3721  start_posn = 1;
3722  curr_posn = text_position_next(1, &state);
3723 
3724  /* When the from_sub_text is not found, there is nothing to do. */
3725  if (curr_posn == 0)
3726  {
3727  text_position_cleanup(&state);
3728  PG_RETURN_TEXT_P(src_text);
3729  }
3730 
3731  /* start_ptr points to the start_posn'th character of src_text */
3732  start_ptr = VARDATA_ANY(src_text);
3733 
3734  initStringInfo(&str);
3735 
3736  do
3737  {
3739 
3740  /* copy the data skipped over by last text_position_next() */
3741  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3742  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3743 
3744  appendStringInfoText(&str, to_sub_text);
3745 
3746  start_posn = curr_posn;
3747  start_ptr += chunk_len;
3748  start_posn += from_sub_text_len;
3749  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3750 
3751  curr_posn = text_position_next(start_posn, &state);
3752  }
3753  while (curr_posn > 0);
3754 
3755  /* copy trailing data */
3756  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3757  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3758 
3759  text_position_cleanup(&state);
3760 
3761  ret_text = cstring_to_text_with_len(str.data, str.len);
3762  pfree(str.data);
3763 
3764  PG_RETURN_TEXT_P(ret_text);
3765 }
3766 
3767 /*
3768  * check_replace_text_has_escape_char
3769  *
3770  * check whether replace_text contains escape char.
3771  */
3772 static bool
3774 {
3775  const char *p = VARDATA_ANY(replace_text);
3776  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3777 
3779  {
3780  for (; p < p_end; p++)
3781  {
3782  if (*p == '\\')
3783  return true;
3784  }
3785  }
3786  else
3787  {
3788  for (; p < p_end; p += pg_mblen(p))
3789  {
3790  if (*p == '\\')
3791  return true;
3792  }
3793  }
3794 
3795  return false;
3796 }
3797 
3798 /*
3799  * appendStringInfoRegexpSubstr
3800  *
3801  * Append replace_text to str, substituting regexp back references for
3802  * \n escapes. start_ptr is the start of the match in the source string,
3803  * at logical character position data_pos.
3804  */
3805 static void
3807  regmatch_t *pmatch,
3808  char *start_ptr, int data_pos)
3809 {
3810  const char *p = VARDATA_ANY(replace_text);
3811  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3812  int eml = pg_database_encoding_max_length();
3813 
3814  for (;;)
3815  {
3816  const char *chunk_start = p;
3817  int so;
3818  int eo;
3819 
3820  /* Find next escape char. */
3821  if (eml == 1)
3822  {
3823  for (; p < p_end && *p != '\\'; p++)
3824  /* nothing */ ;
3825  }
3826  else
3827  {
3828  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3829  /* nothing */ ;
3830  }
3831 
3832  /* Copy the text we just scanned over, if any. */
3833  if (p > chunk_start)
3834  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3835 
3836  /* Done if at end of string, else advance over escape char. */
3837  if (p >= p_end)
3838  break;
3839  p++;
3840 
3841  if (p >= p_end)
3842  {
3843  /* Escape at very end of input. Treat same as unexpected char */
3844  appendStringInfoChar(str, '\\');
3845  break;
3846  }
3847 
3848  if (*p >= '1' && *p <= '9')
3849  {
3850  /* Use the back reference of regexp. */
3851  int idx = *p - '0';
3852 
3853  so = pmatch[idx].rm_so;
3854  eo = pmatch[idx].rm_eo;
3855  p++;
3856  }
3857  else if (*p == '&')
3858  {
3859  /* Use the entire matched string. */
3860  so = pmatch[0].rm_so;
3861  eo = pmatch[0].rm_eo;
3862  p++;
3863  }
3864  else if (*p == '\\')
3865  {
3866  /* \\ means transfer one \ to output. */
3867  appendStringInfoChar(str, '\\');
3868  p++;
3869  continue;
3870  }
3871  else
3872  {
3873  /*
3874  * If escape char is not followed by any expected char, just treat
3875  * it as ordinary data to copy. (XXX would it be better to throw
3876  * an error?)
3877  */
3878  appendStringInfoChar(str, '\\');
3879  continue;
3880  }
3881 
3882  if (so != -1 && eo != -1)
3883  {
3884  /*
3885  * Copy the text that is back reference of regexp. Note so and eo
3886  * are counted in characters not bytes.
3887  */
3888  char *chunk_start;
3889  int chunk_len;
3890 
3891  Assert(so >= data_pos);
3892  chunk_start = start_ptr;
3893  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3894  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3895  appendBinaryStringInfo(str, chunk_start, chunk_len);
3896  }
3897  }
3898 }
3899 
3900 #define REGEXP_REPLACE_BACKREF_CNT 10
3901 
3902 /*
3903  * replace_text_regexp
3904  *
3905  * replace text that matches to regexp in src_text to replace_text.
3906  *
3907  * Note: to avoid having to include regex.h in builtins.h, we declare
3908  * the regexp argument as void *, but really it's regex_t *.
3909  */
3910 text *
3911 replace_text_regexp(text *src_text, void *regexp,
3912  text *replace_text, bool glob)
3913 {
3914  text *ret_text;
3915  regex_t *re = (regex_t *) regexp;
3916  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3919  pg_wchar *data;
3920  size_t data_len;
3921  int search_start;
3922  int data_pos;
3923  char *start_ptr;
3924  bool have_escape;
3925 
3926  initStringInfo(&buf);
3927 
3928  /* Convert data string to wide characters. */
3929  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3930  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3931 
3932  /* Check whether replace_text has escape char. */
3933  have_escape = check_replace_text_has_escape_char(replace_text);
3934 
3935  /* start_ptr points to the data_pos'th character of src_text */
3936  start_ptr = (char *) VARDATA_ANY(src_text);
3937  data_pos = 0;
3938 
3939  search_start = 0;
3940  while (search_start <= data_len)
3941  {
3942  int regexec_result;
3943 
3945 
3946  regexec_result = pg_regexec(re,
3947  data,
3948  data_len,
3949  search_start,
3950  NULL, /* no details */
3952  pmatch,
3953  0);
3954 
3955  if (regexec_result == REG_NOMATCH)
3956  break;
3957 
3958  if (regexec_result != REG_OKAY)
3959  {
3960  char errMsg[100];
3961 
3963  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3964  ereport(ERROR,
3965  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3966  errmsg("regular expression failed: %s", errMsg)));
3967  }
3968 
3969  /*
3970  * Copy the text to the left of the match position. Note we are given
3971  * character not byte indexes.
3972  */
3973  if (pmatch[0].rm_so - data_pos > 0)
3974  {
3975  int chunk_len;
3976 
3977  chunk_len = charlen_to_bytelen(start_ptr,
3978  pmatch[0].rm_so - data_pos);
3979  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3980 
3981  /*
3982  * Advance start_ptr over that text, to avoid multiple rescans of
3983  * it if the replace_text contains multiple back-references.
3984  */
3985  start_ptr += chunk_len;
3986  data_pos = pmatch[0].rm_so;
3987  }
3988 
3989  /*
3990  * Copy the replace_text. Process back references when the
3991  * replace_text has escape characters.
3992  */
3993  if (have_escape)
3994  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3995  start_ptr, data_pos);
3996  else
3997  appendStringInfoText(&buf, replace_text);
3998 
3999  /* Advance start_ptr and data_pos over the matched text. */
4000  start_ptr += charlen_to_bytelen(start_ptr,
4001  pmatch[0].rm_eo - data_pos);
4002  data_pos = pmatch[0].rm_eo;
4003 
4004  /*
4005  * When global option is off, replace the first instance only.
4006  */
4007  if (!glob)
4008  break;
4009 
4010  /*
4011  * Advance search position. Normally we start the next search at the
4012  * end of the previous match; but if the match was of zero length, we
4013  * have to advance by one character, or we'd just find the same match
4014  * again.
4015  */
4016  search_start = data_pos;
4017  if (pmatch[0].rm_so == pmatch[0].rm_eo)
4018  search_start++;
4019  }
4020 
4021  /*
4022  * Copy the text to the right of the last match.
4023  */
4024  if (data_pos < data_len)
4025  {
4026  int chunk_len;
4027 
4028  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4029  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4030  }
4031 
4032  ret_text = cstring_to_text_with_len(buf.data, buf.len);
4033  pfree(buf.data);
4034  pfree(data);
4035 
4036  return ret_text;
4037 }
4038 
4039 /*
4040  * split_text
4041  * parse input string
4042  * return ord item (1 based)
4043  * based on provided field separator
4044  */
4045 Datum
4047 {
4048  text *inputstring = PG_GETARG_TEXT_PP(0);
4049  text *fldsep = PG_GETARG_TEXT_PP(1);
4050  int fldnum = PG_GETARG_INT32(2);
4051  int inputstring_len;
4052  int fldsep_len;
4054  int start_posn;
4055  int end_posn;
4056  text *result_text;
4057 
4058  /* field number is 1 based */
4059  if (fldnum < 1)
4060  ereport(ERROR,
4061  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4062  errmsg("field position must be greater than zero")));
4063 
4064  text_position_setup(inputstring, fldsep, &state);
4065 
4066  /*
4067  * Note: we check the converted string length, not the original, because
4068  * they could be different if the input contained invalid encoding.
4069  */
4070  inputstring_len = state.len1;
4071  fldsep_len = state.len2;
4072 
4073  /* return empty string for empty input string */
4074  if (inputstring_len < 1)
4075  {
4076  text_position_cleanup(&state);
4078  }
4079 
4080  /* empty field separator */
4081  if (fldsep_len < 1)
4082  {
4083  text_position_cleanup(&state);
4084  /* if first field, return input string, else empty string */
4085  if (fldnum == 1)
4086  PG_RETURN_TEXT_P(inputstring);
4087  else
4089  }
4090 
4091  /* identify bounds of first field */
4092  start_posn = 1;
4093  end_posn = text_position_next(1, &state);
4094 
4095  /* special case if fldsep not found at all */
4096  if (end_posn == 0)
4097  {
4098  text_position_cleanup(&state);
4099  /* if field 1 requested, return input string, else empty string */
4100  if (fldnum == 1)
4101  PG_RETURN_TEXT_P(inputstring);
4102  else
4104  }
4105 
4106  while (end_posn > 0 && --fldnum > 0)
4107  {
4108  /* identify bounds of next field */
4109  start_posn = end_posn + fldsep_len;
4110  end_posn = text_position_next(start_posn, &state);
4111  }
4112 
4113  text_position_cleanup(&state);
4114 
4115  if (fldnum > 0)
4116  {
4117  /* N'th field separator not found */
4118  /* if last field requested, return it, else empty string */
4119  if (fldnum == 1)
4120  result_text = text_substring(PointerGetDatum(inputstring),
4121  start_posn,
4122  -1,
4123  true);
4124  else
4125  result_text = cstring_to_text("");
4126  }
4127  else
4128  {
4129  /* non-last field requested */
4130  result_text = text_substring(PointerGetDatum(inputstring),
4131  start_posn,
4132  end_posn - start_posn,
4133  false);
4134  }
4135 
4136  PG_RETURN_TEXT_P(result_text);
4137 }
4138 
4139 /*
4140  * Convenience function to return true when two text params are equal.
4141  */
4142 static bool
4143 text_isequal(text *txt1, text *txt2)
4144 {
4146  PointerGetDatum(txt1),
4147  PointerGetDatum(txt2)));
4148 }
4149 
4150 /*
4151  * text_to_array
4152  * parse input string and return text array of elements,
4153  * based on provided field separator
4154  */
4155 Datum
4157 {
4158  return text_to_array_internal(fcinfo);
4159 }
4160 
4161 /*
4162  * text_to_array_null
4163  * parse input string and return text array of elements,
4164  * based on provided field separator and null string
4165  *
4166  * This is a separate entry point only to prevent the regression tests from
4167  * complaining about different argument sets for the same internal function.
4168  */
4169 Datum
4171 {
4172  return text_to_array_internal(fcinfo);
4173 }
4174 
4175 /*
4176  * common code for text_to_array and text_to_array_null functions
4177  *
4178  * These are not strict so we have to test for null inputs explicitly.
4179  */
4180 static Datum
4182 {
4183  text *inputstring;
4184  text *fldsep;
4185  text *null_string;
4186  int inputstring_len;
4187  int fldsep_len;
4188  char *start_ptr;
4189  text *result_text;
4190  bool is_null;
4191  ArrayBuildState *astate = NULL;
4192 
4193  /* when input string is NULL, then result is NULL too */
4194  if (PG_ARGISNULL(0))
4195  PG_RETURN_NULL();
4196 
4197  inputstring = PG_GETARG_TEXT_PP(0);
4198 
4199  /* fldsep can be NULL */
4200  if (!PG_ARGISNULL(1))
4201  fldsep = PG_GETARG_TEXT_PP(1);
4202  else
4203  fldsep = NULL;
4204 
4205  /* null_string can be NULL or omitted */
4206  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4207  null_string = PG_GETARG_TEXT_PP(2);
4208  else
4209  null_string = NULL;
4210 
4211  if (fldsep != NULL)
4212  {
4213  /*
4214  * Normal case with non-null fldsep. Use the text_position machinery
4215  * to search for occurrences of fldsep.
4216  */
4218  int fldnum;
4219  int start_posn;
4220  int end_posn;
4221  int chunk_len;
4222 
4223  text_position_setup(inputstring, fldsep, &state);
4224 
4225  /*
4226  * Note: we check the converted string length, not the original,
4227  * because they could be different if the input contained invalid
4228  * encoding.
4229  */
4230  inputstring_len = state.len1;
4231  fldsep_len = state.len2;
4232 
4233  /* return empty array for empty input string */
4234  if (inputstring_len < 1)
4235  {
4236  text_position_cleanup(&state);
4238  }
4239 
4240  /*
4241  * empty field separator: return the input string as a one-element
4242  * array
4243  */
4244  if (fldsep_len < 1)
4245  {
4246  Datum elems[1];
4247  bool nulls[1];
4248  int dims[1];
4249  int lbs[1];
4250 
4251  text_position_cleanup(&state);
4252  /* single element can be a NULL too */
4253  is_null = null_string ? text_isequal(inputstring, null_string) : false;
4254 
4255  elems[0] = PointerGetDatum(inputstring);
4256  nulls[0] = is_null;
4257  dims[0] = 1;
4258  lbs[0] = 1;
4259  /* XXX: this hardcodes assumptions about the text type */
4261  1, dims, lbs,
4262  TEXTOID, -1, false, 'i'));
4263  }
4264 
4265  start_posn = 1;
4266  /* start_ptr points to the start_posn'th character of inputstring */
4267  start_ptr = VARDATA_ANY(inputstring);
4268 
4269  for (fldnum = 1;; fldnum++) /* field number is 1 based */
4270  {
4272 
4273  end_posn = text_position_next(start_posn, &state);
4274 
4275  if (end_posn == 0)
4276  {
4277  /* fetch last field */
4278  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4279  }
4280  else
4281  {
4282  /* fetch non-last field */
4283  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4284  }
4285 
4286  /* must build a temp text datum to pass to accumArrayResult */
4287  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4288  is_null = null_string ? text_isequal(result_text, null_string) : false;
4289 
4290  /* stash away this field */
4291  astate = accumArrayResult(astate,
4292  PointerGetDatum(result_text),
4293  is_null,
4294  TEXTOID,
4296 
4297  pfree(result_text);
4298 
4299  if (end_posn == 0)
4300  break;
4301 
4302  start_posn = end_posn;
4303  start_ptr += chunk_len;
4304  start_posn += fldsep_len;
4305  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4306  }
4307 
4308  text_position_cleanup(&state);
4309  }
4310  else
4311  {
4312  /*
4313  * When fldsep is NULL, each character in the inputstring becomes an
4314  * element in the result array. The separator is effectively the
4315  * space between characters.
4316  */
4317  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4318 
4319  /* return empty array for empty input string */
4320  if (inputstring_len < 1)
4322 
4323  start_ptr = VARDATA_ANY(inputstring);
4324 
4325  while (inputstring_len > 0)
4326  {
4327  int chunk_len = pg_mblen(start_ptr);
4328 
4330 
4331  /* must build a temp text datum to pass to accumArrayResult */
4332  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4333  is_null = null_string ? text_isequal(result_text, null_string) : false;
4334 
4335  /* stash away this field */
4336  astate = accumArrayResult(astate,
4337  PointerGetDatum(result_text),
4338  is_null,
4339  TEXTOID,
4341 
4342  pfree(result_text);
4343 
4344  start_ptr += chunk_len;
4345  inputstring_len -= chunk_len;
4346  }
4347  }
4348 
4351 }
4352 
4353 /*
4354  * array_to_text
4355  * concatenate Cstring representation of input array elements
4356  * using provided field separator
4357  */
4358 Datum
4360 {
4362  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4363 
4364  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4365 }
4366 
4367 /*
4368  * array_to_text_null
4369  * concatenate Cstring representation of input array elements
4370  * using provided field separator and null string
4371  *
4372  * This version is not strict so we have to test for null inputs explicitly.
4373  */
4374 Datum
4376 {
4377  ArrayType *v;
4378  char *fldsep;
4379  char *null_string;
4380 
4381  /* returns NULL when first or second parameter is NULL */
4382  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4383  PG_RETURN_NULL();
4384 
4385  v = PG_GETARG_ARRAYTYPE_P(0);
4386  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4387 
4388  /* NULL null string is passed through as a null pointer */
4389  if (!PG_ARGISNULL(2))
4390  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4391  else
4392  null_string = NULL;
4393 
4394  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4395 }
4396 
4397 /*
4398  * common code for array_to_text and array_to_text_null functions
4399  */
4400 static text *
4402  const char *fldsep, const char *null_string)
4403 {
4404  text *result;
4405  int nitems,
4406  *dims,
4407  ndims;
4408  Oid element_type;
4409  int typlen;
4410  bool typbyval;
4411  char typalign;
4413  bool printed = false;
4414  char *p;
4415  bits8 *bitmap;
4416  int bitmask;
4417  int i;
4418  ArrayMetaState *my_extra;
4419 
4420  ndims = ARR_NDIM(v);
4421  dims = ARR_DIMS(v);
4422  nitems = ArrayGetNItems(ndims, dims);
4423 
4424  /* if there are no elements, return an empty string */
4425  if (nitems == 0)
4426  return cstring_to_text_with_len("", 0);
4427 
4428  element_type = ARR_ELEMTYPE(v);
4429  initStringInfo(&buf);
4430 
4431  /*
4432  * We arrange to look up info about element type, including its output
4433  * conversion proc, only once per series of calls, assuming the element
4434  * type doesn't change underneath us.
4435  */
4436  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4437  if (my_extra == NULL)
4438  {
4439  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4440  sizeof(ArrayMetaState));
4441  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4442  my_extra->element_type = ~element_type;
4443  }
4444 
4445  if (my_extra->element_type != element_type)
4446  {
4447  /*
4448  * Get info about element type, including its output conversion proc
4449  */
4450  get_type_io_data(element_type, IOFunc_output,
4451  &my_extra->typlen, &my_extra->typbyval,
4452  &my_extra->typalign, &my_extra->typdelim,
4453  &my_extra->typioparam, &my_extra->typiofunc);
4454  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4455  fcinfo->flinfo->fn_mcxt);
4456  my_extra->element_type = element_type;
4457  }
4458  typlen = my_extra->typlen;
4459  typbyval = my_extra->typbyval;
4460  typalign = my_extra->typalign;
4461 
4462  p = ARR_DATA_PTR(v);
4463  bitmap = ARR_NULLBITMAP(v);
4464  bitmask = 1;
4465 
4466  for (i = 0; i < nitems; i++)
4467  {
4468  Datum itemvalue;
4469  char *value;
4470 
4471  /* Get source element, checking for NULL */
4472  if (bitmap && (*bitmap & bitmask) == 0)
4473  {
4474  /* if null_string is NULL, we just ignore null elements */
4475  if (null_string != NULL)
4476  {
4477  if (printed)
4478  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4479  else
4480  appendStringInfoString(&buf, null_string);
4481  printed = true;
4482  }
4483  }
4484  else
4485  {
4486  itemvalue = fetch_att(p, typbyval, typlen);
4487 
4488  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4489 
4490  if (printed)
4491  appendStringInfo(&buf, "%s%s", fldsep, value);
4492  else
4493  appendStringInfoString(&buf, value);
4494  printed = true;
4495 
4496  p = att_addlength_pointer(p, typlen, p);
4497  p = (char *) att_align_nominal(p, typalign);
4498  }
4499 
4500  /* advance bitmap pointer if any */
4501  if (bitmap)
4502  {
4503  bitmask <<= 1;
4504  if (bitmask == 0x100)
4505  {
4506  bitmap++;
4507  bitmask = 1;
4508  }
4509  }
4510  }
4511 
4512  result = cstring_to_text_with_len(buf.data, buf.len);
4513  pfree(buf.data);
4514 
4515  return result;
4516 }
4517 
4518 #define HEXBASE 16
4519 /*
4520  * Convert an int32 to a string containing a base 16 (hex) representation of
4521  * the number.
4522  */
4523 Datum
4525 {
4527  char *ptr;
4528  const char *digits = "0123456789abcdef";
4529  char buf[32]; /* bigger than needed, but reasonable */
4530 
4531  ptr = buf + sizeof(buf) - 1;
4532  *ptr = '\0';
4533 
4534  do
4535  {
4536  *--ptr = digits[value % HEXBASE];
4537  value /= HEXBASE;
4538  } while (ptr > buf && value);
4539 
4541 }
4542 
4543 /*
4544  * Convert an int64 to a string containing a base 16 (hex) representation of
4545  * the number.
4546  */
4547 Datum
4549 {
4550  uint64 value = (uint64) PG_GETARG_INT64(0);
4551  char *ptr;
4552  const char *digits = "0123456789abcdef";
4553  char buf[32]; /* bigger than needed, but reasonable */
4554 
4555  ptr = buf + sizeof(buf) - 1;
4556  *ptr = '\0';
4557 
4558  do
4559  {
4560  *--ptr = digits[value % HEXBASE];
4561  value /= HEXBASE;
4562  } while (ptr > buf && value);
4563 
4565 }
4566 
4567 /*
4568  * Create an md5 hash of a text string and return it as hex
4569  *
4570  * md5 produces a 16 byte (128 bit) hash; double it for hex
4571  */
4572 #define MD5_HASH_LEN 32
4573 
4574 Datum
4576 {
4577  text *in_text = PG_GETARG_TEXT_PP(0);
4578  size_t len;
4579  char hexsum[MD5_HASH_LEN + 1];
4580 
4581  /* Calculate the length of the buffer using varlena metadata */
4582  len = VARSIZE_ANY_EXHDR(in_text);
4583 
4584  /* get the hash result */
4585  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4586  ereport(ERROR,
4587  (errcode(ERRCODE_OUT_OF_MEMORY),
4588  errmsg("out of memory")));
4589 
4590  /* convert to text and return it */
4592 }
4593 
4594 /*
4595  * Create an md5 hash of a bytea field and return it as a hex string:
4596  * 16-byte md5 digest is represented in 32 hex characters.
4597  */
4598 Datum
4600 {
4601  bytea *in = PG_GETARG_BYTEA_PP(0);
4602  size_t len;
4603  char hexsum[MD5_HASH_LEN + 1];
4604 
4605  len = VARSIZE_ANY_EXHDR(in);
4606  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4607  ereport(ERROR,
4608  (errcode(ERRCODE_OUT_OF_MEMORY),
4609  errmsg("out of memory")));
4610 
4612 }
4613 
4614 /*
4615  * Return the size of a datum, possibly compressed
4616  *
4617  * Works on any data type
4618  */
4619 Datum
4621 {
4623  int32 result;
4624  int typlen;
4625 
4626  /* On first call, get the input type's typlen, and save at *fn_extra */
4627  if (fcinfo->flinfo->fn_extra == NULL)
4628  {
4629  /* Lookup the datatype of the supplied argument */
4630  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4631 
4632  typlen = get_typlen(argtypeid);
4633  if (typlen == 0) /* should not happen */
4634  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4635 
4636  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4637  sizeof(int));
4638  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4639  }
4640  else
4641  typlen = *((int *) fcinfo->flinfo->fn_extra);
4642 
4643  if (typlen == -1)
4644  {
4645  /* varlena type, possibly toasted */
4646  result = toast_datum_size(value);
4647  }
4648  else if (typlen == -2)
4649  {
4650  /* cstring */
4651  result = strlen(DatumGetCString(value)) + 1;
4652  }
4653  else
4654  {
4655  /* ordinary fixed-width type */
4656  result = typlen;
4657  }
4658 
4659  PG_RETURN_INT32(result);
4660 }
4661 
4662 /*
4663  * string_agg - Concatenates values and returns string.
4664  *
4665  * Syntax: string_agg(value text, delimiter text) RETURNS text
4666  *
4667  * Note: Any NULL values are ignored. The first-call delimiter isn't
4668  * actually used at all, and on subsequent calls the delimiter precedes
4669  * the associated value.
4670  */
4671 
4672 /* subroutine to initialize state */
4673 static StringInfo
4675 {
4676  StringInfo state;
4677  MemoryContext aggcontext;
4678  MemoryContext oldcontext;
4679 
4680  if (!AggCheckCallContext(fcinfo, &aggcontext))
4681  {
4682  /* cannot be called directly because of internal-type argument */
4683  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4684  }
4685 
4686  /*
4687  * Create state in aggregate context. It'll stay there across subsequent
4688  * calls.
4689  */
4690  oldcontext = MemoryContextSwitchTo(aggcontext);
4691  state = makeStringInfo();
4692  MemoryContextSwitchTo(oldcontext);
4693 
4694  return state;
4695 }
4696 
4697 Datum
4699 {
4700  StringInfo state;
4701 
4702  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4703 
4704  /* Append the value unless null. */
4705  if (!PG_ARGISNULL(1))
4706  {
4707  /* On the first time through, we ignore the delimiter. */
4708  if (state == NULL)
4709  state = makeStringAggState(fcinfo);
4710  else if (!PG_ARGISNULL(2))
4711  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4712 
4713  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4714  }
4715 
4716  /*
4717  * The transition type for string_agg() is declared to be "internal",
4718  * which is a pass-by-value type the same size as a pointer.
4719  */
4720  PG_RETURN_POINTER(state);
4721 }
4722 
4723 Datum
4725 {
4726  StringInfo state;
4727 
4728  /* cannot be called directly because of internal-type argument */
4729  Assert(AggCheckCallContext(fcinfo, NULL));
4730 
4731  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4732 
4733  if (state != NULL)
4735  else
4736  PG_RETURN_NULL();
4737 }
4738 
4739 /*
4740  * Prepare cache with fmgr info for the output functions of the datatypes of
4741  * the arguments of a concat-like function, beginning with argument "argidx".
4742  * (Arguments before that will have corresponding slots in the resulting
4743  * FmgrInfo array, but we don't fill those slots.)
4744  */
4745 static FmgrInfo *
4747 {
4748  FmgrInfo *foutcache;
4749  int i;
4750 
4751  /* We keep the info in fn_mcxt so it survives across calls */
4752  foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4753  PG_NARGS() * sizeof(FmgrInfo));
4754 
4755  for (i = argidx; i < PG_NARGS(); i++)
4756  {
4757  Oid valtype;
4758  Oid typOutput;
4759  bool typIsVarlena;
4760 
4761  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4762  if (!OidIsValid(valtype))
4763  elog(ERROR, "could not determine data type of concat() input");
4764 
4765  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4766  fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
4767  }
4768 
4769  fcinfo->flinfo->fn_extra = foutcache;
4770 
4771  return foutcache;
4772 }
4773 
4774 /*
4775  * Implementation of both concat() and concat_ws().
4776  *
4777  * sepstr is the separator string to place between values.
4778  * argidx identifies the first argument to concatenate (counting from zero);
4779  * note that this must be constant across any one series of calls.
4780  *
4781  * Returns NULL if result should be NULL, else text value.
4782  */
4783 static text *
4784 concat_internal(const char *sepstr, int argidx,
4785  FunctionCallInfo fcinfo)
4786 {
4787  text *result;
4789  FmgrInfo *foutcache;
4790  bool first_arg = true;
4791  int i;
4792 
4793  /*
4794  * concat(VARIADIC some-array) is essentially equivalent to
4795  * array_to_text(), ie concat the array elements with the given separator.
4796  * So we just pass the case off to that code.
4797  */
4798  if (get_fn_expr_variadic(fcinfo->flinfo))
4799  {
4800  ArrayType *arr;
4801 
4802  /* Should have just the one argument */
4803  Assert(argidx == PG_NARGS() - 1);
4804 
4805  /* concat(VARIADIC NULL) is defined as NULL */
4806  if (PG_ARGISNULL(argidx))
4807  return NULL;
4808 
4809  /*
4810  * Non-null argument had better be an array. We assume that any call
4811  * context that could let get_fn_expr_variadic return true will have
4812  * checked that a VARIADIC-labeled parameter actually is an array. So
4813  * it should be okay to just Assert that it's an array rather than
4814  * doing a full-fledged error check.
4815  */
4817 
4818  /* OK, safe to fetch the array value */
4819  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4820 
4821  /*
4822  * And serialize the array. We tell array_to_text to ignore null
4823  * elements, which matches the behavior of the loop below.
4824  */
4825  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4826  }
4827 
4828  /* Normal case without explicit VARIADIC marker */
4829  initStringInfo(&str);
4830 
4831  /* Get output function info, building it if first time through */
4832  foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
4833  if (foutcache == NULL)
4834  foutcache = build_concat_foutcache(fcinfo, argidx);
4835 
4836  for (i = argidx; i < PG_NARGS(); i++)
4837  {
4838  if (!PG_ARGISNULL(i))
4839  {
4841 
4842  /* add separator if appropriate */
4843  if (first_arg)
4844  first_arg = false;
4845  else
4846  appendStringInfoString(&str, sepstr);
4847 
4848  /* call the appropriate type output function, append the result */
4850  OutputFunctionCall(&foutcache[i], value));
4851  }
4852  }
4853 
4854  result = cstring_to_text_with_len(str.data, str.len);
4855  pfree(str.data);
4856 
4857  return result;
4858 }
4859 
4860 /*
4861  * Concatenate all arguments. NULL arguments are ignored.
4862  */
4863 Datum
4865 {
4866  text *result;
4867 
4868  result = concat_internal("", 0, fcinfo);
4869  if (result == NULL)
4870  PG_RETURN_NULL();
4871  PG_RETURN_TEXT_P(result);
4872 }
4873 
4874 /*
4875  * Concatenate all but first argument value with separators. The first
4876  * parameter is used as the separator. NULL arguments are ignored.
4877  */
4878 Datum
4880 {
4881  char *sep;
4882  text *result;
4883 
4884  /* return NULL when separator is NULL */
4885  if (PG_ARGISNULL(0))
4886  PG_RETURN_NULL();
4888 
4889  result = concat_internal(sep, 1, fcinfo);
4890  if (result == NULL)
4891  PG_RETURN_NULL();
4892  PG_RETURN_TEXT_P(result);
4893 }
4894 
4895 /*
4896  * Return first n characters in the string. When n is negative,
4897  * return all but last |n| characters.
4898  */
4899 Datum
4901 {
4902  text *str = PG_GETARG_TEXT_PP(0);
4903  const char *p = VARDATA_ANY(str);
4904  int len = VARSIZE_ANY_EXHDR(str);
4905  int n = PG_GETARG_INT32(1);
4906  int rlen;
4907 
4908  if (n < 0)
4909  n = pg_mbstrlen_with_len(p, len) + n;
4910  rlen = pg_mbcharcliplen(p, len, n);
4911 
4913 }
4914 
4915 /*
4916  * Return last n characters in the string. When n is negative,
4917  * return all but first |n| characters.
4918  */
4919 Datum
4921 {
4922  text *str = PG_GETARG_TEXT_PP(0);
4923  const char *p = VARDATA_ANY(str);
4924  int len = VARSIZE_ANY_EXHDR(str);
4925  int n = PG_GETARG_INT32(1);
4926  int off;
4927 
4928  if (n < 0)
4929  n = -n;
4930  else
4931  n = pg_mbstrlen_with_len(p, len) - n;
4932  off = pg_mbcharcliplen(p, len, n);
4933 
4934  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4935 }
4936 
4937 /*
4938  * Return reversed string
4939  */
4940 Datum
4942 {
4943  text *str = PG_GETARG_TEXT_PP(0);
4944  const char *p = VARDATA_ANY(str);
4945  int len = VARSIZE_ANY_EXHDR(str);
4946  const char *endp = p + len;
4947  text *result;
4948  char *dst;
4949 
4950  result = palloc(len + VARHDRSZ);
4951  dst = (char *) VARDATA(result) + len;
4952  SET_VARSIZE(result, len + VARHDRSZ);
4953 
4955  {
4956  /* multibyte version */
4957  while (p < endp)
4958  {
4959  int sz;
4960 
4961  sz = pg_mblen(p);
4962  dst -= sz;
4963  memcpy(dst, p, sz);
4964  p += sz;
4965  }
4966  }
4967  else
4968  {
4969  /* single byte version */
4970  while (p < endp)
4971  *(--dst) = *p++;
4972  }
4973 
4974  PG_RETURN_TEXT_P(result);
4975 }
4976 
4977 
4978 /*
4979  * Support macros for text_format()
4980  */
4981 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4982 
4983 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4984  do { \
4985  if (++(ptr) >= (end_ptr)) \
4986  ereport(ERROR, \
4987  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4988  errmsg("unterminated format() type specifier"), \
4989  errhint("For a single \"%%\" use \"%%%%\"."))); \
4990  } while (0)
4991 
4992 /*
4993  * Returns a formatted string
4994  */
4995 Datum
4997 {
4998  text *fmt;
5000  const char *cp;
5001  const char *start_ptr;
5002  const char *end_ptr;
5003  text *result;
5004  int arg;
5005  bool funcvariadic;
5006  int nargs;
5007  Datum *elements = NULL;
5008  bool *nulls = NULL;
5009  Oid element_type = InvalidOid;
5010  Oid prev_type = InvalidOid;
5011  Oid prev_width_type = InvalidOid;
5012  FmgrInfo typoutputfinfo;
5013  FmgrInfo typoutputinfo_width;
5014 
5015  /* When format string is null, immediately return null */
5016  if (PG_ARGISNULL(0))
5017  PG_RETURN_NULL();
5018 
5019  /* If argument is marked VARIADIC, expand array into elements */
5020  if (get_fn_expr_variadic(fcinfo->flinfo))
5021  {
5022  ArrayType *arr;
5023  int16 elmlen;
5024  bool elmbyval;
5025  char elmalign;
5026  int nitems;
5027 
5028  /* Should have just the one argument */
5029  Assert(PG_NARGS() == 2);
5030 
5031  /* If argument is NULL, we treat it as zero-length array */
5032  if (PG_ARGISNULL(1))
5033  nitems = 0;
5034  else
5035  {
5036  /*
5037  * Non-null argument had better be an array. We assume that any
5038  * call context that could let get_fn_expr_variadic return true
5039  * will have checked that a VARIADIC-labeled parameter actually is
5040  * an array. So it should be okay to just Assert that it's an
5041  * array rather than doing a full-fledged error check.
5042  */
5044 
5045  /* OK, safe to fetch the array value */
5046  arr = PG_GETARG_ARRAYTYPE_P(1);
5047 
5048  /* Get info about array element type */
5049  element_type = ARR_ELEMTYPE(arr);
5050  get_typlenbyvalalign(element_type,
5051  &elmlen, &elmbyval, &elmalign);
5052 
5053  /* Extract all array elements */
5054  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5055  &elements, &nulls, &nitems);
5056  }
5057 
5058  nargs = nitems + 1;
5059  funcvariadic = true;
5060  }
5061  else
5062  {
5063  /* Non-variadic case, we'll process the arguments individually */
5064  nargs = PG_NARGS();
5065  funcvariadic = false;
5066  }
5067 
5068  /* Setup for main loop. */
5069  fmt = PG_GETARG_TEXT_PP(0);
5070  start_ptr = VARDATA_ANY(fmt);
5071  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5072  initStringInfo(&str);
5073  arg = 1; /* next argument position to print */
5074 
5075  /* Scan format string, looking for conversion specifiers. */
5076  for (cp = start_ptr; cp < end_ptr; cp++)
5077  {
5078  int argpos;
5079  int widthpos;
5080  int flags;
5081  int width;
5082  Datum value;
5083  bool isNull;
5084  Oid typid;
5085 
5086  /*
5087  * If it's not the start of a conversion specifier, just copy it to
5088  * the output buffer.
5089  */
5090  if (*cp != '%')
5091  {
5092  appendStringInfoCharMacro(&str, *cp);
5093  continue;
5094  }
5095 
5096  ADVANCE_PARSE_POINTER(cp, end_ptr);
5097 
5098  /* Easy case: %% outputs a single % */
5099  if (*cp == '%')
5100  {
5101  appendStringInfoCharMacro(&str, *cp);
5102  continue;
5103  }
5104 
5105  /* Parse the optional portions of the format specifier */
5106  cp = text_format_parse_format(cp, end_ptr,
5107  &argpos, &widthpos,
5108  &flags, &width);
5109 
5110  /*
5111  * Next we should see the main conversion specifier. Whether or not
5112  * an argument position was present, it's known that at least one
5113  * character remains in the string at this point. Experience suggests
5114  * that it's worth checking that that character is one of the expected
5115  * ones before we try to fetch arguments, so as to produce the least
5116  * confusing response to a mis-formatted specifier.
5117  */
5118  if (strchr("sIL", *cp) == NULL)
5119  ereport(ERROR,
5120  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5121  errmsg("unrecognized format() type specifier \"%c\"",
5122  *cp),
5123  errhint("For a single \"%%\" use \"%%%%\".")));
5124 
5125  /* If indirect width was specified, get its value */
5126  if (widthpos >= 0)
5127  {
5128  /* Collect the specified or next argument position */
5129  if (widthpos > 0)
5130  arg = widthpos;
5131  if (arg >= nargs)
5132  ereport(ERROR,
5133  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5134  errmsg("too few arguments for format()")));
5135 
5136  /* Get the value and type of the selected argument */
5137  if (!funcvariadic)
5138  {
5139  value = PG_GETARG_DATUM(arg);
5140  isNull = PG_ARGISNULL(arg);
5141  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5142  }
5143  else
5144  {
5145  value = elements[arg - 1];
5146  isNull = nulls[arg - 1];
5147  typid = element_type;
5148  }
5149  if (!OidIsValid(typid))
5150  elog(ERROR, "could not determine data type of format() input");
5151 
5152  arg++;
5153 
5154  /* We can treat NULL width the same as zero */
5155  if (isNull)
5156  width = 0;
5157  else if (typid == INT4OID)
5158  width = DatumGetInt32(value);
5159  else if (typid == INT2OID)
5160  width = DatumGetInt16(value);
5161  else
5162  {
5163  /* For less-usual datatypes, convert to text then to int */
5164  char *str;
5165 
5166  if (typid != prev_width_type)
5167  {
5168  Oid typoutputfunc;
5169  bool typIsVarlena;
5170 
5171  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5172  fmgr_info(typoutputfunc, &typoutputinfo_width);
5173  prev_width_type = typid;
5174  }
5175 
5176  str = OutputFunctionCall(&typoutputinfo_width, value);
5177 
5178  /* pg_atoi will complain about bad data or overflow */
5179  width = pg_atoi(str, sizeof(int), '\0');
5180 
5181  pfree(str);
5182  }
5183  }
5184 
5185  /* Collect the specified or next argument position */
5186  if (argpos > 0)
5187  arg = argpos;
5188  if (arg >= nargs)
5189  ereport(ERROR,
5190  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5191  errmsg("too few arguments for format()")));
5192 
5193  /* Get the value and type of the selected argument */
5194  if (!funcvariadic)
5195  {
5196  value = PG_GETARG_DATUM(arg);
5197  isNull = PG_ARGISNULL(arg);
5198  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5199  }
5200  else
5201  {
5202  value = elements[arg - 1];
5203  isNull = nulls[arg - 1];
5204  typid = element_type;
5205  }
5206  if (!OidIsValid(typid))
5207  elog(ERROR, "could not determine data type of format() input");
5208 
5209  arg++;
5210 
5211  /*
5212  * Get the appropriate typOutput function, reusing previous one if
5213  * same type as previous argument. That's particularly useful in the
5214  * variadic-array case, but often saves work even for ordinary calls.
5215  */
5216  if (typid != prev_type)
5217  {
5218  Oid typoutputfunc;
5219  bool typIsVarlena;
5220 
5221  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5222  fmgr_info(typoutputfunc, &typoutputfinfo);
5223  prev_type = typid;
5224  }
5225 
5226  /*
5227  * And now we can format the value.
5228  */
5229  switch (*cp)
5230  {
5231  case 's':
5232  case 'I':
5233  case 'L':
5234  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5235  value, isNull,
5236  flags, width);
5237  break;
5238  default:
5239  /* should not get here, because of previous check */
5240  ereport(ERROR,
5241  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5242  errmsg("unrecognized format() type specifier \"%c\"",
5243  *cp),
5244  errhint("For a single \"%%\" use \"%%%%\".")));
5245  break;
5246  }
5247  }
5248 
5249  /* Don't need deconstruct_array results anymore. */
5250  if (elements != NULL)
5251  pfree(elements);
5252  if (nulls != NULL)
5253  pfree(nulls);
5254 
5255  /* Generate results. */
5256  result = cstring_to_text_with_len(str.data, str.len);
5257  pfree(str.data);
5258 
5259  PG_RETURN_TEXT_P(result);
5260 }
5261 
5262 /*
5263  * Parse contiguous digits as a decimal number.
5264  *
5265  * Returns true if some digits could be parsed.
5266  * The value is returned into *value, and *ptr is advanced to the next
5267  * character to be parsed.
5268  *
5269  * Note parsing invariant: at least one character is known available before
5270  * string end (end_ptr) at entry, and this is still true at exit.
5271  */
5272 static bool
5273 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5274 {
5275  bool found = false;
5276  const char *cp = *ptr;
5277  int val = 0;
5278 
5279  while (*cp >= '0' && *cp <= '9')
5280  {
5281  int8 digit = (*cp - '0');
5282 
5283  if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5284  unlikely(pg_add_s32_overflow(val, digit, &val)))
5285  ereport(ERROR,
5286  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5287  errmsg("number is out of range")));
5288  ADVANCE_PARSE_POINTER(cp, end_ptr);
5289  found = true;
5290  }
5291 
5292  *ptr = cp;
5293  *value = val;
5294 
5295  return found;
5296 }
5297 
5298 /*
5299  * Parse a format specifier (generally following the SUS printf spec).
5300  *
5301  * We have already advanced over the initial '%', and we are looking for
5302  * [argpos][flags][width]type (but the type character is not consumed here).
5303  *
5304  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5305  * Output parameters:
5306  * argpos: argument position for value to be printed. -1 means unspecified.
5307  * widthpos: argument position for width. Zero means the argument position
5308  * was unspecified (ie, take the next arg) and -1 means no width
5309  * argument (width was omitted or specified as a constant).
5310  * flags: bitmask of flags.
5311  * width: directly-specified width value. Zero means the width was omitted
5312  * (note it's not necessary to distinguish this case from an explicit
5313  * zero width value).
5314  *
5315  * The function result is the next character position to be parsed, ie, the
5316  * location where the type character is/should be.
5317  *
5318  * Note parsing invariant: at least one character is known available before
5319  * string end (end_ptr) at entry, and this is still true at exit.
5320  */
5321 static const char *
5322 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5323  int *argpos, int *widthpos,
5324  int *flags, int *width)
5325 {
5326  const char *cp = start_ptr;
5327  int n;
5328 
5329  /* set defaults for output parameters */
5330  *argpos = -1;
5331  *widthpos = -1;
5332  *flags = 0;
5333  *width = 0;
5334 
5335  /* try to identify first number */
5336  if (text_format_parse_digits(&cp, end_ptr, &n))
5337  {
5338  if (*cp != '$')
5339  {
5340  /* Must be just a width and a type, so we're done */
5341  *width = n;
5342  return cp;
5343  }
5344  /* The number was argument position */
5345  *argpos = n;
5346  /* Explicit 0 for argument index is immediately refused */
5347  if (n == 0)
5348  ereport(ERROR,
5349  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5350  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5351  ADVANCE_PARSE_POINTER(cp, end_ptr);
5352  }
5353 
5354  /* Handle flags (only minus is supported now) */
5355  while (*cp == '-')
5356  {
5357  *flags |= TEXT_FORMAT_FLAG_MINUS;
5358  ADVANCE_PARSE_POINTER(cp, end_ptr);
5359  }
5360 
5361  if (*cp == '*')
5362  {
5363  /* Handle indirect width */
5364  ADVANCE_PARSE_POINTER(cp, end_ptr);
5365  if (text_format_parse_digits(&cp, end_ptr, &n))
5366  {
5367  /* number in this position must be closed by $ */
5368  if (*cp != '$')
5369  ereport(ERROR,
5370  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5371  errmsg("width argument position must be ended by \"$\"")));
5372  /* The number was width argument position */
5373  *widthpos = n;
5374  /* Explicit 0 for argument index is immediately refused */
5375  if (n == 0)
5376  ereport(ERROR,
5377  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5378  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5379  ADVANCE_PARSE_POINTER(cp, end_ptr);
5380  }
5381  else
5382  *widthpos = 0; /* width's argument position is unspecified */
5383  }
5384  else
5385  {
5386  /* Check for direct width specification */
5387  if (text_format_parse_digits(&cp, end_ptr, &n))
5388  *width = n;
5389  }
5390 
5391  /* cp should now be pointing at type character */
5392  return cp;
5393 }
5394 
5395 /*
5396  * Format a %s, %I, or %L conversion
5397  */
5398 static void
5400  FmgrInfo *typOutputInfo,
5401  Datum value, bool isNull,
5402  int flags, int width)
5403 {
5404  char *str;
5405 
5406  /* Handle NULL arguments before trying to stringify the value. */
5407  if (isNull)
5408  {
5409  if (conversion == 's')
5410  text_format_append_string(buf, "", flags, width);
5411  else if (conversion == 'L')
5412  text_format_append_string(buf, "NULL", flags, width);
5413  else if (conversion == 'I')
5414  ereport(ERROR,
5415  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5416  errmsg("null values cannot be formatted as an SQL identifier")));
5417  return;
5418  }
5419 
5420  /* Stringify. */
5421  str = OutputFunctionCall(typOutputInfo, value);
5422 
5423  /* Escape. */
5424  if (conversion == 'I')
5425  {
5426  /* quote_identifier may or may not allocate a new string. */
5427  text_format_append_string(buf, quote_identifier(str), flags, width);
5428  }
5429  else if (conversion == 'L')
5430  {
5431  char *qstr = quote_literal_cstr(str);
5432 
5433  text_format_append_string(buf, qstr, flags, width);
5434  /* quote_literal_cstr() always allocates a new string */
5435  pfree(qstr);
5436  }
5437  else
5438  text_format_append_string(buf, str, flags, width);
5439 
5440  /* Cleanup. */
5441  pfree(str);
5442 }
5443 
5444 /*
5445  * Append str to buf, padding as directed by flags/width
5446  */
5447 static void
5449  int flags, int width)
5450 {
5451  bool align_to_left = false;
5452  int len;
5453 
5454  /* fast path for typical easy case */
5455  if (width == 0)
5456  {
5457  appendStringInfoString(buf, str);
5458  return;
5459  }
5460 
5461  if (width < 0)
5462  {
5463  /* Negative width: implicit '-' flag, then take absolute value */
5464  align_to_left = true;
5465  /* -INT_MIN is undefined */
5466  if (width <= INT_MIN)
5467  ereport(ERROR,
5468  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5469  errmsg("number is out of range")));
5470  width = -width;
5471  }
5472  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5473  align_to_left = true;
5474 
5475  len = pg_mbstrlen(str);
5476  if (align_to_left)
5477  {
5478  /* left justify */
5479  appendStringInfoString(buf, str);
5480  if (len < width)
5481  appendStringInfoSpaces(buf, width - len);
5482  }
5483  else
5484  {
5485  /* right justify */
5486  if (len < width)
5487  appendStringInfoSpaces(buf, width - len);
5488  appendStringInfoString(buf, str);
5489  }
5490 }
5491 
5492 /*
5493  * text_format_nv - nonvariadic wrapper for text_format function.
5494  *
5495  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5496  * which checks that all built-in functions that share the implementing C
5497  * function take the same number of arguments.
5498  */
5499 Datum
5501 {
5502  return text_format(fcinfo);
5503 }
5504 
5505 /*
5506  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5507  * for this use case.
5508  */
5509 static inline bool
5510 rest_of_char_same(const char *s1, const char *s2, int len)
5511 {
5512  while (len > 0)
5513  {
5514  len--;
5515  if (s1[len] != s2[len])
5516  return false;
5517  }
5518  return true;
5519 }
5520 
5521 /* Expand each Levenshtein distance variant */
5522 #include "levenshtein.c"
5523 #define LEVENSHTEIN_LESS_EQUAL
5524 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2710
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4156
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2846
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2034
Value * makeString(char *str)
Definition: value.c:53
#define COLLPROVIDER_ICU
Definition: pg_collation.h:85
signed short int16
Definition: c.h:293
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:352
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
#define DatumGetUInt32(X)
Definition: postgres.h:492
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4996
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2001
int length(const List *list)
Definition: list.c:1333
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4983
Definition: fmgr.h:56
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3911
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:313
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3115
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:862
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4046
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1023
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2665
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
static struct @130 value
MemoryContext fn_mcxt
Definition: fmgr.h:65
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
#define MD5_HASH_LEN
Definition: varlena.c:4572
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10412
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1706
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2662
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:268
#define DatumGetInt32(X)
Definition: postgres.h:478
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:129
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2694
#define HEXBASE
Definition: varlena.c:4518
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:304
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3690
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3592
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5399
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2040
#define PointerGetDatum(X)
Definition: postgres.h:562
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:531
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1119
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
#define VARHDRSZ
Definition: c.h:503
Datum md5_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4599
char * pstrdup(const char *in)
Definition: mcxt.c:1063
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:520
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:256
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:826
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:278
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2237
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2212
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:267
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2780
#define INT4OID
Definition: pg_type.h:316
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2038
int errcode(int sqlerrcode)
Definition: elog.c:575
union pg_locale_struct::@129 info
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:255
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3083
pg_wchar * wstr2
Definition: varlena.c:53
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:483
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4698
Datum md5_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4575
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:330
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3398
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2855
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3488
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:623
#define OidIsValid(objectId)
Definition: c.h:586
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1781
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:214
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1035
bool trace_sort
Definition: tuplesort.c:118
#define PG_GET_COLLATION()
Definition: fmgr.h:163
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2925
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4864
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:661
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5448
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4375
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4879
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:294
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2813
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1964
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1662
static int32 text_length(Datum str)
Definition: varlena.c:641
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:794
bool typbyval
Definition: array.h:225
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4548
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:314
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4181
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3552
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3390
FmgrInfo * flinfo
Definition: fmgr.h:79
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:248
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4359
void pfree(void *pointer)
Definition: mcxt.c:936
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:353
#define REG_OKAY
Definition: regex.h:137
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4724
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1012
#define ERROR
Definition: elog.h:43
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3773
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1128
#define DatumGetCString(X)
Definition: postgres.h:572
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:409
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1382
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1904
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3612
#define ARR_DIMS(a)
Definition: array.h:279
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:122
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:1858
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1231
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4170
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:820
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:256
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:736
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:815
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:588
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4401
Definition: c.h:551
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3676
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2591
#define INT2OID
Definition: pg_type.h:308
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1642
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
#define ARR_DATA_PTR(a)
Definition: array.h:307
hyperLogLogState abbr_card
Definition: varlena.c:74
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2603
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1677
int16 typlen
Definition: array.h:224
pg_locale_t locale
Definition: varlena.c:77
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
static char * buf
Definition: pg_test_fsync.c:67
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:149
#define memmove(d, s, c)
Definition: c.h:1069
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:162
#define strcoll_l
Definition: win32_port.h:397
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3263
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:75
char typdelim
Definition: array.h:227
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2937
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3165
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:695
#define DatumGetInt16(X)
Definition: postgres.h:450
#define DatumGetBool(X)
Definition: postgres.h:399
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:173
unsigned int uint32
Definition: c.h:306
void * ssup_extra
Definition: sortsupport.h:87
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1072
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:796
int bytea_output
Definition: varlena.c:42
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:132
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1617
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3017
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:250
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4620
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1736
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:122
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2624
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5106
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5273
unsigned int pg_wchar
Definition: mbprint.c:31
#define DatumGetVarStringPP(X)
Definition: varlena.c:93
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3205
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:680
List * lappend(List *list, void *datum)
Definition: list.c:128
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3188
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:58
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:443
signed char int8
Definition: c.h:292
void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
Definition: varlena.c:1808
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1275
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1721
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:853
uint8 bits8
Definition: c.h:313
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:782
#define TextDatumGetCString(d)
Definition: builtins.h:92
void * palloc0(Size size)
Definition: mcxt.c:864
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5500
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4941
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2832
int bpchartruelen(char *s, int len)
Definition: varchar.c:660
bool scanner_isspace(char ch)
Definition: scansup.c:221
#define wcscoll_l
Definition: win32_port.h:399
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3900
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:187
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:774
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1751
#define VARSIZE_ANY(PTR)
Definition: postgres.h:334
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1365
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3632
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:723
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4524
hyperLogLogState full_card
Definition: varlena.c:75
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:331
#define Max(x, y)
Definition: c.h:820
text * cstring_to_text(const char *s)
Definition: varlena.c:150
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:603
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
bool pg_md5_hash(const void *buff, size_t len, char *hexsum)
Definition: md5.c:293
#define Assert(condition)
Definition: c.h:680
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:428
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:530
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4920
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4784
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:183
Oid typioparam
Definition: array.h:228
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:89
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:322
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:564
size_t Size
Definition: c.h:414
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2477
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5510
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2646
struct FmgrInfo FmgrInfo
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:549
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:272
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3520
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:3471
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:509
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:225
#define PG_NARGS()
Definition: fmgr.h:168
#define C_COLLATION_OID
Definition: pg_collation.h:78
void * fn_extra
Definition: fmgr.h:64
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3806
#define ARR_NDIM(a)
Definition: array.h:275
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:2973
#define TEXTBUFLEN
Definition: varlena.c:84
Oid typiofunc
Definition: array.h:229
#define DatumGetPointer(X)
Definition: postgres.h:555
char typalign
Definition: array.h:226
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3449
char * text_to_cstring(const text *t)
Definition: varlena.c:183
pg_wchar * wstr1
Definition: varlena.c:52
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5042
#define DatumGetBpCharPP(X)
Definition: fmgr.h:257
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2590
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1766
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:576
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:172
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1966
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:424
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:835
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4674
FmgrInfo proc
Definition: array.h:230
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:451
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:693
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2914
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2726
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2749
void list_free(List *list)
Definition: list.c:1133
int i
static FmgrInfo * build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
Definition: varlena.c:4746
Oid element_type
Definition: array.h:223
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:557
static char * locale
Definition: initdb.c:124
void