PostgreSQL Source Code  git master
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/tuptoaster.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "common/int.h"
24 #include "lib/hyperloglog.h"
25 #include "libpq/pqformat.h"
26 #include "miscadmin.h"
27 #include "parser/scansup.h"
28 #include "port/pg_bswap.h"
29 #include "regex/regex.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/hashutils.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38 
39 
40 /* GUC variable */
42 
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45 
46 /*
47  * State for text_position_* functions.
48  */
49 typedef struct
50 {
51  bool is_multibyte; /* T if multibyte encoding */
53 
54  char *str1; /* haystack string */
55  char *str2; /* needle string */
56  int len1; /* string lengths in bytes */
57  int len2;
58 
59  /* Skip table for Boyer-Moore-Horspool search algorithm: */
60  int skiptablemask; /* mask for ANDing with skiptable subscripts */
61  int skiptable[256]; /* skip distance for given mismatched char */
62 
63  char *last_match; /* pointer to last match in 'str1' */
64 
65  /*
66  * Sometimes we need to convert the byte position of a match to a
67  * character position. These store the last position that was converted,
68  * so that on the next call, we can continue from that point, rather than
69  * count characters from the very beginning.
70  */
71  char *refpoint; /* pointer within original haystack string */
72  int refpos; /* 0-based character offset of the same point */
74 
75 typedef struct
76 {
77  char *buf1; /* 1st string, or abbreviation original string
78  * buf */
79  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
80  int buflen1;
81  int buflen2;
82  int last_len1; /* Length of last buf1 string/strxfrm() input */
83  int last_len2; /* Length of last buf2 string/strxfrm() blob */
84  int last_returned; /* Last comparison result (cache) */
85  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
86  bool collate_c;
87  Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
88  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
89  hyperLogLogState full_card; /* Full key cardinality state */
90  double prop_card; /* Required cardinality proportion */
93 
94 /*
95  * This should be large enough that most strings will fit, but small enough
96  * that we feel comfortable putting it on the stack
97  */
98 #define TEXTBUFLEN 1024
99 
100 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
101 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
102 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
103 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
104 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
105 
106 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
107 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
108 
109 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
110 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
111 static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
112 static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
113 static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
114 static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
115 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
116 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
117 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
118 static int32 text_length(Datum str);
119 static text *text_catenate(text *t1, text *t2);
120 static text *text_substring(Datum str,
121  int32 start,
122  int32 length,
123  bool length_not_specified);
124 static text *text_overlay(text *t1, text *t2, int sp, int sl);
125 static int text_position(text *t1, text *t2, Oid collid);
126 static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
128 static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
132 static void check_collation_set(Oid collid);
133 static int text_cmp(text *arg1, text *arg2, Oid collid);
134 static bytea *bytea_catenate(bytea *t1, bytea *t2);
136  int S,
137  int L,
138  bool length_not_specified);
139 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
140 static void appendStringInfoText(StringInfo str, const text *t);
143  const char *fldsep, const char *null_string);
145 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
146  int *value);
147 static const char *text_format_parse_format(const char *start_ptr,
148  const char *end_ptr,
149  int *argpos, int *widthpos,
150  int *flags, int *width);
151 static void text_format_string_conversion(StringInfo buf, char conversion,
152  FmgrInfo *typOutputInfo,
153  Datum value, bool isNull,
154  int flags, int width);
155 static void text_format_append_string(StringInfo buf, const char *str,
156  int flags, int width);
157 
158 
159 /*****************************************************************************
160  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
161  *****************************************************************************/
162 
163 /*
164  * cstring_to_text
165  *
166  * Create a text value from a null-terminated C string.
167  *
168  * The new text value is freshly palloc'd with a full-size VARHDR.
169  */
170 text *
171 cstring_to_text(const char *s)
172 {
173  return cstring_to_text_with_len(s, strlen(s));
174 }
175 
176 /*
177  * cstring_to_text_with_len
178  *
179  * Same as cstring_to_text except the caller specifies the string length;
180  * the string need not be null_terminated.
181  */
182 text *
183 cstring_to_text_with_len(const char *s, int len)
184 {
185  text *result = (text *) palloc(len + VARHDRSZ);
186 
187  SET_VARSIZE(result, len + VARHDRSZ);
188  memcpy(VARDATA(result), s, len);
189 
190  return result;
191 }
192 
193 /*
194  * text_to_cstring
195  *
196  * Create a palloc'd, null-terminated C string from a text value.
197  *
198  * We support being passed a compressed or toasted text value.
199  * This is a bit bogus since such values shouldn't really be referred to as
200  * "text *", but it seems useful for robustness. If we didn't handle that
201  * case here, we'd need another routine that did, anyway.
202  */
203 char *
205 {
206  /* must cast away the const, unfortunately */
207  text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
208  int len = VARSIZE_ANY_EXHDR(tunpacked);
209  char *result;
210 
211  result = (char *) palloc(len + 1);
212  memcpy(result, VARDATA_ANY(tunpacked), len);
213  result[len] = '\0';
214 
215  if (tunpacked != t)
216  pfree(tunpacked);
217 
218  return result;
219 }
220 
221 /*
222  * text_to_cstring_buffer
223  *
224  * Copy a text value into a caller-supplied buffer of size dst_len.
225  *
226  * The text string is truncated if necessary to fit. The result is
227  * guaranteed null-terminated (unless dst_len == 0).
228  *
229  * We support being passed a compressed or toasted text value.
230  * This is a bit bogus since such values shouldn't really be referred to as
231  * "text *", but it seems useful for robustness. If we didn't handle that
232  * case here, we'd need another routine that did, anyway.
233  */
234 void
235 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
236 {
237  /* must cast away the const, unfortunately */
238  text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
239  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
240 
241  if (dst_len > 0)
242  {
243  dst_len--;
244  if (dst_len >= src_len)
245  dst_len = src_len;
246  else /* ensure truncation is encoding-safe */
247  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
248  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
249  dst[dst_len] = '\0';
250  }
251 
252  if (srcunpacked != src)
253  pfree(srcunpacked);
254 }
255 
256 
257 /*****************************************************************************
258  * USER I/O ROUTINES *
259  *****************************************************************************/
260 
261 
262 #define VAL(CH) ((CH) - '0')
263 #define DIG(VAL) ((VAL) + '0')
264 
265 /*
266  * byteain - converts from printable representation of byte array
267  *
268  * Non-printable characters must be passed as '\nnn' (octal) and are
269  * converted to internal form. '\' must be passed as '\\'.
270  * ereport(ERROR, ...) if bad form.
271  *
272  * BUGS:
273  * The input is scanned twice.
274  * The error checking of input is minimal.
275  */
276 Datum
278 {
279  char *inputText = PG_GETARG_CSTRING(0);
280  char *tp;
281  char *rp;
282  int bc;
283  bytea *result;
284 
285  /* Recognize hex input */
286  if (inputText[0] == '\\' && inputText[1] == 'x')
287  {
288  size_t len = strlen(inputText);
289 
290  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
291  result = palloc(bc);
292  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
293  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
294 
295  PG_RETURN_BYTEA_P(result);
296  }
297 
298  /* Else, it's the traditional escaped style */
299  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
300  {
301  if (tp[0] != '\\')
302  tp++;
303  else if ((tp[0] == '\\') &&
304  (tp[1] >= '0' && tp[1] <= '3') &&
305  (tp[2] >= '0' && tp[2] <= '7') &&
306  (tp[3] >= '0' && tp[3] <= '7'))
307  tp += 4;
308  else if ((tp[0] == '\\') &&
309  (tp[1] == '\\'))
310  tp += 2;
311  else
312  {
313  /*
314  * one backslash, not followed by another or ### valid octal
315  */
316  ereport(ERROR,
317  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318  errmsg("invalid input syntax for type %s", "bytea")));
319  }
320  }
321 
322  bc += VARHDRSZ;
323 
324  result = (bytea *) palloc(bc);
325  SET_VARSIZE(result, bc);
326 
327  tp = inputText;
328  rp = VARDATA(result);
329  while (*tp != '\0')
330  {
331  if (tp[0] != '\\')
332  *rp++ = *tp++;
333  else if ((tp[0] == '\\') &&
334  (tp[1] >= '0' && tp[1] <= '3') &&
335  (tp[2] >= '0' && tp[2] <= '7') &&
336  (tp[3] >= '0' && tp[3] <= '7'))
337  {
338  bc = VAL(tp[1]);
339  bc <<= 3;
340  bc += VAL(tp[2]);
341  bc <<= 3;
342  *rp++ = bc + VAL(tp[3]);
343 
344  tp += 4;
345  }
346  else if ((tp[0] == '\\') &&
347  (tp[1] == '\\'))
348  {
349  *rp++ = '\\';
350  tp += 2;
351  }
352  else
353  {
354  /*
355  * We should never get here. The first pass should not allow it.
356  */
357  ereport(ERROR,
358  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
359  errmsg("invalid input syntax for type %s", "bytea")));
360  }
361  }
362 
363  PG_RETURN_BYTEA_P(result);
364 }
365 
366 /*
367  * byteaout - converts to printable representation of byte array
368  *
369  * In the traditional escaped format, non-printable characters are
370  * printed as '\nnn' (octal) and '\' as '\\'.
371  */
372 Datum
374 {
375  bytea *vlena = PG_GETARG_BYTEA_PP(0);
376  char *result;
377  char *rp;
378 
380  {
381  /* Print hex format */
382  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
383  *rp++ = '\\';
384  *rp++ = 'x';
385  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
386  }
387  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
388  {
389  /* Print traditional escaped format */
390  char *vp;
391  int len;
392  int i;
393 
394  len = 1; /* empty string has 1 char */
395  vp = VARDATA_ANY(vlena);
396  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
397  {
398  if (*vp == '\\')
399  len += 2;
400  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
401  len += 4;
402  else
403  len++;
404  }
405  rp = result = (char *) palloc(len);
406  vp = VARDATA_ANY(vlena);
407  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
408  {
409  if (*vp == '\\')
410  {
411  *rp++ = '\\';
412  *rp++ = '\\';
413  }
414  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
415  {
416  int val; /* holds unprintable chars */
417 
418  val = *vp;
419  rp[0] = '\\';
420  rp[3] = DIG(val & 07);
421  val >>= 3;
422  rp[2] = DIG(val & 07);
423  val >>= 3;
424  rp[1] = DIG(val & 03);
425  rp += 4;
426  }
427  else
428  *rp++ = *vp;
429  }
430  }
431  else
432  {
433  elog(ERROR, "unrecognized bytea_output setting: %d",
434  bytea_output);
435  rp = result = NULL; /* keep compiler quiet */
436  }
437  *rp = '\0';
438  PG_RETURN_CSTRING(result);
439 }
440 
441 /*
442  * bytearecv - converts external binary format to bytea
443  */
444 Datum
446 {
448  bytea *result;
449  int nbytes;
450 
451  nbytes = buf->len - buf->cursor;
452  result = (bytea *) palloc(nbytes + VARHDRSZ);
453  SET_VARSIZE(result, nbytes + VARHDRSZ);
454  pq_copymsgbytes(buf, VARDATA(result), nbytes);
455  PG_RETURN_BYTEA_P(result);
456 }
457 
458 /*
459  * byteasend - converts bytea to binary format
460  *
461  * This is a special case: just copy the input...
462  */
463 Datum
465 {
466  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
467 
468  PG_RETURN_BYTEA_P(vlena);
469 }
470 
471 Datum
473 {
475 
476  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
477 
478  /* Append the value unless null. */
479  if (!PG_ARGISNULL(1))
480  {
482 
483  /* On the first time through, we ignore the delimiter. */
484  if (state == NULL)
485  state = makeStringAggState(fcinfo);
486  else if (!PG_ARGISNULL(2))
487  {
488  bytea *delim = PG_GETARG_BYTEA_PP(2);
489 
491  }
492 
494  }
495 
496  /*
497  * The transition type for string_agg() is declared to be "internal",
498  * which is a pass-by-value type the same size as a pointer.
499  */
500  PG_RETURN_POINTER(state);
501 }
502 
503 Datum
505 {
507 
508  /* cannot be called directly because of internal-type argument */
509  Assert(AggCheckCallContext(fcinfo, NULL));
510 
511  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
512 
513  if (state != NULL)
514  {
515  bytea *result;
516 
517  result = (bytea *) palloc(state->len + VARHDRSZ);
518  SET_VARSIZE(result, state->len + VARHDRSZ);
519  memcpy(VARDATA(result), state->data, state->len);
520  PG_RETURN_BYTEA_P(result);
521  }
522  else
523  PG_RETURN_NULL();
524 }
525 
526 /*
527  * textin - converts "..." to internal representation
528  */
529 Datum
531 {
532  char *inputText = PG_GETARG_CSTRING(0);
533 
534  PG_RETURN_TEXT_P(cstring_to_text(inputText));
535 }
536 
537 /*
538  * textout - converts internal representation to "..."
539  */
540 Datum
542 {
543  Datum txt = PG_GETARG_DATUM(0);
544 
546 }
547 
548 /*
549  * textrecv - converts external binary format to text
550  */
551 Datum
553 {
555  text *result;
556  char *str;
557  int nbytes;
558 
559  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
560 
561  result = cstring_to_text_with_len(str, nbytes);
562  pfree(str);
563  PG_RETURN_TEXT_P(result);
564 }
565 
566 /*
567  * textsend - converts text to binary format
568  */
569 Datum
571 {
572  text *t = PG_GETARG_TEXT_PP(0);
574 
575  pq_begintypsend(&buf);
578 }
579 
580 
581 /*
582  * unknownin - converts "..." to internal representation
583  */
584 Datum
586 {
587  char *str = PG_GETARG_CSTRING(0);
588 
589  /* representation is same as cstring */
591 }
592 
593 /*
594  * unknownout - converts internal representation to "..."
595  */
596 Datum
598 {
599  /* representation is same as cstring */
600  char *str = PG_GETARG_CSTRING(0);
601 
603 }
604 
605 /*
606  * unknownrecv - converts external binary format to unknown
607  */
608 Datum
610 {
612  char *str;
613  int nbytes;
614 
615  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
616  /* representation is same as cstring */
617  PG_RETURN_CSTRING(str);
618 }
619 
620 /*
621  * unknownsend - converts unknown to binary format
622  */
623 Datum
625 {
626  /* representation is same as cstring */
627  char *str = PG_GETARG_CSTRING(0);
629 
630  pq_begintypsend(&buf);
631  pq_sendtext(&buf, str, strlen(str));
633 }
634 
635 
636 /* ========== PUBLIC ROUTINES ========== */
637 
638 /*
639  * textlen -
640  * returns the logical length of a text*
641  * (which is less than the VARSIZE of the text*)
642  */
643 Datum
645 {
647 
648  /* try to avoid decompressing argument */
650 }
651 
652 /*
653  * text_length -
654  * Does the real work for textlen()
655  *
656  * This is broken out so it can be called directly by other string processing
657  * functions. Note that the argument is passed as a Datum, to indicate that
658  * it may still be in compressed form. We can avoid decompressing it at all
659  * in some cases.
660  */
661 static int32
663 {
664  /* fastpath when max encoding length is one */
667  else
668  {
669  text *t = DatumGetTextPP(str);
670 
672  VARSIZE_ANY_EXHDR(t)));
673  }
674 }
675 
676 /*
677  * textoctetlen -
678  * returns the physical length of a text*
679  * (which is less than the VARSIZE of the text*)
680  */
681 Datum
683 {
685 
686  /* We need not detoast the input at all */
688 }
689 
690 /*
691  * textcat -
692  * takes two text* and returns a text* that is the concatenation of
693  * the two.
694  *
695  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
696  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
697  * Allocate space for output in all cases.
698  * XXX - thomas 1997-07-10
699  */
700 Datum
702 {
703  text *t1 = PG_GETARG_TEXT_PP(0);
704  text *t2 = PG_GETARG_TEXT_PP(1);
705 
707 }
708 
709 /*
710  * text_catenate
711  * Guts of textcat(), broken out so it can be used by other functions
712  *
713  * Arguments can be in short-header form, but not compressed or out-of-line
714  */
715 static text *
717 {
718  text *result;
719  int len1,
720  len2,
721  len;
722  char *ptr;
723 
724  len1 = VARSIZE_ANY_EXHDR(t1);
725  len2 = VARSIZE_ANY_EXHDR(t2);
726 
727  /* paranoia ... probably should throw error instead? */
728  if (len1 < 0)
729  len1 = 0;
730  if (len2 < 0)
731  len2 = 0;
732 
733  len = len1 + len2 + VARHDRSZ;
734  result = (text *) palloc(len);
735 
736  /* Set size of result string... */
737  SET_VARSIZE(result, len);
738 
739  /* Fill data field of result string... */
740  ptr = VARDATA(result);
741  if (len1 > 0)
742  memcpy(ptr, VARDATA_ANY(t1), len1);
743  if (len2 > 0)
744  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
745 
746  return result;
747 }
748 
749 /*
750  * charlen_to_bytelen()
751  * Compute the number of bytes occupied by n characters starting at *p
752  *
753  * It is caller's responsibility that there actually are n characters;
754  * the string need not be null-terminated.
755  */
756 static int
757 charlen_to_bytelen(const char *p, int n)
758 {
760  {
761  /* Optimization for single-byte encodings */
762  return n;
763  }
764  else
765  {
766  const char *s;
767 
768  for (s = p; n > 0; n--)
769  s += pg_mblen(s);
770 
771  return s - p;
772  }
773 }
774 
775 /*
776  * text_substr()
777  * Return a substring starting at the specified position.
778  * - thomas 1997-12-31
779  *
780  * Input:
781  * - string
782  * - starting position (is one-based)
783  * - string length
784  *
785  * If the starting position is zero or less, then return from the start of the string
786  * adjusting the length to be consistent with the "negative start" per SQL.
787  * If the length is less than zero, return the remaining string.
788  *
789  * Added multibyte support.
790  * - Tatsuo Ishii 1998-4-21
791  * Changed behavior if starting position is less than one to conform to SQL behavior.
792  * Formerly returned the entire string; now returns a portion.
793  * - Thomas Lockhart 1998-12-10
794  * Now uses faster TOAST-slicing interface
795  * - John Gray 2002-02-22
796  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
797  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
798  * error; if E < 1, return '', not entire string). Fixed MB related bug when
799  * S > LC and < LC + 4 sometimes garbage characters are returned.
800  * - Joe Conway 2002-08-10
801  */
802 Datum
804 {
806  PG_GETARG_INT32(1),
807  PG_GETARG_INT32(2),
808  false));
809 }
810 
811 /*
812  * text_substr_no_len -
813  * Wrapper to avoid opr_sanity failure due to
814  * one function accepting a different number of args.
815  */
816 Datum
818 {
820  PG_GETARG_INT32(1),
821  -1, true));
822 }
823 
824 /*
825  * text_substring -
826  * Does the real work for text_substr() and text_substr_no_len()
827  *
828  * This is broken out so it can be called directly by other string processing
829  * functions. Note that the argument is passed as a Datum, to indicate that
830  * it may still be in compressed/toasted form. We can avoid detoasting all
831  * of it in some cases.
832  *
833  * The result is always a freshly palloc'd datum.
834  */
835 static text *
836 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
837 {
839  int32 S = start; /* start position */
840  int32 S1; /* adjusted start position */
841  int32 L1; /* adjusted substring length */
842 
843  /* life is easy if the encoding max length is 1 */
844  if (eml == 1)
845  {
846  S1 = Max(S, 1);
847 
848  if (length_not_specified) /* special case - get length to end of
849  * string */
850  L1 = -1;
851  else
852  {
853  /* end position */
854  int E = S + length;
855 
856  /*
857  * A negative value for L is the only way for the end position to
858  * be before the start. SQL99 says to throw an error.
859  */
860  if (E < S)
861  ereport(ERROR,
862  (errcode(ERRCODE_SUBSTRING_ERROR),
863  errmsg("negative substring length not allowed")));
864 
865  /*
866  * A zero or negative value for the end position can happen if the
867  * start was negative or one. SQL99 says to return a zero-length
868  * string.
869  */
870  if (E < 1)
871  return cstring_to_text("");
872 
873  L1 = E - S1;
874  }
875 
876  /*
877  * If the start position is past the end of the string, SQL99 says to
878  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
879  * that for us. Convert to zero-based starting position
880  */
881  return DatumGetTextPSlice(str, S1 - 1, L1);
882  }
883  else if (eml > 1)
884  {
885  /*
886  * When encoding max length is > 1, we can't get LC without
887  * detoasting, so we'll grab a conservatively large slice now and go
888  * back later to do the right thing
889  */
890  int32 slice_start;
891  int32 slice_size;
892  int32 slice_strlen;
893  text *slice;
894  int32 E1;
895  int32 i;
896  char *p;
897  char *s;
898  text *ret;
899 
900  /*
901  * if S is past the end of the string, the tuple toaster will return a
902  * zero-length string to us
903  */
904  S1 = Max(S, 1);
905 
906  /*
907  * We need to start at position zero because there is no way to know
908  * in advance which byte offset corresponds to the supplied start
909  * position.
910  */
911  slice_start = 0;
912 
913  if (length_not_specified) /* special case - get length to end of
914  * string */
915  slice_size = L1 = -1;
916  else
917  {
918  int E = S + length;
919 
920  /*
921  * A negative value for L is the only way for the end position to
922  * be before the start. SQL99 says to throw an error.
923  */
924  if (E < S)
925  ereport(ERROR,
926  (errcode(ERRCODE_SUBSTRING_ERROR),
927  errmsg("negative substring length not allowed")));
928 
929  /*
930  * A zero or negative value for the end position can happen if the
931  * start was negative or one. SQL99 says to return a zero-length
932  * string.
933  */
934  if (E < 1)
935  return cstring_to_text("");
936 
937  /*
938  * if E is past the end of the string, the tuple toaster will
939  * truncate the length for us
940  */
941  L1 = E - S1;
942 
943  /*
944  * Total slice size in bytes can't be any longer than the start
945  * position plus substring length times the encoding max length.
946  */
947  slice_size = (S1 + L1) * eml;
948  }
949 
950  /*
951  * If we're working with an untoasted source, no need to do an extra
952  * copying step.
953  */
956  slice = DatumGetTextPSlice(str, slice_start, slice_size);
957  else
958  slice = (text *) DatumGetPointer(str);
959 
960  /* see if we got back an empty string */
961  if (VARSIZE_ANY_EXHDR(slice) == 0)
962  {
963  if (slice != (text *) DatumGetPointer(str))
964  pfree(slice);
965  return cstring_to_text("");
966  }
967 
968  /* Now we can get the actual length of the slice in MB characters */
969  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
970  VARSIZE_ANY_EXHDR(slice));
971 
972  /*
973  * Check that the start position wasn't > slice_strlen. If so, SQL99
974  * says to return a zero-length string.
975  */
976  if (S1 > slice_strlen)
977  {
978  if (slice != (text *) DatumGetPointer(str))
979  pfree(slice);
980  return cstring_to_text("");
981  }
982 
983  /*
984  * Adjust L1 and E1 now that we know the slice string length. Again
985  * remember that S1 is one based, and slice_start is zero based.
986  */
987  if (L1 > -1)
988  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
989  else
990  E1 = slice_start + 1 + slice_strlen;
991 
992  /*
993  * Find the start position in the slice; remember S1 is not zero based
994  */
995  p = VARDATA_ANY(slice);
996  for (i = 0; i < S1 - 1; i++)
997  p += pg_mblen(p);
998 
999  /* hang onto a pointer to our start position */
1000  s = p;
1001 
1002  /*
1003  * Count the actual bytes used by the substring of the requested
1004  * length.
1005  */
1006  for (i = S1; i < E1; i++)
1007  p += pg_mblen(p);
1008 
1009  ret = (text *) palloc(VARHDRSZ + (p - s));
1010  SET_VARSIZE(ret, VARHDRSZ + (p - s));
1011  memcpy(VARDATA(ret), s, (p - s));
1012 
1013  if (slice != (text *) DatumGetPointer(str))
1014  pfree(slice);
1015 
1016  return ret;
1017  }
1018  else
1019  elog(ERROR, "invalid backend encoding: encoding max length < 1");
1020 
1021  /* not reached: suppress compiler warning */
1022  return NULL;
1023 }
1024 
1025 /*
1026  * textoverlay
1027  * Replace specified substring of first string with second
1028  *
1029  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1030  * This code is a direct implementation of what the standard says.
1031  */
1032 Datum
1034 {
1035  text *t1 = PG_GETARG_TEXT_PP(0);
1036  text *t2 = PG_GETARG_TEXT_PP(1);
1037  int sp = PG_GETARG_INT32(2); /* substring start position */
1038  int sl = PG_GETARG_INT32(3); /* substring length */
1039 
1040  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1041 }
1042 
1043 Datum
1045 {
1046  text *t1 = PG_GETARG_TEXT_PP(0);
1047  text *t2 = PG_GETARG_TEXT_PP(1);
1048  int sp = PG_GETARG_INT32(2); /* substring start position */
1049  int sl;
1050 
1051  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1052  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1053 }
1054 
1055 static text *
1056 text_overlay(text *t1, text *t2, int sp, int sl)
1057 {
1058  text *result;
1059  text *s1;
1060  text *s2;
1061  int sp_pl_sl;
1062 
1063  /*
1064  * Check for possible integer-overflow cases. For negative sp, throw a
1065  * "substring length" error because that's what should be expected
1066  * according to the spec's definition of OVERLAY().
1067  */
1068  if (sp <= 0)
1069  ereport(ERROR,
1070  (errcode(ERRCODE_SUBSTRING_ERROR),
1071  errmsg("negative substring length not allowed")));
1072  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1073  ereport(ERROR,
1074  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1075  errmsg("integer out of range")));
1076 
1077  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1078  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1079  result = text_catenate(s1, t2);
1080  result = text_catenate(result, s2);
1081 
1082  return result;
1083 }
1084 
1085 /*
1086  * textpos -
1087  * Return the position of the specified substring.
1088  * Implements the SQL POSITION() function.
1089  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1090  * - thomas 1997-07-27
1091  */
1092 Datum
1094 {
1095  text *str = PG_GETARG_TEXT_PP(0);
1096  text *search_str = PG_GETARG_TEXT_PP(1);
1097 
1098  PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
1099 }
1100 
1101 /*
1102  * text_position -
1103  * Does the real work for textpos()
1104  *
1105  * Inputs:
1106  * t1 - string to be searched
1107  * t2 - pattern to match within t1
1108  * Result:
1109  * Character index of the first matched char, starting from 1,
1110  * or 0 if no match.
1111  *
1112  * This is broken out so it can be called directly by other string processing
1113  * functions.
1114  */
1115 static int
1116 text_position(text *t1, text *t2, Oid collid)
1117 {
1119  int result;
1120 
1121  if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1)
1122  return 0;
1123 
1124  text_position_setup(t1, t2, collid, &state);
1125  if (!text_position_next(&state))
1126  result = 0;
1127  else
1128  result = text_position_get_match_pos(&state);
1129  text_position_cleanup(&state);
1130  return result;
1131 }
1132 
1133 
1134 /*
1135  * text_position_setup, text_position_next, text_position_cleanup -
1136  * Component steps of text_position()
1137  *
1138  * These are broken out so that a string can be efficiently searched for
1139  * multiple occurrences of the same pattern. text_position_next may be
1140  * called multiple times, and it advances to the next match on each call.
1141  * text_position_get_match_ptr() and text_position_get_match_pos() return
1142  * a pointer or 1-based character position of the last match, respectively.
1143  *
1144  * The "state" variable is normally just a local variable in the caller.
1145  *
1146  * NOTE: text_position_next skips over the matched portion. For example,
1147  * searching for "xx" in "xxx" returns only one match, not two.
1148  */
1149 
1150 static void
1152 {
1153  int len1 = VARSIZE_ANY_EXHDR(t1);
1154  int len2 = VARSIZE_ANY_EXHDR(t2);
1155  pg_locale_t mylocale = 0;
1156 
1157  check_collation_set(collid);
1158 
1159  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1160  mylocale = pg_newlocale_from_collation(collid);
1161 
1162  if (mylocale && !mylocale->deterministic)
1163  ereport(ERROR,
1164  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1165  errmsg("nondeterministic collations are not supported for substring searches")));
1166 
1167  Assert(len1 > 0);
1168  Assert(len2 > 0);
1169 
1170  /*
1171  * Even with a multi-byte encoding, we perform the search using the raw
1172  * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1173  * because in UTF-8 the byte sequence of one character cannot contain
1174  * another character. For other multi-byte encodings, we do the search
1175  * initially as a simple byte search, ignoring multibyte issues, but
1176  * verify afterwards that the match we found is at a character boundary,
1177  * and continue the search if it was a false match.
1178  */
1180  {
1181  state->is_multibyte = false;
1182  state->is_multibyte_char_in_char = false;
1183  }
1184  else if (GetDatabaseEncoding() == PG_UTF8)
1185  {
1186  state->is_multibyte = true;
1187  state->is_multibyte_char_in_char = false;
1188  }
1189  else
1190  {
1191  state->is_multibyte = true;
1192  state->is_multibyte_char_in_char = true;
1193  }
1194 
1195  state->str1 = VARDATA_ANY(t1);
1196  state->str2 = VARDATA_ANY(t2);
1197  state->len1 = len1;
1198  state->len2 = len2;
1199  state->last_match = NULL;
1200  state->refpoint = state->str1;
1201  state->refpos = 0;
1202 
1203  /*
1204  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1205  * notes we use the terminology that the "haystack" is the string to be
1206  * searched (t1) and the "needle" is the pattern being sought (t2).
1207  *
1208  * If the needle is empty or bigger than the haystack then there is no
1209  * point in wasting cycles initializing the table. We also choose not to
1210  * use B-M-H for needles of length 1, since the skip table can't possibly
1211  * save anything in that case.
1212  */
1213  if (len1 >= len2 && len2 > 1)
1214  {
1215  int searchlength = len1 - len2;
1216  int skiptablemask;
1217  int last;
1218  int i;
1219  const char *str2 = state->str2;
1220 
1221  /*
1222  * First we must determine how much of the skip table to use. The
1223  * declaration of TextPositionState allows up to 256 elements, but for
1224  * short search problems we don't really want to have to initialize so
1225  * many elements --- it would take too long in comparison to the
1226  * actual search time. So we choose a useful skip table size based on
1227  * the haystack length minus the needle length. The closer the needle
1228  * length is to the haystack length the less useful skipping becomes.
1229  *
1230  * Note: since we use bit-masking to select table elements, the skip
1231  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1232  */
1233  if (searchlength < 16)
1234  skiptablemask = 3;
1235  else if (searchlength < 64)
1236  skiptablemask = 7;
1237  else if (searchlength < 128)
1238  skiptablemask = 15;
1239  else if (searchlength < 512)
1240  skiptablemask = 31;
1241  else if (searchlength < 2048)
1242  skiptablemask = 63;
1243  else if (searchlength < 4096)
1244  skiptablemask = 127;
1245  else
1246  skiptablemask = 255;
1247  state->skiptablemask = skiptablemask;
1248 
1249  /*
1250  * Initialize the skip table. We set all elements to the needle
1251  * length, since this is the correct skip distance for any character
1252  * not found in the needle.
1253  */
1254  for (i = 0; i <= skiptablemask; i++)
1255  state->skiptable[i] = len2;
1256 
1257  /*
1258  * Now examine the needle. For each character except the last one,
1259  * set the corresponding table element to the appropriate skip
1260  * distance. Note that when two characters share the same skip table
1261  * entry, the one later in the needle must determine the skip
1262  * distance.
1263  */
1264  last = len2 - 1;
1265 
1266  for (i = 0; i < last; i++)
1267  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1268  }
1269 }
1270 
1271 /*
1272  * Advance to the next match, starting from the end of the previous match
1273  * (or the beginning of the string, on first call). Returns true if a match
1274  * is found.
1275  */
1276 static bool
1278 {
1279  int needle_len = state->len2;
1280  char *start_ptr;
1281  char *matchptr;
1282 
1283  if (needle_len <= 0)
1284  return false; /* result for empty pattern */
1285 
1286  /* Start from the point right after the previous match. */
1287  if (state->last_match)
1288  start_ptr = state->last_match + needle_len;
1289  else
1290  start_ptr = state->str1;
1291 
1292 retry:
1293  matchptr = text_position_next_internal(start_ptr, state);
1294 
1295  if (!matchptr)
1296  return false;
1297 
1298  /*
1299  * Found a match for the byte sequence. If this is a multibyte encoding,
1300  * where one character's byte sequence can appear inside a longer
1301  * multi-byte character, we need to verify that the match was at a
1302  * character boundary, not in the middle of a multi-byte character.
1303  */
1304  if (state->is_multibyte_char_in_char)
1305  {
1306  /* Walk one character at a time, until we reach the match. */
1307 
1308  /* the search should never move backwards. */
1309  Assert(state->refpoint <= matchptr);
1310 
1311  while (state->refpoint < matchptr)
1312  {
1313  /* step to next character. */
1314  state->refpoint += pg_mblen(state->refpoint);
1315  state->refpos++;
1316 
1317  /*
1318  * If we stepped over the match's start position, then it was a
1319  * false positive, where the byte sequence appeared in the middle
1320  * of a multi-byte character. Skip it, and continue the search at
1321  * the next character boundary.
1322  */
1323  if (state->refpoint > matchptr)
1324  {
1325  start_ptr = state->refpoint;
1326  goto retry;
1327  }
1328  }
1329  }
1330 
1331  state->last_match = matchptr;
1332  return true;
1333 }
1334 
1335 /*
1336  * Subroutine of text_position_next(). This searches for the raw byte
1337  * sequence, ignoring any multi-byte encoding issues. Returns the first
1338  * match starting at 'start_ptr', or NULL if no match is found.
1339  */
1340 static char *
1342 {
1343  int haystack_len = state->len1;
1344  int needle_len = state->len2;
1345  int skiptablemask = state->skiptablemask;
1346  const char *haystack = state->str1;
1347  const char *needle = state->str2;
1348  const char *haystack_end = &haystack[haystack_len];
1349  const char *hptr;
1350 
1351  Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1352 
1353  if (needle_len == 1)
1354  {
1355  /* No point in using B-M-H for a one-character needle */
1356  char nchar = *needle;
1357 
1358  hptr = start_ptr;
1359  while (hptr < haystack_end)
1360  {
1361  if (*hptr == nchar)
1362  return (char *) hptr;
1363  hptr++;
1364  }
1365  }
1366  else
1367  {
1368  const char *needle_last = &needle[needle_len - 1];
1369 
1370  /* Start at startpos plus the length of the needle */
1371  hptr = start_ptr + needle_len - 1;
1372  while (hptr < haystack_end)
1373  {
1374  /* Match the needle scanning *backward* */
1375  const char *nptr;
1376  const char *p;
1377 
1378  nptr = needle_last;
1379  p = hptr;
1380  while (*nptr == *p)
1381  {
1382  /* Matched it all? If so, return 1-based position */
1383  if (nptr == needle)
1384  return (char *) p;
1385  nptr--, p--;
1386  }
1387 
1388  /*
1389  * No match, so use the haystack char at hptr to decide how far to
1390  * advance. If the needle had any occurrence of that character
1391  * (or more precisely, one sharing the same skiptable entry)
1392  * before its last character, then we advance far enough to align
1393  * the last such needle character with that haystack position.
1394  * Otherwise we can advance by the whole needle length.
1395  */
1396  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1397  }
1398  }
1399 
1400  return 0; /* not found */
1401 }
1402 
1403 /*
1404  * Return a pointer to the current match.
1405  *
1406  * The returned pointer points into correct position in the original
1407  * the haystack string.
1408  */
1409 static char *
1411 {
1412  return state->last_match;
1413 }
1414 
1415 /*
1416  * Return the offset of the current match.
1417  *
1418  * The offset is in characters, 1-based.
1419  */
1420 static int
1422 {
1423  if (!state->is_multibyte)
1424  return state->last_match - state->str1 + 1;
1425  else
1426  {
1427  /* Convert the byte position to char position. */
1428  while (state->refpoint < state->last_match)
1429  {
1430  state->refpoint += pg_mblen(state->refpoint);
1431  state->refpos++;
1432  }
1433  Assert(state->refpoint == state->last_match);
1434  return state->refpos + 1;
1435  }
1436 }
1437 
1438 static void
1440 {
1441  /* no cleanup needed */
1442 }
1443 
1444 static void
1446 {
1447  if (!OidIsValid(collid))
1448  {
1449  /*
1450  * This typically means that the parser could not resolve a conflict
1451  * of implicit collations, so report it that way.
1452  */
1453  ereport(ERROR,
1454  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1455  errmsg("could not determine which collation to use for string comparison"),
1456  errhint("Use the COLLATE clause to set the collation explicitly.")));
1457  }
1458 }
1459 
1460 /* varstr_cmp()
1461  * Comparison function for text strings with given lengths.
1462  * Includes locale support, but must copy strings to temporary memory
1463  * to allow null-termination for inputs to strcoll().
1464  * Returns an integer less than, equal to, or greater than zero, indicating
1465  * whether arg1 is less than, equal to, or greater than arg2.
1466  */
1467 int
1468 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1469 {
1470  int result;
1471 
1472  check_collation_set(collid);
1473 
1474  /*
1475  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1476  * have to do some memory copying. This turns out to be significantly
1477  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1478  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1479  */
1480  if (lc_collate_is_c(collid))
1481  {
1482  result = memcmp(arg1, arg2, Min(len1, len2));
1483  if ((result == 0) && (len1 != len2))
1484  result = (len1 < len2) ? -1 : 1;
1485  }
1486  else
1487  {
1488  char a1buf[TEXTBUFLEN];
1489  char a2buf[TEXTBUFLEN];
1490  char *a1p,
1491  *a2p;
1492  pg_locale_t mylocale = 0;
1493 
1494  if (collid != DEFAULT_COLLATION_OID)
1495  mylocale = pg_newlocale_from_collation(collid);
1496 
1497  /*
1498  * memcmp() can't tell us which of two unequal strings sorts first,
1499  * but it's a cheap way to tell if they're equal. Testing shows that
1500  * memcmp() followed by strcoll() is only trivially slower than
1501  * strcoll() by itself, so we don't lose much if this doesn't work out
1502  * very often, and if it does - for example, because there are many
1503  * equal strings in the input - then we win big by avoiding expensive
1504  * collation-aware comparisons.
1505  */
1506  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1507  return 0;
1508 
1509 #ifdef WIN32
1510  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1511  if (GetDatabaseEncoding() == PG_UTF8
1512  && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1513  {
1514  int a1len;
1515  int a2len;
1516  int r;
1517 
1518  if (len1 >= TEXTBUFLEN / 2)
1519  {
1520  a1len = len1 * 2 + 2;
1521  a1p = palloc(a1len);
1522  }
1523  else
1524  {
1525  a1len = TEXTBUFLEN;
1526  a1p = a1buf;
1527  }
1528  if (len2 >= TEXTBUFLEN / 2)
1529  {
1530  a2len = len2 * 2 + 2;
1531  a2p = palloc(a2len);
1532  }
1533  else
1534  {
1535  a2len = TEXTBUFLEN;
1536  a2p = a2buf;
1537  }
1538 
1539  /* stupid Microsloth API does not work for zero-length input */
1540  if (len1 == 0)
1541  r = 0;
1542  else
1543  {
1544  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1545  (LPWSTR) a1p, a1len / 2);
1546  if (!r)
1547  ereport(ERROR,
1548  (errmsg("could not convert string to UTF-16: error code %lu",
1549  GetLastError())));
1550  }
1551  ((LPWSTR) a1p)[r] = 0;
1552 
1553  if (len2 == 0)
1554  r = 0;
1555  else
1556  {
1557  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1558  (LPWSTR) a2p, a2len / 2);
1559  if (!r)
1560  ereport(ERROR,
1561  (errmsg("could not convert string to UTF-16: error code %lu",
1562  GetLastError())));
1563  }
1564  ((LPWSTR) a2p)[r] = 0;
1565 
1566  errno = 0;
1567 #ifdef HAVE_LOCALE_T
1568  if (mylocale)
1569  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1570  else
1571 #endif
1572  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1573  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1574  * headers */
1575  ereport(ERROR,
1576  (errmsg("could not compare Unicode strings: %m")));
1577 
1578  /* Break tie if necessary. */
1579  if (result == 0 &&
1580  (!mylocale || mylocale->deterministic))
1581  {
1582  result = memcmp(arg1, arg2, Min(len1, len2));
1583  if ((result == 0) && (len1 != len2))
1584  result = (len1 < len2) ? -1 : 1;
1585  }
1586 
1587  if (a1p != a1buf)
1588  pfree(a1p);
1589  if (a2p != a2buf)
1590  pfree(a2p);
1591 
1592  return result;
1593  }
1594 #endif /* WIN32 */
1595 
1596  if (len1 >= TEXTBUFLEN)
1597  a1p = (char *) palloc(len1 + 1);
1598  else
1599  a1p = a1buf;
1600  if (len2 >= TEXTBUFLEN)
1601  a2p = (char *) palloc(len2 + 1);
1602  else
1603  a2p = a2buf;
1604 
1605  memcpy(a1p, arg1, len1);
1606  a1p[len1] = '\0';
1607  memcpy(a2p, arg2, len2);
1608  a2p[len2] = '\0';
1609 
1610  if (mylocale)
1611  {
1612  if (mylocale->provider == COLLPROVIDER_ICU)
1613  {
1614 #ifdef USE_ICU
1615 #ifdef HAVE_UCOL_STRCOLLUTF8
1616  if (GetDatabaseEncoding() == PG_UTF8)
1617  {
1618  UErrorCode status;
1619 
1620  status = U_ZERO_ERROR;
1621  result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1622  arg1, len1,
1623  arg2, len2,
1624  &status);
1625  if (U_FAILURE(status))
1626  ereport(ERROR,
1627  (errmsg("collation failed: %s", u_errorName(status))));
1628  }
1629  else
1630 #endif
1631  {
1632  int32_t ulen1,
1633  ulen2;
1634  UChar *uchar1,
1635  *uchar2;
1636 
1637  ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1638  ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1639 
1640  result = ucol_strcoll(mylocale->info.icu.ucol,
1641  uchar1, ulen1,
1642  uchar2, ulen2);
1643 
1644  pfree(uchar1);
1645  pfree(uchar2);
1646  }
1647 #else /* not USE_ICU */
1648  /* shouldn't happen */
1649  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1650 #endif /* not USE_ICU */
1651  }
1652  else
1653  {
1654 #ifdef HAVE_LOCALE_T
1655  result = strcoll_l(a1p, a2p, mylocale->info.lt);
1656 #else
1657  /* shouldn't happen */
1658  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1659 #endif
1660  }
1661  }
1662  else
1663  result = strcoll(a1p, a2p);
1664 
1665  /* Break tie if necessary. */
1666  if (result == 0 &&
1667  (!mylocale || mylocale->deterministic))
1668  result = strcmp(a1p, a2p);
1669 
1670  if (a1p != a1buf)
1671  pfree(a1p);
1672  if (a2p != a2buf)
1673  pfree(a2p);
1674  }
1675 
1676  return result;
1677 }
1678 
1679 /* text_cmp()
1680  * Internal comparison function for text strings.
1681  * Returns -1, 0 or 1
1682  */
1683 static int
1684 text_cmp(text *arg1, text *arg2, Oid collid)
1685 {
1686  char *a1p,
1687  *a2p;
1688  int len1,
1689  len2;
1690 
1691  a1p = VARDATA_ANY(arg1);
1692  a2p = VARDATA_ANY(arg2);
1693 
1694  len1 = VARSIZE_ANY_EXHDR(arg1);
1695  len2 = VARSIZE_ANY_EXHDR(arg2);
1696 
1697  return varstr_cmp(a1p, len1, a2p, len2, collid);
1698 }
1699 
1700 /*
1701  * Comparison functions for text strings.
1702  *
1703  * Note: btree indexes need these routines not to leak memory; therefore,
1704  * be careful to free working copies of toasted datums. Most places don't
1705  * need to be so careful.
1706  */
1707 
1708 Datum
1710 {
1711  Oid collid = PG_GET_COLLATION();
1712  bool result;
1713 
1714  check_collation_set(collid);
1715 
1716  if (lc_collate_is_c(collid) ||
1717  collid == DEFAULT_COLLATION_OID ||
1718  pg_newlocale_from_collation(collid)->deterministic)
1719  {
1720  Datum arg1 = PG_GETARG_DATUM(0);
1721  Datum arg2 = PG_GETARG_DATUM(1);
1722  Size len1,
1723  len2;
1724 
1725  /*
1726  * Since we only care about equality or not-equality, we can avoid all
1727  * the expense of strcoll() here, and just do bitwise comparison. In
1728  * fact, we don't even have to do a bitwise comparison if we can show
1729  * the lengths of the strings are unequal; which might save us from
1730  * having to detoast one or both values.
1731  */
1732  len1 = toast_raw_datum_size(arg1);
1733  len2 = toast_raw_datum_size(arg2);
1734  if (len1 != len2)
1735  result = false;
1736  else
1737  {
1738  text *targ1 = DatumGetTextPP(arg1);
1739  text *targ2 = DatumGetTextPP(arg2);
1740 
1741  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1742  len1 - VARHDRSZ) == 0);
1743 
1744  PG_FREE_IF_COPY(targ1, 0);
1745  PG_FREE_IF_COPY(targ2, 1);
1746  }
1747  }
1748  else
1749  {
1750  text *arg1 = PG_GETARG_TEXT_PP(0);
1751  text *arg2 = PG_GETARG_TEXT_PP(1);
1752 
1753  result = (text_cmp(arg1, arg2, collid) == 0);
1754 
1755  PG_FREE_IF_COPY(arg1, 0);
1756  PG_FREE_IF_COPY(arg2, 1);
1757  }
1758 
1759  PG_RETURN_BOOL(result);
1760 }
1761 
1762 Datum
1764 {
1765  Oid collid = PG_GET_COLLATION();
1766  bool result;
1767 
1768  check_collation_set(collid);
1769 
1770  if (lc_collate_is_c(collid) ||
1771  collid == DEFAULT_COLLATION_OID ||
1772  pg_newlocale_from_collation(collid)->deterministic)
1773  {
1774  Datum arg1 = PG_GETARG_DATUM(0);
1775  Datum arg2 = PG_GETARG_DATUM(1);
1776  Size len1,
1777  len2;
1778 
1779  /* See comment in texteq() */
1780  len1 = toast_raw_datum_size(arg1);
1781  len2 = toast_raw_datum_size(arg2);
1782  if (len1 != len2)
1783  result = true;
1784  else
1785  {
1786  text *targ1 = DatumGetTextPP(arg1);
1787  text *targ2 = DatumGetTextPP(arg2);
1788 
1789  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1790  len1 - VARHDRSZ) != 0);
1791 
1792  PG_FREE_IF_COPY(targ1, 0);
1793  PG_FREE_IF_COPY(targ2, 1);
1794  }
1795  }
1796  else
1797  {
1798  text *arg1 = PG_GETARG_TEXT_PP(0);
1799  text *arg2 = PG_GETARG_TEXT_PP(1);
1800 
1801  result = (text_cmp(arg1, arg2, collid) != 0);
1802 
1803  PG_FREE_IF_COPY(arg1, 0);
1804  PG_FREE_IF_COPY(arg2, 1);
1805  }
1806 
1807  PG_RETURN_BOOL(result);
1808 }
1809 
1810 Datum
1812 {
1813  text *arg1 = PG_GETARG_TEXT_PP(0);
1814  text *arg2 = PG_GETARG_TEXT_PP(1);
1815  bool result;
1816 
1817  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1818 
1819  PG_FREE_IF_COPY(arg1, 0);
1820  PG_FREE_IF_COPY(arg2, 1);
1821 
1822  PG_RETURN_BOOL(result);
1823 }
1824 
1825 Datum
1827 {
1828  text *arg1 = PG_GETARG_TEXT_PP(0);
1829  text *arg2 = PG_GETARG_TEXT_PP(1);
1830  bool result;
1831 
1832  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1833 
1834  PG_FREE_IF_COPY(arg1, 0);
1835  PG_FREE_IF_COPY(arg2, 1);
1836 
1837  PG_RETURN_BOOL(result);
1838 }
1839 
1840 Datum
1842 {
1843  text *arg1 = PG_GETARG_TEXT_PP(0);
1844  text *arg2 = PG_GETARG_TEXT_PP(1);
1845  bool result;
1846 
1847  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1848 
1849  PG_FREE_IF_COPY(arg1, 0);
1850  PG_FREE_IF_COPY(arg2, 1);
1851 
1852  PG_RETURN_BOOL(result);
1853 }
1854 
1855 Datum
1857 {
1858  text *arg1 = PG_GETARG_TEXT_PP(0);
1859  text *arg2 = PG_GETARG_TEXT_PP(1);
1860  bool result;
1861 
1862  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1863 
1864  PG_FREE_IF_COPY(arg1, 0);
1865  PG_FREE_IF_COPY(arg2, 1);
1866 
1867  PG_RETURN_BOOL(result);
1868 }
1869 
1870 Datum
1872 {
1873  Datum arg1 = PG_GETARG_DATUM(0);
1874  Datum arg2 = PG_GETARG_DATUM(1);
1875  Oid collid = PG_GET_COLLATION();
1876  pg_locale_t mylocale = 0;
1877  bool result;
1878  Size len1,
1879  len2;
1880 
1881  check_collation_set(collid);
1882 
1883  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1884  mylocale = pg_newlocale_from_collation(collid);
1885 
1886  if (mylocale && !mylocale->deterministic)
1887  ereport(ERROR,
1888  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1889  errmsg("nondeterministic collations are not supported for substring searches")));
1890 
1891  len1 = toast_raw_datum_size(arg1);
1892  len2 = toast_raw_datum_size(arg2);
1893  if (len2 > len1)
1894  result = false;
1895  else
1896  {
1897  text *targ1 = text_substring(arg1, 1, len2, false);
1898  text *targ2 = DatumGetTextPP(arg2);
1899 
1900  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1901  VARSIZE_ANY_EXHDR(targ2)) == 0);
1902 
1903  PG_FREE_IF_COPY(targ1, 0);
1904  PG_FREE_IF_COPY(targ2, 1);
1905  }
1906 
1907  PG_RETURN_BOOL(result);
1908 }
1909 
1910 Datum
1912 {
1913  text *arg1 = PG_GETARG_TEXT_PP(0);
1914  text *arg2 = PG_GETARG_TEXT_PP(1);
1915  int32 result;
1916 
1917  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1918 
1919  PG_FREE_IF_COPY(arg1, 0);
1920  PG_FREE_IF_COPY(arg2, 1);
1921 
1922  PG_RETURN_INT32(result);
1923 }
1924 
1925 Datum
1927 {
1929  Oid collid = ssup->ssup_collation;
1930  MemoryContext oldcontext;
1931 
1932  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1933 
1934  /* Use generic string SortSupport */
1935  varstr_sortsupport(ssup, TEXTOID, collid);
1936 
1937  MemoryContextSwitchTo(oldcontext);
1938 
1939  PG_RETURN_VOID();
1940 }
1941 
1942 /*
1943  * Generic sortsupport interface for character type's operator classes.
1944  * Includes locale support, and support for BpChar semantics (i.e. removing
1945  * trailing spaces before comparison).
1946  *
1947  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1948  * same representation. Callers that always use the C collation (e.g.
1949  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1950  * this will not work with any other collation, though.
1951  */
1952 void
1954 {
1955  bool abbreviate = ssup->abbreviate;
1956  bool collate_c = false;
1957  VarStringSortSupport *sss;
1958  pg_locale_t locale = 0;
1959 
1960  check_collation_set(collid);
1961 
1962  /*
1963  * If possible, set ssup->comparator to a function which can be used to
1964  * directly compare two datums. If we can do this, we'll avoid the
1965  * overhead of a trip through the fmgr layer for every comparison, which
1966  * can be substantial.
1967  *
1968  * Most typically, we'll set the comparator to varlenafastcmp_locale,
1969  * which uses strcoll() to perform comparisons. We use that for the
1970  * BpChar case too, but type NAME uses namefastcmp_locale. However, if
1971  * LC_COLLATE = C, we can make things quite a bit faster with
1972  * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1973  * memcmp() rather than strcoll().
1974  */
1975  if (lc_collate_is_c(collid))
1976  {
1977  if (typid == BPCHAROID)
1978  ssup->comparator = bpcharfastcmp_c;
1979  else if (typid == NAMEOID)
1980  {
1981  ssup->comparator = namefastcmp_c;
1982  /* Not supporting abbreviation with type NAME, for now */
1983  abbreviate = false;
1984  }
1985  else
1986  ssup->comparator = varstrfastcmp_c;
1987 
1988  collate_c = true;
1989  }
1990  else
1991  {
1992  /*
1993  * We need a collation-sensitive comparison. To make things faster,
1994  * we'll figure out the collation based on the locale id and cache the
1995  * result.
1996  */
1997  if (collid != DEFAULT_COLLATION_OID)
1998  locale = pg_newlocale_from_collation(collid);
1999 
2000  /*
2001  * There is a further exception on Windows. When the database
2002  * encoding is UTF-8 and we are not using the C collation, complex
2003  * hacks are required. We don't currently have a comparator that
2004  * handles that case, so we fall back on the slow method of having the
2005  * sort code invoke bttextcmp() (in the case of text) via the fmgr
2006  * trampoline. ICU locales work just the same on Windows, however.
2007  */
2008 #ifdef WIN32
2009  if (GetDatabaseEncoding() == PG_UTF8 &&
2010  !(locale && locale->provider == COLLPROVIDER_ICU))
2011  return;
2012 #endif
2013 
2014  /*
2015  * We use varlenafastcmp_locale except for type NAME.
2016  */
2017  if (typid == NAMEOID)
2018  {
2020  /* Not supporting abbreviation with type NAME, for now */
2021  abbreviate = false;
2022  }
2023  else
2025  }
2026 
2027  /*
2028  * Unfortunately, it seems that abbreviation for non-C collations is
2029  * broken on many common platforms; testing of multiple versions of glibc
2030  * reveals that, for many locales, strcoll() and strxfrm() do not return
2031  * consistent results, which is fatal to this optimization. While no
2032  * other libc other than Cygwin has so far been shown to have a problem,
2033  * we take the conservative course of action for right now and disable
2034  * this categorically. (Users who are certain this isn't a problem on
2035  * their system can define TRUST_STRXFRM.)
2036  *
2037  * Even apart from the risk of broken locales, it's possible that there
2038  * are platforms where the use of abbreviated keys should be disabled at
2039  * compile time. Having only 4 byte datums could make worst-case
2040  * performance drastically more likely, for example. Moreover, macOS's
2041  * strxfrm() implementation is known to not effectively concentrate a
2042  * significant amount of entropy from the original string in earlier
2043  * transformed blobs. It's possible that other supported platforms are
2044  * similarly encumbered. So, if we ever get past disabling this
2045  * categorically, we may still want or need to disable it for particular
2046  * platforms.
2047  */
2048 #ifndef TRUST_STRXFRM
2049  if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
2050  abbreviate = false;
2051 #endif
2052 
2053  /*
2054  * If we're using abbreviated keys, or if we're using a locale-aware
2055  * comparison, we need to initialize a StringSortSupport object. Both
2056  * cases will make use of the temporary buffers we initialize here for
2057  * scratch space (and to detect requirement for BpChar semantics from
2058  * caller), and the abbreviation case requires additional state.
2059  */
2060  if (abbreviate || !collate_c)
2061  {
2062  sss = palloc(sizeof(VarStringSortSupport));
2063  sss->buf1 = palloc(TEXTBUFLEN);
2064  sss->buflen1 = TEXTBUFLEN;
2065  sss->buf2 = palloc(TEXTBUFLEN);
2066  sss->buflen2 = TEXTBUFLEN;
2067  /* Start with invalid values */
2068  sss->last_len1 = -1;
2069  sss->last_len2 = -1;
2070  /* Initialize */
2071  sss->last_returned = 0;
2072  sss->locale = locale;
2073 
2074  /*
2075  * To avoid somehow confusing a strxfrm() blob and an original string,
2076  * constantly keep track of the variety of data that buf1 and buf2
2077  * currently contain.
2078  *
2079  * Comparisons may be interleaved with conversion calls. Frequently,
2080  * conversions and comparisons are batched into two distinct phases,
2081  * but the correctness of caching cannot hinge upon this. For
2082  * comparison caching, buffer state is only trusted if cache_blob is
2083  * found set to false, whereas strxfrm() caching only trusts the state
2084  * when cache_blob is found set to true.
2085  *
2086  * Arbitrarily initialize cache_blob to true.
2087  */
2088  sss->cache_blob = true;
2089  sss->collate_c = collate_c;
2090  sss->typid = typid;
2091  ssup->ssup_extra = sss;
2092 
2093  /*
2094  * If possible, plan to use the abbreviated keys optimization. The
2095  * core code may switch back to authoritative comparator should
2096  * abbreviation be aborted.
2097  */
2098  if (abbreviate)
2099  {
2100  sss->prop_card = 0.20;
2101  initHyperLogLog(&sss->abbr_card, 10);
2102  initHyperLogLog(&sss->full_card, 10);
2103  ssup->abbrev_full_comparator = ssup->comparator;
2104  ssup->comparator = varstrcmp_abbrev;
2107  }
2108  }
2109 }
2110 
2111 /*
2112  * sortsupport comparison func (for C locale case)
2113  */
2114 static int
2116 {
2117  VarString *arg1 = DatumGetVarStringPP(x);
2118  VarString *arg2 = DatumGetVarStringPP(y);
2119  char *a1p,
2120  *a2p;
2121  int len1,
2122  len2,
2123  result;
2124 
2125  a1p = VARDATA_ANY(arg1);
2126  a2p = VARDATA_ANY(arg2);
2127 
2128  len1 = VARSIZE_ANY_EXHDR(arg1);
2129  len2 = VARSIZE_ANY_EXHDR(arg2);
2130 
2131  result = memcmp(a1p, a2p, Min(len1, len2));
2132  if ((result == 0) && (len1 != len2))
2133  result = (len1 < len2) ? -1 : 1;
2134 
2135  /* We can't afford to leak memory here. */
2136  if (PointerGetDatum(arg1) != x)
2137  pfree(arg1);
2138  if (PointerGetDatum(arg2) != y)
2139  pfree(arg2);
2140 
2141  return result;
2142 }
2143 
2144 /*
2145  * sortsupport comparison func (for BpChar C locale case)
2146  *
2147  * BpChar outsources its sortsupport to this module. Specialization for the
2148  * varstr_sortsupport BpChar case, modeled on
2149  * internal_bpchar_pattern_compare().
2150  */
2151 static int
2153 {
2154  BpChar *arg1 = DatumGetBpCharPP(x);
2155  BpChar *arg2 = DatumGetBpCharPP(y);
2156  char *a1p,
2157  *a2p;
2158  int len1,
2159  len2,
2160  result;
2161 
2162  a1p = VARDATA_ANY(arg1);
2163  a2p = VARDATA_ANY(arg2);
2164 
2165  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2166  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2167 
2168  result = memcmp(a1p, a2p, Min(len1, len2));
2169  if ((result == 0) && (len1 != len2))
2170  result = (len1 < len2) ? -1 : 1;
2171 
2172  /* We can't afford to leak memory here. */
2173  if (PointerGetDatum(arg1) != x)
2174  pfree(arg1);
2175  if (PointerGetDatum(arg2) != y)
2176  pfree(arg2);
2177 
2178  return result;
2179 }
2180 
2181 /*
2182  * sortsupport comparison func (for NAME C locale case)
2183  */
2184 static int
2186 {
2187  Name arg1 = DatumGetName(x);
2188  Name arg2 = DatumGetName(y);
2189 
2190  return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2191 }
2192 
2193 /*
2194  * sortsupport comparison func (for locale case with all varlena types)
2195  */
2196 static int
2198 {
2199  VarString *arg1 = DatumGetVarStringPP(x);
2200  VarString *arg2 = DatumGetVarStringPP(y);
2201  char *a1p,
2202  *a2p;
2203  int len1,
2204  len2,
2205  result;
2206 
2207  a1p = VARDATA_ANY(arg1);
2208  a2p = VARDATA_ANY(arg2);
2209 
2210  len1 = VARSIZE_ANY_EXHDR(arg1);
2211  len2 = VARSIZE_ANY_EXHDR(arg2);
2212 
2213  result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2214 
2215  /* We can't afford to leak memory here. */
2216  if (PointerGetDatum(arg1) != x)
2217  pfree(arg1);
2218  if (PointerGetDatum(arg2) != y)
2219  pfree(arg2);
2220 
2221  return result;
2222 }
2223 
2224 /*
2225  * sortsupport comparison func (for locale case with NAME type)
2226  */
2227 static int
2229 {
2230  Name arg1 = DatumGetName(x);
2231  Name arg2 = DatumGetName(y);
2232 
2233  return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2234  NameStr(*arg2), strlen(NameStr(*arg2)),
2235  ssup);
2236 }
2237 
2238 /*
2239  * sortsupport comparison func for locale cases
2240  */
2241 static int
2242 varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2243 {
2245  int result;
2246  bool arg1_match;
2247 
2248  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2249  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2250  {
2251  /*
2252  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2253  * last_len2. Existing contents of buffers might still be used by
2254  * next call.
2255  *
2256  * It's fine to allow the comparison of BpChar padding bytes here,
2257  * even though that implies that the memcmp() will usually be
2258  * performed for BpChar callers (though multibyte characters could
2259  * still prevent that from occurring). The memcmp() is still very
2260  * cheap, and BpChar's funny semantics have us remove trailing spaces
2261  * (not limited to padding), so we need make no distinction between
2262  * padding space characters and "real" space characters.
2263  */
2264  return 0;
2265  }
2266 
2267  if (sss->typid == BPCHAROID)
2268  {
2269  /* Get true number of bytes, ignoring trailing spaces */
2270  len1 = bpchartruelen(a1p, len1);
2271  len2 = bpchartruelen(a2p, len2);
2272  }
2273 
2274  if (len1 >= sss->buflen1)
2275  {
2276  pfree(sss->buf1);
2277  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2278  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2279  }
2280  if (len2 >= sss->buflen2)
2281  {
2282  pfree(sss->buf2);
2283  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2284  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2285  }
2286 
2287  /*
2288  * We're likely to be asked to compare the same strings repeatedly, and
2289  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2290  * comparisons, even though in general there is no reason to think that
2291  * that will work out (every string datum may be unique). Caching does
2292  * not slow things down measurably when it doesn't work out, and can speed
2293  * things up by rather a lot when it does. In part, this is because the
2294  * memcmp() compares data from cachelines that are needed in L1 cache even
2295  * when the last comparison's result cannot be reused.
2296  */
2297  arg1_match = true;
2298  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2299  {
2300  arg1_match = false;
2301  memcpy(sss->buf1, a1p, len1);
2302  sss->buf1[len1] = '\0';
2303  sss->last_len1 = len1;
2304  }
2305 
2306  /*
2307  * If we're comparing the same two strings as last time, we can return the
2308  * same answer without calling strcoll() again. This is more likely than
2309  * it seems (at least with moderate to low cardinality sets), because
2310  * quicksort compares the same pivot against many values.
2311  */
2312  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2313  {
2314  memcpy(sss->buf2, a2p, len2);
2315  sss->buf2[len2] = '\0';
2316  sss->last_len2 = len2;
2317  }
2318  else if (arg1_match && !sss->cache_blob)
2319  {
2320  /* Use result cached following last actual strcoll() call */
2321  return sss->last_returned;
2322  }
2323 
2324  if (sss->locale)
2325  {
2326  if (sss->locale->provider == COLLPROVIDER_ICU)
2327  {
2328 #ifdef USE_ICU
2329 #ifdef HAVE_UCOL_STRCOLLUTF8
2330  if (GetDatabaseEncoding() == PG_UTF8)
2331  {
2332  UErrorCode status;
2333 
2334  status = U_ZERO_ERROR;
2335  result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2336  a1p, len1,
2337  a2p, len2,
2338  &status);
2339  if (U_FAILURE(status))
2340  ereport(ERROR,
2341  (errmsg("collation failed: %s", u_errorName(status))));
2342  }
2343  else
2344 #endif
2345  {
2346  int32_t ulen1,
2347  ulen2;
2348  UChar *uchar1,
2349  *uchar2;
2350 
2351  ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2352  ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2353 
2354  result = ucol_strcoll(sss->locale->info.icu.ucol,
2355  uchar1, ulen1,
2356  uchar2, ulen2);
2357 
2358  pfree(uchar1);
2359  pfree(uchar2);
2360  }
2361 #else /* not USE_ICU */
2362  /* shouldn't happen */
2363  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2364 #endif /* not USE_ICU */
2365  }
2366  else
2367  {
2368 #ifdef HAVE_LOCALE_T
2369  result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2370 #else
2371  /* shouldn't happen */
2372  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2373 #endif
2374  }
2375  }
2376  else
2377  result = strcoll(sss->buf1, sss->buf2);
2378 
2379  /* Break tie if necessary. */
2380  if (result == 0 &&
2381  (!sss->locale || sss->locale->deterministic))
2382  result = strcmp(sss->buf1, sss->buf2);
2383 
2384  /* Cache result, perhaps saving an expensive strcoll() call next time */
2385  sss->cache_blob = false;
2386  sss->last_returned = result;
2387  return result;
2388 }
2389 
2390 /*
2391  * Abbreviated key comparison func
2392  */
2393 static int
2395 {
2396  /*
2397  * When 0 is returned, the core system will call varstrfastcmp_c()
2398  * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
2399  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2400  * authoritatively, for the same reason that there is a strcoll()
2401  * tie-breaker call to strcmp() in varstr_cmp().
2402  */
2403  if (x > y)
2404  return 1;
2405  else if (x == y)
2406  return 0;
2407  else
2408  return -1;
2409 }
2410 
2411 /*
2412  * Conversion routine for sortsupport. Converts original to abbreviated key
2413  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2414  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2415  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2416  * locale is used, or in case of bytea, just memcpy() from original instead.
2417  */
2418 static Datum
2420 {
2422  VarString *authoritative = DatumGetVarStringPP(original);
2423  char *authoritative_data = VARDATA_ANY(authoritative);
2424 
2425  /* working state */
2426  Datum res;
2427  char *pres;
2428  int len;
2429  uint32 hash;
2430 
2431  pres = (char *) &res;
2432  /* memset(), so any non-overwritten bytes are NUL */
2433  memset(pres, 0, sizeof(Datum));
2434  len = VARSIZE_ANY_EXHDR(authoritative);
2435 
2436  /* Get number of bytes, ignoring trailing spaces */
2437  if (sss->typid == BPCHAROID)
2438  len = bpchartruelen(authoritative_data, len);
2439 
2440  /*
2441  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2442  * abbreviate keys. The full comparator for the C locale is always
2443  * memcmp(). It would be incorrect to allow bytea callers (callers that
2444  * always force the C collation -- bytea isn't a collatable type, but this
2445  * approach is convenient) to use strxfrm(). This is because bytea
2446  * strings may contain NUL bytes. Besides, this should be faster, too.
2447  *
2448  * More generally, it's okay that bytea callers can have NUL bytes in
2449  * strings because varstrcmp_abbrev() need not make a distinction between
2450  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2451  * authoritative representation. Hopefully a comparison at or past one
2452  * abbreviated key's terminating NUL byte will resolve the comparison
2453  * without consulting the authoritative representation; specifically, some
2454  * later non-NUL byte in the longer string can resolve the comparison
2455  * against a subsequent terminating NUL in the shorter string. There will
2456  * usually be what is effectively a "length-wise" resolution there and
2457  * then.
2458  *
2459  * If that doesn't work out -- if all bytes in the longer string
2460  * positioned at or past the offset of the smaller string's (first)
2461  * terminating NUL are actually representative of NUL bytes in the
2462  * authoritative binary string (perhaps with some *terminating* NUL bytes
2463  * towards the end of the longer string iff it happens to still be small)
2464  * -- then an authoritative tie-breaker will happen, and do the right
2465  * thing: explicitly consider string length.
2466  */
2467  if (sss->collate_c)
2468  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2469  else
2470  {
2471  Size bsize;
2472 #ifdef USE_ICU
2473  int32_t ulen = -1;
2474  UChar *uchar = NULL;
2475 #endif
2476 
2477  /*
2478  * We're not using the C collation, so fall back on strxfrm or ICU
2479  * analogs.
2480  */
2481 
2482  /* By convention, we use buffer 1 to store and NUL-terminate */
2483  if (len >= sss->buflen1)
2484  {
2485  pfree(sss->buf1);
2486  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2487  sss->buf1 = palloc(sss->buflen1);
2488  }
2489 
2490  /* Might be able to reuse strxfrm() blob from last call */
2491  if (sss->last_len1 == len && sss->cache_blob &&
2492  memcmp(sss->buf1, authoritative_data, len) == 0)
2493  {
2494  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2495  /* No change affecting cardinality, so no hashing required */
2496  goto done;
2497  }
2498 
2499  memcpy(sss->buf1, authoritative_data, len);
2500 
2501  /*
2502  * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2503  * necessary for ICU, but doesn't hurt.
2504  */
2505  sss->buf1[len] = '\0';
2506  sss->last_len1 = len;
2507 
2508 #ifdef USE_ICU
2509  /* When using ICU and not UTF8, convert string to UChar. */
2510  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2512  ulen = icu_to_uchar(&uchar, sss->buf1, len);
2513 #endif
2514 
2515  /*
2516  * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2517  * and try again. Both of these functions have the result buffer
2518  * content undefined if the result did not fit, so we need to retry
2519  * until everything fits, even though we only need the first few bytes
2520  * in the end. When using ucol_nextSortKeyPart(), however, we only
2521  * ask for as many bytes as we actually need.
2522  */
2523  for (;;)
2524  {
2525 #ifdef USE_ICU
2526  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2527  {
2528  /*
2529  * When using UTF8, use the iteration interface so we only
2530  * need to produce as many bytes as we actually need.
2531  */
2532  if (GetDatabaseEncoding() == PG_UTF8)
2533  {
2534  UCharIterator iter;
2535  uint32_t state[2];
2536  UErrorCode status;
2537 
2538  uiter_setUTF8(&iter, sss->buf1, len);
2539  state[0] = state[1] = 0; /* won't need that again */
2540  status = U_ZERO_ERROR;
2541  bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2542  &iter,
2543  state,
2544  (uint8_t *) sss->buf2,
2545  Min(sizeof(Datum), sss->buflen2),
2546  &status);
2547  if (U_FAILURE(status))
2548  ereport(ERROR,
2549  (errmsg("sort key generation failed: %s",
2550  u_errorName(status))));
2551  }
2552  else
2553  bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2554  uchar, ulen,
2555  (uint8_t *) sss->buf2, sss->buflen2);
2556  }
2557  else
2558 #endif
2559 #ifdef HAVE_LOCALE_T
2560  if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2561  bsize = strxfrm_l(sss->buf2, sss->buf1,
2562  sss->buflen2, sss->locale->info.lt);
2563  else
2564 #endif
2565  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2566 
2567  sss->last_len2 = bsize;
2568  if (bsize < sss->buflen2)
2569  break;
2570 
2571  /*
2572  * Grow buffer and retry.
2573  */
2574  pfree(sss->buf2);
2575  sss->buflen2 = Max(bsize + 1,
2576  Min(sss->buflen2 * 2, MaxAllocSize));
2577  sss->buf2 = palloc(sss->buflen2);
2578  }
2579 
2580  /*
2581  * Every Datum byte is always compared. This is safe because the
2582  * strxfrm() blob is itself NUL terminated, leaving no danger of
2583  * misinterpreting any NUL bytes not intended to be interpreted as
2584  * logically representing termination.
2585  *
2586  * (Actually, even if there were NUL bytes in the blob it would be
2587  * okay. See remarks on bytea case above.)
2588  */
2589  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2590 
2591 #ifdef USE_ICU
2592  if (uchar)
2593  pfree(uchar);
2594 #endif
2595  }
2596 
2597  /*
2598  * Maintain approximate cardinality of both abbreviated keys and original,
2599  * authoritative keys using HyperLogLog. Used as cheap insurance against
2600  * the worst case, where we do many string transformations for no saving
2601  * in full strcoll()-based comparisons. These statistics are used by
2602  * varstr_abbrev_abort().
2603  *
2604  * First, Hash key proper, or a significant fraction of it. Mix in length
2605  * in order to compensate for cases where differences are past
2606  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2607  */
2608  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2609  Min(len, PG_CACHE_LINE_SIZE)));
2610 
2611  if (len > PG_CACHE_LINE_SIZE)
2612  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2613 
2614  addHyperLogLog(&sss->full_card, hash);
2615 
2616  /* Hash abbreviated key */
2617 #if SIZEOF_DATUM == 8
2618  {
2619  uint32 lohalf,
2620  hihalf;
2621 
2622  lohalf = (uint32) res;
2623  hihalf = (uint32) (res >> 32);
2624  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2625  }
2626 #else /* SIZEOF_DATUM != 8 */
2627  hash = DatumGetUInt32(hash_uint32((uint32) res));
2628 #endif
2629 
2630  addHyperLogLog(&sss->abbr_card, hash);
2631 
2632  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2633  sss->cache_blob = true;
2634 done:
2635 
2636  /*
2637  * Byteswap on little-endian machines.
2638  *
2639  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2640  * comparator) works correctly on all platforms. If we didn't do this,
2641  * the comparator would have to call memcmp() with a pair of pointers to
2642  * the first byte of each abbreviated key, which is slower.
2643  */
2644  res = DatumBigEndianToNative(res);
2645 
2646  /* Don't leak memory here */
2647  if (PointerGetDatum(authoritative) != original)
2648  pfree(authoritative);
2649 
2650  return res;
2651 }
2652 
2653 /*
2654  * Callback for estimating effectiveness of abbreviated key optimization, using
2655  * heuristic rules. Returns value indicating if the abbreviation optimization
2656  * should be aborted, based on its projected effectiveness.
2657  */
2658 static bool
2659 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2660 {
2662  double abbrev_distinct,
2663  key_distinct;
2664 
2665  Assert(ssup->abbreviate);
2666 
2667  /* Have a little patience */
2668  if (memtupcount < 100)
2669  return false;
2670 
2671  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2672  key_distinct = estimateHyperLogLog(&sss->full_card);
2673 
2674  /*
2675  * Clamp cardinality estimates to at least one distinct value. While
2676  * NULLs are generally disregarded, if only NULL values were seen so far,
2677  * that might misrepresent costs if we failed to clamp.
2678  */
2679  if (abbrev_distinct <= 1.0)
2680  abbrev_distinct = 1.0;
2681 
2682  if (key_distinct <= 1.0)
2683  key_distinct = 1.0;
2684 
2685  /*
2686  * In the worst case all abbreviated keys are identical, while at the same
2687  * time there are differences within full key strings not captured in
2688  * abbreviations.
2689  */
2690 #ifdef TRACE_SORT
2691  if (trace_sort)
2692  {
2693  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2694 
2695  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2696  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2697  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2698  sss->prop_card);
2699  }
2700 #endif
2701 
2702  /*
2703  * If the number of distinct abbreviated keys approximately matches the
2704  * number of distinct authoritative original keys, that's reason enough to
2705  * proceed. We can win even with a very low cardinality set if most
2706  * tie-breakers only memcmp(). This is by far the most important
2707  * consideration.
2708  *
2709  * While comparisons that are resolved at the abbreviated key level are
2710  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2711  * those two outcomes are so much cheaper than a full strcoll() once
2712  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2713  * cardinality against the overall size of the set in order to more
2714  * accurately model costs. Assume that an abbreviated comparison, and an
2715  * abbreviated comparison with a cheap memcmp()-based authoritative
2716  * resolution are equivalent.
2717  */
2718  if (abbrev_distinct > key_distinct * sss->prop_card)
2719  {
2720  /*
2721  * When we have exceeded 10,000 tuples, decay required cardinality
2722  * aggressively for next call.
2723  *
2724  * This is useful because the number of comparisons required on
2725  * average increases at a linearithmic rate, and at roughly 10,000
2726  * tuples that factor will start to dominate over the linear costs of
2727  * string transformation (this is a conservative estimate). The decay
2728  * rate is chosen to be a little less aggressive than halving -- which
2729  * (since we're called at points at which memtupcount has doubled)
2730  * would never see the cost model actually abort past the first call
2731  * following a decay. This decay rate is mostly a precaution against
2732  * a sudden, violent swing in how well abbreviated cardinality tracks
2733  * full key cardinality. The decay also serves to prevent a marginal
2734  * case from being aborted too late, when too much has already been
2735  * invested in string transformation.
2736  *
2737  * It's possible for sets of several million distinct strings with
2738  * mere tens of thousands of distinct abbreviated keys to still
2739  * benefit very significantly. This will generally occur provided
2740  * each abbreviated key is a proxy for a roughly uniform number of the
2741  * set's full keys. If it isn't so, we hope to catch that early and
2742  * abort. If it isn't caught early, by the time the problem is
2743  * apparent it's probably not worth aborting.
2744  */
2745  if (memtupcount > 10000)
2746  sss->prop_card *= 0.65;
2747 
2748  return false;
2749  }
2750 
2751  /*
2752  * Abort abbreviation strategy.
2753  *
2754  * The worst case, where all abbreviated keys are identical while all
2755  * original strings differ will typically only see a regression of about
2756  * 10% in execution time for small to medium sized lists of strings.
2757  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2758  * often expect very large improvements, particularly with sets of strings
2759  * of moderately high to high abbreviated cardinality. There is little to
2760  * lose but much to gain, which our strategy reflects.
2761  */
2762 #ifdef TRACE_SORT
2763  if (trace_sort)
2764  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2765  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2766  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2767 #endif
2768 
2769  return true;
2770 }
2771 
2772 Datum
2774 {
2775  text *arg1 = PG_GETARG_TEXT_PP(0);
2776  text *arg2 = PG_GETARG_TEXT_PP(1);
2777  text *result;
2778 
2779  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2780 
2781  PG_RETURN_TEXT_P(result);
2782 }
2783 
2784 Datum
2786 {
2787  text *arg1 = PG_GETARG_TEXT_PP(0);
2788  text *arg2 = PG_GETARG_TEXT_PP(1);
2789  text *result;
2790 
2791  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2792 
2793  PG_RETURN_TEXT_P(result);
2794 }
2795 
2796 
2797 /*
2798  * Cross-type comparison functions for types text and name.
2799  */
2800 
2801 Datum
2803 {
2804  Name arg1 = PG_GETARG_NAME(0);
2805  text *arg2 = PG_GETARG_TEXT_PP(1);
2806  size_t len1 = strlen(NameStr(*arg1));
2807  size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2808  Oid collid = PG_GET_COLLATION();
2809  bool result;
2810 
2811  check_collation_set(collid);
2812 
2813  if (collid == C_COLLATION_OID)
2814  result = (len1 == len2 &&
2815  memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2816  else
2817  result = (varstr_cmp(NameStr(*arg1), len1,
2818  VARDATA_ANY(arg2), len2,
2819  collid) == 0);
2820 
2821  PG_FREE_IF_COPY(arg2, 1);
2822 
2823  PG_RETURN_BOOL(result);
2824 }
2825 
2826 Datum
2828 {
2829  text *arg1 = PG_GETARG_TEXT_PP(0);
2830  Name arg2 = PG_GETARG_NAME(1);
2831  size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2832  size_t len2 = strlen(NameStr(*arg2));
2833  Oid collid = PG_GET_COLLATION();
2834  bool result;
2835 
2836  check_collation_set(collid);
2837 
2838  if (collid == C_COLLATION_OID)
2839  result = (len1 == len2 &&
2840  memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2841  else
2842  result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2843  NameStr(*arg2), len2,
2844  collid) == 0);
2845 
2846  PG_FREE_IF_COPY(arg1, 0);
2847 
2848  PG_RETURN_BOOL(result);
2849 }
2850 
2851 Datum
2853 {
2854  Name arg1 = PG_GETARG_NAME(0);
2855  text *arg2 = PG_GETARG_TEXT_PP(1);
2856  size_t len1 = strlen(NameStr(*arg1));
2857  size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2858  Oid collid = PG_GET_COLLATION();
2859  bool result;
2860 
2861  check_collation_set(collid);
2862 
2863  if (collid == C_COLLATION_OID)
2864  result = !(len1 == len2 &&
2865  memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2866  else
2867  result = !(varstr_cmp(NameStr(*arg1), len1,
2868  VARDATA_ANY(arg2), len2,
2869  collid) == 0);
2870 
2871  PG_FREE_IF_COPY(arg2, 1);
2872 
2873  PG_RETURN_BOOL(result);
2874 }
2875 
2876 Datum
2878 {
2879  text *arg1 = PG_GETARG_TEXT_PP(0);
2880  Name arg2 = PG_GETARG_NAME(1);
2881  size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2882  size_t len2 = strlen(NameStr(*arg2));
2883  Oid collid = PG_GET_COLLATION();
2884  bool result;
2885 
2886  check_collation_set(collid);
2887 
2888  if (collid == C_COLLATION_OID)
2889  result = !(len1 == len2 &&
2890  memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2891  else
2892  result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2893  NameStr(*arg2), len2,
2894  collid) == 0);
2895 
2896  PG_FREE_IF_COPY(arg1, 0);
2897 
2898  PG_RETURN_BOOL(result);
2899 }
2900 
2901 Datum
2903 {
2904  Name arg1 = PG_GETARG_NAME(0);
2905  text *arg2 = PG_GETARG_TEXT_PP(1);
2906  int32 result;
2907 
2908  result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
2909  VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2910  PG_GET_COLLATION());
2911 
2912  PG_FREE_IF_COPY(arg2, 1);
2913 
2914  PG_RETURN_INT32(result);
2915 }
2916 
2917 Datum
2919 {
2920  text *arg1 = PG_GETARG_TEXT_PP(0);
2921  Name arg2 = PG_GETARG_NAME(1);
2922  int32 result;
2923 
2924  result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2925  NameStr(*arg2), strlen(NameStr(*arg2)),
2926  PG_GET_COLLATION());
2927 
2928  PG_FREE_IF_COPY(arg1, 0);
2929 
2930  PG_RETURN_INT32(result);
2931 }
2932 
2933 #define CmpCall(cmpfunc) \
2934  DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2935  PG_GET_COLLATION(), \
2936  PG_GETARG_DATUM(0), \
2937  PG_GETARG_DATUM(1)))
2938 
2939 Datum
2941 {
2943 }
2944 
2945 Datum
2947 {
2949 }
2950 
2951 Datum
2953 {
2955 }
2956 
2957 Datum
2959 {
2961 }
2962 
2963 Datum
2965 {
2967 }
2968 
2969 Datum
2971 {
2973 }
2974 
2975 Datum
2977 {
2979 }
2980 
2981 Datum
2983 {
2985 }
2986 
2987 #undef CmpCall
2988 
2989 
2990 /*
2991  * The following operators support character-by-character comparison
2992  * of text datums, to allow building indexes suitable for LIKE clauses.
2993  * Note that the regular texteq/textne comparison operators, and regular
2994  * support functions 1 and 2 with "C" collation are assumed to be
2995  * compatible with these!
2996  */
2997 
2998 static int
3000 {
3001  int result;
3002  int len1,
3003  len2;
3004 
3005  check_collation_set(collid);
3006 
3007  /*
3008  * XXX We cannot use a text_pattern_ops index for nondeterministic
3009  * collations, because these operators intentionally ignore the collation.
3010  * However, the planner has no way to know that, so it might choose such
3011  * an index for an "=" clause, which would lead to wrong results. This
3012  * check here doesn't prevent choosing the index, but it will at least
3013  * error out if the index is chosen. A text_pattern_ops index on a column
3014  * with nondeterministic collation is pretty useless anyway, since LIKE
3015  * etc. won't work there either. A future possibility would be to
3016  * annotate the operator class or its members in the catalog to avoid the
3017  * index. Another alternative is to stay away from the *_pattern_ops
3018  * operator classes and prefer creating LIKE-supporting indexes with
3019  * COLLATE "C".
3020  */
3021  if (!get_collation_isdeterministic(collid))
3022  ereport(ERROR,
3023  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3024  errmsg("nondeterministic collations are not supported for operator class \"%s\"",
3025  "text_pattern_ops")));
3026 
3027  len1 = VARSIZE_ANY_EXHDR(arg1);
3028  len2 = VARSIZE_ANY_EXHDR(arg2);
3029 
3030  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3031  if (result != 0)
3032  return result;
3033  else if (len1 < len2)
3034  return -1;
3035  else if (len1 > len2)
3036  return 1;
3037  else
3038  return 0;
3039 }
3040 
3041 
3042 Datum
3044 {
3045  text *arg1 = PG_GETARG_TEXT_PP(0);
3046  text *arg2 = PG_GETARG_TEXT_PP(1);
3047  int result;
3048 
3049  result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
3050 
3051  PG_FREE_IF_COPY(arg1, 0);
3052  PG_FREE_IF_COPY(arg2, 1);
3053 
3054  PG_RETURN_BOOL(result < 0);
3055 }
3056 
3057 
3058 Datum
3060 {
3061  text *arg1 = PG_GETARG_TEXT_PP(0);
3062  text *arg2 = PG_GETARG_TEXT_PP(1);
3063  int result;
3064 
3065  result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
3066 
3067  PG_FREE_IF_COPY(arg1, 0);
3068  PG_FREE_IF_COPY(arg2, 1);
3069 
3070  PG_RETURN_BOOL(result <= 0);
3071 }
3072 
3073 
3074 Datum
3076 {
3077  text *arg1 = PG_GETARG_TEXT_PP(0);
3078  text *arg2 = PG_GETARG_TEXT_PP(1);
3079  int result;
3080 
3081  result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
3082 
3083  PG_FREE_IF_COPY(arg1, 0);
3084  PG_FREE_IF_COPY(arg2, 1);
3085 
3086  PG_RETURN_BOOL(result >= 0);
3087 }
3088 
3089 
3090 Datum
3092 {
3093  text *arg1 = PG_GETARG_TEXT_PP(0);
3094  text *arg2 = PG_GETARG_TEXT_PP(1);
3095  int result;
3096 
3097  result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
3098 
3099  PG_FREE_IF_COPY(arg1, 0);
3100  PG_FREE_IF_COPY(arg2, 1);
3101 
3102  PG_RETURN_BOOL(result > 0);
3103 }
3104 
3105 
3106 Datum
3108 {
3109  text *arg1 = PG_GETARG_TEXT_PP(0);
3110  text *arg2 = PG_GETARG_TEXT_PP(1);
3111  int result;
3112 
3113  result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
3114 
3115  PG_FREE_IF_COPY(arg1, 0);
3116  PG_FREE_IF_COPY(arg2, 1);
3117 
3118  PG_RETURN_INT32(result);
3119 }
3120 
3121 
3122 Datum
3124 {
3126  Oid collid = ssup->ssup_collation;
3127  MemoryContext oldcontext;
3128 
3129  check_collation_set(collid);
3130 
3131  if (!get_collation_isdeterministic(collid))
3132  ereport(ERROR,
3133  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3134  errmsg("nondeterministic collations are not supported for operator class \"%s\"",
3135  "text_pattern_ops")));
3136 
3137  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3138 
3139  /* Use generic string SortSupport, forcing "C" collation */
3140  varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
3141 
3142  MemoryContextSwitchTo(oldcontext);
3143 
3144  PG_RETURN_VOID();
3145 }
3146 
3147 
3148 /*-------------------------------------------------------------
3149  * byteaoctetlen
3150  *
3151  * get the number of bytes contained in an instance of type 'bytea'
3152  *-------------------------------------------------------------
3153  */
3154 Datum
3156 {
3157  Datum str = PG_GETARG_DATUM(0);
3158 
3159  /* We need not detoast the input at all */
3161 }
3162 
3163 /*
3164  * byteacat -
3165  * takes two bytea* and returns a bytea* that is the concatenation of
3166  * the two.
3167  *
3168  * Cloned from textcat and modified as required.
3169  */
3170 Datum
3172 {
3173  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3174  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3175 
3177 }
3178 
3179 /*
3180  * bytea_catenate
3181  * Guts of byteacat(), broken out so it can be used by other functions
3182  *
3183  * Arguments can be in short-header form, but not compressed or out-of-line
3184  */
3185 static bytea *
3187 {
3188  bytea *result;
3189  int len1,
3190  len2,
3191  len;
3192  char *ptr;
3193 
3194  len1 = VARSIZE_ANY_EXHDR(t1);
3195  len2 = VARSIZE_ANY_EXHDR(t2);
3196 
3197  /* paranoia ... probably should throw error instead? */
3198  if (len1 < 0)
3199  len1 = 0;
3200  if (len2 < 0)
3201  len2 = 0;
3202 
3203  len = len1 + len2 + VARHDRSZ;
3204  result = (bytea *) palloc(len);
3205 
3206  /* Set size of result string... */
3207  SET_VARSIZE(result, len);
3208 
3209  /* Fill data field of result string... */
3210  ptr = VARDATA(result);
3211  if (len1 > 0)
3212  memcpy(ptr, VARDATA_ANY(t1), len1);
3213  if (len2 > 0)
3214  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3215 
3216  return result;
3217 }
3218 
3219 #define PG_STR_GET_BYTEA(str_) \
3220  DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3221 
3222 /*
3223  * bytea_substr()
3224  * Return a substring starting at the specified position.
3225  * Cloned from text_substr and modified as required.
3226  *
3227  * Input:
3228  * - string
3229  * - starting position (is one-based)
3230  * - string length (optional)
3231  *
3232  * If the starting position is zero or less, then return from the start of the string
3233  * adjusting the length to be consistent with the "negative start" per SQL.
3234  * If the length is less than zero, an ERROR is thrown. If no third argument
3235  * (length) is provided, the length to the end of the string is assumed.
3236  */
3237 Datum
3239 {
3241  PG_GETARG_INT32(1),
3242  PG_GETARG_INT32(2),
3243  false));
3244 }
3245 
3246 /*
3247  * bytea_substr_no_len -
3248  * Wrapper to avoid opr_sanity failure due to
3249  * one function accepting a different number of args.
3250  */
3251 Datum
3253 {
3255  PG_GETARG_INT32(1),
3256  -1,
3257  true));
3258 }
3259 
3260 static bytea *
3262  int S,
3263  int L,
3264  bool length_not_specified)
3265 {
3266  int S1; /* adjusted start position */
3267  int L1; /* adjusted substring length */
3268 
3269  S1 = Max(S, 1);
3270 
3271  if (length_not_specified)
3272  {
3273  /*
3274  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3275  * end of the string if we pass it a negative value for length.
3276  */
3277  L1 = -1;
3278  }
3279  else
3280  {
3281  /* end position */
3282  int E = S + L;
3283 
3284  /*
3285  * A negative value for L is the only way for the end position to be
3286  * before the start. SQL99 says to throw an error.
3287  */
3288  if (E < S)
3289  ereport(ERROR,
3290  (errcode(ERRCODE_SUBSTRING_ERROR),
3291  errmsg("negative substring length not allowed")));
3292 
3293  /*
3294  * A zero or negative value for the end position can happen if the
3295  * start was negative or one. SQL99 says to return a zero-length
3296  * string.
3297  */
3298  if (E < 1)
3299  return PG_STR_GET_BYTEA("");
3300 
3301  L1 = E - S1;
3302  }
3303 
3304  /*
3305  * If the start position is past the end of the string, SQL99 says to
3306  * return a zero-length string -- DatumGetByteaPSlice() will do that for
3307  * us. Convert to zero-based starting position
3308  */
3309  return DatumGetByteaPSlice(str, S1 - 1, L1);
3310 }
3311 
3312 /*
3313  * byteaoverlay
3314  * Replace specified substring of first string with second
3315  *
3316  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3317  * This code is a direct implementation of what the standard says.
3318  */
3319 Datum
3321 {
3322  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3323  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3324  int sp = PG_GETARG_INT32(2); /* substring start position */
3325  int sl = PG_GETARG_INT32(3); /* substring length */
3326 
3327  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3328 }
3329 
3330 Datum
3332 {
3333  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3334  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3335  int sp = PG_GETARG_INT32(2); /* substring start position */
3336  int sl;
3337 
3338  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3339  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3340 }
3341 
3342 static bytea *
3343 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3344 {
3345  bytea *result;
3346  bytea *s1;
3347  bytea *s2;
3348  int sp_pl_sl;
3349 
3350  /*
3351  * Check for possible integer-overflow cases. For negative sp, throw a
3352  * "substring length" error because that's what should be expected
3353  * according to the spec's definition of OVERLAY().
3354  */
3355  if (sp <= 0)
3356  ereport(ERROR,
3357  (errcode(ERRCODE_SUBSTRING_ERROR),
3358  errmsg("negative substring length not allowed")));
3359  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3360  ereport(ERROR,
3361  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3362  errmsg("integer out of range")));
3363 
3364  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3365  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3366  result = bytea_catenate(s1, t2);
3367  result = bytea_catenate(result, s2);
3368 
3369  return result;
3370 }
3371 
3372 /*
3373  * byteapos -
3374  * Return the position of the specified substring.
3375  * Implements the SQL POSITION() function.
3376  * Cloned from textpos and modified as required.
3377  */
3378 Datum
3380 {
3381  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3382  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3383  int pos;
3384  int px,
3385  p;
3386  int len1,
3387  len2;
3388  char *p1,
3389  *p2;
3390 
3391  len1 = VARSIZE_ANY_EXHDR(t1);
3392  len2 = VARSIZE_ANY_EXHDR(t2);
3393 
3394  if (len2 <= 0)
3395  PG_RETURN_INT32(1); /* result for empty pattern */
3396 
3397  p1 = VARDATA_ANY(t1);
3398  p2 = VARDATA_ANY(t2);
3399 
3400  pos = 0;
3401  px = (len1 - len2);
3402  for (p = 0; p <= px; p++)
3403  {
3404  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3405  {
3406  pos = p + 1;
3407  break;
3408  };
3409  p1++;
3410  };
3411 
3412  PG_RETURN_INT32(pos);
3413 }
3414 
3415 /*-------------------------------------------------------------
3416  * byteaGetByte
3417  *
3418  * this routine treats "bytea" as an array of bytes.
3419  * It returns the Nth byte (a number between 0 and 255).
3420  *-------------------------------------------------------------
3421  */
3422 Datum
3424 {
3425  bytea *v = PG_GETARG_BYTEA_PP(0);
3426  int32 n = PG_GETARG_INT32(1);
3427  int len;
3428  int byte;
3429 
3430  len = VARSIZE_ANY_EXHDR(v);
3431 
3432  if (n < 0 || n >= len)
3433  ereport(ERROR,
3434  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3435  errmsg("index %d out of valid range, 0..%d",
3436  n, len - 1)));
3437 
3438  byte = ((unsigned char *) VARDATA_ANY(v))[n];
3439 
3440  PG_RETURN_INT32(byte);
3441 }
3442 
3443 /*-------------------------------------------------------------
3444  * byteaGetBit
3445  *
3446  * This routine treats a "bytea" type like an array of bits.
3447  * It returns the value of the Nth bit (0 or 1).
3448  *
3449  *-------------------------------------------------------------
3450  */
3451 Datum
3453 {
3454  bytea *v = PG_GETARG_BYTEA_PP(0);
3455  int32 n = PG_GETARG_INT32(1);
3456  int byteNo,
3457  bitNo;
3458  int len;
3459  int byte;
3460 
3461  len = VARSIZE_ANY_EXHDR(v);
3462 
3463  if (n < 0 || n >= len * 8)
3464  ereport(ERROR,
3465  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3466  errmsg("index %d out of valid range, 0..%d",
3467  n, len * 8 - 1)));
3468 
3469  byteNo = n / 8;
3470  bitNo = n % 8;
3471 
3472  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3473 
3474  if (byte & (1 << bitNo))
3475  PG_RETURN_INT32(1);
3476  else
3477  PG_RETURN_INT32(0);
3478 }
3479 
3480 /*-------------------------------------------------------------
3481  * byteaSetByte
3482  *
3483  * Given an instance of type 'bytea' creates a new one with
3484  * the Nth byte set to the given value.
3485  *
3486  *-------------------------------------------------------------
3487  */
3488 Datum
3490 {
3491  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3492  int32 n = PG_GETARG_INT32(1);
3493  int32 newByte = PG_GETARG_INT32(2);
3494  int len;
3495 
3496  len = VARSIZE(res) - VARHDRSZ;
3497 
3498  if (n < 0 || n >= len)
3499  ereport(ERROR,
3500  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3501  errmsg("index %d out of valid range, 0..%d",
3502  n, len - 1)));
3503 
3504  /*
3505  * Now set the byte.
3506  */
3507  ((unsigned char *) VARDATA(res))[n] = newByte;
3508 
3509  PG_RETURN_BYTEA_P(res);
3510 }
3511 
3512 /*-------------------------------------------------------------
3513  * byteaSetBit
3514  *
3515  * Given an instance of type 'bytea' creates a new one with
3516  * the Nth bit set to the given value.
3517  *
3518  *-------------------------------------------------------------
3519  */
3520 Datum
3522 {
3523  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3524  int32 n = PG_GETARG_INT32(1);
3525  int32 newBit = PG_GETARG_INT32(2);
3526  int len;
3527  int oldByte,
3528  newByte;
3529  int byteNo,
3530  bitNo;
3531 
3532  len = VARSIZE(res) - VARHDRSZ;
3533 
3534  if (n < 0 || n >= len * 8)
3535  ereport(ERROR,
3536  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3537  errmsg("index %d out of valid range, 0..%d",
3538  n, len * 8 - 1)));
3539 
3540  byteNo = n / 8;
3541  bitNo = n % 8;
3542 
3543  /*
3544  * sanity check!
3545  */
3546  if (newBit != 0 && newBit != 1)
3547  ereport(ERROR,
3548  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3549  errmsg("new bit must be 0 or 1")));
3550 
3551  /*
3552  * Update the byte.
3553  */
3554  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3555 
3556  if (newBit == 0)
3557  newByte = oldByte & (~(1 << bitNo));
3558  else
3559  newByte = oldByte | (1 << bitNo);
3560 
3561  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3562 
3563  PG_RETURN_BYTEA_P(res);
3564 }
3565 
3566 
3567 /* text_name()
3568  * Converts a text type to a Name type.
3569  */
3570 Datum
3572 {
3573  text *s = PG_GETARG_TEXT_PP(0);
3574  Name result;
3575  int len;
3576 
3577  len = VARSIZE_ANY_EXHDR(s);
3578 
3579  /* Truncate oversize input */
3580  if (len >= NAMEDATALEN)
3581  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3582 
3583  /* We use palloc0 here to ensure result is zero-padded */
3584  result = (Name) palloc0(NAMEDATALEN);
3585  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3586 
3587  PG_RETURN_NAME(result);
3588 }
3589 
3590 /* name_text()
3591  * Converts a Name type to a text type.
3592  */
3593 Datum
3595 {
3596  Name s = PG_GETARG_NAME(0);
3597 
3599 }
3600 
3601 
3602 /*
3603  * textToQualifiedNameList - convert a text object to list of names
3604  *
3605  * This implements the input parsing needed by nextval() and other
3606  * functions that take a text parameter representing a qualified name.
3607  * We split the name at dots, downcase if not double-quoted, and
3608  * truncate names if they're too long.
3609  */
3610 List *
3612 {
3613  char *rawname;
3614  List *result = NIL;
3615  List *namelist;
3616  ListCell *l;
3617 
3618  /* Convert to C string (handles possible detoasting). */
3619  /* Note we rely on being able to modify rawname below. */
3620  rawname = text_to_cstring(textval);
3621 
3622  if (!SplitIdentifierString(rawname, '.', &namelist))
3623  ereport(ERROR,
3624  (errcode(ERRCODE_INVALID_NAME),
3625  errmsg("invalid name syntax")));
3626 
3627  if (namelist == NIL)
3628  ereport(ERROR,
3629  (errcode(ERRCODE_INVALID_NAME),
3630  errmsg("invalid name syntax")));
3631 
3632  foreach(l, namelist)
3633  {
3634  char *curname = (char *) lfirst(l);
3635 
3636  result = lappend(result, makeString(pstrdup(curname)));
3637  }
3638 
3639  pfree(rawname);
3640  list_free(namelist);
3641 
3642  return result;
3643 }
3644 
3645 /*
3646  * SplitIdentifierString --- parse a string containing identifiers
3647  *
3648  * This is the guts of textToQualifiedNameList, and is exported for use in
3649  * other situations such as parsing GUC variables. In the GUC case, it's
3650  * important to avoid memory leaks, so the API is designed to minimize the
3651  * amount of stuff that needs to be allocated and freed.
3652  *
3653  * Inputs:
3654  * rawstring: the input string; must be overwritable! On return, it's
3655  * been modified to contain the separated identifiers.
3656  * separator: the separator punctuation expected between identifiers
3657  * (typically '.' or ','). Whitespace may also appear around
3658  * identifiers.
3659  * Outputs:
3660  * namelist: filled with a palloc'd list of pointers to identifiers within
3661  * rawstring. Caller should list_free() this even on error return.
3662  *
3663  * Returns true if okay, false if there is a syntax error in the string.
3664  *
3665  * Note that an empty string is considered okay here, though not in
3666  * textToQualifiedNameList.
3667  */
3668 bool
3669 SplitIdentifierString(char *rawstring, char separator,
3670  List **namelist)
3671 {
3672  char *nextp = rawstring;
3673  bool done = false;
3674 
3675  *namelist = NIL;
3676 
3677  while (scanner_isspace(*nextp))
3678  nextp++; /* skip leading whitespace */
3679 
3680  if (*nextp == '\0')
3681  return true; /* allow empty string */
3682 
3683  /* At the top of the loop, we are at start of a new identifier. */
3684  do
3685  {
3686  char *curname;
3687  char *endp;
3688 
3689  if (*nextp == '"')
3690  {
3691  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3692  curname = nextp + 1;
3693  for (;;)
3694  {
3695  endp = strchr(nextp + 1, '"');
3696  if (endp == NULL)
3697  return false; /* mismatched quotes */
3698  if (endp[1] != '"')
3699  break; /* found end of quoted name */
3700  /* Collapse adjacent quotes into one quote, and look again */
3701  memmove(endp, endp + 1, strlen(endp));
3702  nextp = endp;
3703  }
3704  /* endp now points at the terminating quote */
3705  nextp = endp + 1;
3706  }
3707  else
3708  {
3709  /* Unquoted name --- extends to separator or whitespace */
3710  char *downname;
3711  int len;
3712 
3713  curname = nextp;
3714  while (*nextp && *nextp != separator &&
3715  !scanner_isspace(*nextp))
3716  nextp++;
3717  endp = nextp;
3718  if (curname == nextp)
3719  return false; /* empty unquoted name not allowed */
3720 
3721  /*
3722  * Downcase the identifier, using same code as main lexer does.
3723  *
3724  * XXX because we want to overwrite the input in-place, we cannot
3725  * support a downcasing transformation that increases the string
3726  * length. This is not a problem given the current implementation
3727  * of downcase_truncate_identifier, but we'll probably have to do
3728  * something about this someday.
3729  */
3730  len = endp - curname;
3731  downname = downcase_truncate_identifier(curname, len, false);
3732  Assert(strlen(downname) <= len);
3733  strncpy(curname, downname, len); /* strncpy is required here */
3734  pfree(downname);
3735  }
3736 
3737  while (scanner_isspace(*nextp))
3738  nextp++; /* skip trailing whitespace */
3739 
3740  if (*nextp == separator)
3741  {
3742  nextp++;
3743  while (scanner_isspace(*nextp))
3744  nextp++; /* skip leading whitespace for next */
3745  /* we expect another name, so done remains false */
3746  }
3747  else if (*nextp == '\0')
3748  done = true;
3749  else
3750  return false; /* invalid syntax */
3751 
3752  /* Now safe to overwrite separator with a null */
3753  *endp = '\0';
3754 
3755  /* Truncate name if it's overlength */
3756  truncate_identifier(curname, strlen(curname), false);
3757 
3758  /*
3759  * Finished isolating current name --- add it to list
3760  */
3761  *namelist = lappend(*namelist, curname);
3762 
3763  /* Loop back if we didn't reach end of string */
3764  } while (!done);
3765 
3766  return true;
3767 }
3768 
3769 
3770 /*
3771  * SplitDirectoriesString --- parse a string containing file/directory names
3772  *
3773  * This works fine on file names too; the function name is historical.
3774  *
3775  * This is similar to SplitIdentifierString, except that the parsing
3776  * rules are meant to handle pathnames instead of identifiers: there is
3777  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3778  * and we apply canonicalize_path() to each extracted string. Because of the
3779  * last, the returned strings are separately palloc'd rather than being
3780  * pointers into rawstring --- but we still scribble on rawstring.
3781  *
3782  * Inputs:
3783  * rawstring: the input string; must be modifiable!
3784  * separator: the separator punctuation expected between directories
3785  * (typically ',' or ';'). Whitespace may also appear around
3786  * directories.
3787  * Outputs:
3788  * namelist: filled with a palloc'd list of directory names.
3789  * Caller should list_free_deep() this even on error return.
3790  *
3791  * Returns true if okay, false if there is a syntax error in the string.
3792  *
3793  * Note that an empty string is considered okay here.
3794  */
3795 bool
3796 SplitDirectoriesString(char *rawstring, char separator,
3797  List **namelist)
3798 {
3799  char *nextp = rawstring;
3800  bool done = false;
3801 
3802  *namelist = NIL;
3803 
3804  while (scanner_isspace(*nextp))
3805  nextp++; /* skip leading whitespace */
3806 
3807  if (*nextp == '\0')
3808  return true; /* allow empty string */
3809 
3810  /* At the top of the loop, we are at start of a new directory. */
3811  do
3812  {
3813  char *curname;
3814  char *endp;
3815 
3816  if (*nextp == '"')
3817  {
3818  /* Quoted name --- collapse quote-quote pairs */
3819  curname = nextp + 1;
3820  for (;;)
3821  {
3822  endp = strchr(nextp + 1, '"');
3823  if (endp == NULL)
3824  return false; /* mismatched quotes */
3825  if (endp[1] != '"')
3826  break; /* found end of quoted name */
3827  /* Collapse adjacent quotes into one quote, and look again */
3828  memmove(endp, endp + 1, strlen(endp));
3829  nextp = endp;
3830  }
3831  /* endp now points at the terminating quote */
3832  nextp = endp + 1;
3833  }
3834  else
3835  {
3836  /* Unquoted name --- extends to separator or end of string */
3837  curname = endp = nextp;
3838  while (*nextp && *nextp != separator)
3839  {
3840  /* trailing whitespace should not be included in name */
3841  if (!scanner_isspace(*nextp))
3842  endp = nextp + 1;
3843  nextp++;
3844  }
3845  if (curname == endp)
3846  return false; /* empty unquoted name not allowed */
3847  }
3848 
3849  while (scanner_isspace(*nextp))
3850  nextp++; /* skip trailing whitespace */
3851 
3852  if (*nextp == separator)
3853  {
3854  nextp++;
3855  while (scanner_isspace(*nextp))
3856  nextp++; /* skip leading whitespace for next */
3857  /* we expect another name, so done remains false */
3858  }
3859  else if (*nextp == '\0')
3860  done = true;
3861  else
3862  return false; /* invalid syntax */
3863 
3864  /* Now safe to overwrite separator with a null */
3865  *endp = '\0';
3866 
3867  /* Truncate path if it's overlength */
3868  if (strlen(curname) >= MAXPGPATH)
3869  curname[MAXPGPATH - 1] = '\0';
3870 
3871  /*
3872  * Finished isolating current name --- add it to list
3873  */
3874  curname = pstrdup(curname);
3875  canonicalize_path(curname);
3876  *namelist = lappend(*namelist, curname);
3877 
3878  /* Loop back if we didn't reach end of string */
3879  } while (!done);
3880 
3881  return true;
3882 }
3883 
3884 
3885 /*
3886  * SplitGUCList --- parse a string containing identifiers or file names
3887  *
3888  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3889  * presuming whether the elements will be taken as identifiers or file names.
3890  * We assume the input has already been through flatten_set_variable_args(),
3891  * so that we need never downcase (if appropriate, that was done already).
3892  * Nor do we ever truncate, since we don't know the correct max length.
3893  * We disallow embedded whitespace for simplicity (it shouldn't matter,
3894  * because any embedded whitespace should have led to double-quoting).
3895  * Otherwise the API is identical to SplitIdentifierString.
3896  *
3897  * XXX it's annoying to have so many copies of this string-splitting logic.
3898  * However, it's not clear that having one function with a bunch of option
3899  * flags would be much better.
3900  *
3901  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3902  * Be sure to update that if you have to change this.
3903  *
3904  * Inputs:
3905  * rawstring: the input string; must be overwritable! On return, it's
3906  * been modified to contain the separated identifiers.
3907  * separator: the separator punctuation expected between identifiers
3908  * (typically '.' or ','). Whitespace may also appear around
3909  * identifiers.
3910  * Outputs:
3911  * namelist: filled with a palloc'd list of pointers to identifiers within
3912  * rawstring. Caller should list_free() this even on error return.
3913  *
3914  * Returns true if okay, false if there is a syntax error in the string.
3915  */
3916 bool
3917 SplitGUCList(char *rawstring, char separator,
3918  List **namelist)
3919 {
3920  char *nextp = rawstring;
3921  bool done = false;
3922 
3923  *namelist = NIL;
3924 
3925  while (scanner_isspace(*nextp))
3926  nextp++; /* skip leading whitespace */
3927 
3928  if (*nextp == '\0')
3929  return true; /* allow empty string */
3930 
3931  /* At the top of the loop, we are at start of a new identifier. */
3932  do
3933  {
3934  char *curname;
3935  char *endp;
3936 
3937  if (*nextp == '"')
3938  {
3939  /* Quoted name --- collapse quote-quote pairs */
3940  curname = nextp + 1;
3941  for (;;)
3942  {
3943  endp = strchr(nextp + 1, '"');
3944  if (endp == NULL)
3945  return false; /* mismatched quotes */
3946  if (endp[1] != '"')
3947  break; /* found end of quoted name */
3948  /* Collapse adjacent quotes into one quote, and look again */
3949  memmove(endp, endp + 1, strlen(endp));
3950  nextp = endp;
3951  }
3952  /* endp now points at the terminating quote */
3953  nextp = endp + 1;
3954  }
3955  else
3956  {
3957  /* Unquoted name --- extends to separator or whitespace */
3958  curname = nextp;
3959  while (*nextp && *nextp != separator &&
3960  !scanner_isspace(*nextp))
3961  nextp++;
3962  endp = nextp;
3963  if (curname == nextp)
3964  return false; /* empty unquoted name not allowed */
3965  }
3966 
3967  while (scanner_isspace(*nextp))
3968  nextp++; /* skip trailing whitespace */
3969 
3970  if (*nextp == separator)
3971  {
3972  nextp++;
3973  while (scanner_isspace(*nextp))
3974  nextp++; /* skip leading whitespace for next */
3975  /* we expect another name, so done remains false */
3976  }
3977  else if (*nextp == '\0')
3978  done = true;
3979  else
3980  return false; /* invalid syntax */
3981 
3982  /* Now safe to overwrite separator with a null */
3983  *endp = '\0';
3984 
3985  /*
3986  * Finished isolating current name --- add it to list
3987  */
3988  *namelist = lappend(*namelist, curname);
3989 
3990  /* Loop back if we didn't reach end of string */
3991  } while (!done);
3992 
3993  return true;
3994 }
3995 
3996 
3997 /*****************************************************************************
3998  * Comparison Functions used for bytea
3999  *
4000  * Note: btree indexes need these routines not to leak memory; therefore,
4001  * be careful to free working copies of toasted datums. Most places don't
4002  * need to be so careful.
4003  *****************************************************************************/
4004 
4005 Datum
4007 {
4008  Datum arg1 = PG_GETARG_DATUM(0);
4009  Datum arg2 = PG_GETARG_DATUM(1);
4010  bool result;
4011  Size len1,
4012  len2;
4013 
4014  /*
4015  * We can use a fast path for unequal lengths, which might save us from
4016  * having to detoast one or both values.
4017  */
4018  len1 = toast_raw_datum_size(arg1);
4019  len2 = toast_raw_datum_size(arg2);
4020  if (len1 != len2)
4021  result = false;
4022  else
4023  {
4024  bytea *barg1 = DatumGetByteaPP(arg1);
4025  bytea *barg2 = DatumGetByteaPP(arg2);
4026 
4027  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4028  len1 - VARHDRSZ) == 0);
4029 
4030  PG_FREE_IF_COPY(barg1, 0);
4031  PG_FREE_IF_COPY(barg2, 1);
4032  }
4033 
4034  PG_RETURN_BOOL(result);
4035 }
4036 
4037 Datum
4039 {
4040  Datum arg1 = PG_GETARG_DATUM(0);
4041  Datum arg2 = PG_GETARG_DATUM(1);
4042  bool result;
4043  Size len1,
4044  len2;
4045 
4046  /*
4047  * We can use a fast path for unequal lengths, which might save us from
4048  * having to detoast one or both values.
4049  */
4050  len1 = toast_raw_datum_size(arg1);
4051  len2 = toast_raw_datum_size(arg2);
4052  if (len1 != len2)
4053  result = true;
4054  else
4055  {
4056  bytea *barg1 = DatumGetByteaPP(arg1);
4057  bytea *barg2 = DatumGetByteaPP(arg2);
4058 
4059  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4060  len1 - VARHDRSZ) != 0);
4061 
4062  PG_FREE_IF_COPY(barg1, 0);
4063  PG_FREE_IF_COPY(barg2, 1);
4064  }
4065 
4066  PG_RETURN_BOOL(result);
4067 }
4068 
4069 Datum
4071 {
4072  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4073  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4074  int len1,
4075  len2;
4076  int cmp;
4077 
4078  len1 = VARSIZE_ANY_EXHDR(arg1);
4079  len2 = VARSIZE_ANY_EXHDR(arg2);
4080 
4081  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4082 
4083  PG_FREE_IF_COPY(arg1, 0);
4084  PG_FREE_IF_COPY(arg2, 1);
4085 
4086  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
4087 }
4088 
4089 Datum
4091 {
4092  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4093  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4094  int len1,
4095  len2;
4096  int cmp;
4097 
4098  len1 = VARSIZE_ANY_EXHDR(arg1);
4099  len2 = VARSIZE_ANY_EXHDR(arg2);
4100 
4101  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4102 
4103  PG_FREE_IF_COPY(arg1, 0);
4104  PG_FREE_IF_COPY(arg2, 1);
4105 
4106  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
4107 }
4108 
4109 Datum
4111 {
4112  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4113  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4114  int len1,
4115  len2;
4116  int cmp;
4117 
4118  len1 = VARSIZE_ANY_EXHDR(arg1);
4119  len2 = VARSIZE_ANY_EXHDR(arg2);
4120 
4121  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4122 
4123  PG_FREE_IF_COPY(arg1, 0);
4124  PG_FREE_IF_COPY(arg2, 1);
4125 
4126  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
4127 }
4128 
4129 Datum
4131 {
4132  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4133  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4134  int len1,
4135  len2;
4136  int cmp;
4137 
4138  len1 = VARSIZE_ANY_EXHDR(arg1);
4139  len2 = VARSIZE_ANY_EXHDR(arg2);
4140 
4141  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4142 
4143  PG_FREE_IF_COPY(arg1, 0);
4144  PG_FREE_IF_COPY(arg2, 1);
4145 
4146  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
4147 }
4148 
4149 Datum
4151 {
4152  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4153  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4154  int len1,
4155  len2;
4156  int cmp;
4157 
4158  len1 = VARSIZE_ANY_EXHDR(arg1);
4159  len2 = VARSIZE_ANY_EXHDR(arg2);
4160 
4161  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4162  if ((cmp == 0) && (len1 != len2))
4163  cmp = (len1 < len2) ? -1 : 1;
4164 
4165  PG_FREE_IF_COPY(arg1, 0);
4166  PG_FREE_IF_COPY(arg2, 1);
4167 
4168  PG_RETURN_INT32(cmp);
4169 }
4170 
4171 Datum
4173 {
4175  MemoryContext oldcontext;
4176 
4177  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4178 
4179  /* Use generic string SortSupport, forcing "C" collation */
4180  varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4181 
4182  MemoryContextSwitchTo(oldcontext);
4183 
4184  PG_RETURN_VOID();
4185 }
4186 
4187 /*
4188  * appendStringInfoText
4189  *
4190  * Append a text to str.
4191  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4192  */
4193 static void
4195 {
4197 }
4198 
4199 /*
4200  * replace_text
4201  * replace all occurrences of 'old_sub_str' in 'orig_str'
4202  * with 'new_sub_str' to form 'new_str'
4203  *
4204  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4205  * otherwise returns 'new_str'
4206  */
4207 Datum
4209 {
4210  text *src_text = PG_GETARG_TEXT_PP(0);
4211  text *from_sub_text = PG_GETARG_TEXT_PP(1);
4212  text *to_sub_text = PG_GETARG_TEXT_PP(2);
4213  int src_text_len;
4214  int from_sub_text_len;
4216  text *ret_text;
4217  int chunk_len;
4218  char *curr_ptr;
4219  char *start_ptr;
4221  bool found;
4222 
4223  src_text_len = VARSIZE_ANY_EXHDR(src_text);
4224  from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4225 
4226  /* Return unmodified source string if empty source or pattern */
4227  if (src_text_len < 1 || from_sub_text_len < 1)
4228  {
4229  PG_RETURN_TEXT_P(src_text);
4230  }
4231 
4232  text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4233 
4234  found = text_position_next(&state);
4235 
4236  /* When the from_sub_text is not found, there is nothing to do. */
4237  if (!found)
4238  {
4239  text_position_cleanup(&state);
4240  PG_RETURN_TEXT_P(src_text);
4241  }
4242  curr_ptr = text_position_get_match_ptr(&state);
4243  start_ptr = VARDATA_ANY(src_text);
4244 
4245  initStringInfo(&str);
4246 
4247  do
4248  {
4250 
4251  /* copy the data skipped over by last text_position_next() */
4252  chunk_len = curr_ptr - start_ptr;
4253  appendBinaryStringInfo(&str, start_ptr, chunk_len);
4254 
4255  appendStringInfoText(&str, to_sub_text);
4256 
4257  start_ptr = curr_ptr + from_sub_text_len;
4258 
4259  found = text_position_next(&state);
4260  if (found)
4261  curr_ptr = text_position_get_match_ptr(&state);
4262  }
4263  while (found);
4264 
4265  /* copy trailing data */
4266  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4267  appendBinaryStringInfo(&str, start_ptr, chunk_len);
4268 
4269  text_position_cleanup(&state);
4270 
4271  ret_text = cstring_to_text_with_len(str.data, str.len);
4272  pfree(str.data);
4273 
4274  PG_RETURN_TEXT_P(ret_text);
4275 }
4276 
4277 /*
4278  * check_replace_text_has_escape_char
4279  *
4280  * check whether replace_text contains escape char.
4281  */
4282 static bool
4284 {
4285  const char *p = VARDATA_ANY(replace_text);
4286  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4287 
4289  {
4290  for (; p < p_end; p++)
4291  {
4292  if (*p == '\\')
4293  return true;
4294  }
4295  }
4296  else
4297  {
4298  for (; p < p_end; p += pg_mblen(p))
4299  {
4300  if (*p == '\\')
4301  return true;
4302  }
4303  }
4304 
4305  return false;
4306 }
4307 
4308 /*
4309  * appendStringInfoRegexpSubstr
4310  *
4311  * Append replace_text to str, substituting regexp back references for
4312  * \n escapes. start_ptr is the start of the match in the source string,
4313  * at logical character position data_pos.
4314  */
4315 static void
4317  regmatch_t *pmatch,
4318  char *start_ptr, int data_pos)
4319 {
4320  const char *p = VARDATA_ANY(replace_text);
4321  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4322  int eml = pg_database_encoding_max_length();
4323 
4324  for (;;)
4325  {
4326  const char *chunk_start = p;
4327  int so;
4328  int eo;
4329 
4330  /* Find next escape char. */
4331  if (eml == 1)
4332  {
4333  for (; p < p_end && *p != '\\'; p++)
4334  /* nothing */ ;
4335  }
4336  else
4337  {
4338  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
4339  /* nothing */ ;
4340  }
4341 
4342  /* Copy the text we just scanned over, if any. */
4343  if (p > chunk_start)
4344  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4345 
4346  /* Done if at end of string, else advance over escape char. */
4347  if (p >= p_end)
4348  break;
4349  p++;
4350 
4351  if (p >= p_end)
4352  {
4353  /* Escape at very end of input. Treat same as unexpected char */
4354  appendStringInfoChar(str, '\\');
4355  break;
4356  }
4357 
4358  if (*p >= '1' && *p <= '9')
4359  {
4360  /* Use the back reference of regexp. */
4361  int idx = *p - '0';
4362 
4363  so = pmatch[idx].rm_so;
4364  eo = pmatch[idx].rm_eo;
4365  p++;
4366  }
4367  else if (*p == '&')
4368  {
4369  /* Use the entire matched string. */
4370  so = pmatch[0].rm_so;
4371  eo = pmatch[0].rm_eo;
4372  p++;
4373  }
4374  else if (*p == '\\')
4375  {
4376  /* \\ means transfer one \ to output. */
4377  appendStringInfoChar(str, '\\');
4378  p++;
4379  continue;
4380  }
4381  else
4382  {
4383  /*
4384  * If escape char is not followed by any expected char, just treat
4385  * it as ordinary data to copy. (XXX would it be better to throw
4386  * an error?)
4387  */
4388  appendStringInfoChar(str, '\\');
4389  continue;
4390  }
4391 
4392  if (so != -1 && eo != -1)
4393  {
4394  /*
4395  * Copy the text that is back reference of regexp. Note so and eo
4396  * are counted in characters not bytes.
4397  */
4398  char *chunk_start;
4399  int chunk_len;
4400 
4401  Assert(so >= data_pos);
4402  chunk_start = start_ptr;
4403  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4404  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4405  appendBinaryStringInfo(str, chunk_start, chunk_len);
4406  }
4407  }
4408 }
4409 
4410 #define REGEXP_REPLACE_BACKREF_CNT 10
4411 
4412 /*
4413  * replace_text_regexp
4414  *
4415  * replace text that matches to regexp in src_text to replace_text.
4416  *
4417  * Note: to avoid having to include regex.h in builtins.h, we declare
4418  * the regexp argument as void *, but really it's regex_t *.
4419  */
4420 text *
4421 replace_text_regexp(text *src_text, void *regexp,
4422  text *replace_text, bool glob)
4423 {
4424  text *ret_text;
4425  regex_t *re = (regex_t *) regexp;
4426  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4429  pg_wchar *data;
4430  size_t data_len;
4431  int search_start;
4432  int data_pos;
4433  char *start_ptr;
4434  bool have_escape;
4435 
4436  initStringInfo(&buf);
4437 
4438  /* Convert data string to wide characters. */
4439  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4440  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4441 
4442  /* Check whether replace_text has escape char. */
4443  have_escape = check_replace_text_has_escape_char(replace_text);
4444 
4445  /* start_ptr points to the data_pos'th character of src_text */
4446  start_ptr = (char *) VARDATA_ANY(src_text);
4447  data_pos = 0;
4448 
4449  search_start = 0;
4450  while (search_start <= data_len)
4451  {
4452  int regexec_result;
4453 
4455 
4456  regexec_result = pg_regexec(re,
4457  data,
4458  data_len,
4459  search_start,
4460  NULL, /* no details */
4462  pmatch,
4463  0);
4464 
4465  if (regexec_result == REG_NOMATCH)
4466  break;
4467 
4468  if (regexec_result != REG_OKAY)
4469  {
4470  char errMsg[100];
4471 
4473  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4474  ereport(ERROR,
4475  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4476  errmsg("regular expression failed: %s", errMsg)));
4477  }
4478 
4479  /*
4480  * Copy the text to the left of the match position. Note we are given
4481  * character not byte indexes.
4482  */
4483  if (pmatch[0].rm_so - data_pos > 0)
4484  {
4485  int chunk_len;
4486 
4487  chunk_len = charlen_to_bytelen(start_ptr,
4488  pmatch[0].rm_so - data_pos);
4489  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4490 
4491  /*
4492  * Advance start_ptr over that text, to avoid multiple rescans of
4493  * it if the replace_text contains multiple back-references.
4494  */
4495  start_ptr += chunk_len;
4496  data_pos = pmatch[0].rm_so;
4497  }
4498 
4499  /*
4500  * Copy the replace_text. Process back references when the
4501  * replace_text has escape characters.
4502  */
4503  if (have_escape)
4504  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4505  start_ptr, data_pos);
4506  else
4507  appendStringInfoText(&buf, replace_text);
4508 
4509  /* Advance start_ptr and data_pos over the matched text. */
4510  start_ptr += charlen_to_bytelen(start_ptr,
4511  pmatch[0].rm_eo - data_pos);
4512  data_pos = pmatch[0].rm_eo;
4513 
4514  /*
4515  * When global option is off, replace the first instance only.
4516  */
4517  if (!glob)
4518  break;
4519 
4520  /*
4521  * Advance search position. Normally we start the next search at the
4522  * end of the previous match; but if the match was of zero length, we
4523  * have to advance by one character, or we'd just find the same match
4524  * again.
4525  */
4526  search_start = data_pos;
4527  if (pmatch[0].rm_so == pmatch[0].rm_eo)
4528  search_start++;
4529  }
4530 
4531  /*
4532  * Copy the text to the right of the last match.
4533  */
4534  if (data_pos < data_len)
4535  {
4536  int chunk_len;
4537 
4538  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4539  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4540  }
4541 
4542  ret_text = cstring_to_text_with_len(buf.data, buf.len);
4543  pfree(buf.data);
4544  pfree(data);
4545 
4546  return ret_text;
4547 }
4548 
4549 /*
4550  * split_text
4551  * parse input string
4552  * return ord item (1 based)
4553  * based on provided field separator
4554  */
4555 Datum
4557 {
4558  text *inputstring = PG_GETARG_TEXT_PP(0);
4559  text *fldsep = PG_GETARG_TEXT_PP(1);
4560  int fldnum = PG_GETARG_INT32(2);
4561  int inputstring_len;
4562  int fldsep_len;
4564  char *start_ptr;
4565  char *end_ptr;
4566  text *result_text;
4567  bool found;
4568 
4569  /* field number is 1 based */
4570  if (fldnum < 1)
4571  ereport(ERROR,
4572  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4573  errmsg("field position must be greater than zero")));
4574 
4575  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4576  fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4577 
4578  /* return empty string for empty input string */
4579  if (inputstring_len < 1)
4581 
4582  /* empty field separator */
4583  if (fldsep_len < 1)
4584  {
4585  text_position_cleanup(&state);
4586  /* if first field, return input string, else empty string */
4587  if (fldnum == 1)
4588  PG_RETURN_TEXT_P(inputstring);
4589  else
4591  }
4592 
4593  text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4594 
4595  /* identify bounds of first field */
4596  start_ptr = VARDATA_ANY(inputstring);
4597  found = text_position_next(&state);
4598 
4599  /* special case if fldsep not found at all */
4600  if (!found)
4601  {
4602  text_position_cleanup(&state);
4603  /* if field 1 requested, return input string, else empty string */
4604  if (fldnum == 1)
4605  PG_RETURN_TEXT_P(inputstring);
4606  else
4608  }
4609  end_ptr = text_position_get_match_ptr(&state);
4610 
4611  while (found && --fldnum > 0)
4612  {
4613  /* identify bounds of next field */
4614  start_ptr = end_ptr + fldsep_len;
4615  found = text_position_next(&state);
4616  if (found)
4617  end_ptr = text_position_get_match_ptr(&state);
4618  }
4619 
4620  text_position_cleanup(&state);
4621 
4622  if (fldnum > 0)
4623  {
4624  /* N'th field separator not found */
4625  /* if last field requested, return it, else empty string */
4626  if (fldnum == 1)
4627  {
4628  int last_len = start_ptr - VARDATA_ANY(inputstring);
4629 
4630  result_text = cstring_to_text_with_len(start_ptr,
4631  inputstring_len - last_len);
4632  }
4633  else
4634  result_text = cstring_to_text("");
4635  }
4636  else
4637  {
4638  /* non-last field requested */
4639  result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4640  }
4641 
4642  PG_RETURN_TEXT_P(result_text);
4643 }
4644 
4645 /*
4646  * Convenience function to return true when two text params are equal.
4647  */
4648 static bool
4649 text_isequal(text *txt1, text *txt2, Oid collid)
4650 {
4652  collid,
4653  PointerGetDatum(txt1),
4654  PointerGetDatum(txt2)));
4655 }
4656 
4657 /*
4658  * text_to_array
4659  * parse input string and return text array of elements,
4660  * based on provided field separator
4661  */
4662 Datum
4664 {
4665  return text_to_array_internal(fcinfo);
4666 }
4667 
4668 /*
4669  * text_to_array_null
4670  * parse input string and return text array of elements,
4671  * based on provided field separator and null string
4672  *
4673  * This is a separate entry point only to prevent the regression tests from
4674  * complaining about different argument sets for the same internal function.
4675  */
4676 Datum
4678 {
4679  return text_to_array_internal(fcinfo);
4680 }
4681 
4682 /*
4683  * common code for text_to_array and text_to_array_null functions
4684  *
4685  * These are not strict so we have to test for null inputs explicitly.
4686  */
4687 static Datum
4689 {
4690  text *inputstring;
4691  text *fldsep;
4692  text *null_string;
4693  int inputstring_len;
4694  int fldsep_len;
4695  char *start_ptr;
4696  text *result_text;
4697  bool is_null;
4698  ArrayBuildState *astate = NULL;
4699 
4700  /* when input string is NULL, then result is NULL too */
4701  if (PG_ARGISNULL(0))
4702  PG_RETURN_NULL();
4703 
4704  inputstring = PG_GETARG_TEXT_PP(0);
4705 
4706  /* fldsep can be NULL */
4707  if (!PG_ARGISNULL(1))
4708  fldsep = PG_GETARG_TEXT_PP(1);
4709  else
4710  fldsep = NULL;
4711 
4712  /* null_string can be NULL or omitted */
4713  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4714  null_string = PG_GETARG_TEXT_PP(2);
4715  else
4716  null_string = NULL;
4717 
4718  if (fldsep != NULL)
4719  {
4720  /*
4721  * Normal case with non-null fldsep. Use the text_position machinery
4722  * to search for occurrences of fldsep.
4723  */
4725 
4726  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4727  fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4728 
4729  /* return empty array for empty input string */
4730  if (inputstring_len < 1)
4732 
4733  /*
4734  * empty field separator: return the input string as a one-element
4735  * array
4736  */
4737  if (fldsep_len < 1)
4738  {
4739  Datum elems[1];
4740  bool nulls[1];
4741  int dims[1];
4742  int lbs[1];
4743 
4744  /* single element can be a NULL too */
4745  is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
4746 
4747  elems[0] = PointerGetDatum(inputstring);
4748  nulls[0] = is_null;
4749  dims[0] = 1;
4750  lbs[0] = 1;
4751  /* XXX: this hardcodes assumptions about the text type */
4753  1, dims, lbs,
4754  TEXTOID, -1, false, 'i'));
4755  }
4756 
4757  text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4758 
4759  start_ptr = VARDATA_ANY(inputstring);
4760 
4761  for (;;)
4762  {
4763  bool found;
4764  char *end_ptr;
4765  int chunk_len;
4766 
4768 
4769  found = text_position_next(&state);
4770  if (!found)
4771  {
4772  /* fetch last field */
4773  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4774  end_ptr = NULL; /* not used, but some compilers complain */
4775  }
4776  else
4777  {
4778  /* fetch non-last field */
4779  end_ptr = text_position_get_match_ptr(&state);
4780  chunk_len = end_ptr - start_ptr;
4781  }
4782 
4783  /* must build a temp text datum to pass to accumArrayResult */
4784  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4785  is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4786 
4787  /* stash away this field */
4788  astate = accumArrayResult(astate,
4789  PointerGetDatum(result_text),
4790  is_null,
4791  TEXTOID,
4793 
4794  pfree(result_text);
4795 
4796  if (!found)
4797  break;
4798 
4799  start_ptr = end_ptr + fldsep_len;
4800  }
4801 
4802  text_position_cleanup(&state);
4803  }
4804  else
4805  {
4806  /*
4807  * When fldsep is NULL, each character in the inputstring becomes an
4808  * element in the result array. The separator is effectively the
4809  * space between characters.
4810  */
4811  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4812 
4813  /* return empty array for empty input string */
4814  if (inputstring_len < 1)
4816 
4817  start_ptr = VARDATA_ANY(inputstring);
4818 
4819  while (inputstring_len > 0)
4820  {
4821  int chunk_len = pg_mblen(start_ptr);
4822 
4824 
4825  /* must build a temp text datum to pass to accumArrayResult */
4826  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4827  is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4828 
4829  /* stash away this field */
4830  astate = accumArrayResult(astate,
4831  PointerGetDatum(result_text),
4832  is_null,
4833  TEXTOID,
4835 
4836  pfree(result_text);
4837 
4838  start_ptr += chunk_len;
4839  inputstring_len -= chunk_len;
4840  }
4841  }
4842 
4845 }
4846 
4847 /*
4848  * array_to_text
4849  * concatenate Cstring representation of input array elements
4850  * using provided field separator
4851  */
4852 Datum
4854 {
4856  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4857 
4858  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4859 }
4860 
4861 /*
4862  * array_to_text_null
4863  * concatenate Cstring representation of input array elements
4864  * using provided field separator and null string
4865  *
4866  * This version is not strict so we have to test for null inputs explicitly.
4867  */
4868 Datum
4870 {
4871  ArrayType *v;
4872  char *fldsep;
4873  char *null_string;
4874 
4875  /* returns NULL when first or second parameter is NULL */
4876  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4877  PG_RETURN_NULL();
4878 
4879  v = PG_GETARG_ARRAYTYPE_P(0);
4880  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4881 
4882  /* NULL null string is passed through as a null pointer */
4883  if (!PG_ARGISNULL(2))
4884  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4885  else
4886  null_string = NULL;
4887 
4888  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4889 }
4890 
4891 /*
4892  * common code for array_to_text and array_to_text_null functions
4893  */
4894 static text *
4896  const char *fldsep, const char *null_string)
4897 {
4898  text *result;
4899  int nitems,
4900  *dims,
4901  ndims;
4902  Oid element_type;
4903  int typlen;
4904  bool typbyval;
4905  char typalign;
4907  bool printed = false;
4908  char *p;
4909  bits8 *bitmap;
4910  int bitmask;
4911  int i;
4912  ArrayMetaState *my_extra;
4913 
4914  ndims = ARR_NDIM(v);
4915  dims = ARR_DIMS(v);
4916  nitems = ArrayGetNItems(ndims, dims);
4917 
4918  /* if there are no elements, return an empty string */
4919  if (nitems == 0)
4920  return cstring_to_text_with_len("", 0);
4921 
4922  element_type = ARR_ELEMTYPE(v);
4923  initStringInfo(&buf);
4924 
4925  /*
4926  * We arrange to look up info about element type, including its output
4927  * conversion proc, only once per series of calls, assuming the element
4928  * type doesn't change underneath us.
4929  */
4930  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4931  if (my_extra == NULL)
4932  {
4933  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4934  sizeof(ArrayMetaState));
4935  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4936  my_extra->element_type = ~element_type;
4937  }
4938 
4939  if (my_extra->element_type != element_type)
4940  {
4941  /*
4942  * Get info about element type, including its output conversion proc
4943  */
4944  get_type_io_data(element_type, IOFunc_output,
4945  &my_extra->typlen, &my_extra->typbyval,
4946  &my_extra->typalign, &my_extra->typdelim,
4947  &my_extra->typioparam, &my_extra->typiofunc);
4948  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4949  fcinfo->flinfo->fn_mcxt);
4950  my_extra->element_type = element_type;
4951  }
4952  typlen = my_extra->typlen;
4953  typbyval = my_extra->typbyval;
4954  typalign = my_extra->typalign;
4955 
4956  p = ARR_DATA_PTR(v);
4957  bitmap = ARR_NULLBITMAP(v);
4958  bitmask = 1;
4959 
4960  for (i = 0; i < nitems; i++)
4961  {
4962  Datum itemvalue;
4963  char *value;
4964 
4965  /* Get source element, checking for NULL */
4966  if (bitmap && (*bitmap & bitmask) == 0)
4967  {
4968  /* if null_string is NULL, we just ignore null elements */
4969  if (null_string != NULL)
4970  {
4971  if (printed)
4972  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4973  else
4974  appendStringInfoString(&buf, null_string);
4975  printed = true;
4976  }
4977  }
4978  else
4979  {
4980  itemvalue = fetch_att(p, typbyval, typlen);
4981 
4982  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4983 
4984  if (printed)
4985  appendStringInfo(&buf, "%s%s", fldsep, value);
4986  else
4987  appendStringInfoString(&buf, value);
4988  printed = true;
4989 
4990  p = att_addlength_pointer(p, typlen, p);
4991  p = (char *) att_align_nominal(p, typalign);
4992  }
4993 
4994  /* advance bitmap pointer if any */
4995  if (bitmap)
4996  {
4997  bitmask <<= 1;
4998  if (bitmask == 0x100)
4999  {
5000  bitmap++;
5001  bitmask = 1;
5002  }
5003  }
5004  }
5005 
5006  result = cstring_to_text_with_len(buf.data, buf.len);
5007  pfree(buf.data);
5008 
5009  return result;
5010 }
5011 
5012 #define HEXBASE 16
5013 /*
5014  * Convert an int32 to a string containing a base 16 (hex) representation of
5015  * the number.
5016  */
5017 Datum
5019 {
5021  char *ptr;
5022  const char *digits = "0123456789abcdef";
5023  char buf[32]; /* bigger than needed, but reasonable */
5024 
5025  ptr = buf + sizeof(buf) - 1;
5026  *ptr = '\0';
5027 
5028  do
5029  {
5030  *--ptr = digits[value % HEXBASE];
5031  value /= HEXBASE;
5032  } while (ptr > buf && value);
5033 
5035 }
5036 
5037 /*
5038  * Convert an int64 to a string containing a base 16 (hex) representation of
5039  * the number.
5040  */
5041 Datum
5043 {
5044  uint64 value = (uint64) PG_GETARG_INT64(0);
5045  char *ptr;
5046  const char *digits = "0123456789abcdef";
5047  char buf[32]; /* bigger than needed, but reasonable */
5048 
5049  ptr = buf + sizeof(buf) - 1;
5050  *ptr = '\0';
5051 
5052  do
5053  {
5054  *--ptr = digits[value % HEXBASE];
5055  value /= HEXBASE;
5056  } while (ptr > buf && value);
5057 
5059 }
5060 
5061 /*
5062  * Return the size of a datum, possibly compressed
5063  *
5064  * Works on any data type
5065  */
5066 Datum
5068 {
5070  int32 result;
5071  int typlen;
5072 
5073  /* On first call, get the input type's typlen, and save at *fn_extra */
5074  if (fcinfo->flinfo->fn_extra == NULL)
5075  {
5076  /* Lookup the datatype of the supplied argument */
5077  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5078 
5079  typlen = get_typlen(argtypeid);
5080  if (typlen == 0) /* should not happen */
5081  elog(ERROR, "cache lookup failed for type %u", argtypeid);
5082 
5083  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5084  sizeof(int));
5085  *((int *) fcinfo->flinfo->fn_extra) = typlen;
5086  }
5087  else
5088  typlen = *((int *) fcinfo->flinfo->fn_extra);
5089 
5090  if (typlen == -1)
5091  {
5092  /* varlena type, possibly toasted */
5093  result = toast_datum_size(value);
5094  }
5095  else if (typlen == -2)
5096  {
5097  /* cstring */
5098  result = strlen(DatumGetCString(value)) + 1;
5099  }
5100  else
5101  {
5102  /* ordinary fixed-width type */
5103  result = typlen;
5104  }
5105 
5106  PG_RETURN_INT32(result);
5107 }
5108 
5109 /*
5110  * string_agg - Concatenates values and returns string.
5111  *
5112  * Syntax: string_agg(value text, delimiter text) RETURNS text
5113  *
5114  * Note: Any NULL values are ignored. The first-call delimiter isn't
5115  * actually used at all, and on subsequent calls the delimiter precedes
5116  * the associated value.
5117  */
5118 
5119 /* subroutine to initialize state */
5120 static StringInfo
5122 {
5123  StringInfo state;
5124  MemoryContext aggcontext;
5125  MemoryContext oldcontext;
5126 
5127  if (!AggCheckCallContext(fcinfo, &aggcontext))
5128  {
5129  /* cannot be called directly because of internal-type argument */
5130  elog(ERROR, "string_agg_transfn called in non-aggregate context");
5131  }
5132 
5133  /*
5134  * Create state in aggregate context. It'll stay there across subsequent
5135  * calls.
5136  */
5137  oldcontext = MemoryContextSwitchTo(aggcontext);
5138  state = makeStringInfo();
5139  MemoryContextSwitchTo(oldcontext);
5140 
5141  return state;
5142 }
5143 
5144 Datum
5146 {
5147  StringInfo state;
5148 
5149  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5150 
5151  /* Append the value unless null. */
5152  if (!PG_ARGISNULL(1))
5153  {
5154  /* On the first time through, we ignore the delimiter. */
5155  if (state == NULL)
5156  state = makeStringAggState(fcinfo);
5157  else if (!PG_ARGISNULL(2))
5158  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
5159 
5160  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
5161  }
5162 
5163  /*
5164  * The transition type for string_agg() is declared to be "internal",
5165  * which is a pass-by-value type the same size as a pointer.
5166  */
5167  PG_RETURN_POINTER(state);
5168 }
5169 
5170 Datum
5172 {
5173  StringInfo state;
5174 
5175  /* cannot be called directly because of internal-type argument */
5176  Assert(AggCheckCallContext(fcinfo, NULL));
5177 
5178  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5179 
5180  if (state != NULL)
5182  else
5183  PG_RETURN_NULL();
5184 }
5185 
5186 /*
5187  * Prepare cache with fmgr info for the output functions of the datatypes of
5188  * the arguments of a concat-like function, beginning with argument "argidx".
5189  * (Arguments before that will have corresponding slots in the resulting
5190  * FmgrInfo array, but we don't fill those slots.)
5191  */
5192 static FmgrInfo *
5194 {
5195  FmgrInfo *foutcache;
5196  int i;
5197 
5198  /* We keep the info in fn_mcxt so it survives across calls */
5199  foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5200  PG_NARGS() * sizeof(FmgrInfo));
5201 
5202  for (i = argidx; i < PG_NARGS(); i++)
5203  {
5204  Oid valtype;
5205  Oid typOutput;
5206  bool typIsVarlena;
5207 
5208  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5209  if (!OidIsValid(valtype))
5210  elog(ERROR, "could not determine data type of concat() input");
5211 
5212  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5213  fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5214  }
5215 
5216  fcinfo->flinfo->fn_extra = foutcache;
5217 
5218  return foutcache;
5219 }
5220 
5221 /*
5222  * Implementation of both concat() and concat_ws().
5223  *
5224  * sepstr is the separator string to place between values.
5225  * argidx identifies the first argument to concatenate (counting from zero);
5226  * note that this must be constant across any one series of calls.
5227  *
5228  * Returns NULL if result should be NULL, else text value.
5229  */
5230 static text *
5231 concat_internal(const char *sepstr, int argidx,
5232  FunctionCallInfo fcinfo)
5233 {
5234  text *result;
5236  FmgrInfo *foutcache;
5237  bool first_arg = true;
5238  int i;
5239 
5240  /*
5241  * concat(VARIADIC some-array) is essentially equivalent to
5242  * array_to_text(), ie concat the array elements with the given separator.
5243  * So we just pass the case off to that code.
5244  */
5245  if (get_fn_expr_variadic(fcinfo->flinfo))
5246  {
5247  ArrayType *arr;
5248 
5249  /* Should have just the one argument */
5250  Assert(argidx == PG_NARGS() - 1);
5251 
5252  /* concat(VARIADIC NULL) is defined as NULL */
5253  if (PG_ARGISNULL(argidx))
5254  return NULL;
5255 
5256  /*
5257  * Non-null argument had better be an array. We assume that any call
5258  * context that could let get_fn_expr_variadic return true will have
5259  * checked that a VARIADIC-labeled parameter actually is an array. So
5260  * it should be okay to just Assert that it's an array rather than
5261  * doing a full-fledged error check.
5262  */
5264 
5265  /* OK, safe to fetch the array value */
5266  arr = PG_GETARG_ARRAYTYPE_P(argidx);
5267 
5268  /*
5269  * And serialize the array. We tell array_to_text to ignore null
5270  * elements, which matches the behavior of the loop below.
5271  */
5272  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5273  }
5274 
5275  /* Normal case without explicit VARIADIC marker */
5276  initStringInfo(&str);
5277 
5278  /* Get output function info, building it if first time through */
5279  foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5280  if (foutcache == NULL)
5281  foutcache = build_concat_foutcache(fcinfo, argidx);
5282 
5283  for (i = argidx; i < PG_NARGS(); i++)
5284  {
5285  if (!PG_ARGISNULL(i))
5286  {
5288 
5289  /* add separator if appropriate */
5290  if (first_arg)
5291  first_arg = false;
5292  else
5293  appendStringInfoString(&str, sepstr);
5294 
5295  /* call the appropriate type output function, append the result */
5297  OutputFunctionCall(&foutcache[i], value));
5298  }
5299  }
5300 
5301  result = cstring_to_text_with_len(str.data, str.len);
5302  pfree(str.data);
5303 
5304  return result;
5305 }
5306 
5307 /*
5308  * Concatenate all arguments. NULL arguments are ignored.
5309  */
5310 Datum
5312 {
5313  text *result;
5314 
5315  result = concat_internal("", 0, fcinfo);
5316  if (result == NULL)
5317  PG_RETURN_NULL();
5318  PG_RETURN_TEXT_P(result);
5319 }
5320 
5321 /*
5322  * Concatenate all but first argument value with separators. The first
5323  * parameter is used as the separator. NULL arguments are ignored.
5324  */
5325 Datum
5327 {
5328  char *sep;
5329  text *result;
5330 
5331  /* return NULL when separator is NULL */
5332  if (PG_ARGISNULL(0))
5333  PG_RETURN_NULL();
5335 
5336  result = concat_internal(sep, 1, fcinfo);
5337  if (result == NULL)
5338  PG_RETURN_NULL();
5339  PG_RETURN_TEXT_P(result);
5340 }
5341 
5342 /*
5343  * Return first n characters in the string. When n is negative,
5344  * return all but last |n| characters.
5345  */
5346 Datum
5348 {
5349  int n = PG_GETARG_INT32(1);
5350 
5351  if (n < 0)
5352  {
5353  text *str = PG_GETARG_TEXT_PP(0);
5354  const char *p = VARDATA_ANY(str);
5355  int len = VARSIZE_ANY_EXHDR(str);
5356  int rlen;
5357 
5358  n = pg_mbstrlen_with_len(p, len) + n;
5359  rlen = pg_mbcharcliplen(p, len, n);
5361  }
5362  else
5364 }
5365 
5366 /*
5367  * Return last n characters in the string. When n is negative,
5368  * return all but first |n| characters.
5369  */
5370 Datum
5372 {
5373  text *str = PG_GETARG_TEXT_PP(0);
5374  const char *p = VARDATA_ANY(str);
5375  int len = VARSIZE_ANY_EXHDR(str);
5376  int n = PG_GETARG_INT32(1);
5377  int off;
5378 
5379  if (n < 0)
5380  n = -n;
5381  else
5382  n = pg_mbstrlen_with_len(p, len) - n;
5383  off = pg_mbcharcliplen(p, len, n);
5384 
5385  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5386 }
5387 
5388 /*
5389  * Return reversed string
5390  */
5391 Datum
5393 {
5394  text *str = PG_GETARG_TEXT_PP(0);
5395  const char *p = VARDATA_ANY(str);
5396  int len = VARSIZE_ANY_EXHDR(str);
5397  const char *endp = p + len;
5398  text *result;
5399  char *dst;
5400 
5401  result = palloc(len + VARHDRSZ);
5402  dst = (char *) VARDATA(result) + len;
5403  SET_VARSIZE(result, len + VARHDRSZ);
5404 
5406  {
5407  /* multibyte version */
5408  while (p < endp)
5409  {
5410  int sz;
5411 
5412  sz = pg_mblen(p);
5413  dst -= sz;
5414  memcpy(dst, p, sz);
5415  p += sz;
5416  }
5417  }
5418  else
5419  {
5420  /* single byte version */
5421  while (p < endp)
5422  *(--dst) = *p++;
5423  }
5424 
5425  PG_RETURN_TEXT_P(result);
5426 }
5427 
5428 
5429 /*
5430  * Support macros for text_format()
5431  */
5432 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5433 
5434 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5435  do { \
5436  if (++(ptr) >= (end_ptr)) \
5437  ereport(ERROR, \
5438  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5439  errmsg("unterminated format() type specifier"), \
5440  errhint("For a single \"%%\" use \"%%%%\"."))); \
5441  } while (0)
5442 
5443 /*
5444  * Returns a formatted string
5445  */
5446 Datum
5448 {
5449  text *fmt;
5451  const char *cp;
5452  const char *start_ptr;
5453  const char *end_ptr;
5454  text *result;
5455  int arg;
5456  bool funcvariadic;
5457  int nargs;
5458  Datum *elements = NULL;
5459  bool *nulls = NULL;
5460  Oid element_type = InvalidOid;
5461  Oid prev_type = InvalidOid;
5462  Oid prev_width_type = InvalidOid;
5463  FmgrInfo typoutputfinfo;
5464  FmgrInfo typoutputinfo_width;
5465 
5466  /* When format string is null, immediately return null */
5467  if (PG_ARGISNULL(0))
5468  PG_RETURN_NULL();
5469 
5470  /* If argument is marked VARIADIC, expand array into elements */
5471  if (get_fn_expr_variadic(fcinfo->flinfo))
5472  {
5473  ArrayType *arr;
5474  int16 elmlen;
5475  bool elmbyval;
5476  char elmalign;
5477  int nitems;
5478 
5479  /* Should have just the one argument */
5480  Assert(PG_NARGS() == 2);
5481 
5482  /* If argument is NULL, we treat it as zero-length array */
5483  if (PG_ARGISNULL(1))
5484  nitems = 0;
5485  else
5486  {
5487  /*
5488  * Non-null argument had better be an array. We assume that any
5489  * call context that could let get_fn_expr_variadic return true
5490  * will have checked that a VARIADIC-labeled parameter actually is
5491  * an array. So it should be okay to just Assert that it's an
5492  * array rather than doing a full-fledged error check.
5493  */
5495 
5496  /* OK, safe to fetch the array value */
5497  arr = PG_GETARG_ARRAYTYPE_P(1);
5498 
5499  /* Get info about array element type */
5500  element_type = ARR_ELEMTYPE(arr);
5501  get_typlenbyvalalign(element_type,
5502  &elmlen, &elmbyval, &elmalign);
5503 
5504  /* Extract all array elements */
5505  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5506  &elements, &nulls, &nitems);
5507  }
5508 
5509  nargs = nitems + 1;
5510  funcvariadic = true;
5511  }
5512  else
5513  {
5514  /* Non-variadic case, we'll process the arguments individually */
5515  nargs = PG_NARGS();
5516  funcvariadic = false;
5517  }
5518 
5519  /* Setup for main loop. */
5520  fmt = PG_GETARG_TEXT_PP(0);
5521  start_ptr = VARDATA_ANY(fmt);
5522  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5523  initStringInfo(&str);
5524  arg = 1; /* next argument position to print */
5525 
5526  /* Scan format string, looking for conversion specifiers. */
5527  for (cp = start_ptr; cp < end_ptr; cp++)
5528  {
5529  int argpos;
5530  int widthpos;
5531  int flags;
5532  int width;
5533  Datum value;
5534  bool isNull;
5535  Oid typid;
5536 
5537  /*
5538  * If it's not the start of a conversion specifier, just copy it to
5539  * the output buffer.
5540  */
5541  if (*cp != '%')
5542  {
5543  appendStringInfoCharMacro(&str, *cp);
5544  continue;
5545  }
5546 
5547  ADVANCE_PARSE_POINTER(cp, end_ptr);
5548 
5549  /* Easy case: %% outputs a single % */
5550  if (*cp == '%')
5551  {
5552  appendStringInfoCharMacro(&str, *cp);
5553  continue;
5554  }
5555 
5556  /* Parse the optional portions of the format specifier */
5557  cp = text_format_parse_format(cp, end_ptr,
5558  &argpos, &widthpos,
5559  &flags, &width);
5560 
5561  /*
5562  * Next we should see the main conversion specifier. Whether or not
5563  * an argument position was present, it's known that at least one
5564  * character remains in the string at this point. Experience suggests
5565  * that it's worth checking that that character is one of the expected
5566  * ones before we try to fetch arguments, so as to produce the least
5567  * confusing response to a mis-formatted specifier.
5568  */
5569  if (strchr("sIL", *cp) == NULL)
5570  ereport(ERROR,
5571  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5572  errmsg("unrecognized format() type specifier \"%c\"",
5573  *cp),
5574  errhint("For a single \"%%\" use \"%%%%\".")));
5575 
5576  /* If indirect width was specified, get its value */
5577  if (widthpos >= 0)
5578  {
5579  /* Collect the specified or next argument position */
5580  if (widthpos > 0)
5581  arg = widthpos;
5582  if (arg >= nargs)
5583  ereport(ERROR,
5584  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5585  errmsg("too few arguments for format()")));
5586 
5587  /* Get the value and type of the selected argument */
5588  if (!funcvariadic)
5589  {
5590  value = PG_GETARG_DATUM(arg);
5591  isNull = PG_ARGISNULL(arg);
5592  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5593  }
5594  else
5595  {
5596  value = elements[arg - 1];
5597  isNull = nulls[arg - 1];
5598  typid = element_type;
5599  }
5600  if (!OidIsValid(typid))
5601  elog(ERROR, "could not determine data type of format() input");
5602 
5603  arg++;
5604 
5605  /* We can treat NULL width the same as zero */
5606  if (isNull)
5607  width = 0;
5608  else if (typid == INT4OID)
5609  width = DatumGetInt32(value);
5610  else if (typid == INT2OID)
5611  width = DatumGetInt16(value);
5612  else
5613  {
5614  /* For less-usual datatypes, convert to text then to int */
5615  char *str;
5616 
5617  if (typid != prev_width_type)
5618  {
5619  Oid typoutputfunc;
5620  bool typIsVarlena;
5621 
5622  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5623  fmgr_info(typoutputfunc, &typoutputinfo_width);
5624  prev_width_type = typid;
5625  }
5626 
5627  str = OutputFunctionCall(&typoutputinfo_width, value);
5628 
5629  /* pg_strtoint32 will complain about bad data or overflow */
5630  width = pg_strtoint32(str);
5631 
5632  pfree(str);
5633  }
5634  }
5635 
5636  /* Collect the specified or next argument position */
5637  if (argpos > 0)
5638  arg = argpos;
5639  if (arg >= nargs)
5640  ereport(ERROR,
5641  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5642  errmsg("too few arguments for format()")));
5643 
5644  /* Get the value and type of the selected argument */
5645  if (!funcvariadic)
5646  {
5647  value = PG_GETARG_DATUM(arg);
5648  isNull = PG_ARGISNULL(arg);
5649  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5650  }
5651  else
5652  {
5653  value = elements[arg - 1];
5654  isNull = nulls[arg - 1];
5655  typid = element_type;
5656  }
5657  if (!OidIsValid(typid))
5658  elog(ERROR, "could not determine data type of format() input");
5659 
5660  arg++;
5661 
5662  /*
5663  * Get the appropriate typOutput function, reusing previous one if
5664  * same type as previous argument. That's particularly useful in the
5665  * variadic-array case, but often saves work even for ordinary calls.
5666  */
5667  if (typid != prev_type)
5668  {
5669  Oid typoutputfunc;
5670  bool typIsVarlena;
5671 
5672  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5673  fmgr_info(typoutputfunc, &typoutputfinfo);
5674  prev_type = typid;
5675  }
5676 
5677  /*
5678  * And now we can format the value.
5679  */
5680  switch (*cp)
5681  {
5682  case 's':
5683  case 'I':
5684  case 'L':
5685  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5686  value, isNull,
5687  flags, width);
5688  break;
5689  default:
5690  /* should not get here, because of previous check */
5691  ereport(ERROR,
5692  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5693  errmsg("unrecognized format() type specifier \"%c\"",
5694  *cp),
5695  errhint("For a single \"%%\" use \"%%%%\".")));
5696  break;
5697  }
5698  }
5699 
5700  /* Don't need deconstruct_array results anymore. */
5701  if (elements != NULL)
5702  pfree(elements);
5703  if (nulls != NULL)
5704  pfree(nulls);
5705 
5706  /* Generate results. */
5707  result = cstring_to_text_with_len(str.data, str.len);
5708  pfree(str.data);
5709 
5710  PG_RETURN_TEXT_P(result);
5711 }
5712 
5713 /*
5714  * Parse contiguous digits as a decimal number.
5715  *
5716  * Returns true if some digits could be parsed.
5717  * The value is returned into *value, and *ptr is advanced to the next
5718  * character to be parsed.
5719  *
5720  * Note parsing invariant: at least one character is known available before
5721  * string end (end_ptr) at entry, and this is still true at exit.
5722  */
5723 static bool
5724 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5725 {
5726  bool found = false;
5727  const char *cp = *ptr;
5728  int val = 0;
5729 
5730  while (*cp >= '0' && *cp <= '9')
5731  {
5732  int8 digit = (*cp - '0');
5733 
5734  if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5735  unlikely(pg_add_s32_overflow(val, digit, &val)))
5736  ereport(ERROR,
5737  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5738  errmsg("number is out of range")));
5739  ADVANCE_PARSE_POINTER(cp, end_ptr);
5740  found = true;
5741  }
5742 
5743  *ptr = cp;
5744  *value = val;
5745 
5746  return found;
5747 }
5748 
5749 /*
5750  * Parse a format specifier (generally following the SUS printf spec).
5751  *
5752  * We have already advanced over the initial '%', and we are looking for
5753  * [argpos][flags][width]type (but the type character is not consumed here).
5754  *
5755  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5756  * Output parameters:
5757  * argpos: argument position for value to be printed. -1 means unspecified.
5758  * widthpos: argument position for width. Zero means the argument position
5759  * was unspecified (ie, take the next arg) and -1 means no width
5760  * argument (width was omitted or specified as a constant).
5761  * flags: bitmask of flags.
5762  * width: directly-specified width value. Zero means the width was omitted
5763  * (note it's not necessary to distinguish this case from an explicit
5764  * zero width value).
5765  *
5766  * The function result is the next character position to be parsed, ie, the
5767  * location where the type character is/should be.
5768  *
5769  * Note parsing invariant: at least one character is known available before
5770  * string end (end_ptr) at entry, and this is still true at exit.
5771  */
5772 static const char *
5773 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5774  int *argpos, int *widthpos,
5775  int *flags, int *width)
5776 {
5777  const char *cp = start_ptr;
5778  int n;
5779 
5780  /* set defaults for output parameters */
5781  *argpos = -1;
5782  *widthpos = -1;
5783  *flags = 0;
5784  *width = 0;
5785 
5786  /* try to identify first number */
5787  if (text_format_parse_digits(&cp, end_ptr, &n))
5788  {
5789  if (*cp != '$')
5790  {
5791  /* Must be just a width and a type, so we're done */
5792  *width = n;
5793  return cp;
5794  }
5795  /* The number was argument position */
5796  *argpos = n;
5797  /* Explicit 0 for argument index is immediately refused */
5798  if (n == 0)
5799  ereport(ERROR,
5800  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5801  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5802  ADVANCE_PARSE_POINTER(cp, end_ptr);
5803  }
5804 
5805  /* Handle flags (only minus is supported now) */
5806  while (*cp == '-')
5807  {
5808  *flags |= TEXT_FORMAT_FLAG_MINUS;
5809  ADVANCE_PARSE_POINTER(cp, end_ptr);
5810  }
5811 
5812  if (*cp == '*')
5813  {
5814  /* Handle indirect width */
5815  ADVANCE_PARSE_POINTER(cp, end_ptr);
5816  if (text_format_parse_digits(&cp, end_ptr, &n))
5817  {
5818  /* number in this position must be closed by $ */
5819  if (*cp != '$')
5820  ereport(ERROR,
5821  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5822  errmsg("width argument position must be ended by \"$\"")));
5823  /* The number was width argument position */
5824  *widthpos = n;
5825  /* Explicit 0 for argument index is immediately refused */
5826  if (n == 0)
5827  ereport(ERROR,
5828  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5829  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5830  ADVANCE_PARSE_POINTER(cp, end_ptr);
5831  }
5832  else
5833  *widthpos = 0; /* width's argument position is unspecified */
5834  }
5835  else
5836  {
5837  /* Check for direct width specification */
5838  if (text_format_parse_digits(&cp, end_ptr, &n))
5839  *width = n;
5840  }
5841 
5842  /* cp should now be pointing at type character */
5843  return cp;
5844 }
5845 
5846 /*
5847  * Format a %s, %I, or %L conversion
5848  */
5849 static void
5851  FmgrInfo *typOutputInfo,
5852  Datum value, bool isNull,
5853  int flags, int width)
5854 {
5855  char *str;
5856 
5857  /* Handle NULL arguments before trying to stringify the value. */
5858  if (isNull)
5859  {
5860  if (conversion == 's')
5861  text_format_append_string(buf, "", flags, width);
5862  else if (conversion == 'L')
5863  text_format_append_string(buf, "NULL", flags, width);
5864  else if (conversion == 'I')
5865  ereport(ERROR,
5866  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5867  errmsg("null values cannot be formatted as an SQL identifier")));
5868  return;
5869  }
5870 
5871  /* Stringify. */
5872  str = OutputFunctionCall(typOutputInfo, value);
5873 
5874  /* Escape. */
5875  if (conversion == 'I')
5876  {
5877  /* quote_identifier may or may not allocate a new string. */
5878  text_format_append_string(buf, quote_identifier(str), flags, width);
5879  }
5880  else if (conversion == 'L')
5881  {
5882  char *qstr = quote_literal_cstr(str);
5883 
5884  text_format_append_string(buf, qstr, flags, width);
5885  /* quote_literal_cstr() always allocates a new string */
5886  pfree(qstr);
5887  }
5888  else
5889  text_format_append_string(buf, str, flags, width);
5890 
5891  /* Cleanup. */
5892  pfree(str);
5893 }
5894 
5895 /*
5896  * Append str to buf, padding as directed by flags/width
5897  */
5898 static void
5900  int flags, int width)
5901 {
5902  bool align_to_left = false;
5903  int len;
5904 
5905  /* fast path for typical easy case */
5906  if (width == 0)
5907  {
5908  appendStringInfoString(buf, str);
5909  return;
5910  }
5911 
5912  if (width < 0)
5913  {
5914  /* Negative width: implicit '-' flag, then take absolute value */
5915  align_to_left = true;
5916  /* -INT_MIN is undefined */
5917  if (width <= INT_MIN)
5918  ereport(ERROR,
5919  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5920  errmsg("number is out of range")));
5921  width = -width;
5922  }
5923  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5924  align_to_left = true;
5925 
5926  len = pg_mbstrlen(str);
5927  if (align_to_left)
5928  {
5929  /* left justify */
5930  appendStringInfoString(buf, str);
5931  if (len < width)
5932  appendStringInfoSpaces(buf, width - len);
5933  }
5934  else
5935  {
5936  /* right justify */
5937  if (len < width)
5938  appendStringInfoSpaces(buf, width - len);
5939  appendStringInfoString(buf, str);
5940  }
5941 }
5942 
5943 /*
5944  * text_format_nv - nonvariadic wrapper for text_format function.
5945  *
5946  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5947  * which checks that all built-in functions that share the implementing C
5948  * function take the same number of arguments.
5949  */
5950 Datum
5952 {
5953  return text_format(fcinfo);
5954 }
5955 
5956 /*
5957  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5958  * for this use case.
5959  */
5960 static inline bool
5961 rest_of_char_same(const char *s1, const char *s2, int len)
5962 {
5963  while (len > 0)
5964  {
5965  len--;
5966  if (s1[len] != s2[len])
5967  return false;
5968  }
5969  return true;
5970 }
5971 
5972 /* Expand each Levenshtein distance variant */
5973 #include "levenshtein.c"
5974 #define LEVENSHTEIN_LESS_EQUAL
5975 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3107
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4663
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3252
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:345
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:373
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
#define DatumGetUInt32(X)
Definition: postgres.h:486
#define NIL
Definition: pg_list.h:65
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:5447
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2152
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:5434
Definition: fmgr.h:56
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:4421
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:312
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3521
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:854
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4556
int errhint(const char *fmt,...)
Definition: elog.c:974
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1044
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2674
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
Datum namegetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2958
MemoryContext fn_mcxt
Definition: fmgr.h:65
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:146
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10620
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1811
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:3059
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:298
#define DatumGetInt32(X)
Definition: postgres.h:472
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:144
Datum namelttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2940
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:3091
#define HEXBASE
Definition: varlena.c:5012
char * refpoint
Definition: varlena.c:71
#define VARSIZE(PTR)
Definition: postgres.h:303
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4208
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:4110
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5850
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2049
#define PointerGetDatum(X)
Definition: postgres.h:556
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:552
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
#define VARHDRSZ
Definition: c.h:555
char * pstrdup(const char *in)
Definition: mcxt.c:1161
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:541
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:286
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:890
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
static struct @144 value
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:308
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2419
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2394
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:297
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:3186
void canonicalize_path(char *path)
Definition: path.c:254
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:942
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:1951
char * last_match
Definition: varlena.c:63
int errcode(int sqlerrcode)
Definition: elog.c:570
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:285
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3489
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:504
static bool text_position_next(TextPositionState *state)
Definition: varlena.c:1277
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5145
Datum nameeqtext(PG_FUNCTION_ARGS)
Definition: varlena.c:2802
static char * text_position_get_match_ptr(TextPositionState *state)
Definition: varlena.c:1410
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:360
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3410
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:3261
#define LOG
Definition: elog.h:26
Datum bttextnamecmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2918
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:4006
static bool text_isequal(text *txt1, text *txt2, Oid collid)
Definition: varlena.c:4649
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:644
#define OidIsValid(objectId)
Definition: c.h:638
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1926
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:235
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1056
bool trace_sort
Definition: tuplesort.c:130
#define PG_GET_COLLATION()
Definition: fmgr.h:193
static char * text_position_next_internal(char *start_ptr, TextPositionState *state)
Definition: varlena.c:1341
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3331
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:5311
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:682
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5899
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4869
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:5326
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:346
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:3219
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2115
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1575
static int32 text_length(Datum str)
Definition: varlena.c:662
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:786
bool typbyval
Definition: array.h:228
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:5042
#define VARATT_IS_EXTER