PostgreSQL Source Code  git master
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/detoast.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "common/int.h"
24 #include "lib/hyperloglog.h"
25 #include "libpq/pqformat.h"
26 #include "miscadmin.h"
27 #include "parser/scansup.h"
28 #include "port/pg_bswap.h"
29 #include "regex/regex.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/hashutils.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38 
39 
40 /* GUC variable */
42 
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45 
46 /*
47  * State for text_position_* functions.
48  */
49 typedef struct
50 {
51  bool is_multibyte; /* T if multibyte encoding */
53 
54  char *str1; /* haystack string */
55  char *str2; /* needle string */
56  int len1; /* string lengths in bytes */
57  int len2;
58 
59  /* Skip table for Boyer-Moore-Horspool search algorithm: */
60  int skiptablemask; /* mask for ANDing with skiptable subscripts */
61  int skiptable[256]; /* skip distance for given mismatched char */
62 
63  char *last_match; /* pointer to last match in 'str1' */
64 
65  /*
66  * Sometimes we need to convert the byte position of a match to a
67  * character position. These store the last position that was converted,
68  * so that on the next call, we can continue from that point, rather than
69  * count characters from the very beginning.
70  */
71  char *refpoint; /* pointer within original haystack string */
72  int refpos; /* 0-based character offset of the same point */
74 
75 typedef struct
76 {
77  char *buf1; /* 1st string, or abbreviation original string
78  * buf */
79  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
80  int buflen1;
81  int buflen2;
82  int last_len1; /* Length of last buf1 string/strxfrm() input */
83  int last_len2; /* Length of last buf2 string/strxfrm() blob */
84  int last_returned; /* Last comparison result (cache) */
85  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
86  bool collate_c;
87  Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
88  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
89  hyperLogLogState full_card; /* Full key cardinality state */
90  double prop_card; /* Required cardinality proportion */
93 
94 /*
95  * This should be large enough that most strings will fit, but small enough
96  * that we feel comfortable putting it on the stack
97  */
98 #define TEXTBUFLEN 1024
99 
100 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
101 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
102 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
103 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
104 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
105 
106 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
107 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
108 
109 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
110 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
111 static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
112 static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
113 static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
114 static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
115 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
116 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
117 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
118 static int32 text_length(Datum str);
119 static text *text_catenate(text *t1, text *t2);
120 static text *text_substring(Datum str,
121  int32 start,
122  int32 length,
123  bool length_not_specified);
124 static text *text_overlay(text *t1, text *t2, int sp, int sl);
125 static int text_position(text *t1, text *t2, Oid collid);
126 static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
128 static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
132 static void check_collation_set(Oid collid);
133 static int text_cmp(text *arg1, text *arg2, Oid collid);
134 static bytea *bytea_catenate(bytea *t1, bytea *t2);
136  int S,
137  int L,
138  bool length_not_specified);
139 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
140 static void appendStringInfoText(StringInfo str, const text *t);
143  const char *fldsep, const char *null_string);
145 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
146  int *value);
147 static const char *text_format_parse_format(const char *start_ptr,
148  const char *end_ptr,
149  int *argpos, int *widthpos,
150  int *flags, int *width);
151 static void text_format_string_conversion(StringInfo buf, char conversion,
152  FmgrInfo *typOutputInfo,
153  Datum value, bool isNull,
154  int flags, int width);
155 static void text_format_append_string(StringInfo buf, const char *str,
156  int flags, int width);
157 
158 
159 /*****************************************************************************
160  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
161  *****************************************************************************/
162 
163 /*
164  * cstring_to_text
165  *
166  * Create a text value from a null-terminated C string.
167  *
168  * The new text value is freshly palloc'd with a full-size VARHDR.
169  */
170 text *
171 cstring_to_text(const char *s)
172 {
173  return cstring_to_text_with_len(s, strlen(s));
174 }
175 
176 /*
177  * cstring_to_text_with_len
178  *
179  * Same as cstring_to_text except the caller specifies the string length;
180  * the string need not be null_terminated.
181  */
182 text *
183 cstring_to_text_with_len(const char *s, int len)
184 {
185  text *result = (text *) palloc(len + VARHDRSZ);
186 
187  SET_VARSIZE(result, len + VARHDRSZ);
188  memcpy(VARDATA(result), s, len);
189 
190  return result;
191 }
192 
193 /*
194  * text_to_cstring
195  *
196  * Create a palloc'd, null-terminated C string from a text value.
197  *
198  * We support being passed a compressed or toasted text value.
199  * This is a bit bogus since such values shouldn't really be referred to as
200  * "text *", but it seems useful for robustness. If we didn't handle that
201  * case here, we'd need another routine that did, anyway.
202  */
203 char *
205 {
206  /* must cast away the const, unfortunately */
207  text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
208  int len = VARSIZE_ANY_EXHDR(tunpacked);
209  char *result;
210 
211  result = (char *) palloc(len + 1);
212  memcpy(result, VARDATA_ANY(tunpacked), len);
213  result[len] = '\0';
214 
215  if (tunpacked != t)
216  pfree(tunpacked);
217 
218  return result;
219 }
220 
221 /*
222  * text_to_cstring_buffer
223  *
224  * Copy a text value into a caller-supplied buffer of size dst_len.
225  *
226  * The text string is truncated if necessary to fit. The result is
227  * guaranteed null-terminated (unless dst_len == 0).
228  *
229  * We support being passed a compressed or toasted text value.
230  * This is a bit bogus since such values shouldn't really be referred to as
231  * "text *", but it seems useful for robustness. If we didn't handle that
232  * case here, we'd need another routine that did, anyway.
233  */
234 void
235 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
236 {
237  /* must cast away the const, unfortunately */
238  text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
239  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
240 
241  if (dst_len > 0)
242  {
243  dst_len--;
244  if (dst_len >= src_len)
245  dst_len = src_len;
246  else /* ensure truncation is encoding-safe */
247  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
248  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
249  dst[dst_len] = '\0';
250  }
251 
252  if (srcunpacked != src)
253  pfree(srcunpacked);
254 }
255 
256 
257 /*****************************************************************************
258  * USER I/O ROUTINES *
259  *****************************************************************************/
260 
261 
262 #define VAL(CH) ((CH) - '0')
263 #define DIG(VAL) ((VAL) + '0')
264 
265 /*
266  * byteain - converts from printable representation of byte array
267  *
268  * Non-printable characters must be passed as '\nnn' (octal) and are
269  * converted to internal form. '\' must be passed as '\\'.
270  * ereport(ERROR, ...) if bad form.
271  *
272  * BUGS:
273  * The input is scanned twice.
274  * The error checking of input is minimal.
275  */
276 Datum
278 {
279  char *inputText = PG_GETARG_CSTRING(0);
280  char *tp;
281  char *rp;
282  int bc;
283  bytea *result;
284 
285  /* Recognize hex input */
286  if (inputText[0] == '\\' && inputText[1] == 'x')
287  {
288  size_t len = strlen(inputText);
289 
290  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
291  result = palloc(bc);
292  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
293  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
294 
295  PG_RETURN_BYTEA_P(result);
296  }
297 
298  /* Else, it's the traditional escaped style */
299  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
300  {
301  if (tp[0] != '\\')
302  tp++;
303  else if ((tp[0] == '\\') &&
304  (tp[1] >= '0' && tp[1] <= '3') &&
305  (tp[2] >= '0' && tp[2] <= '7') &&
306  (tp[3] >= '0' && tp[3] <= '7'))
307  tp += 4;
308  else if ((tp[0] == '\\') &&
309  (tp[1] == '\\'))
310  tp += 2;
311  else
312  {
313  /*
314  * one backslash, not followed by another or ### valid octal
315  */
316  ereport(ERROR,
317  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318  errmsg("invalid input syntax for type %s", "bytea")));
319  }
320  }
321 
322  bc += VARHDRSZ;
323 
324  result = (bytea *) palloc(bc);
325  SET_VARSIZE(result, bc);
326 
327  tp = inputText;
328  rp = VARDATA(result);
329  while (*tp != '\0')
330  {
331  if (tp[0] != '\\')
332  *rp++ = *tp++;
333  else if ((tp[0] == '\\') &&
334  (tp[1] >= '0' && tp[1] <= '3') &&
335  (tp[2] >= '0' && tp[2] <= '7') &&
336  (tp[3] >= '0' && tp[3] <= '7'))
337  {
338  bc = VAL(tp[1]);
339  bc <<= 3;
340  bc += VAL(tp[2]);
341  bc <<= 3;
342  *rp++ = bc + VAL(tp[3]);
343 
344  tp += 4;
345  }
346  else if ((tp[0] == '\\') &&
347  (tp[1] == '\\'))
348  {
349  *rp++ = '\\';
350  tp += 2;
351  }
352  else
353  {
354  /*
355  * We should never get here. The first pass should not allow it.
356  */
357  ereport(ERROR,
358  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
359  errmsg("invalid input syntax for type %s", "bytea")));
360  }
361  }
362 
363  PG_RETURN_BYTEA_P(result);
364 }
365 
366 /*
367  * byteaout - converts to printable representation of byte array
368  *
369  * In the traditional escaped format, non-printable characters are
370  * printed as '\nnn' (octal) and '\' as '\\'.
371  */
372 Datum
374 {
375  bytea *vlena = PG_GETARG_BYTEA_PP(0);
376  char *result;
377  char *rp;
378 
380  {
381  /* Print hex format */
382  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
383  *rp++ = '\\';
384  *rp++ = 'x';
385  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
386  }
387  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
388  {
389  /* Print traditional escaped format */
390  char *vp;
391  int len;
392  int i;
393 
394  len = 1; /* empty string has 1 char */
395  vp = VARDATA_ANY(vlena);
396  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
397  {
398  if (*vp == '\\')
399  len += 2;
400  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
401  len += 4;
402  else
403  len++;
404  }
405  rp = result = (char *) palloc(len);
406  vp = VARDATA_ANY(vlena);
407  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
408  {
409  if (*vp == '\\')
410  {
411  *rp++ = '\\';
412  *rp++ = '\\';
413  }
414  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
415  {
416  int val; /* holds unprintable chars */
417 
418  val = *vp;
419  rp[0] = '\\';
420  rp[3] = DIG(val & 07);
421  val >>= 3;
422  rp[2] = DIG(val & 07);
423  val >>= 3;
424  rp[1] = DIG(val & 03);
425  rp += 4;
426  }
427  else
428  *rp++ = *vp;
429  }
430  }
431  else
432  {
433  elog(ERROR, "unrecognized bytea_output setting: %d",
434  bytea_output);
435  rp = result = NULL; /* keep compiler quiet */
436  }
437  *rp = '\0';
438  PG_RETURN_CSTRING(result);
439 }
440 
441 /*
442  * bytearecv - converts external binary format to bytea
443  */
444 Datum
446 {
448  bytea *result;
449  int nbytes;
450 
451  nbytes = buf->len - buf->cursor;
452  result = (bytea *) palloc(nbytes + VARHDRSZ);
453  SET_VARSIZE(result, nbytes + VARHDRSZ);
454  pq_copymsgbytes(buf, VARDATA(result), nbytes);
455  PG_RETURN_BYTEA_P(result);
456 }
457 
458 /*
459  * byteasend - converts bytea to binary format
460  *
461  * This is a special case: just copy the input...
462  */
463 Datum
465 {
466  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
467 
468  PG_RETURN_BYTEA_P(vlena);
469 }
470 
471 Datum
473 {
475 
476  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
477 
478  /* Append the value unless null. */
479  if (!PG_ARGISNULL(1))
480  {
482 
483  /* On the first time through, we ignore the delimiter. */
484  if (state == NULL)
485  state = makeStringAggState(fcinfo);
486  else if (!PG_ARGISNULL(2))
487  {
488  bytea *delim = PG_GETARG_BYTEA_PP(2);
489 
491  }
492 
494  }
495 
496  /*
497  * The transition type for string_agg() is declared to be "internal",
498  * which is a pass-by-value type the same size as a pointer.
499  */
500  PG_RETURN_POINTER(state);
501 }
502 
503 Datum
505 {
507 
508  /* cannot be called directly because of internal-type argument */
509  Assert(AggCheckCallContext(fcinfo, NULL));
510 
511  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
512 
513  if (state != NULL)
514  {
515  bytea *result;
516 
517  result = (bytea *) palloc(state->len + VARHDRSZ);
518  SET_VARSIZE(result, state->len + VARHDRSZ);
519  memcpy(VARDATA(result), state->data, state->len);
520  PG_RETURN_BYTEA_P(result);
521  }
522  else
523  PG_RETURN_NULL();
524 }
525 
526 /*
527  * textin - converts "..." to internal representation
528  */
529 Datum
531 {
532  char *inputText = PG_GETARG_CSTRING(0);
533 
534  PG_RETURN_TEXT_P(cstring_to_text(inputText));
535 }
536 
537 /*
538  * textout - converts internal representation to "..."
539  */
540 Datum
542 {
543  Datum txt = PG_GETARG_DATUM(0);
544 
546 }
547 
548 /*
549  * textrecv - converts external binary format to text
550  */
551 Datum
553 {
555  text *result;
556  char *str;
557  int nbytes;
558 
559  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
560 
561  result = cstring_to_text_with_len(str, nbytes);
562  pfree(str);
563  PG_RETURN_TEXT_P(result);
564 }
565 
566 /*
567  * textsend - converts text to binary format
568  */
569 Datum
571 {
572  text *t = PG_GETARG_TEXT_PP(0);
574 
575  pq_begintypsend(&buf);
578 }
579 
580 
581 /*
582  * unknownin - converts "..." to internal representation
583  */
584 Datum
586 {
587  char *str = PG_GETARG_CSTRING(0);
588 
589  /* representation is same as cstring */
591 }
592 
593 /*
594  * unknownout - converts internal representation to "..."
595  */
596 Datum
598 {
599  /* representation is same as cstring */
600  char *str = PG_GETARG_CSTRING(0);
601 
603 }
604 
605 /*
606  * unknownrecv - converts external binary format to unknown
607  */
608 Datum
610 {
612  char *str;
613  int nbytes;
614 
615  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
616  /* representation is same as cstring */
617  PG_RETURN_CSTRING(str);
618 }
619 
620 /*
621  * unknownsend - converts unknown to binary format
622  */
623 Datum
625 {
626  /* representation is same as cstring */
627  char *str = PG_GETARG_CSTRING(0);
629 
630  pq_begintypsend(&buf);
631  pq_sendtext(&buf, str, strlen(str));
633 }
634 
635 
636 /* ========== PUBLIC ROUTINES ========== */
637 
638 /*
639  * textlen -
640  * returns the logical length of a text*
641  * (which is less than the VARSIZE of the text*)
642  */
643 Datum
645 {
647 
648  /* try to avoid decompressing argument */
650 }
651 
652 /*
653  * text_length -
654  * Does the real work for textlen()
655  *
656  * This is broken out so it can be called directly by other string processing
657  * functions. Note that the argument is passed as a Datum, to indicate that
658  * it may still be in compressed form. We can avoid decompressing it at all
659  * in some cases.
660  */
661 static int32
663 {
664  /* fastpath when max encoding length is one */
667  else
668  {
669  text *t = DatumGetTextPP(str);
670 
672  VARSIZE_ANY_EXHDR(t)));
673  }
674 }
675 
676 /*
677  * textoctetlen -
678  * returns the physical length of a text*
679  * (which is less than the VARSIZE of the text*)
680  */
681 Datum
683 {
685 
686  /* We need not detoast the input at all */
688 }
689 
690 /*
691  * textcat -
692  * takes two text* and returns a text* that is the concatenation of
693  * the two.
694  *
695  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
696  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
697  * Allocate space for output in all cases.
698  * XXX - thomas 1997-07-10
699  */
700 Datum
702 {
703  text *t1 = PG_GETARG_TEXT_PP(0);
704  text *t2 = PG_GETARG_TEXT_PP(1);
705 
707 }
708 
709 /*
710  * text_catenate
711  * Guts of textcat(), broken out so it can be used by other functions
712  *
713  * Arguments can be in short-header form, but not compressed or out-of-line
714  */
715 static text *
717 {
718  text *result;
719  int len1,
720  len2,
721  len;
722  char *ptr;
723 
724  len1 = VARSIZE_ANY_EXHDR(t1);
725  len2 = VARSIZE_ANY_EXHDR(t2);
726 
727  /* paranoia ... probably should throw error instead? */
728  if (len1 < 0)
729  len1 = 0;
730  if (len2 < 0)
731  len2 = 0;
732 
733  len = len1 + len2 + VARHDRSZ;
734  result = (text *) palloc(len);
735 
736  /* Set size of result string... */
737  SET_VARSIZE(result, len);
738 
739  /* Fill data field of result string... */
740  ptr = VARDATA(result);
741  if (len1 > 0)
742  memcpy(ptr, VARDATA_ANY(t1), len1);
743  if (len2 > 0)
744  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
745 
746  return result;
747 }
748 
749 /*
750  * charlen_to_bytelen()
751  * Compute the number of bytes occupied by n characters starting at *p
752  *
753  * It is caller's responsibility that there actually are n characters;
754  * the string need not be null-terminated.
755  */
756 static int
757 charlen_to_bytelen(const char *p, int n)
758 {
760  {
761  /* Optimization for single-byte encodings */
762  return n;
763  }
764  else
765  {
766  const char *s;
767 
768  for (s = p; n > 0; n--)
769  s += pg_mblen(s);
770 
771  return s - p;
772  }
773 }
774 
775 /*
776  * text_substr()
777  * Return a substring starting at the specified position.
778  * - thomas 1997-12-31
779  *
780  * Input:
781  * - string
782  * - starting position (is one-based)
783  * - string length
784  *
785  * If the starting position is zero or less, then return from the start of the string
786  * adjusting the length to be consistent with the "negative start" per SQL.
787  * If the length is less than zero, return the remaining string.
788  *
789  * Added multibyte support.
790  * - Tatsuo Ishii 1998-4-21
791  * Changed behavior if starting position is less than one to conform to SQL behavior.
792  * Formerly returned the entire string; now returns a portion.
793  * - Thomas Lockhart 1998-12-10
794  * Now uses faster TOAST-slicing interface
795  * - John Gray 2002-02-22
796  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
797  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
798  * error; if E < 1, return '', not entire string). Fixed MB related bug when
799  * S > LC and < LC + 4 sometimes garbage characters are returned.
800  * - Joe Conway 2002-08-10
801  */
802 Datum
804 {
806  PG_GETARG_INT32(1),
807  PG_GETARG_INT32(2),
808  false));
809 }
810 
811 /*
812  * text_substr_no_len -
813  * Wrapper to avoid opr_sanity failure due to
814  * one function accepting a different number of args.
815  */
816 Datum
818 {
820  PG_GETARG_INT32(1),
821  -1, true));
822 }
823 
824 /*
825  * text_substring -
826  * Does the real work for text_substr() and text_substr_no_len()
827  *
828  * This is broken out so it can be called directly by other string processing
829  * functions. Note that the argument is passed as a Datum, to indicate that
830  * it may still be in compressed/toasted form. We can avoid detoasting all
831  * of it in some cases.
832  *
833  * The result is always a freshly palloc'd datum.
834  */
835 static text *
836 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
837 {
839  int32 S = start; /* start position */
840  int32 S1; /* adjusted start position */
841  int32 L1; /* adjusted substring length */
842 
843  /* life is easy if the encoding max length is 1 */
844  if (eml == 1)
845  {
846  S1 = Max(S, 1);
847 
848  if (length_not_specified) /* special case - get length to end of
849  * string */
850  L1 = -1;
851  else
852  {
853  /* end position */
854  int E = S + length;
855 
856  /*
857  * A negative value for L is the only way for the end position to
858  * be before the start. SQL99 says to throw an error.
859  */
860  if (E < S)
861  ereport(ERROR,
862  (errcode(ERRCODE_SUBSTRING_ERROR),
863  errmsg("negative substring length not allowed")));
864 
865  /*
866  * A zero or negative value for the end position can happen if the
867  * start was negative or one. SQL99 says to return a zero-length
868  * string.
869  */
870  if (E < 1)
871  return cstring_to_text("");
872 
873  L1 = E - S1;
874  }
875 
876  /*
877  * If the start position is past the end of the string, SQL99 says to
878  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
879  * that for us. Convert to zero-based starting position
880  */
881  return DatumGetTextPSlice(str, S1 - 1, L1);
882  }
883  else if (eml > 1)
884  {
885  /*
886  * When encoding max length is > 1, we can't get LC without
887  * detoasting, so we'll grab a conservatively large slice now and go
888  * back later to do the right thing
889  */
890  int32 slice_start;
891  int32 slice_size;
892  int32 slice_strlen;
893  text *slice;
894  int32 E1;
895  int32 i;
896  char *p;
897  char *s;
898  text *ret;
899 
900  /*
901  * if S is past the end of the string, the tuple toaster will return a
902  * zero-length string to us
903  */
904  S1 = Max(S, 1);
905 
906  /*
907  * We need to start at position zero because there is no way to know
908  * in advance which byte offset corresponds to the supplied start
909  * position.
910  */
911  slice_start = 0;
912 
913  if (length_not_specified) /* special case - get length to end of
914  * string */
915  slice_size = L1 = -1;
916  else
917  {
918  int E = S + length;
919 
920  /*
921  * A negative value for L is the only way for the end position to
922  * be before the start. SQL99 says to throw an error.
923  */
924  if (E < S)
925  ereport(ERROR,
926  (errcode(ERRCODE_SUBSTRING_ERROR),
927  errmsg("negative substring length not allowed")));
928 
929  /*
930  * A zero or negative value for the end position can happen if the
931  * start was negative or one. SQL99 says to return a zero-length
932  * string.
933  */
934  if (E < 1)
935  return cstring_to_text("");
936 
937  /*
938  * if E is past the end of the string, the tuple toaster will
939  * truncate the length for us
940  */
941  L1 = E - S1;
942 
943  /*
944  * Total slice size in bytes can't be any longer than the start
945  * position plus substring length times the encoding max length.
946  */
947  slice_size = (S1 + L1) * eml;
948  }
949 
950  /*
951  * If we're working with an untoasted source, no need to do an extra
952  * copying step.
953  */
956  slice = DatumGetTextPSlice(str, slice_start, slice_size);
957  else
958  slice = (text *) DatumGetPointer(str);
959 
960  /* see if we got back an empty string */
961  if (VARSIZE_ANY_EXHDR(slice) == 0)
962  {
963  if (slice != (text *) DatumGetPointer(str))
964  pfree(slice);
965  return cstring_to_text("");
966  }
967 
968  /* Now we can get the actual length of the slice in MB characters */
969  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
970  VARSIZE_ANY_EXHDR(slice));
971 
972  /*
973  * Check that the start position wasn't > slice_strlen. If so, SQL99
974  * says to return a zero-length string.
975  */
976  if (S1 > slice_strlen)
977  {
978  if (slice != (text *) DatumGetPointer(str))
979  pfree(slice);
980  return cstring_to_text("");
981  }
982 
983  /*
984  * Adjust L1 and E1 now that we know the slice string length. Again
985  * remember that S1 is one based, and slice_start is zero based.
986  */
987  if (L1 > -1)
988  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
989  else
990  E1 = slice_start + 1 + slice_strlen;
991 
992  /*
993  * Find the start position in the slice; remember S1 is not zero based
994  */
995  p = VARDATA_ANY(slice);
996  for (i = 0; i < S1 - 1; i++)
997  p += pg_mblen(p);
998 
999  /* hang onto a pointer to our start position */
1000  s = p;
1001 
1002  /*
1003  * Count the actual bytes used by the substring of the requested
1004  * length.
1005  */
1006  for (i = S1; i < E1; i++)
1007  p += pg_mblen(p);
1008 
1009  ret = (text *) palloc(VARHDRSZ + (p - s));
1010  SET_VARSIZE(ret, VARHDRSZ + (p - s));
1011  memcpy(VARDATA(ret), s, (p - s));
1012 
1013  if (slice != (text *) DatumGetPointer(str))
1014  pfree(slice);
1015 
1016  return ret;
1017  }
1018  else
1019  elog(ERROR, "invalid backend encoding: encoding max length < 1");
1020 
1021  /* not reached: suppress compiler warning */
1022  return NULL;
1023 }
1024 
1025 /*
1026  * textoverlay
1027  * Replace specified substring of first string with second
1028  *
1029  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1030  * This code is a direct implementation of what the standard says.
1031  */
1032 Datum
1034 {
1035  text *t1 = PG_GETARG_TEXT_PP(0);
1036  text *t2 = PG_GETARG_TEXT_PP(1);
1037  int sp = PG_GETARG_INT32(2); /* substring start position */
1038  int sl = PG_GETARG_INT32(3); /* substring length */
1039 
1040  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1041 }
1042 
1043 Datum
1045 {
1046  text *t1 = PG_GETARG_TEXT_PP(0);
1047  text *t2 = PG_GETARG_TEXT_PP(1);
1048  int sp = PG_GETARG_INT32(2); /* substring start position */
1049  int sl;
1050 
1051  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1052  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1053 }
1054 
1055 static text *
1056 text_overlay(text *t1, text *t2, int sp, int sl)
1057 {
1058  text *result;
1059  text *s1;
1060  text *s2;
1061  int sp_pl_sl;
1062 
1063  /*
1064  * Check for possible integer-overflow cases. For negative sp, throw a
1065  * "substring length" error because that's what should be expected
1066  * according to the spec's definition of OVERLAY().
1067  */
1068  if (sp <= 0)
1069  ereport(ERROR,
1070  (errcode(ERRCODE_SUBSTRING_ERROR),
1071  errmsg("negative substring length not allowed")));
1072  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1073  ereport(ERROR,
1074  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1075  errmsg("integer out of range")));
1076 
1077  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1078  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1079  result = text_catenate(s1, t2);
1080  result = text_catenate(result, s2);
1081 
1082  return result;
1083 }
1084 
1085 /*
1086  * textpos -
1087  * Return the position of the specified substring.
1088  * Implements the SQL POSITION() function.
1089  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1090  * - thomas 1997-07-27
1091  */
1092 Datum
1094 {
1095  text *str = PG_GETARG_TEXT_PP(0);
1096  text *search_str = PG_GETARG_TEXT_PP(1);
1097 
1098  PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
1099 }
1100 
1101 /*
1102  * text_position -
1103  * Does the real work for textpos()
1104  *
1105  * Inputs:
1106  * t1 - string to be searched
1107  * t2 - pattern to match within t1
1108  * Result:
1109  * Character index of the first matched char, starting from 1,
1110  * or 0 if no match.
1111  *
1112  * This is broken out so it can be called directly by other string processing
1113  * functions.
1114  */
1115 static int
1116 text_position(text *t1, text *t2, Oid collid)
1117 {
1119  int result;
1120 
1121  /* Empty needle always matches at position 1 */
1122  if (VARSIZE_ANY_EXHDR(t2) < 1)
1123  return 1;
1124 
1125  /* Otherwise, can't match if haystack is shorter than needle */
1126  if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
1127  return 0;
1128 
1129  text_position_setup(t1, t2, collid, &state);
1130  if (!text_position_next(&state))
1131  result = 0;
1132  else
1133  result = text_position_get_match_pos(&state);
1134  text_position_cleanup(&state);
1135  return result;
1136 }
1137 
1138 
1139 /*
1140  * text_position_setup, text_position_next, text_position_cleanup -
1141  * Component steps of text_position()
1142  *
1143  * These are broken out so that a string can be efficiently searched for
1144  * multiple occurrences of the same pattern. text_position_next may be
1145  * called multiple times, and it advances to the next match on each call.
1146  * text_position_get_match_ptr() and text_position_get_match_pos() return
1147  * a pointer or 1-based character position of the last match, respectively.
1148  *
1149  * The "state" variable is normally just a local variable in the caller.
1150  *
1151  * NOTE: text_position_next skips over the matched portion. For example,
1152  * searching for "xx" in "xxx" returns only one match, not two.
1153  */
1154 
1155 static void
1157 {
1158  int len1 = VARSIZE_ANY_EXHDR(t1);
1159  int len2 = VARSIZE_ANY_EXHDR(t2);
1160  pg_locale_t mylocale = 0;
1161 
1162  check_collation_set(collid);
1163 
1164  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1165  mylocale = pg_newlocale_from_collation(collid);
1166 
1167  if (mylocale && !mylocale->deterministic)
1168  ereport(ERROR,
1169  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1170  errmsg("nondeterministic collations are not supported for substring searches")));
1171 
1172  Assert(len1 > 0);
1173  Assert(len2 > 0);
1174 
1175  /*
1176  * Even with a multi-byte encoding, we perform the search using the raw
1177  * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1178  * because in UTF-8 the byte sequence of one character cannot contain
1179  * another character. For other multi-byte encodings, we do the search
1180  * initially as a simple byte search, ignoring multibyte issues, but
1181  * verify afterwards that the match we found is at a character boundary,
1182  * and continue the search if it was a false match.
1183  */
1185  {
1186  state->is_multibyte = false;
1187  state->is_multibyte_char_in_char = false;
1188  }
1189  else if (GetDatabaseEncoding() == PG_UTF8)
1190  {
1191  state->is_multibyte = true;
1192  state->is_multibyte_char_in_char = false;
1193  }
1194  else
1195  {
1196  state->is_multibyte = true;
1197  state->is_multibyte_char_in_char = true;
1198  }
1199 
1200  state->str1 = VARDATA_ANY(t1);
1201  state->str2 = VARDATA_ANY(t2);
1202  state->len1 = len1;
1203  state->len2 = len2;
1204  state->last_match = NULL;
1205  state->refpoint = state->str1;
1206  state->refpos = 0;
1207 
1208  /*
1209  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1210  * notes we use the terminology that the "haystack" is the string to be
1211  * searched (t1) and the "needle" is the pattern being sought (t2).
1212  *
1213  * If the needle is empty or bigger than the haystack then there is no
1214  * point in wasting cycles initializing the table. We also choose not to
1215  * use B-M-H for needles of length 1, since the skip table can't possibly
1216  * save anything in that case.
1217  */
1218  if (len1 >= len2 && len2 > 1)
1219  {
1220  int searchlength = len1 - len2;
1221  int skiptablemask;
1222  int last;
1223  int i;
1224  const char *str2 = state->str2;
1225 
1226  /*
1227  * First we must determine how much of the skip table to use. The
1228  * declaration of TextPositionState allows up to 256 elements, but for
1229  * short search problems we don't really want to have to initialize so
1230  * many elements --- it would take too long in comparison to the
1231  * actual search time. So we choose a useful skip table size based on
1232  * the haystack length minus the needle length. The closer the needle
1233  * length is to the haystack length the less useful skipping becomes.
1234  *
1235  * Note: since we use bit-masking to select table elements, the skip
1236  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1237  */
1238  if (searchlength < 16)
1239  skiptablemask = 3;
1240  else if (searchlength < 64)
1241  skiptablemask = 7;
1242  else if (searchlength < 128)
1243  skiptablemask = 15;
1244  else if (searchlength < 512)
1245  skiptablemask = 31;
1246  else if (searchlength < 2048)
1247  skiptablemask = 63;
1248  else if (searchlength < 4096)
1249  skiptablemask = 127;
1250  else
1251  skiptablemask = 255;
1252  state->skiptablemask = skiptablemask;
1253 
1254  /*
1255  * Initialize the skip table. We set all elements to the needle
1256  * length, since this is the correct skip distance for any character
1257  * not found in the needle.
1258  */
1259  for (i = 0; i <= skiptablemask; i++)
1260  state->skiptable[i] = len2;
1261 
1262  /*
1263  * Now examine the needle. For each character except the last one,
1264  * set the corresponding table element to the appropriate skip
1265  * distance. Note that when two characters share the same skip table
1266  * entry, the one later in the needle must determine the skip
1267  * distance.
1268  */
1269  last = len2 - 1;
1270 
1271  for (i = 0; i < last; i++)
1272  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1273  }
1274 }
1275 
1276 /*
1277  * Advance to the next match, starting from the end of the previous match
1278  * (or the beginning of the string, on first call). Returns true if a match
1279  * is found.
1280  *
1281  * Note that this refuses to match an empty-string needle. Most callers
1282  * will have handled that case specially and we'll never see it here.
1283  */
1284 static bool
1286 {
1287  int needle_len = state->len2;
1288  char *start_ptr;
1289  char *matchptr;
1290 
1291  if (needle_len <= 0)
1292  return false; /* result for empty pattern */
1293 
1294  /* Start from the point right after the previous match. */
1295  if (state->last_match)
1296  start_ptr = state->last_match + needle_len;
1297  else
1298  start_ptr = state->str1;
1299 
1300 retry:
1301  matchptr = text_position_next_internal(start_ptr, state);
1302 
1303  if (!matchptr)
1304  return false;
1305 
1306  /*
1307  * Found a match for the byte sequence. If this is a multibyte encoding,
1308  * where one character's byte sequence can appear inside a longer
1309  * multi-byte character, we need to verify that the match was at a
1310  * character boundary, not in the middle of a multi-byte character.
1311  */
1312  if (state->is_multibyte_char_in_char)
1313  {
1314  /* Walk one character at a time, until we reach the match. */
1315 
1316  /* the search should never move backwards. */
1317  Assert(state->refpoint <= matchptr);
1318 
1319  while (state->refpoint < matchptr)
1320  {
1321  /* step to next character. */
1322  state->refpoint += pg_mblen(state->refpoint);
1323  state->refpos++;
1324 
1325  /*
1326  * If we stepped over the match's start position, then it was a
1327  * false positive, where the byte sequence appeared in the middle
1328  * of a multi-byte character. Skip it, and continue the search at
1329  * the next character boundary.
1330  */
1331  if (state->refpoint > matchptr)
1332  {
1333  start_ptr = state->refpoint;
1334  goto retry;
1335  }
1336  }
1337  }
1338 
1339  state->last_match = matchptr;
1340  return true;
1341 }
1342 
1343 /*
1344  * Subroutine of text_position_next(). This searches for the raw byte
1345  * sequence, ignoring any multi-byte encoding issues. Returns the first
1346  * match starting at 'start_ptr', or NULL if no match is found.
1347  */
1348 static char *
1350 {
1351  int haystack_len = state->len1;
1352  int needle_len = state->len2;
1353  int skiptablemask = state->skiptablemask;
1354  const char *haystack = state->str1;
1355  const char *needle = state->str2;
1356  const char *haystack_end = &haystack[haystack_len];
1357  const char *hptr;
1358 
1359  Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1360 
1361  if (needle_len == 1)
1362  {
1363  /* No point in using B-M-H for a one-character needle */
1364  char nchar = *needle;
1365 
1366  hptr = start_ptr;
1367  while (hptr < haystack_end)
1368  {
1369  if (*hptr == nchar)
1370  return (char *) hptr;
1371  hptr++;
1372  }
1373  }
1374  else
1375  {
1376  const char *needle_last = &needle[needle_len - 1];
1377 
1378  /* Start at startpos plus the length of the needle */
1379  hptr = start_ptr + needle_len - 1;
1380  while (hptr < haystack_end)
1381  {
1382  /* Match the needle scanning *backward* */
1383  const char *nptr;
1384  const char *p;
1385 
1386  nptr = needle_last;
1387  p = hptr;
1388  while (*nptr == *p)
1389  {
1390  /* Matched it all? If so, return 1-based position */
1391  if (nptr == needle)
1392  return (char *) p;
1393  nptr--, p--;
1394  }
1395 
1396  /*
1397  * No match, so use the haystack char at hptr to decide how far to
1398  * advance. If the needle had any occurrence of that character
1399  * (or more precisely, one sharing the same skiptable entry)
1400  * before its last character, then we advance far enough to align
1401  * the last such needle character with that haystack position.
1402  * Otherwise we can advance by the whole needle length.
1403  */
1404  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1405  }
1406  }
1407 
1408  return 0; /* not found */
1409 }
1410 
1411 /*
1412  * Return a pointer to the current match.
1413  *
1414  * The returned pointer points into correct position in the original
1415  * the haystack string.
1416  */
1417 static char *
1419 {
1420  return state->last_match;
1421 }
1422 
1423 /*
1424  * Return the offset of the current match.
1425  *
1426  * The offset is in characters, 1-based.
1427  */
1428 static int
1430 {
1431  if (!state->is_multibyte)
1432  return state->last_match - state->str1 + 1;
1433  else
1434  {
1435  /* Convert the byte position to char position. */
1436  while (state->refpoint < state->last_match)
1437  {
1438  state->refpoint += pg_mblen(state->refpoint);
1439  state->refpos++;
1440  }
1441  Assert(state->refpoint == state->last_match);
1442  return state->refpos + 1;
1443  }
1444 }
1445 
1446 static void
1448 {
1449  /* no cleanup needed */
1450 }
1451 
1452 static void
1454 {
1455  if (!OidIsValid(collid))
1456  {
1457  /*
1458  * This typically means that the parser could not resolve a conflict
1459  * of implicit collations, so report it that way.
1460  */
1461  ereport(ERROR,
1462  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1463  errmsg("could not determine which collation to use for string comparison"),
1464  errhint("Use the COLLATE clause to set the collation explicitly.")));
1465  }
1466 }
1467 
1468 /* varstr_cmp()
1469  * Comparison function for text strings with given lengths.
1470  * Includes locale support, but must copy strings to temporary memory
1471  * to allow null-termination for inputs to strcoll().
1472  * Returns an integer less than, equal to, or greater than zero, indicating
1473  * whether arg1 is less than, equal to, or greater than arg2.
1474  *
1475  * Note: many functions that depend on this are marked leakproof; therefore,
1476  * avoid reporting the actual contents of the input when throwing errors.
1477  * All errors herein should be things that can't happen except on corrupt
1478  * data, anyway; otherwise we will have trouble with indexing strings that
1479  * would cause them.
1480  */
1481 int
1482 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1483 {
1484  int result;
1485 
1486  check_collation_set(collid);
1487 
1488  /*
1489  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1490  * have to do some memory copying. This turns out to be significantly
1491  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1492  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1493  */
1494  if (lc_collate_is_c(collid))
1495  {
1496  result = memcmp(arg1, arg2, Min(len1, len2));
1497  if ((result == 0) && (len1 != len2))
1498  result = (len1 < len2) ? -1 : 1;
1499  }
1500  else
1501  {
1502  char a1buf[TEXTBUFLEN];
1503  char a2buf[TEXTBUFLEN];
1504  char *a1p,
1505  *a2p;
1506  pg_locale_t mylocale = 0;
1507 
1508  if (collid != DEFAULT_COLLATION_OID)
1509  mylocale = pg_newlocale_from_collation(collid);
1510 
1511  /*
1512  * memcmp() can't tell us which of two unequal strings sorts first,
1513  * but it's a cheap way to tell if they're equal. Testing shows that
1514  * memcmp() followed by strcoll() is only trivially slower than
1515  * strcoll() by itself, so we don't lose much if this doesn't work out
1516  * very often, and if it does - for example, because there are many
1517  * equal strings in the input - then we win big by avoiding expensive
1518  * collation-aware comparisons.
1519  */
1520  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1521  return 0;
1522 
1523 #ifdef WIN32
1524  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1525  if (GetDatabaseEncoding() == PG_UTF8
1526  && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1527  {
1528  int a1len;
1529  int a2len;
1530  int r;
1531 
1532  if (len1 >= TEXTBUFLEN / 2)
1533  {
1534  a1len = len1 * 2 + 2;
1535  a1p = palloc(a1len);
1536  }
1537  else
1538  {
1539  a1len = TEXTBUFLEN;
1540  a1p = a1buf;
1541  }
1542  if (len2 >= TEXTBUFLEN / 2)
1543  {
1544  a2len = len2 * 2 + 2;
1545  a2p = palloc(a2len);
1546  }
1547  else
1548  {
1549  a2len = TEXTBUFLEN;
1550  a2p = a2buf;
1551  }
1552 
1553  /* stupid Microsloth API does not work for zero-length input */
1554  if (len1 == 0)
1555  r = 0;
1556  else
1557  {
1558  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1559  (LPWSTR) a1p, a1len / 2);
1560  if (!r)
1561  ereport(ERROR,
1562  (errmsg("could not convert string to UTF-16: error code %lu",
1563  GetLastError())));
1564  }
1565  ((LPWSTR) a1p)[r] = 0;
1566 
1567  if (len2 == 0)
1568  r = 0;
1569  else
1570  {
1571  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1572  (LPWSTR) a2p, a2len / 2);
1573  if (!r)
1574  ereport(ERROR,
1575  (errmsg("could not convert string to UTF-16: error code %lu",
1576  GetLastError())));
1577  }
1578  ((LPWSTR) a2p)[r] = 0;
1579 
1580  errno = 0;
1581 #ifdef HAVE_LOCALE_T
1582  if (mylocale)
1583  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1584  else
1585 #endif
1586  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1587  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1588  * headers */
1589  ereport(ERROR,
1590  (errmsg("could not compare Unicode strings: %m")));
1591 
1592  /* Break tie if necessary. */
1593  if (result == 0 &&
1594  (!mylocale || mylocale->deterministic))
1595  {
1596  result = memcmp(arg1, arg2, Min(len1, len2));
1597  if ((result == 0) && (len1 != len2))
1598  result = (len1 < len2) ? -1 : 1;
1599  }
1600 
1601  if (a1p != a1buf)
1602  pfree(a1p);
1603  if (a2p != a2buf)
1604  pfree(a2p);
1605 
1606  return result;
1607  }
1608 #endif /* WIN32 */
1609 
1610  if (len1 >= TEXTBUFLEN)
1611  a1p = (char *) palloc(len1 + 1);
1612  else
1613  a1p = a1buf;
1614  if (len2 >= TEXTBUFLEN)
1615  a2p = (char *) palloc(len2 + 1);
1616  else
1617  a2p = a2buf;
1618 
1619  memcpy(a1p, arg1, len1);
1620  a1p[len1] = '\0';
1621  memcpy(a2p, arg2, len2);
1622  a2p[len2] = '\0';
1623 
1624  if (mylocale)
1625  {
1626  if (mylocale->provider == COLLPROVIDER_ICU)
1627  {
1628 #ifdef USE_ICU
1629 #ifdef HAVE_UCOL_STRCOLLUTF8
1630  if (GetDatabaseEncoding() == PG_UTF8)
1631  {
1632  UErrorCode status;
1633 
1634  status = U_ZERO_ERROR;
1635  result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1636  arg1, len1,
1637  arg2, len2,
1638  &status);
1639  if (U_FAILURE(status))
1640  ereport(ERROR,
1641  (errmsg("collation failed: %s", u_errorName(status))));
1642  }
1643  else
1644 #endif
1645  {
1646  int32_t ulen1,
1647  ulen2;
1648  UChar *uchar1,
1649  *uchar2;
1650 
1651  ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1652  ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1653 
1654  result = ucol_strcoll(mylocale->info.icu.ucol,
1655  uchar1, ulen1,
1656  uchar2, ulen2);
1657 
1658  pfree(uchar1);
1659  pfree(uchar2);
1660  }
1661 #else /* not USE_ICU */
1662  /* shouldn't happen */
1663  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1664 #endif /* not USE_ICU */
1665  }
1666  else
1667  {
1668 #ifdef HAVE_LOCALE_T
1669  result = strcoll_l(a1p, a2p, mylocale->info.lt);
1670 #else
1671  /* shouldn't happen */
1672  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1673 #endif
1674  }
1675  }
1676  else
1677  result = strcoll(a1p, a2p);
1678 
1679  /* Break tie if necessary. */
1680  if (result == 0 &&
1681  (!mylocale || mylocale->deterministic))
1682  result = strcmp(a1p, a2p);
1683 
1684  if (a1p != a1buf)
1685  pfree(a1p);
1686  if (a2p != a2buf)
1687  pfree(a2p);
1688  }
1689 
1690  return result;
1691 }
1692 
1693 /* text_cmp()
1694  * Internal comparison function for text strings.
1695  * Returns -1, 0 or 1
1696  */
1697 static int
1698 text_cmp(text *arg1, text *arg2, Oid collid)
1699 {
1700  char *a1p,
1701  *a2p;
1702  int len1,
1703  len2;
1704 
1705  a1p = VARDATA_ANY(arg1);
1706  a2p = VARDATA_ANY(arg2);
1707 
1708  len1 = VARSIZE_ANY_EXHDR(arg1);
1709  len2 = VARSIZE_ANY_EXHDR(arg2);
1710 
1711  return varstr_cmp(a1p, len1, a2p, len2, collid);
1712 }
1713 
1714 /*
1715  * Comparison functions for text strings.
1716  *
1717  * Note: btree indexes need these routines not to leak memory; therefore,
1718  * be careful to free working copies of toasted datums. Most places don't
1719  * need to be so careful.
1720  */
1721 
1722 Datum
1724 {
1725  Oid collid = PG_GET_COLLATION();
1726  bool result;
1727 
1728  check_collation_set(collid);
1729 
1730  if (lc_collate_is_c(collid) ||
1731  collid == DEFAULT_COLLATION_OID ||
1732  pg_newlocale_from_collation(collid)->deterministic)
1733  {
1734  Datum arg1 = PG_GETARG_DATUM(0);
1735  Datum arg2 = PG_GETARG_DATUM(1);
1736  Size len1,
1737  len2;
1738 
1739  /*
1740  * Since we only care about equality or not-equality, we can avoid all
1741  * the expense of strcoll() here, and just do bitwise comparison. In
1742  * fact, we don't even have to do a bitwise comparison if we can show
1743  * the lengths of the strings are unequal; which might save us from
1744  * having to detoast one or both values.
1745  */
1746  len1 = toast_raw_datum_size(arg1);
1747  len2 = toast_raw_datum_size(arg2);
1748  if (len1 != len2)
1749  result = false;
1750  else
1751  {
1752  text *targ1 = DatumGetTextPP(arg1);
1753  text *targ2 = DatumGetTextPP(arg2);
1754 
1755  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1756  len1 - VARHDRSZ) == 0);
1757 
1758  PG_FREE_IF_COPY(targ1, 0);
1759  PG_FREE_IF_COPY(targ2, 1);
1760  }
1761  }
1762  else
1763  {
1764  text *arg1 = PG_GETARG_TEXT_PP(0);
1765  text *arg2 = PG_GETARG_TEXT_PP(1);
1766 
1767  result = (text_cmp(arg1, arg2, collid) == 0);
1768 
1769  PG_FREE_IF_COPY(arg1, 0);
1770  PG_FREE_IF_COPY(arg2, 1);
1771  }
1772 
1773  PG_RETURN_BOOL(result);
1774 }
1775 
1776 Datum
1778 {
1779  Oid collid = PG_GET_COLLATION();
1780  bool result;
1781 
1782  check_collation_set(collid);
1783 
1784  if (lc_collate_is_c(collid) ||
1785  collid == DEFAULT_COLLATION_OID ||
1786  pg_newlocale_from_collation(collid)->deterministic)
1787  {
1788  Datum arg1 = PG_GETARG_DATUM(0);
1789  Datum arg2 = PG_GETARG_DATUM(1);
1790  Size len1,
1791  len2;
1792 
1793  /* See comment in texteq() */
1794  len1 = toast_raw_datum_size(arg1);
1795  len2 = toast_raw_datum_size(arg2);
1796  if (len1 != len2)
1797  result = true;
1798  else
1799  {
1800  text *targ1 = DatumGetTextPP(arg1);
1801  text *targ2 = DatumGetTextPP(arg2);
1802 
1803  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1804  len1 - VARHDRSZ) != 0);
1805 
1806  PG_FREE_IF_COPY(targ1, 0);
1807  PG_FREE_IF_COPY(targ2, 1);
1808  }
1809  }
1810  else
1811  {
1812  text *arg1 = PG_GETARG_TEXT_PP(0);
1813  text *arg2 = PG_GETARG_TEXT_PP(1);
1814 
1815  result = (text_cmp(arg1, arg2, collid) != 0);
1816 
1817  PG_FREE_IF_COPY(arg1, 0);
1818  PG_FREE_IF_COPY(arg2, 1);
1819  }
1820 
1821  PG_RETURN_BOOL(result);
1822 }
1823 
1824 Datum
1826 {
1827  text *arg1 = PG_GETARG_TEXT_PP(0);
1828  text *arg2 = PG_GETARG_TEXT_PP(1);
1829  bool result;
1830 
1831  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1832 
1833  PG_FREE_IF_COPY(arg1, 0);
1834  PG_FREE_IF_COPY(arg2, 1);
1835 
1836  PG_RETURN_BOOL(result);
1837 }
1838 
1839 Datum
1841 {
1842  text *arg1 = PG_GETARG_TEXT_PP(0);
1843  text *arg2 = PG_GETARG_TEXT_PP(1);
1844  bool result;
1845 
1846  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1847 
1848  PG_FREE_IF_COPY(arg1, 0);
1849  PG_FREE_IF_COPY(arg2, 1);
1850 
1851  PG_RETURN_BOOL(result);
1852 }
1853 
1854 Datum
1856 {
1857  text *arg1 = PG_GETARG_TEXT_PP(0);
1858  text *arg2 = PG_GETARG_TEXT_PP(1);
1859  bool result;
1860 
1861  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1862 
1863  PG_FREE_IF_COPY(arg1, 0);
1864  PG_FREE_IF_COPY(arg2, 1);
1865 
1866  PG_RETURN_BOOL(result);
1867 }
1868 
1869 Datum
1871 {
1872  text *arg1 = PG_GETARG_TEXT_PP(0);
1873  text *arg2 = PG_GETARG_TEXT_PP(1);
1874  bool result;
1875 
1876  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1877 
1878  PG_FREE_IF_COPY(arg1, 0);
1879  PG_FREE_IF_COPY(arg2, 1);
1880 
1881  PG_RETURN_BOOL(result);
1882 }
1883 
1884 Datum
1886 {
1887  Datum arg1 = PG_GETARG_DATUM(0);
1888  Datum arg2 = PG_GETARG_DATUM(1);
1889  Oid collid = PG_GET_COLLATION();
1890  pg_locale_t mylocale = 0;
1891  bool result;
1892  Size len1,
1893  len2;
1894 
1895  check_collation_set(collid);
1896 
1897  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1898  mylocale = pg_newlocale_from_collation(collid);
1899 
1900  if (mylocale && !mylocale->deterministic)
1901  ereport(ERROR,
1902  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1903  errmsg("nondeterministic collations are not supported for substring searches")));
1904 
1905  len1 = toast_raw_datum_size(arg1);
1906  len2 = toast_raw_datum_size(arg2);
1907  if (len2 > len1)
1908  result = false;
1909  else
1910  {
1911  text *targ1 = text_substring(arg1, 1, len2, false);
1912  text *targ2 = DatumGetTextPP(arg2);
1913 
1914  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1915  VARSIZE_ANY_EXHDR(targ2)) == 0);
1916 
1917  PG_FREE_IF_COPY(targ1, 0);
1918  PG_FREE_IF_COPY(targ2, 1);
1919  }
1920 
1921  PG_RETURN_BOOL(result);
1922 }
1923 
1924 Datum
1926 {
1927  text *arg1 = PG_GETARG_TEXT_PP(0);
1928  text *arg2 = PG_GETARG_TEXT_PP(1);
1929  int32 result;
1930 
1931  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1932 
1933  PG_FREE_IF_COPY(arg1, 0);
1934  PG_FREE_IF_COPY(arg2, 1);
1935 
1936  PG_RETURN_INT32(result);
1937 }
1938 
1939 Datum
1941 {
1943  Oid collid = ssup->ssup_collation;
1944  MemoryContext oldcontext;
1945 
1946  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1947 
1948  /* Use generic string SortSupport */
1949  varstr_sortsupport(ssup, TEXTOID, collid);
1950 
1951  MemoryContextSwitchTo(oldcontext);
1952 
1953  PG_RETURN_VOID();
1954 }
1955 
1956 /*
1957  * Generic sortsupport interface for character type's operator classes.
1958  * Includes locale support, and support for BpChar semantics (i.e. removing
1959  * trailing spaces before comparison).
1960  *
1961  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1962  * same representation. Callers that always use the C collation (e.g.
1963  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1964  * this will not work with any other collation, though.
1965  */
1966 void
1968 {
1969  bool abbreviate = ssup->abbreviate;
1970  bool collate_c = false;
1971  VarStringSortSupport *sss;
1972  pg_locale_t locale = 0;
1973 
1974  check_collation_set(collid);
1975 
1976  /*
1977  * If possible, set ssup->comparator to a function which can be used to
1978  * directly compare two datums. If we can do this, we'll avoid the
1979  * overhead of a trip through the fmgr layer for every comparison, which
1980  * can be substantial.
1981  *
1982  * Most typically, we'll set the comparator to varlenafastcmp_locale,
1983  * which uses strcoll() to perform comparisons. We use that for the
1984  * BpChar case too, but type NAME uses namefastcmp_locale. However, if
1985  * LC_COLLATE = C, we can make things quite a bit faster with
1986  * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1987  * memcmp() rather than strcoll().
1988  */
1989  if (lc_collate_is_c(collid))
1990  {
1991  if (typid == BPCHAROID)
1992  ssup->comparator = bpcharfastcmp_c;
1993  else if (typid == NAMEOID)
1994  {
1995  ssup->comparator = namefastcmp_c;
1996  /* Not supporting abbreviation with type NAME, for now */
1997  abbreviate = false;
1998  }
1999  else
2000  ssup->comparator = varstrfastcmp_c;
2001 
2002  collate_c = true;
2003  }
2004  else
2005  {
2006  /*
2007  * We need a collation-sensitive comparison. To make things faster,
2008  * we'll figure out the collation based on the locale id and cache the
2009  * result.
2010  */
2011  if (collid != DEFAULT_COLLATION_OID)
2012  locale = pg_newlocale_from_collation(collid);
2013 
2014  /*
2015  * There is a further exception on Windows. When the database
2016  * encoding is UTF-8 and we are not using the C collation, complex
2017  * hacks are required. We don't currently have a comparator that
2018  * handles that case, so we fall back on the slow method of having the
2019  * sort code invoke bttextcmp() (in the case of text) via the fmgr
2020  * trampoline. ICU locales work just the same on Windows, however.
2021  */
2022 #ifdef WIN32
2023  if (GetDatabaseEncoding() == PG_UTF8 &&
2024  !(locale && locale->provider == COLLPROVIDER_ICU))
2025  return;
2026 #endif
2027 
2028  /*
2029  * We use varlenafastcmp_locale except for type NAME.
2030  */
2031  if (typid == NAMEOID)
2032  {
2034  /* Not supporting abbreviation with type NAME, for now */
2035  abbreviate = false;
2036  }
2037  else
2039  }
2040 
2041  /*
2042  * Unfortunately, it seems that abbreviation for non-C collations is
2043  * broken on many common platforms; testing of multiple versions of glibc
2044  * reveals that, for many locales, strcoll() and strxfrm() do not return
2045  * consistent results, which is fatal to this optimization. While no
2046  * other libc other than Cygwin has so far been shown to have a problem,
2047  * we take the conservative course of action for right now and disable
2048  * this categorically. (Users who are certain this isn't a problem on
2049  * their system can define TRUST_STRXFRM.)
2050  *
2051  * Even apart from the risk of broken locales, it's possible that there
2052  * are platforms where the use of abbreviated keys should be disabled at
2053  * compile time. Having only 4 byte datums could make worst-case
2054  * performance drastically more likely, for example. Moreover, macOS's
2055  * strxfrm() implementation is known to not effectively concentrate a
2056  * significant amount of entropy from the original string in earlier
2057  * transformed blobs. It's possible that other supported platforms are
2058  * similarly encumbered. So, if we ever get past disabling this
2059  * categorically, we may still want or need to disable it for particular
2060  * platforms.
2061  */
2062 #ifndef TRUST_STRXFRM
2063  if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
2064  abbreviate = false;
2065 #endif
2066 
2067  /*
2068  * If we're using abbreviated keys, or if we're using a locale-aware
2069  * comparison, we need to initialize a VarStringSortSupport object. Both
2070  * cases will make use of the temporary buffers we initialize here for
2071  * scratch space (and to detect requirement for BpChar semantics from
2072  * caller), and the abbreviation case requires additional state.
2073  */
2074  if (abbreviate || !collate_c)
2075  {
2076  sss = palloc(sizeof(VarStringSortSupport));
2077  sss->buf1 = palloc(TEXTBUFLEN);
2078  sss->buflen1 = TEXTBUFLEN;
2079  sss->buf2 = palloc(TEXTBUFLEN);
2080  sss->buflen2 = TEXTBUFLEN;
2081  /* Start with invalid values */
2082  sss->last_len1 = -1;
2083  sss->last_len2 = -1;
2084  /* Initialize */
2085  sss->last_returned = 0;
2086  sss->locale = locale;
2087 
2088  /*
2089  * To avoid somehow confusing a strxfrm() blob and an original string,
2090  * constantly keep track of the variety of data that buf1 and buf2
2091  * currently contain.
2092  *
2093  * Comparisons may be interleaved with conversion calls. Frequently,
2094  * conversions and comparisons are batched into two distinct phases,
2095  * but the correctness of caching cannot hinge upon this. For
2096  * comparison caching, buffer state is only trusted if cache_blob is
2097  * found set to false, whereas strxfrm() caching only trusts the state
2098  * when cache_blob is found set to true.
2099  *
2100  * Arbitrarily initialize cache_blob to true.
2101  */
2102  sss->cache_blob = true;
2103  sss->collate_c = collate_c;
2104  sss->typid = typid;
2105  ssup->ssup_extra = sss;
2106 
2107  /*
2108  * If possible, plan to use the abbreviated keys optimization. The
2109  * core code may switch back to authoritative comparator should
2110  * abbreviation be aborted.
2111  */
2112  if (abbreviate)
2113  {
2114  sss->prop_card = 0.20;
2115  initHyperLogLog(&sss->abbr_card, 10);
2116  initHyperLogLog(&sss->full_card, 10);
2117  ssup->abbrev_full_comparator = ssup->comparator;
2118  ssup->comparator = varstrcmp_abbrev;
2121  }
2122  }
2123 }
2124 
2125 /*
2126  * sortsupport comparison func (for C locale case)
2127  */
2128 static int
2130 {
2131  VarString *arg1 = DatumGetVarStringPP(x);
2132  VarString *arg2 = DatumGetVarStringPP(y);
2133  char *a1p,
2134  *a2p;
2135  int len1,
2136  len2,
2137  result;
2138 
2139  a1p = VARDATA_ANY(arg1);
2140  a2p = VARDATA_ANY(arg2);
2141 
2142  len1 = VARSIZE_ANY_EXHDR(arg1);
2143  len2 = VARSIZE_ANY_EXHDR(arg2);
2144 
2145  result = memcmp(a1p, a2p, Min(len1, len2));
2146  if ((result == 0) && (len1 != len2))
2147  result = (len1 < len2) ? -1 : 1;
2148 
2149  /* We can't afford to leak memory here. */
2150  if (PointerGetDatum(arg1) != x)
2151  pfree(arg1);
2152  if (PointerGetDatum(arg2) != y)
2153  pfree(arg2);
2154 
2155  return result;
2156 }
2157 
2158 /*
2159  * sortsupport comparison func (for BpChar C locale case)
2160  *
2161  * BpChar outsources its sortsupport to this module. Specialization for the
2162  * varstr_sortsupport BpChar case, modeled on
2163  * internal_bpchar_pattern_compare().
2164  */
2165 static int
2167 {
2168  BpChar *arg1 = DatumGetBpCharPP(x);
2169  BpChar *arg2 = DatumGetBpCharPP(y);
2170  char *a1p,
2171  *a2p;
2172  int len1,
2173  len2,
2174  result;
2175 
2176  a1p = VARDATA_ANY(arg1);
2177  a2p = VARDATA_ANY(arg2);
2178 
2179  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2180  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2181 
2182  result = memcmp(a1p, a2p, Min(len1, len2));
2183  if ((result == 0) && (len1 != len2))
2184  result = (len1 < len2) ? -1 : 1;
2185 
2186  /* We can't afford to leak memory here. */
2187  if (PointerGetDatum(arg1) != x)
2188  pfree(arg1);
2189  if (PointerGetDatum(arg2) != y)
2190  pfree(arg2);
2191 
2192  return result;
2193 }
2194 
2195 /*
2196  * sortsupport comparison func (for NAME C locale case)
2197  */
2198 static int
2200 {
2201  Name arg1 = DatumGetName(x);
2202  Name arg2 = DatumGetName(y);
2203 
2204  return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2205 }
2206 
2207 /*
2208  * sortsupport comparison func (for locale case with all varlena types)
2209  */
2210 static int
2212 {
2213  VarString *arg1 = DatumGetVarStringPP(x);
2214  VarString *arg2 = DatumGetVarStringPP(y);
2215  char *a1p,
2216  *a2p;
2217  int len1,
2218  len2,
2219  result;
2220 
2221  a1p = VARDATA_ANY(arg1);
2222  a2p = VARDATA_ANY(arg2);
2223 
2224  len1 = VARSIZE_ANY_EXHDR(arg1);
2225  len2 = VARSIZE_ANY_EXHDR(arg2);
2226 
2227  result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2228 
2229  /* We can't afford to leak memory here. */
2230  if (PointerGetDatum(arg1) != x)
2231  pfree(arg1);
2232  if (PointerGetDatum(arg2) != y)
2233  pfree(arg2);
2234 
2235  return result;
2236 }
2237 
2238 /*
2239  * sortsupport comparison func (for locale case with NAME type)
2240  */
2241 static int
2243 {
2244  Name arg1 = DatumGetName(x);
2245  Name arg2 = DatumGetName(y);
2246 
2247  return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2248  NameStr(*arg2), strlen(NameStr(*arg2)),
2249  ssup);
2250 }
2251 
2252 /*
2253  * sortsupport comparison func for locale cases
2254  */
2255 static int
2256 varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2257 {
2259  int result;
2260  bool arg1_match;
2261 
2262  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2263  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2264  {
2265  /*
2266  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2267  * last_len2. Existing contents of buffers might still be used by
2268  * next call.
2269  *
2270  * It's fine to allow the comparison of BpChar padding bytes here,
2271  * even though that implies that the memcmp() will usually be
2272  * performed for BpChar callers (though multibyte characters could
2273  * still prevent that from occurring). The memcmp() is still very
2274  * cheap, and BpChar's funny semantics have us remove trailing spaces
2275  * (not limited to padding), so we need make no distinction between
2276  * padding space characters and "real" space characters.
2277  */
2278  return 0;
2279  }
2280 
2281  if (sss->typid == BPCHAROID)
2282  {
2283  /* Get true number of bytes, ignoring trailing spaces */
2284  len1 = bpchartruelen(a1p, len1);
2285  len2 = bpchartruelen(a2p, len2);
2286  }
2287 
2288  if (len1 >= sss->buflen1)
2289  {
2290  pfree(sss->buf1);
2291  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2292  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2293  }
2294  if (len2 >= sss->buflen2)
2295  {
2296  pfree(sss->buf2);
2297  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2298  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2299  }
2300 
2301  /*
2302  * We're likely to be asked to compare the same strings repeatedly, and
2303  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2304  * comparisons, even though in general there is no reason to think that
2305  * that will work out (every string datum may be unique). Caching does
2306  * not slow things down measurably when it doesn't work out, and can speed
2307  * things up by rather a lot when it does. In part, this is because the
2308  * memcmp() compares data from cachelines that are needed in L1 cache even
2309  * when the last comparison's result cannot be reused.
2310  */
2311  arg1_match = true;
2312  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2313  {
2314  arg1_match = false;
2315  memcpy(sss->buf1, a1p, len1);
2316  sss->buf1[len1] = '\0';
2317  sss->last_len1 = len1;
2318  }
2319 
2320  /*
2321  * If we're comparing the same two strings as last time, we can return the
2322  * same answer without calling strcoll() again. This is more likely than
2323  * it seems (at least with moderate to low cardinality sets), because
2324  * quicksort compares the same pivot against many values.
2325  */
2326  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2327  {
2328  memcpy(sss->buf2, a2p, len2);
2329  sss->buf2[len2] = '\0';
2330  sss->last_len2 = len2;
2331  }
2332  else if (arg1_match && !sss->cache_blob)
2333  {
2334  /* Use result cached following last actual strcoll() call */
2335  return sss->last_returned;
2336  }
2337 
2338  if (sss->locale)
2339  {
2340  if (sss->locale->provider == COLLPROVIDER_ICU)
2341  {
2342 #ifdef USE_ICU
2343 #ifdef HAVE_UCOL_STRCOLLUTF8
2344  if (GetDatabaseEncoding() == PG_UTF8)
2345  {
2346  UErrorCode status;
2347 
2348  status = U_ZERO_ERROR;
2349  result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2350  a1p, len1,
2351  a2p, len2,
2352  &status);
2353  if (U_FAILURE(status))
2354  ereport(ERROR,
2355  (errmsg("collation failed: %s", u_errorName(status))));
2356  }
2357  else
2358 #endif
2359  {
2360  int32_t ulen1,
2361  ulen2;
2362  UChar *uchar1,
2363  *uchar2;
2364 
2365  ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2366  ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2367 
2368  result = ucol_strcoll(sss->locale->info.icu.ucol,
2369  uchar1, ulen1,
2370  uchar2, ulen2);
2371 
2372  pfree(uchar1);
2373  pfree(uchar2);
2374  }
2375 #else /* not USE_ICU */
2376  /* shouldn't happen */
2377  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2378 #endif /* not USE_ICU */
2379  }
2380  else
2381  {
2382 #ifdef HAVE_LOCALE_T
2383  result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2384 #else
2385  /* shouldn't happen */
2386  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2387 #endif
2388  }
2389  }
2390  else
2391  result = strcoll(sss->buf1, sss->buf2);
2392 
2393  /* Break tie if necessary. */
2394  if (result == 0 &&
2395  (!sss->locale || sss->locale->deterministic))
2396  result = strcmp(sss->buf1, sss->buf2);
2397 
2398  /* Cache result, perhaps saving an expensive strcoll() call next time */
2399  sss->cache_blob = false;
2400  sss->last_returned = result;
2401  return result;
2402 }
2403 
2404 /*
2405  * Abbreviated key comparison func
2406  */
2407 static int
2409 {
2410  /*
2411  * When 0 is returned, the core system will call varstrfastcmp_c()
2412  * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
2413  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2414  * authoritatively, for the same reason that there is a strcoll()
2415  * tie-breaker call to strcmp() in varstr_cmp().
2416  */
2417  if (x > y)
2418  return 1;
2419  else if (x == y)
2420  return 0;
2421  else
2422  return -1;
2423 }
2424 
2425 /*
2426  * Conversion routine for sortsupport. Converts original to abbreviated key
2427  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2428  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2429  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2430  * locale is used, or in case of bytea, just memcpy() from original instead.
2431  */
2432 static Datum
2434 {
2436  VarString *authoritative = DatumGetVarStringPP(original);
2437  char *authoritative_data = VARDATA_ANY(authoritative);
2438 
2439  /* working state */
2440  Datum res;
2441  char *pres;
2442  int len;
2443  uint32 hash;
2444 
2445  pres = (char *) &res;
2446  /* memset(), so any non-overwritten bytes are NUL */
2447  memset(pres, 0, sizeof(Datum));
2448  len = VARSIZE_ANY_EXHDR(authoritative);
2449 
2450  /* Get number of bytes, ignoring trailing spaces */
2451  if (sss->typid == BPCHAROID)
2452  len = bpchartruelen(authoritative_data, len);
2453 
2454  /*
2455  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2456  * abbreviate keys. The full comparator for the C locale is always
2457  * memcmp(). It would be incorrect to allow bytea callers (callers that
2458  * always force the C collation -- bytea isn't a collatable type, but this
2459  * approach is convenient) to use strxfrm(). This is because bytea
2460  * strings may contain NUL bytes. Besides, this should be faster, too.
2461  *
2462  * More generally, it's okay that bytea callers can have NUL bytes in
2463  * strings because varstrcmp_abbrev() need not make a distinction between
2464  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2465  * authoritative representation. Hopefully a comparison at or past one
2466  * abbreviated key's terminating NUL byte will resolve the comparison
2467  * without consulting the authoritative representation; specifically, some
2468  * later non-NUL byte in the longer string can resolve the comparison
2469  * against a subsequent terminating NUL in the shorter string. There will
2470  * usually be what is effectively a "length-wise" resolution there and
2471  * then.
2472  *
2473  * If that doesn't work out -- if all bytes in the longer string
2474  * positioned at or past the offset of the smaller string's (first)
2475  * terminating NUL are actually representative of NUL bytes in the
2476  * authoritative binary string (perhaps with some *terminating* NUL bytes
2477  * towards the end of the longer string iff it happens to still be small)
2478  * -- then an authoritative tie-breaker will happen, and do the right
2479  * thing: explicitly consider string length.
2480  */
2481  if (sss->collate_c)
2482  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2483  else
2484  {
2485  Size bsize;
2486 #ifdef USE_ICU
2487  int32_t ulen = -1;
2488  UChar *uchar = NULL;
2489 #endif
2490 
2491  /*
2492  * We're not using the C collation, so fall back on strxfrm or ICU
2493  * analogs.
2494  */
2495 
2496  /* By convention, we use buffer 1 to store and NUL-terminate */
2497  if (len >= sss->buflen1)
2498  {
2499  pfree(sss->buf1);
2500  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2501  sss->buf1 = palloc(sss->buflen1);
2502  }
2503 
2504  /* Might be able to reuse strxfrm() blob from last call */
2505  if (sss->last_len1 == len && sss->cache_blob &&
2506  memcmp(sss->buf1, authoritative_data, len) == 0)
2507  {
2508  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2509  /* No change affecting cardinality, so no hashing required */
2510  goto done;
2511  }
2512 
2513  memcpy(sss->buf1, authoritative_data, len);
2514 
2515  /*
2516  * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2517  * necessary for ICU, but doesn't hurt.
2518  */
2519  sss->buf1[len] = '\0';
2520  sss->last_len1 = len;
2521 
2522 #ifdef USE_ICU
2523  /* When using ICU and not UTF8, convert string to UChar. */
2524  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2526  ulen = icu_to_uchar(&uchar, sss->buf1, len);
2527 #endif
2528 
2529  /*
2530  * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2531  * and try again. Both of these functions have the result buffer
2532  * content undefined if the result did not fit, so we need to retry
2533  * until everything fits, even though we only need the first few bytes
2534  * in the end. When using ucol_nextSortKeyPart(), however, we only
2535  * ask for as many bytes as we actually need.
2536  */
2537  for (;;)
2538  {
2539 #ifdef USE_ICU
2540  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2541  {
2542  /*
2543  * When using UTF8, use the iteration interface so we only
2544  * need to produce as many bytes as we actually need.
2545  */
2546  if (GetDatabaseEncoding() == PG_UTF8)
2547  {
2548  UCharIterator iter;
2549  uint32_t state[2];
2550  UErrorCode status;
2551 
2552  uiter_setUTF8(&iter, sss->buf1, len);
2553  state[0] = state[1] = 0; /* won't need that again */
2554  status = U_ZERO_ERROR;
2555  bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2556  &iter,
2557  state,
2558  (uint8_t *) sss->buf2,
2559  Min(sizeof(Datum), sss->buflen2),
2560  &status);
2561  if (U_FAILURE(status))
2562  ereport(ERROR,
2563  (errmsg("sort key generation failed: %s",
2564  u_errorName(status))));
2565  }
2566  else
2567  bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2568  uchar, ulen,
2569  (uint8_t *) sss->buf2, sss->buflen2);
2570  }
2571  else
2572 #endif
2573 #ifdef HAVE_LOCALE_T
2574  if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2575  bsize = strxfrm_l(sss->buf2, sss->buf1,
2576  sss->buflen2, sss->locale->info.lt);
2577  else
2578 #endif
2579  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2580 
2581  sss->last_len2 = bsize;
2582  if (bsize < sss->buflen2)
2583  break;
2584 
2585  /*
2586  * Grow buffer and retry.
2587  */
2588  pfree(sss->buf2);
2589  sss->buflen2 = Max(bsize + 1,
2590  Min(sss->buflen2 * 2, MaxAllocSize));
2591  sss->buf2 = palloc(sss->buflen2);
2592  }
2593 
2594  /*
2595  * Every Datum byte is always compared. This is safe because the
2596  * strxfrm() blob is itself NUL terminated, leaving no danger of
2597  * misinterpreting any NUL bytes not intended to be interpreted as
2598  * logically representing termination.
2599  *
2600  * (Actually, even if there were NUL bytes in the blob it would be
2601  * okay. See remarks on bytea case above.)
2602  */
2603  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2604 
2605 #ifdef USE_ICU
2606  if (uchar)
2607  pfree(uchar);
2608 #endif
2609  }
2610 
2611  /*
2612  * Maintain approximate cardinality of both abbreviated keys and original,
2613  * authoritative keys using HyperLogLog. Used as cheap insurance against
2614  * the worst case, where we do many string transformations for no saving
2615  * in full strcoll()-based comparisons. These statistics are used by
2616  * varstr_abbrev_abort().
2617  *
2618  * First, Hash key proper, or a significant fraction of it. Mix in length
2619  * in order to compensate for cases where differences are past
2620  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2621  */
2622  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2623  Min(len, PG_CACHE_LINE_SIZE)));
2624 
2625  if (len > PG_CACHE_LINE_SIZE)
2626  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2627 
2628  addHyperLogLog(&sss->full_card, hash);
2629 
2630  /* Hash abbreviated key */
2631 #if SIZEOF_DATUM == 8
2632  {
2633  uint32 lohalf,
2634  hihalf;
2635 
2636  lohalf = (uint32) res;
2637  hihalf = (uint32) (res >> 32);
2638  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2639  }
2640 #else /* SIZEOF_DATUM != 8 */
2641  hash = DatumGetUInt32(hash_uint32((uint32) res));
2642 #endif
2643 
2644  addHyperLogLog(&sss->abbr_card, hash);
2645 
2646  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2647  sss->cache_blob = true;
2648 done:
2649 
2650  /*
2651  * Byteswap on little-endian machines.
2652  *
2653  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2654  * comparator) works correctly on all platforms. If we didn't do this,
2655  * the comparator would have to call memcmp() with a pair of pointers to
2656  * the first byte of each abbreviated key, which is slower.
2657  */
2658  res = DatumBigEndianToNative(res);
2659 
2660  /* Don't leak memory here */
2661  if (PointerGetDatum(authoritative) != original)
2662  pfree(authoritative);
2663 
2664  return res;
2665 }
2666 
2667 /*
2668  * Callback for estimating effectiveness of abbreviated key optimization, using
2669  * heuristic rules. Returns value indicating if the abbreviation optimization
2670  * should be aborted, based on its projected effectiveness.
2671  */
2672 static bool
2673 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2674 {
2676  double abbrev_distinct,
2677  key_distinct;
2678 
2679  Assert(ssup->abbreviate);
2680 
2681  /* Have a little patience */
2682  if (memtupcount < 100)
2683  return false;
2684 
2685  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2686  key_distinct = estimateHyperLogLog(&sss->full_card);
2687 
2688  /*
2689  * Clamp cardinality estimates to at least one distinct value. While
2690  * NULLs are generally disregarded, if only NULL values were seen so far,
2691  * that might misrepresent costs if we failed to clamp.
2692  */
2693  if (abbrev_distinct <= 1.0)
2694  abbrev_distinct = 1.0;
2695 
2696  if (key_distinct <= 1.0)
2697  key_distinct = 1.0;
2698 
2699  /*
2700  * In the worst case all abbreviated keys are identical, while at the same
2701  * time there are differences within full key strings not captured in
2702  * abbreviations.
2703  */
2704 #ifdef TRACE_SORT
2705  if (trace_sort)
2706  {
2707  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2708 
2709  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2710  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2711  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2712  sss->prop_card);
2713  }
2714 #endif
2715 
2716  /*
2717  * If the number of distinct abbreviated keys approximately matches the
2718  * number of distinct authoritative original keys, that's reason enough to
2719  * proceed. We can win even with a very low cardinality set if most
2720  * tie-breakers only memcmp(). This is by far the most important
2721  * consideration.
2722  *
2723  * While comparisons that are resolved at the abbreviated key level are
2724  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2725  * those two outcomes are so much cheaper than a full strcoll() once
2726  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2727  * cardinality against the overall size of the set in order to more
2728  * accurately model costs. Assume that an abbreviated comparison, and an
2729  * abbreviated comparison with a cheap memcmp()-based authoritative
2730  * resolution are equivalent.
2731  */
2732  if (abbrev_distinct > key_distinct * sss->prop_card)
2733  {
2734  /*
2735  * When we have exceeded 10,000 tuples, decay required cardinality
2736  * aggressively for next call.
2737  *
2738  * This is useful because the number of comparisons required on
2739  * average increases at a linearithmic rate, and at roughly 10,000
2740  * tuples that factor will start to dominate over the linear costs of
2741  * string transformation (this is a conservative estimate). The decay
2742  * rate is chosen to be a little less aggressive than halving -- which
2743  * (since we're called at points at which memtupcount has doubled)
2744  * would never see the cost model actually abort past the first call
2745  * following a decay. This decay rate is mostly a precaution against
2746  * a sudden, violent swing in how well abbreviated cardinality tracks
2747  * full key cardinality. The decay also serves to prevent a marginal
2748  * case from being aborted too late, when too much has already been
2749  * invested in string transformation.
2750  *
2751  * It's possible for sets of several million distinct strings with
2752  * mere tens of thousands of distinct abbreviated keys to still
2753  * benefit very significantly. This will generally occur provided
2754  * each abbreviated key is a proxy for a roughly uniform number of the
2755  * set's full keys. If it isn't so, we hope to catch that early and
2756  * abort. If it isn't caught early, by the time the problem is
2757  * apparent it's probably not worth aborting.
2758  */
2759  if (memtupcount > 10000)
2760  sss->prop_card *= 0.65;
2761 
2762  return false;
2763  }
2764 
2765  /*
2766  * Abort abbreviation strategy.
2767  *
2768  * The worst case, where all abbreviated keys are identical while all
2769  * original strings differ will typically only see a regression of about
2770  * 10% in execution time for small to medium sized lists of strings.
2771  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2772  * often expect very large improvements, particularly with sets of strings
2773  * of moderately high to high abbreviated cardinality. There is little to
2774  * lose but much to gain, which our strategy reflects.
2775  */
2776 #ifdef TRACE_SORT
2777  if (trace_sort)
2778  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2779  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2780  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2781 #endif
2782 
2783  return true;
2784 }
2785 
2786 Datum
2788 {
2789  text *arg1 = PG_GETARG_TEXT_PP(0);
2790  text *arg2 = PG_GETARG_TEXT_PP(1);
2791  text *result;
2792 
2793  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2794 
2795  PG_RETURN_TEXT_P(result);
2796 }
2797 
2798 Datum
2800 {
2801  text *arg1 = PG_GETARG_TEXT_PP(0);
2802  text *arg2 = PG_GETARG_TEXT_PP(1);
2803  text *result;
2804 
2805  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2806 
2807  PG_RETURN_TEXT_P(result);
2808 }
2809 
2810 
2811 /*
2812  * Cross-type comparison functions for types text and name.
2813  */
2814 
2815 Datum
2817 {
2818  Name arg1 = PG_GETARG_NAME(0);
2819  text *arg2 = PG_GETARG_TEXT_PP(1);
2820  size_t len1 = strlen(NameStr(*arg1));
2821  size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2822  Oid collid = PG_GET_COLLATION();
2823  bool result;
2824 
2825  check_collation_set(collid);
2826 
2827  if (collid == C_COLLATION_OID)
2828  result = (len1 == len2 &&
2829  memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2830  else
2831  result = (varstr_cmp(NameStr(*arg1), len1,
2832  VARDATA_ANY(arg2), len2,
2833  collid) == 0);
2834 
2835  PG_FREE_IF_COPY(arg2, 1);
2836 
2837  PG_RETURN_BOOL(result);
2838 }
2839 
2840 Datum
2842 {
2843  text *arg1 = PG_GETARG_TEXT_PP(0);
2844  Name arg2 = PG_GETARG_NAME(1);
2845  size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2846  size_t len2 = strlen(NameStr(*arg2));
2847  Oid collid = PG_GET_COLLATION();
2848  bool result;
2849 
2850  check_collation_set(collid);
2851 
2852  if (collid == C_COLLATION_OID)
2853  result = (len1 == len2 &&
2854  memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2855  else
2856  result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2857  NameStr(*arg2), len2,
2858  collid) == 0);
2859 
2860  PG_FREE_IF_COPY(arg1, 0);
2861 
2862  PG_RETURN_BOOL(result);
2863 }
2864 
2865 Datum
2867 {
2868  Name arg1 = PG_GETARG_NAME(0);
2869  text *arg2 = PG_GETARG_TEXT_PP(1);
2870  size_t len1 = strlen(NameStr(*arg1));
2871  size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2872  Oid collid = PG_GET_COLLATION();
2873  bool result;
2874 
2875  check_collation_set(collid);
2876 
2877  if (collid == C_COLLATION_OID)
2878  result = !(len1 == len2 &&
2879  memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2880  else
2881  result = !(varstr_cmp(NameStr(*arg1), len1,
2882  VARDATA_ANY(arg2), len2,
2883  collid) == 0);
2884 
2885  PG_FREE_IF_COPY(arg2, 1);
2886 
2887  PG_RETURN_BOOL(result);
2888 }
2889 
2890 Datum
2892 {
2893  text *arg1 = PG_GETARG_TEXT_PP(0);
2894  Name arg2 = PG_GETARG_NAME(1);
2895  size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2896  size_t len2 = strlen(NameStr(*arg2));
2897  Oid collid = PG_GET_COLLATION();
2898  bool result;
2899 
2900  check_collation_set(collid);
2901 
2902  if (collid == C_COLLATION_OID)
2903  result = !(len1 == len2 &&
2904  memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2905  else
2906  result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2907  NameStr(*arg2), len2,
2908  collid) == 0);
2909 
2910  PG_FREE_IF_COPY(arg1, 0);
2911 
2912  PG_RETURN_BOOL(result);
2913 }
2914 
2915 Datum
2917 {
2918  Name arg1 = PG_GETARG_NAME(0);
2919  text *arg2 = PG_GETARG_TEXT_PP(1);
2920  int32 result;
2921 
2922  result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
2923  VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2924  PG_GET_COLLATION());
2925 
2926  PG_FREE_IF_COPY(arg2, 1);
2927 
2928  PG_RETURN_INT32(result);
2929 }
2930 
2931 Datum
2933 {
2934  text *arg1 = PG_GETARG_TEXT_PP(0);
2935  Name arg2 = PG_GETARG_NAME(1);
2936  int32 result;
2937 
2938  result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2939  NameStr(*arg2), strlen(NameStr(*arg2)),
2940  PG_GET_COLLATION());
2941 
2942  PG_FREE_IF_COPY(arg1, 0);
2943 
2944  PG_RETURN_INT32(result);
2945 }
2946 
2947 #define CmpCall(cmpfunc) \
2948  DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2949  PG_GET_COLLATION(), \
2950  PG_GETARG_DATUM(0), \
2951  PG_GETARG_DATUM(1)))
2952 
2953 Datum
2955 {
2957 }
2958 
2959 Datum
2961 {
2963 }
2964 
2965 Datum
2967 {
2969 }
2970 
2971 Datum
2973 {
2975 }
2976 
2977 Datum
2979 {
2981 }
2982 
2983 Datum
2985 {
2987 }
2988 
2989 Datum
2991 {
2993 }
2994 
2995 Datum
2997 {
2999 }
3000 
3001 #undef CmpCall
3002 
3003 
3004 /*
3005  * The following operators support character-by-character comparison
3006  * of text datums, to allow building indexes suitable for LIKE clauses.
3007  * Note that the regular texteq/textne comparison operators, and regular
3008  * support functions 1 and 2 with "C" collation are assumed to be
3009  * compatible with these!
3010  */
3011 
3012 static int
3014 {
3015  int result;
3016  int len1,
3017  len2;
3018 
3019  len1 = VARSIZE_ANY_EXHDR(arg1);
3020  len2 = VARSIZE_ANY_EXHDR(arg2);
3021 
3022  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3023  if (result != 0)
3024  return result;
3025  else if (len1 < len2)
3026  return -1;
3027  else if (len1 > len2)
3028  return 1;
3029  else
3030  return 0;
3031 }
3032 
3033 
3034 Datum
3036 {
3037  text *arg1 = PG_GETARG_TEXT_PP(0);
3038  text *arg2 = PG_GETARG_TEXT_PP(1);
3039  int result;
3040 
3041  result = internal_text_pattern_compare(arg1, arg2);
3042 
3043  PG_FREE_IF_COPY(arg1, 0);
3044  PG_FREE_IF_COPY(arg2, 1);
3045 
3046  PG_RETURN_BOOL(result < 0);
3047 }
3048 
3049 
3050 Datum
3052 {
3053  text *arg1 = PG_GETARG_TEXT_PP(0);
3054  text *arg2 = PG_GETARG_TEXT_PP(1);
3055  int result;
3056 
3057  result = internal_text_pattern_compare(arg1, arg2);
3058 
3059  PG_FREE_IF_COPY(arg1, 0);
3060  PG_FREE_IF_COPY(arg2, 1);
3061 
3062  PG_RETURN_BOOL(result <= 0);
3063 }
3064 
3065 
3066 Datum
3068 {
3069  text *arg1 = PG_GETARG_TEXT_PP(0);
3070  text *arg2 = PG_GETARG_TEXT_PP(1);
3071  int result;
3072 
3073  result = internal_text_pattern_compare(arg1, arg2);
3074 
3075  PG_FREE_IF_COPY(arg1, 0);
3076  PG_FREE_IF_COPY(arg2, 1);
3077 
3078  PG_RETURN_BOOL(result >= 0);
3079 }
3080 
3081 
3082 Datum
3084 {
3085  text *arg1 = PG_GETARG_TEXT_PP(0);
3086  text *arg2 = PG_GETARG_TEXT_PP(1);
3087  int result;
3088 
3089  result = internal_text_pattern_compare(arg1, arg2);
3090 
3091  PG_FREE_IF_COPY(arg1, 0);
3092  PG_FREE_IF_COPY(arg2, 1);
3093 
3094  PG_RETURN_BOOL(result > 0);
3095 }
3096 
3097 
3098 Datum
3100 {
3101  text *arg1 = PG_GETARG_TEXT_PP(0);
3102  text *arg2 = PG_GETARG_TEXT_PP(1);
3103  int result;
3104 
3105  result = internal_text_pattern_compare(arg1, arg2);
3106 
3107  PG_FREE_IF_COPY(arg1, 0);
3108  PG_FREE_IF_COPY(arg2, 1);
3109 
3110  PG_RETURN_INT32(result);
3111 }
3112 
3113 
3114 Datum
3116 {
3118  MemoryContext oldcontext;
3119 
3120  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3121 
3122  /* Use generic string SortSupport, forcing "C" collation */
3123  varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
3124 
3125  MemoryContextSwitchTo(oldcontext);
3126 
3127  PG_RETURN_VOID();
3128 }
3129 
3130 
3131 /*-------------------------------------------------------------
3132  * byteaoctetlen
3133  *
3134  * get the number of bytes contained in an instance of type 'bytea'
3135  *-------------------------------------------------------------
3136  */
3137 Datum
3139 {
3140  Datum str = PG_GETARG_DATUM(0);
3141 
3142  /* We need not detoast the input at all */
3144 }
3145 
3146 /*
3147  * byteacat -
3148  * takes two bytea* and returns a bytea* that is the concatenation of
3149  * the two.
3150  *
3151  * Cloned from textcat and modified as required.
3152  */
3153 Datum
3155 {
3156  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3157  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3158 
3160 }
3161 
3162 /*
3163  * bytea_catenate
3164  * Guts of byteacat(), broken out so it can be used by other functions
3165  *
3166  * Arguments can be in short-header form, but not compressed or out-of-line
3167  */
3168 static bytea *
3170 {
3171  bytea *result;
3172  int len1,
3173  len2,
3174  len;
3175  char *ptr;
3176 
3177  len1 = VARSIZE_ANY_EXHDR(t1);
3178  len2 = VARSIZE_ANY_EXHDR(t2);
3179 
3180  /* paranoia ... probably should throw error instead? */
3181  if (len1 < 0)
3182  len1 = 0;
3183  if (len2 < 0)
3184  len2 = 0;
3185 
3186  len = len1 + len2 + VARHDRSZ;
3187  result = (bytea *) palloc(len);
3188 
3189  /* Set size of result string... */
3190  SET_VARSIZE(result, len);
3191 
3192  /* Fill data field of result string... */
3193  ptr = VARDATA(result);
3194  if (len1 > 0)
3195  memcpy(ptr, VARDATA_ANY(t1), len1);
3196  if (len2 > 0)
3197  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3198 
3199  return result;
3200 }
3201 
3202 #define PG_STR_GET_BYTEA(str_) \
3203  DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3204 
3205 /*
3206  * bytea_substr()
3207  * Return a substring starting at the specified position.
3208  * Cloned from text_substr and modified as required.
3209  *
3210  * Input:
3211  * - string
3212  * - starting position (is one-based)
3213  * - string length (optional)
3214  *
3215  * If the starting position is zero or less, then return from the start of the string
3216  * adjusting the length to be consistent with the "negative start" per SQL.
3217  * If the length is less than zero, an ERROR is thrown. If no third argument
3218  * (length) is provided, the length to the end of the string is assumed.
3219  */
3220 Datum
3222 {
3224  PG_GETARG_INT32(1),
3225  PG_GETARG_INT32(2),
3226  false));
3227 }
3228 
3229 /*
3230  * bytea_substr_no_len -
3231  * Wrapper to avoid opr_sanity failure due to
3232  * one function accepting a different number of args.
3233  */
3234 Datum
3236 {
3238  PG_GETARG_INT32(1),
3239  -1,
3240  true));
3241 }
3242 
3243 static bytea *
3245  int S,
3246  int L,
3247  bool length_not_specified)
3248 {
3249  int S1; /* adjusted start position */
3250  int L1; /* adjusted substring length */
3251 
3252  S1 = Max(S, 1);
3253 
3254  if (length_not_specified)
3255  {
3256  /*
3257  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3258  * end of the string if we pass it a negative value for length.
3259  */
3260  L1 = -1;
3261  }
3262  else
3263  {
3264  /* end position */
3265  int E = S + L;
3266 
3267  /*
3268  * A negative value for L is the only way for the end position to be
3269  * before the start. SQL99 says to throw an error.
3270  */
3271  if (E < S)
3272  ereport(ERROR,
3273  (errcode(ERRCODE_SUBSTRING_ERROR),
3274  errmsg("negative substring length not allowed")));
3275 
3276  /*
3277  * A zero or negative value for the end position can happen if the
3278  * start was negative or one. SQL99 says to return a zero-length
3279  * string.
3280  */
3281  if (E < 1)
3282  return PG_STR_GET_BYTEA("");
3283 
3284  L1 = E - S1;
3285  }
3286 
3287  /*
3288  * If the start position is past the end of the string, SQL99 says to
3289  * return a zero-length string -- DatumGetByteaPSlice() will do that for
3290  * us. Convert to zero-based starting position
3291  */
3292  return DatumGetByteaPSlice(str, S1 - 1, L1);
3293 }
3294 
3295 /*
3296  * byteaoverlay
3297  * Replace specified substring of first string with second
3298  *
3299  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3300  * This code is a direct implementation of what the standard says.
3301  */
3302 Datum
3304 {
3305  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3306  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3307  int sp = PG_GETARG_INT32(2); /* substring start position */
3308  int sl = PG_GETARG_INT32(3); /* substring length */
3309 
3310  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3311 }
3312 
3313 Datum
3315 {
3316  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3317  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3318  int sp = PG_GETARG_INT32(2); /* substring start position */
3319  int sl;
3320 
3321  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3322  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3323 }
3324 
3325 static bytea *
3326 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3327 {
3328  bytea *result;
3329  bytea *s1;
3330  bytea *s2;
3331  int sp_pl_sl;
3332 
3333  /*
3334  * Check for possible integer-overflow cases. For negative sp, throw a
3335  * "substring length" error because that's what should be expected
3336  * according to the spec's definition of OVERLAY().
3337  */
3338  if (sp <= 0)
3339  ereport(ERROR,
3340  (errcode(ERRCODE_SUBSTRING_ERROR),
3341  errmsg("negative substring length not allowed")));
3342  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3343  ereport(ERROR,
3344  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3345  errmsg("integer out of range")));
3346 
3347  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3348  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3349  result = bytea_catenate(s1, t2);
3350  result = bytea_catenate(result, s2);
3351 
3352  return result;
3353 }
3354 
3355 /*
3356  * byteapos -
3357  * Return the position of the specified substring.
3358  * Implements the SQL POSITION() function.
3359  * Cloned from textpos and modified as required.
3360  */
3361 Datum
3363 {
3364  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3365  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3366  int pos;
3367  int px,
3368  p;
3369  int len1,
3370  len2;
3371  char *p1,
3372  *p2;
3373 
3374  len1 = VARSIZE_ANY_EXHDR(t1);
3375  len2 = VARSIZE_ANY_EXHDR(t2);
3376 
3377  if (len2 <= 0)
3378  PG_RETURN_INT32(1); /* result for empty pattern */
3379 
3380  p1 = VARDATA_ANY(t1);
3381  p2 = VARDATA_ANY(t2);
3382 
3383  pos = 0;
3384  px = (len1 - len2);
3385  for (p = 0; p <= px; p++)
3386  {
3387  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3388  {
3389  pos = p + 1;
3390  break;
3391  };
3392  p1++;
3393  };
3394 
3395  PG_RETURN_INT32(pos);
3396 }
3397 
3398 /*-------------------------------------------------------------
3399  * byteaGetByte
3400  *
3401  * this routine treats "bytea" as an array of bytes.
3402  * It returns the Nth byte (a number between 0 and 255).
3403  *-------------------------------------------------------------
3404  */
3405 Datum
3407 {
3408  bytea *v = PG_GETARG_BYTEA_PP(0);
3409  int32 n = PG_GETARG_INT32(1);
3410  int len;
3411  int byte;
3412 
3413  len = VARSIZE_ANY_EXHDR(v);
3414 
3415  if (n < 0 || n >= len)
3416  ereport(ERROR,
3417  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3418  errmsg("index %d out of valid range, 0..%d",
3419  n, len - 1)));
3420 
3421  byte = ((unsigned char *) VARDATA_ANY(v))[n];
3422 
3423  PG_RETURN_INT32(byte);
3424 }
3425 
3426 /*-------------------------------------------------------------
3427  * byteaGetBit
3428  *
3429  * This routine treats a "bytea" type like an array of bits.
3430  * It returns the value of the Nth bit (0 or 1).
3431  *
3432  *-------------------------------------------------------------
3433  */
3434 Datum
3436 {
3437  bytea *v = PG_GETARG_BYTEA_PP(0);
3438  int32 n = PG_GETARG_INT32(1);
3439  int byteNo,
3440  bitNo;
3441  int len;
3442  int byte;
3443 
3444  len = VARSIZE_ANY_EXHDR(v);
3445 
3446  if (n < 0 || n >= len * 8)
3447  ereport(ERROR,
3448  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3449  errmsg("index %d out of valid range, 0..%d",
3450  n, len * 8 - 1)));
3451 
3452  byteNo = n / 8;
3453  bitNo = n % 8;
3454 
3455  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3456 
3457  if (byte & (1 << bitNo))
3458  PG_RETURN_INT32(1);
3459  else
3460  PG_RETURN_INT32(0);
3461 }
3462 
3463 /*-------------------------------------------------------------
3464  * byteaSetByte
3465  *
3466  * Given an instance of type 'bytea' creates a new one with
3467  * the Nth byte set to the given value.
3468  *
3469  *-------------------------------------------------------------
3470  */
3471 Datum
3473 {
3474  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3475  int32 n = PG_GETARG_INT32(1);
3476  int32 newByte = PG_GETARG_INT32(2);
3477  int len;
3478 
3479  len = VARSIZE(res) - VARHDRSZ;
3480 
3481  if (n < 0 || n >= len)
3482  ereport(ERROR,
3483  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3484  errmsg("index %d out of valid range, 0..%d",
3485  n, len - 1)));
3486 
3487  /*
3488  * Now set the byte.
3489  */
3490  ((unsigned char *) VARDATA(res))[n] = newByte;
3491 
3492  PG_RETURN_BYTEA_P(res);
3493 }
3494 
3495 /*-------------------------------------------------------------
3496  * byteaSetBit
3497  *
3498  * Given an instance of type 'bytea' creates a new one with
3499  * the Nth bit set to the given value.
3500  *
3501  *-------------------------------------------------------------
3502  */
3503 Datum
3505 {
3506  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3507  int32 n = PG_GETARG_INT32(1);
3508  int32 newBit = PG_GETARG_INT32(2);
3509  int len;
3510  int oldByte,
3511  newByte;
3512  int byteNo,
3513  bitNo;
3514 
3515  len = VARSIZE(res) - VARHDRSZ;
3516 
3517  if (n < 0 || n >= len * 8)
3518  ereport(ERROR,
3519  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3520  errmsg("index %d out of valid range, 0..%d",
3521  n, len * 8 - 1)));
3522 
3523  byteNo = n / 8;
3524  bitNo = n % 8;
3525 
3526  /*
3527  * sanity check!
3528  */
3529  if (newBit != 0 && newBit != 1)
3530  ereport(ERROR,
3531  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3532  errmsg("new bit must be 0 or 1")));
3533 
3534  /*
3535  * Update the byte.
3536  */
3537  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3538 
3539  if (newBit == 0)
3540  newByte = oldByte & (~(1 << bitNo));
3541  else
3542  newByte = oldByte | (1 << bitNo);
3543 
3544  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3545 
3546  PG_RETURN_BYTEA_P(res);
3547 }
3548 
3549 
3550 /* text_name()
3551  * Converts a text type to a Name type.
3552  */
3553 Datum
3555 {
3556  text *s = PG_GETARG_TEXT_PP(0);
3557  Name result;
3558  int len;
3559 
3560  len = VARSIZE_ANY_EXHDR(s);
3561 
3562  /* Truncate oversize input */
3563  if (len >= NAMEDATALEN)
3564  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3565 
3566  /* We use palloc0 here to ensure result is zero-padded */
3567  result = (Name) palloc0(NAMEDATALEN);
3568  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3569 
3570  PG_RETURN_NAME(result);
3571 }
3572 
3573 /* name_text()
3574  * Converts a Name type to a text type.
3575  */
3576 Datum
3578 {
3579  Name s = PG_GETARG_NAME(0);
3580 
3582 }
3583 
3584 
3585 /*
3586  * textToQualifiedNameList - convert a text object to list of names
3587  *
3588  * This implements the input parsing needed by nextval() and other
3589  * functions that take a text parameter representing a qualified name.
3590  * We split the name at dots, downcase if not double-quoted, and
3591  * truncate names if they're too long.
3592  */
3593 List *
3595 {
3596  char *rawname;
3597  List *result = NIL;
3598  List *namelist;
3599  ListCell *l;
3600 
3601  /* Convert to C string (handles possible detoasting). */
3602  /* Note we rely on being able to modify rawname below. */
3603  rawname = text_to_cstring(textval);
3604 
3605  if (!SplitIdentifierString(rawname, '.', &namelist))
3606  ereport(ERROR,
3607  (errcode(ERRCODE_INVALID_NAME),
3608  errmsg("invalid name syntax")));
3609 
3610  if (namelist == NIL)
3611  ereport(ERROR,
3612  (errcode(ERRCODE_INVALID_NAME),
3613  errmsg("invalid name syntax")));
3614 
3615  foreach(l, namelist)
3616  {
3617  char *curname = (char *) lfirst(l);
3618 
3619  result = lappend(result, makeString(pstrdup(curname)));
3620  }
3621 
3622  pfree(rawname);
3623  list_free(namelist);
3624 
3625  return result;
3626 }
3627 
3628 /*
3629  * SplitIdentifierString --- parse a string containing identifiers
3630  *
3631  * This is the guts of textToQualifiedNameList, and is exported for use in
3632  * other situations such as parsing GUC variables. In the GUC case, it's
3633  * important to avoid memory leaks, so the API is designed to minimize the
3634  * amount of stuff that needs to be allocated and freed.
3635  *
3636  * Inputs:
3637  * rawstring: the input string; must be overwritable! On return, it's
3638  * been modified to contain the separated identifiers.
3639  * separator: the separator punctuation expected between identifiers
3640  * (typically '.' or ','). Whitespace may also appear around
3641  * identifiers.
3642  * Outputs:
3643  * namelist: filled with a palloc'd list of pointers to identifiers within
3644  * rawstring. Caller should list_free() this even on error return.
3645  *
3646  * Returns true if okay, false if there is a syntax error in the string.
3647  *
3648  * Note that an empty string is considered okay here, though not in
3649  * textToQualifiedNameList.
3650  */
3651 bool
3652 SplitIdentifierString(char *rawstring, char separator,
3653  List **namelist)
3654 {
3655  char *nextp = rawstring;
3656  bool done = false;
3657 
3658  *namelist = NIL;
3659 
3660  while (scanner_isspace(*nextp))
3661  nextp++; /* skip leading whitespace */
3662 
3663  if (*nextp == '\0')
3664  return true; /* allow empty string */
3665 
3666  /* At the top of the loop, we are at start of a new identifier. */
3667  do
3668  {
3669  char *curname;
3670  char *endp;
3671 
3672  if (*nextp == '"')
3673  {
3674  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3675  curname = nextp + 1;
3676  for (;;)
3677  {
3678  endp = strchr(nextp + 1, '"');
3679  if (endp == NULL)
3680  return false; /* mismatched quotes */
3681  if (endp[1] != '"')
3682  break; /* found end of quoted name */
3683  /* Collapse adjacent quotes into one quote, and look again */
3684  memmove(endp, endp + 1, strlen(endp));
3685  nextp = endp;
3686  }
3687  /* endp now points at the terminating quote */
3688  nextp = endp + 1;
3689  }
3690  else
3691  {
3692  /* Unquoted name --- extends to separator or whitespace */
3693  char *downname;
3694  int len;
3695 
3696  curname = nextp;
3697  while (*nextp && *nextp != separator &&
3698  !scanner_isspace(*nextp))
3699  nextp++;
3700  endp = nextp;
3701  if (curname == nextp)
3702  return false; /* empty unquoted name not allowed */
3703 
3704  /*
3705  * Downcase the identifier, using same code as main lexer does.
3706  *
3707  * XXX because we want to overwrite the input in-place, we cannot
3708  * support a downcasing transformation that increases the string
3709  * length. This is not a problem given the current implementation
3710  * of downcase_truncate_identifier, but we'll probably have to do
3711  * something about this someday.
3712  */
3713  len = endp - curname;
3714  downname = downcase_truncate_identifier(curname, len, false);
3715  Assert(strlen(downname) <= len);
3716  strncpy(curname, downname, len); /* strncpy is required here */
3717  pfree(downname);
3718  }
3719 
3720  while (scanner_isspace(*nextp))
3721  nextp++; /* skip trailing whitespace */
3722 
3723  if (*nextp == separator)
3724  {
3725  nextp++;
3726  while (scanner_isspace(*nextp))
3727  nextp++; /* skip leading whitespace for next */
3728  /* we expect another name, so done remains false */
3729  }
3730  else if (*nextp == '\0')
3731  done = true;
3732  else
3733  return false; /* invalid syntax */
3734 
3735  /* Now safe to overwrite separator with a null */
3736  *endp = '\0';
3737 
3738  /* Truncate name if it's overlength */
3739  truncate_identifier(curname, strlen(curname), false);
3740 
3741  /*
3742  * Finished isolating current name --- add it to list
3743  */
3744  *namelist = lappend(*namelist, curname);
3745 
3746  /* Loop back if we didn't reach end of string */
3747  } while (!done);
3748 
3749  return true;
3750 }
3751 
3752 
3753 /*
3754  * SplitDirectoriesString --- parse a string containing file/directory names
3755  *
3756  * This works fine on file names too; the function name is historical.
3757  *
3758  * This is similar to SplitIdentifierString, except that the parsing
3759  * rules are meant to handle pathnames instead of identifiers: there is
3760  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3761  * and we apply canonicalize_path() to each extracted string. Because of the
3762  * last, the returned strings are separately palloc'd rather than being
3763  * pointers into rawstring --- but we still scribble on rawstring.
3764  *
3765  * Inputs:
3766  * rawstring: the input string; must be modifiable!
3767  * separator: the separator punctuation expected between directories
3768  * (typically ',' or ';'). Whitespace may also appear around
3769  * directories.
3770  * Outputs:
3771  * namelist: filled with a palloc'd list of directory names.
3772  * Caller should list_free_deep() this even on error return.
3773  *
3774  * Returns true if okay, false if there is a syntax error in the string.
3775  *
3776  * Note that an empty string is considered okay here.
3777  */
3778 bool
3779 SplitDirectoriesString(char *rawstring, char separator,
3780  List **namelist)
3781 {
3782  char *nextp = rawstring;
3783  bool done = false;
3784 
3785  *namelist = NIL;
3786 
3787  while (scanner_isspace(*nextp))
3788  nextp++; /* skip leading whitespace */
3789 
3790  if (*nextp == '\0')
3791  return true; /* allow empty string */
3792 
3793  /* At the top of the loop, we are at start of a new directory. */
3794  do
3795  {
3796  char *curname;
3797  char *endp;
3798 
3799  if (*nextp == '"')
3800  {
3801  /* Quoted name --- collapse quote-quote pairs */
3802  curname = nextp + 1;
3803  for (;;)
3804  {
3805  endp = strchr(nextp + 1, '"');
3806  if (endp == NULL)
3807  return false; /* mismatched quotes */
3808  if (endp[1] != '"')
3809  break; /* found end of quoted name */
3810  /* Collapse adjacent quotes into one quote, and look again */
3811  memmove(endp, endp + 1, strlen(endp));
3812  nextp = endp;
3813  }
3814  /* endp now points at the terminating quote */
3815  nextp = endp + 1;
3816  }
3817  else
3818  {
3819  /* Unquoted name --- extends to separator or end of string */
3820  curname = endp = nextp;
3821  while (*nextp && *nextp != separator)
3822  {
3823  /* trailing whitespace should not be included in name */
3824  if (!scanner_isspace(*nextp))
3825  endp = nextp + 1;
3826  nextp++;
3827  }
3828  if (curname == endp)
3829  return false; /* empty unquoted name not allowed */
3830  }
3831 
3832  while (scanner_isspace(*nextp))
3833  nextp++; /* skip trailing whitespace */
3834 
3835  if (*nextp == separator)
3836  {
3837  nextp++;
3838  while (scanner_isspace(*nextp))
3839  nextp++; /* skip leading whitespace for next */
3840  /* we expect another name, so done remains false */
3841  }
3842  else if (*nextp == '\0')
3843  done = true;
3844  else
3845  return false; /* invalid syntax */
3846 
3847  /* Now safe to overwrite separator with a null */
3848  *endp = '\0';
3849 
3850  /* Truncate path if it's overlength */
3851  if (strlen(curname) >= MAXPGPATH)
3852  curname[MAXPGPATH - 1] = '\0';
3853 
3854  /*
3855  * Finished isolating current name --- add it to list
3856  */
3857  curname = pstrdup(curname);
3858  canonicalize_path(curname);
3859  *namelist = lappend(*namelist, curname);
3860 
3861  /* Loop back if we didn't reach end of string */
3862  } while (!done);
3863 
3864  return true;
3865 }
3866 
3867 
3868 /*
3869  * SplitGUCList --- parse a string containing identifiers or file names
3870  *
3871  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3872  * presuming whether the elements will be taken as identifiers or file names.
3873  * We assume the input has already been through flatten_set_variable_args(),
3874  * so that we need never downcase (if appropriate, that was done already).
3875  * Nor do we ever truncate, since we don't know the correct max length.
3876  * We disallow embedded whitespace for simplicity (it shouldn't matter,
3877  * because any embedded whitespace should have led to double-quoting).
3878  * Otherwise the API is identical to SplitIdentifierString.
3879  *
3880  * XXX it's annoying to have so many copies of this string-splitting logic.
3881  * However, it's not clear that having one function with a bunch of option
3882  * flags would be much better.
3883  *
3884  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3885  * Be sure to update that if you have to change this.
3886  *
3887  * Inputs:
3888  * rawstring: the input string; must be overwritable! On return, it's
3889  * been modified to contain the separated identifiers.
3890  * separator: the separator punctuation expected between identifiers
3891  * (typically '.' or ','). Whitespace may also appear around
3892  * identifiers.
3893  * Outputs:
3894  * namelist: filled with a palloc'd list of pointers to identifiers within
3895  * rawstring. Caller should list_free() this even on error return.
3896  *
3897  * Returns true if okay, false if there is a syntax error in the string.
3898  */
3899 bool
3900 SplitGUCList(char *rawstring, char separator,
3901  List **namelist)
3902 {
3903  char *nextp = rawstring;
3904  bool done = false;
3905 
3906  *namelist = NIL;
3907 
3908  while (scanner_isspace(*nextp))
3909  nextp++; /* skip leading whitespace */
3910 
3911  if (*nextp == '\0')
3912  return true; /* allow empty string */
3913 
3914  /* At the top of the loop, we are at start of a new identifier. */
3915  do
3916  {
3917  char *curname;
3918  char *endp;
3919 
3920  if (*nextp == '"')
3921  {
3922  /* Quoted name --- collapse quote-quote pairs */
3923  curname = nextp + 1;
3924  for (;;)
3925  {
3926  endp = strchr(nextp + 1, '"');
3927  if (endp == NULL)
3928  return false; /* mismatched quotes */
3929  if (endp[1] != '"')
3930  break; /* found end of quoted name */
3931  /* Collapse adjacent quotes into one quote, and look again */
3932  memmove(endp, endp + 1, strlen(endp));
3933  nextp = endp;
3934  }
3935  /* endp now points at the terminating quote */
3936  nextp = endp + 1;
3937  }
3938  else
3939  {
3940  /* Unquoted name --- extends to separator or whitespace */
3941  curname = nextp;
3942  while (*nextp && *nextp != separator &&
3943  !scanner_isspace(*nextp))
3944  nextp++;
3945  endp = nextp;
3946  if (curname == nextp)
3947  return false; /* empty unquoted name not allowed */
3948  }
3949 
3950  while (scanner_isspace(*nextp))
3951  nextp++; /* skip trailing whitespace */
3952 
3953  if (*nextp == separator)
3954  {
3955  nextp++;
3956  while (scanner_isspace(*nextp))
3957  nextp++; /* skip leading whitespace for next */
3958  /* we expect another name, so done remains false */
3959  }
3960  else if (*nextp == '\0')
3961  done = true;
3962  else
3963  return false; /* invalid syntax */
3964 
3965  /* Now safe to overwrite separator with a null */
3966  *endp = '\0';
3967 
3968  /*
3969  * Finished isolating current name --- add it to list
3970  */
3971  *namelist = lappend(*namelist, curname);
3972 
3973  /* Loop back if we didn't reach end of string */
3974  } while (!done);
3975 
3976  return true;
3977 }
3978 
3979 
3980 /*****************************************************************************
3981  * Comparison Functions used for bytea
3982  *
3983  * Note: btree indexes need these routines not to leak memory; therefore,
3984  * be careful to free working copies of toasted datums. Most places don't
3985  * need to be so careful.
3986  *****************************************************************************/
3987 
3988 Datum
3990 {
3991  Datum arg1 = PG_GETARG_DATUM(0);
3992  Datum arg2 = PG_GETARG_DATUM(1);
3993  bool result;
3994  Size len1,
3995  len2;
3996 
3997  /*
3998  * We can use a fast path for unequal lengths, which might save us from
3999  * having to detoast one or both values.
4000  */
4001  len1 = toast_raw_datum_size(arg1);
4002  len2 = toast_raw_datum_size(arg2);
4003  if (len1 != len2)
4004  result = false;
4005  else
4006  {
4007  bytea *barg1 = DatumGetByteaPP(arg1);
4008  bytea *barg2 = DatumGetByteaPP(arg2);
4009 
4010  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4011  len1 - VARHDRSZ) == 0);
4012 
4013  PG_FREE_IF_COPY(barg1, 0);
4014  PG_FREE_IF_COPY(barg2, 1);
4015  }
4016 
4017  PG_RETURN_BOOL(result);
4018 }
4019 
4020 Datum
4022 {
4023  Datum arg1 = PG_GETARG_DATUM(0);
4024  Datum arg2 = PG_GETARG_DATUM(1);
4025  bool result;
4026  Size len1,
4027  len2;
4028 
4029  /*
4030  * We can use a fast path for unequal lengths, which might save us from
4031  * having to detoast one or both values.
4032  */
4033  len1 = toast_raw_datum_size(arg1);
4034  len2 = toast_raw_datum_size(arg2);
4035  if (len1 != len2)
4036  result = true;
4037  else
4038  {
4039  bytea *barg1 = DatumGetByteaPP(arg1);
4040  bytea *barg2 = DatumGetByteaPP(arg2);
4041 
4042  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4043  len1 - VARHDRSZ) != 0);
4044 
4045  PG_FREE_IF_COPY(barg1, 0);
4046  PG_FREE_IF_COPY(barg2, 1);
4047  }
4048 
4049  PG_RETURN_BOOL(result);
4050 }
4051 
4052 Datum
4054 {
4055  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4056  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4057  int len1,
4058  len2;
4059  int cmp;
4060 
4061  len1 = VARSIZE_ANY_EXHDR(arg1);
4062  len2 = VARSIZE_ANY_EXHDR(arg2);
4063 
4064  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4065 
4066  PG_FREE_IF_COPY(arg1, 0);
4067  PG_FREE_IF_COPY(arg2, 1);
4068 
4069  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
4070 }
4071 
4072 Datum
4074 {
4075  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4076  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4077  int len1,
4078  len2;
4079  int cmp;
4080 
4081  len1 = VARSIZE_ANY_EXHDR(arg1);
4082  len2 = VARSIZE_ANY_EXHDR(arg2);
4083 
4084  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4085 
4086  PG_FREE_IF_COPY(arg1, 0);
4087  PG_FREE_IF_COPY(arg2, 1);
4088 
4089  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
4090 }
4091 
4092 Datum
4094 {
4095  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4096  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4097  int len1,
4098  len2;
4099  int cmp;
4100 
4101  len1 = VARSIZE_ANY_EXHDR(arg1);
4102  len2 = VARSIZE_ANY_EXHDR(arg2);
4103 
4104  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4105 
4106  PG_FREE_IF_COPY(arg1, 0);
4107  PG_FREE_IF_COPY(arg2, 1);
4108 
4109  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
4110 }
4111 
4112 Datum
4114 {
4115  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4116  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4117  int len1,
4118  len2;
4119  int cmp;
4120 
4121  len1 = VARSIZE_ANY_EXHDR(arg1);
4122  len2 = VARSIZE_ANY_EXHDR(arg2);
4123 
4124  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4125 
4126  PG_FREE_IF_COPY(arg1, 0);
4127  PG_FREE_IF_COPY(arg2, 1);
4128 
4129  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
4130 }
4131 
4132 Datum
4134 {
4135  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4136  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4137  int len1,
4138  len2;
4139  int cmp;
4140 
4141  len1 = VARSIZE_ANY_EXHDR(arg1);
4142  len2 = VARSIZE_ANY_EXHDR(arg2);
4143 
4144  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4145  if ((cmp == 0) && (len1 != len2))
4146  cmp = (len1 < len2) ? -1 : 1;
4147 
4148  PG_FREE_IF_COPY(arg1, 0);
4149  PG_FREE_IF_COPY(arg2, 1);
4150 
4151  PG_RETURN_INT32(cmp);
4152 }
4153 
4154 Datum
4156 {
4158  MemoryContext oldcontext;
4159 
4160  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4161 
4162  /* Use generic string SortSupport, forcing "C" collation */
4163  varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4164 
4165  MemoryContextSwitchTo(oldcontext);
4166 
4167  PG_RETURN_VOID();
4168 }
4169 
4170 /*
4171  * appendStringInfoText
4172  *
4173  * Append a text to str.
4174  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4175  */
4176 static void
4178 {
4180 }
4181 
4182 /*
4183  * replace_text
4184  * replace all occurrences of 'old_sub_str' in 'orig_str'
4185  * with 'new_sub_str' to form 'new_str'
4186  *
4187  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4188  * otherwise returns 'new_str'
4189  */
4190 Datum
4192 {
4193  text *src_text = PG_GETARG_TEXT_PP(0);
4194  text *from_sub_text = PG_GETARG_TEXT_PP(1);
4195  text *to_sub_text = PG_GETARG_TEXT_PP(2);
4196  int src_text_len;
4197  int from_sub_text_len;
4199  text *ret_text;
4200  int chunk_len;
4201  char *curr_ptr;
4202  char *start_ptr;
4204  bool found;
4205 
4206  src_text_len = VARSIZE_ANY_EXHDR(src_text);
4207  from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4208 
4209  /* Return unmodified source string if empty source or pattern */
4210  if (src_text_len < 1 || from_sub_text_len < 1)
4211  {
4212  PG_RETURN_TEXT_P(src_text);
4213  }
4214 
4215  text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4216 
4217  found = text_position_next(&state);
4218 
4219  /* When the from_sub_text is not found, there is nothing to do. */
4220  if (!found)
4221  {
4222  text_position_cleanup(&state);
4223  PG_RETURN_TEXT_P(src_text);
4224  }
4225  curr_ptr = text_position_get_match_ptr(&state);
4226  start_ptr = VARDATA_ANY(src_text);
4227 
4228  initStringInfo(&str);
4229 
4230  do
4231  {
4233 
4234  /* copy the data skipped over by last text_position_next() */
4235  chunk_len = curr_ptr - start_ptr;
4236  appendBinaryStringInfo(&str, start_ptr, chunk_len);
4237 
4238  appendStringInfoText(&str, to_sub_text);
4239 
4240  start_ptr = curr_ptr + from_sub_text_len;
4241 
4242  found = text_position_next(&state);
4243  if (found)
4244  curr_ptr = text_position_get_match_ptr(&state);
4245  }
4246  while (found);
4247 
4248  /* copy trailing data */
4249  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4250  appendBinaryStringInfo(&str, start_ptr, chunk_len);
4251 
4252  text_position_cleanup(&state);
4253 
4254  ret_text = cstring_to_text_with_len(str.data, str.len);
4255  pfree(str.data);
4256 
4257  PG_RETURN_TEXT_P(ret_text);
4258 }
4259 
4260 /*
4261  * check_replace_text_has_escape_char
4262  *
4263  * check whether replace_text contains escape char.
4264  */
4265 static bool
4267 {
4268  const char *p = VARDATA_ANY(replace_text);
4269  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4270 
4272  {
4273  for (; p < p_end; p++)
4274  {
4275  if (*p == '\\')
4276  return true;
4277  }
4278  }
4279  else
4280  {
4281  for (; p < p_end; p += pg_mblen(p))
4282  {
4283  if (*p == '\\')
4284  return true;
4285  }
4286  }
4287 
4288  return false;
4289 }
4290 
4291 /*
4292  * appendStringInfoRegexpSubstr
4293  *
4294  * Append replace_text to str, substituting regexp back references for
4295  * \n escapes. start_ptr is the start of the match in the source string,
4296  * at logical character position data_pos.
4297  */
4298 static void
4300  regmatch_t *pmatch,
4301  char *start_ptr, int data_pos)
4302 {
4303  const char *p = VARDATA_ANY(replace_text);
4304  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4305  int eml = pg_database_encoding_max_length();
4306 
4307  for (;;)
4308  {
4309  const char *chunk_start = p;
4310  int so;
4311  int eo;
4312 
4313  /* Find next escape char. */
4314  if (eml == 1)
4315  {
4316  for (; p < p_end && *p != '\\'; p++)
4317  /* nothing */ ;
4318  }
4319  else
4320  {
4321  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
4322  /* nothing */ ;
4323  }
4324 
4325  /* Copy the text we just scanned over, if any. */
4326  if (p > chunk_start)
4327  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4328 
4329  /* Done if at end of string, else advance over escape char. */
4330  if (p >= p_end)
4331  break;
4332  p++;
4333 
4334  if (p >= p_end)
4335  {
4336  /* Escape at very end of input. Treat same as unexpected char */
4337  appendStringInfoChar(str, '\\');
4338  break;
4339  }
4340 
4341  if (*p >= '1' && *p <= '9')
4342  {
4343  /* Use the back reference of regexp. */
4344  int idx = *p - '0';
4345 
4346  so = pmatch[idx].rm_so;
4347  eo = pmatch[idx].rm_eo;
4348  p++;
4349  }
4350  else if (*p == '&')
4351  {
4352  /* Use the entire matched string. */
4353  so = pmatch[0].rm_so;
4354  eo = pmatch[0].rm_eo;
4355  p++;
4356  }
4357  else if (*p == '\\')
4358  {
4359  /* \\ means transfer one \ to output. */
4360  appendStringInfoChar(str, '\\');
4361  p++;
4362  continue;
4363  }
4364  else
4365  {
4366  /*
4367  * If escape char is not followed by any expected char, just treat
4368  * it as ordinary data to copy. (XXX would it be better to throw
4369  * an error?)
4370  */
4371  appendStringInfoChar(str, '\\');
4372  continue;
4373  }
4374 
4375  if (so != -1 && eo != -1)
4376  {
4377  /*
4378  * Copy the text that is back reference of regexp. Note so and eo
4379  * are counted in characters not bytes.
4380  */
4381  char *chunk_start;
4382  int chunk_len;
4383 
4384  Assert(so >= data_pos);
4385  chunk_start = start_ptr;
4386  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4387  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4388  appendBinaryStringInfo(str, chunk_start, chunk_len);
4389  }
4390  }
4391 }
4392 
4393 #define REGEXP_REPLACE_BACKREF_CNT 10
4394 
4395 /*
4396  * replace_text_regexp
4397  *
4398  * replace text that matches to regexp in src_text to replace_text.
4399  *
4400  * Note: to avoid having to include regex.h in builtins.h, we declare
4401  * the regexp argument as void *, but really it's regex_t *.
4402  */
4403 text *
4404 replace_text_regexp(text *src_text, void *regexp,
4405  text *replace_text, bool glob)
4406 {
4407  text *ret_text;
4408  regex_t *re = (regex_t *) regexp;
4409  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4412  pg_wchar *data;
4413  size_t data_len;
4414  int search_start;
4415  int data_pos;
4416  char *start_ptr;
4417  bool have_escape;
4418 
4419  initStringInfo(&buf);
4420 
4421  /* Convert data string to wide characters. */
4422  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4423  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4424 
4425  /* Check whether replace_text has escape char. */
4426  have_escape = check_replace_text_has_escape_char(replace_text);
4427 
4428  /* start_ptr points to the data_pos'th character of src_text */
4429  start_ptr = (char *) VARDATA_ANY(src_text);
4430  data_pos = 0;
4431 
4432  search_start = 0;
4433  while (search_start <= data_len)
4434  {
4435  int regexec_result;
4436 
4438 
4439  regexec_result = pg_regexec(re,
4440  data,
4441  data_len,
4442  search_start,
4443  NULL, /* no details */
4445  pmatch,
4446  0);
4447 
4448  if (regexec_result == REG_NOMATCH)
4449  break;
4450 
4451  if (regexec_result != REG_OKAY)
4452  {
4453  char errMsg[100];
4454 
4456  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4457  ereport(ERROR,
4458  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4459  errmsg("regular expression failed: %s", errMsg)));
4460  }
4461 
4462  /*
4463  * Copy the text to the left of the match position. Note we are given
4464  * character not byte indexes.
4465  */
4466  if (pmatch[0].rm_so - data_pos > 0)
4467  {
4468  int chunk_len;
4469 
4470  chunk_len = charlen_to_bytelen(start_ptr,
4471  pmatch[0].rm_so - data_pos);
4472  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4473 
4474  /*
4475  * Advance start_ptr over that text, to avoid multiple rescans of
4476  * it if the replace_text contains multiple back-references.
4477  */
4478  start_ptr += chunk_len;
4479  data_pos = pmatch[0].rm_so;
4480  }
4481 
4482  /*
4483  * Copy the replace_text. Process back references when the
4484  * replace_text has escape characters.
4485  */
4486  if (have_escape)
4487  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4488  start_ptr, data_pos);
4489  else
4490  appendStringInfoText(&buf, replace_text);
4491 
4492  /* Advance start_ptr and data_pos over the matched text. */
4493  start_ptr += charlen_to_bytelen(start_ptr,
4494  pmatch[0].rm_eo - data_pos);
4495  data_pos = pmatch[0].rm_eo;
4496 
4497  /*
4498  * When global option is off, replace the first instance only.
4499  */
4500  if (!glob)
4501  break;
4502 
4503  /*
4504  * Advance search position. Normally we start the next search at the
4505  * end of the previous match; but if the match was of zero length, we
4506  * have to advance by one character, or we'd just find the same match
4507  * again.
4508  */
4509  search_start = data_pos;
4510  if (pmatch[0].rm_so == pmatch[0].rm_eo)
4511  search_start++;
4512  }
4513 
4514  /*
4515  * Copy the text to the right of the last match.
4516  */
4517  if (data_pos < data_len)
4518  {
4519  int chunk_len;
4520 
4521  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4522  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4523  }
4524 
4525  ret_text = cstring_to_text_with_len(buf.data, buf.len);
4526  pfree(buf.data);
4527  pfree(data);
4528 
4529  return ret_text;
4530 }
4531 
4532 /*
4533  * split_text
4534  * parse input string
4535  * return ord item (1 based)
4536  * based on provided field separator
4537  */
4538 Datum
4540 {
4541  text *inputstring = PG_GETARG_TEXT_PP(0);
4542  text *fldsep = PG_GETARG_TEXT_PP(1);
4543  int fldnum = PG_GETARG_INT32(2);
4544  int inputstring_len;
4545  int fldsep_len;
4547  char *start_ptr;
4548  char *end_ptr;
4549  text *result_text;
4550  bool found;
4551 
4552  /* field number is 1 based */
4553  if (fldnum < 1)
4554  ereport(ERROR,
4555  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4556  errmsg("field position must be greater than zero")));
4557 
4558  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4559  fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4560 
4561  /* return empty string for empty input string */
4562  if (inputstring_len < 1)
4564 
4565  /* empty field separator */
4566  if (fldsep_len < 1)
4567  {
4568  text_position_cleanup(&state);
4569  /* if first field, return input string, else empty string */
4570  if (fldnum == 1)
4571  PG_RETURN_TEXT_P(inputstring);
4572  else
4574  }
4575 
4576  text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4577 
4578  /* identify bounds of first field */
4579  start_ptr = VARDATA_ANY(inputstring);
4580  found = text_position_next(&state);
4581 
4582  /* special case if fldsep not found at all */
4583  if (!found)
4584  {
4585  text_position_cleanup(&state);
4586  /* if field 1 requested, return input string, else empty string */
4587  if (fldnum == 1)
4588  PG_RETURN_TEXT_P(inputstring);
4589  else
4591  }
4592  end_ptr = text_position_get_match_ptr(&state);
4593 
4594  while (found && --fldnum > 0)
4595  {
4596  /* identify bounds of next field */
4597  start_ptr = end_ptr + fldsep_len;
4598  found = text_position_next(&state);
4599  if (found)
4600  end_ptr = text_position_get_match_ptr(&state);
4601  }
4602 
4603  text_position_cleanup(&state);
4604 
4605  if (fldnum > 0)
4606  {
4607  /* N'th field separator not found */
4608  /* if last field requested, return it, else empty string */
4609  if (fldnum == 1)
4610  {
4611  int last_len = start_ptr - VARDATA_ANY(inputstring);
4612 
4613  result_text = cstring_to_text_with_len(start_ptr,
4614  inputstring_len - last_len);
4615  }
4616  else
4617  result_text = cstring_to_text("");
4618  }
4619  else
4620  {
4621  /* non-last field requested */
4622  result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4623  }
4624 
4625  PG_RETURN_TEXT_P(result_text);
4626 }
4627 
4628 /*
4629  * Convenience function to return true when two text params are equal.
4630  */
4631 static bool
4632 text_isequal(text *txt1, text *txt2, Oid collid)
4633 {
4635  collid,
4636  PointerGetDatum(txt1),
4637  PointerGetDatum(txt2)));
4638 }
4639 
4640 /*
4641  * text_to_array
4642  * parse input string and return text array of elements,
4643  * based on provided field separator
4644  */
4645 Datum
4647 {
4648  return text_to_array_internal(fcinfo);
4649 }
4650 
4651 /*
4652  * text_to_array_null
4653  * parse input string and return text array of elements,
4654  * based on provided field separator and null string
4655  *
4656  * This is a separate entry point only to prevent the regression tests from
4657  * complaining about different argument sets for the same internal function.
4658  */
4659 Datum
4661 {
4662  return text_to_array_internal(fcinfo);
4663 }
4664 
4665 /*
4666  * common code for text_to_array and text_to_array_null functions
4667  *
4668  * These are not strict so we have to test for null inputs explicitly.
4669  */
4670 static Datum
4672 {
4673  text *inputstring;
4674  text *fldsep;
4675  text *null_string;
4676  int inputstring_len;
4677  int fldsep_len;
4678  char *start_ptr;
4679  text *result_text;
4680  bool is_null;
4681  ArrayBuildState *astate = NULL;
4682 
4683  /* when input string is NULL, then result is NULL too */
4684  if (PG_ARGISNULL(0))
4685  PG_RETURN_NULL();
4686 
4687  inputstring = PG_GETARG_TEXT_PP(0);
4688 
4689  /* fldsep can be NULL */
4690  if (!PG_ARGISNULL(1))
4691  fldsep = PG_GETARG_TEXT_PP(1);
4692  else
4693  fldsep = NULL;
4694 
4695  /* null_string can be NULL or omitted */
4696  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4697  null_string = PG_GETARG_TEXT_PP(2);
4698  else
4699  null_string = NULL;
4700 
4701  if (fldsep != NULL)
4702  {
4703  /*
4704  * Normal case with non-null fldsep. Use the text_position machinery
4705  * to search for occurrences of fldsep.
4706  */
4708 
4709  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4710  fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4711 
4712  /* return empty array for empty input string */
4713  if (inputstring_len < 1)
4715 
4716  /*
4717  * empty field separator: return the input string as a one-element
4718  * array
4719  */
4720  if (fldsep_len < 1)
4721  {
4722  Datum elems[1];
4723  bool nulls[1];
4724  int dims[1];
4725  int lbs[1];
4726 
4727  /* single element can be a NULL too */
4728  is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
4729 
4730  elems[0] = PointerGetDatum(inputstring);
4731  nulls[0] = is_null;
4732  dims[0] = 1;
4733  lbs[0] = 1;
4734  /* XXX: this hardcodes assumptions about the text type */
4736  1, dims, lbs,
4737  TEXTOID, -1, false, 'i'));
4738  }
4739 
4740  text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4741 
4742  start_ptr = VARDATA_ANY(inputstring);
4743 
4744  for (;;)
4745  {
4746  bool found;
4747  char *end_ptr;
4748  int chunk_len;
4749 
4751 
4752  found = text_position_next(&state);
4753  if (!found)
4754  {
4755  /* fetch last field */
4756  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4757  end_ptr = NULL; /* not used, but some compilers complain */
4758  }
4759  else
4760  {
4761  /* fetch non-last field */
4762  end_ptr = text_position_get_match_ptr(&state);
4763  chunk_len = end_ptr - start_ptr;
4764  }
4765 
4766  /* must build a temp text datum to pass to accumArrayResult */
4767  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4768  is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4769 
4770  /* stash away this field */
4771  astate = accumArrayResult(astate,
4772  PointerGetDatum(result_text),
4773  is_null,
4774  TEXTOID,
4776 
4777  pfree(result_text);
4778 
4779  if (!found)
4780  break;
4781 
4782  start_ptr = end_ptr + fldsep_len;
4783  }
4784 
4785  text_position_cleanup(&state);
4786  }
4787  else
4788  {
4789  /*
4790  * When fldsep is NULL, each character in the inputstring becomes an
4791  * element in the result array. The separator is effectively the
4792  * space between characters.
4793  */
4794  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4795 
4796  /* return empty array for empty input string */
4797  if (inputstring_len < 1)
4799 
4800  start_ptr = VARDATA_ANY(inputstring);
4801 
4802  while (inputstring_len > 0)
4803  {
4804  int chunk_len = pg_mblen(start_ptr);
4805 
4807 
4808  /* must build a temp text datum to pass to accumArrayResult */
4809  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4810  is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4811 
4812  /* stash away this field */
4813  astate = accumArrayResult(astate,
4814  PointerGetDatum(result_text),
4815  is_null,
4816  TEXTOID,
4818 
4819  pfree(result_text);
4820 
4821  start_ptr += chunk_len;
4822  inputstring_len -= chunk_len;
4823  }
4824  }
4825 
4828 }
4829 
4830 /*
4831  * array_to_text
4832  * concatenate Cstring representation of input array elements
4833  * using provided field separator
4834  */
4835 Datum
4837 {
4839  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4840 
4841  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4842 }
4843 
4844 /*
4845  * array_to_text_null
4846  * concatenate Cstring representation of input array elements
4847  * using provided field separator and null string
4848  *
4849  * This version is not strict so we have to test for null inputs explicitly.
4850  */
4851 Datum
4853 {
4854  ArrayType *v;
4855  char *fldsep;
4856  char *null_string;
4857 
4858  /* returns NULL when first or second parameter is NULL */
4859  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4860  PG_RETURN_NULL();
4861 
4862  v = PG_GETARG_ARRAYTYPE_P(0);
4863  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4864 
4865  /* NULL null string is passed through as a null pointer */
4866  if (!PG_ARGISNULL(2))
4867  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4868  else
4869  null_string = NULL;
4870 
4871  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4872 }
4873 
4874 /*
4875  * common code for array_to_text and array_to_text_null functions
4876  */
4877 static text *
4879  const char *fldsep, const char *null_string)
4880 {
4881  text *result;
4882  int nitems,
4883  *dims,
4884  ndims;
4885  Oid element_type;
4886  int typlen;
4887  bool typbyval;
4888  char typalign;
4890  bool printed = false;
4891  char *p;
4892  bits8 *bitmap;
4893  int bitmask;
4894  int i;
4895  ArrayMetaState *my_extra;
4896 
4897  ndims = ARR_NDIM(v);
4898  dims = ARR_DIMS(v);
4899  nitems = ArrayGetNItems(ndims, dims);
4900 
4901  /* if there are no elements, return an empty string */
4902  if (nitems == 0)
4903  return cstring_to_text_with_len("", 0);
4904 
4905  element_type = ARR_ELEMTYPE(v);
4906  initStringInfo(&buf);
4907 
4908  /*
4909  * We arrange to look up info about element type, including its output
4910  * conversion proc, only once per series of calls, assuming the element
4911  * type doesn't change underneath us.
4912  */
4913  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4914  if (my_extra == NULL)
4915  {
4916  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4917  sizeof(ArrayMetaState));
4918  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4919  my_extra->element_type = ~element_type;
4920  }
4921 
4922  if (my_extra->element_type != element_type)
4923  {
4924  /*
4925  * Get info about element type, including its output conversion proc
4926  */
4927  get_type_io_data(element_type, IOFunc_output,
4928  &my_extra->typlen, &my_extra->typbyval,
4929  &my_extra->typalign, &my_extra->typdelim,
4930  &my_extra->typioparam, &my_extra->typiofunc);
4931  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4932  fcinfo->flinfo->fn_mcxt);
4933  my_extra->element_type = element_type;
4934  }
4935  typlen = my_extra->typlen;
4936  typbyval = my_extra->typbyval;
4937  typalign = my_extra->typalign;
4938 
4939  p = ARR_DATA_PTR(v);
4940  bitmap = ARR_NULLBITMAP(v);
4941  bitmask = 1;
4942 
4943  for (i = 0; i < nitems; i++)
4944  {
4945  Datum itemvalue;
4946  char *value;
4947 
4948  /* Get source element, checking for NULL */
4949  if (bitmap && (*bitmap & bitmask) == 0)
4950  {
4951  /* if null_string is NULL, we just ignore null elements */
4952  if (null_string != NULL)
4953  {
4954  if (printed)
4955  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4956  else
4957  appendStringInfoString(&buf, null_string);
4958  printed = true;
4959  }
4960  }
4961  else
4962  {
4963  itemvalue = fetch_att(p, typbyval, typlen);
4964 
4965  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4966 
4967  if (printed)
4968  appendStringInfo(&buf, "%s%s", fldsep, value);
4969  else
4970  appendStringInfoString(&buf, value);
4971  printed = true;
4972 
4973  p = att_addlength_pointer(p, typlen, p);
4974  p = (char *) att_align_nominal(p, typalign);
4975  }
4976 
4977  /* advance bitmap pointer if any */
4978  if (bitmap)
4979  {
4980  bitmask <<= 1;
4981  if (bitmask == 0x100)
4982  {
4983  bitmap++;
4984  bitmask = 1;
4985  }
4986  }
4987  }
4988 
4989  result = cstring_to_text_with_len(buf.data, buf.len);
4990  pfree(buf.data);
4991 
4992  return result;
4993 }
4994 
4995 #define HEXBASE 16
4996 /*
4997  * Convert an int32 to a string containing a base 16 (hex) representation of
4998  * the number.
4999  */
5000 Datum
5002 {
5004  char *ptr;
5005  const char *digits = "0123456789abcdef";
5006  char buf[32]; /* bigger than needed, but reasonable */
5007 
5008  ptr = buf + sizeof(buf) - 1;
5009  *ptr = '\0';
5010 
5011  do
5012  {
5013  *--ptr = digits[value % HEXBASE];
5014  value /= HEXBASE;
5015  } while (ptr > buf && value);
5016 
5018 }
5019 
5020 /*
5021  * Convert an int64 to a string containing a base 16 (hex) representation of
5022  * the number.
5023  */
5024 Datum
5026 {
5027  uint64 value = (uint64) PG_GETARG_INT64(0);
5028  char *ptr;
5029  const char *digits = "0123456789abcdef";
5030  char buf[32]; /* bigger than needed, but reasonable */
5031 
5032  ptr = buf + sizeof(buf) - 1;
5033  *ptr = '\0';
5034 
5035  do
5036  {
5037  *--ptr = digits[value % HEXBASE];
5038  value /= HEXBASE;
5039  } while (ptr > buf && value);
5040 
5042 }
5043 
5044 /*
5045  * Return the size of a datum, possibly compressed
5046  *
5047  * Works on any data type
5048  */
5049 Datum
5051 {
5053  int32 result;
5054  int typlen;
5055 
5056  /* On first call, get the input type's typlen, and save at *fn_extra */
5057  if (fcinfo->flinfo->fn_extra == NULL)
5058  {
5059  /* Lookup the datatype of the supplied argument */
5060  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5061 
5062  typlen = get_typlen(argtypeid);
5063  if (typlen == 0) /* should not happen */
5064  elog(ERROR, "cache lookup failed for type %u", argtypeid);
5065 
5066  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5067  sizeof(int));
5068  *((int *) fcinfo->flinfo->fn_extra) = typlen;
5069  }
5070  else
5071  typlen = *((int *) fcinfo->flinfo->fn_extra);
5072 
5073  if (typlen == -1)
5074  {
5075  /* varlena type, possibly toasted */
5076  result = toast_datum_size(value);
5077  }
5078  else if (typlen == -2)
5079  {
5080  /* cstring */
5081  result = strlen(DatumGetCString(value)) + 1;
5082  }
5083  else
5084  {
5085  /* ordinary fixed-width type */
5086  result = typlen;
5087  }
5088 
5089  PG_RETURN_INT32(result);
5090 }
5091 
5092 /*
5093  * string_agg - Concatenates values and returns string.
5094  *
5095  * Syntax: string_agg(value text, delimiter text) RETURNS text
5096  *
5097  * Note: Any NULL values are ignored. The first-call delimiter isn't
5098  * actually used at all, and on subsequent calls the delimiter precedes
5099  * the associated value.
5100  */
5101 
5102 /* subroutine to initialize state */
5103 static StringInfo
5105 {
5106  StringInfo state;
5107  MemoryContext aggcontext;
5108  MemoryContext oldcontext;
5109 
5110  if (!AggCheckCallContext(fcinfo, &aggcontext))
5111  {
5112  /* cannot be called directly because of internal-type argument */
5113  elog(ERROR, "string_agg_transfn called in non-aggregate context");
5114  }
5115 
5116  /*
5117  * Create state in aggregate context. It'll stay there across subsequent
5118  * calls.
5119  */
5120  oldcontext = MemoryContextSwitchTo(aggcontext);
5121  state = makeStringInfo();
5122  MemoryContextSwitchTo(oldcontext);
5123 
5124  return state;
5125 }
5126 
5127 Datum
5129 {
5130  StringInfo state;
5131 
5132  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5133 
5134  /* Append the value unless null. */
5135  if (!PG_ARGISNULL(1))
5136  {
5137  /* On the first time through, we ignore the delimiter. */
5138  if (state == NULL)
5139  state = makeStringAggState(fcinfo);
5140  else if (!PG_ARGISNULL(2))
5141  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
5142 
5143  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
5144  }
5145 
5146  /*
5147  * The transition type for string_agg() is declared to be "internal",
5148  * which is a pass-by-value type the same size as a pointer.
5149  */
5150  PG_RETURN_POINTER(state);
5151 }
5152 
5153 Datum
5155 {
5156  StringInfo state;
5157 
5158  /* cannot be called directly because of internal-type argument */
5159  Assert(AggCheckCallContext(fcinfo, NULL));
5160 
5161  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5162 
5163  if (state != NULL)
5165  else
5166  PG_RETURN_NULL();
5167 }
5168 
5169 /*
5170  * Prepare cache with fmgr info for the output functions of the datatypes of
5171  * the arguments of a concat-like function, beginning with argument "argidx".
5172  * (Arguments before that will have corresponding slots in the resulting
5173  * FmgrInfo array, but we don't fill those slots.)
5174  */
5175 static FmgrInfo *
5177 {
5178  FmgrInfo *foutcache;
5179  int i;
5180 
5181  /* We keep the info in fn_mcxt so it survives across calls */
5182  foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5183  PG_NARGS() * sizeof(FmgrInfo));
5184 
5185  for (i = argidx; i < PG_NARGS(); i++)
5186  {
5187  Oid valtype;
5188  Oid typOutput;
5189  bool typIsVarlena;
5190 
5191  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5192  if (!OidIsValid(valtype))
5193  elog(ERROR, "could not determine data type of concat() input");
5194 
5195  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5196  fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5197  }
5198 
5199  fcinfo->flinfo->fn_extra = foutcache;
5200 
5201  return foutcache;
5202 }
5203 
5204 /*
5205  * Implementation of both concat() and concat_ws().
5206  *
5207  * sepstr is the separator string to place between values.
5208  * argidx identifies the first argument to concatenate (counting from zero);
5209  * note that this must be constant across any one series of calls.
5210  *
5211  * Returns NULL if result should be NULL, else text value.
5212  */
5213 static text *
5214 concat_internal(const char *sepstr, int argidx,
5215  FunctionCallInfo fcinfo)
5216 {
5217  text *result;
5219  FmgrInfo *foutcache;
5220  bool first_arg = true;
5221  int i;
5222 
5223  /*
5224  * concat(VARIADIC some-array) is essentially equivalent to
5225  * array_to_text(), ie concat the array elements with the given separator.
5226  * So we just pass the case off to that code.
5227  */
5228  if (get_fn_expr_variadic(fcinfo->flinfo))
5229  {
5230  ArrayType *arr;
5231 
5232  /* Should have just the one argument */
5233  Assert(argidx == PG_NARGS() - 1);
5234 
5235  /* concat(VARIADIC NULL) is defined as NULL */
5236  if (PG_ARGISNULL(argidx))
5237  return NULL;
5238 
5239  /*
5240  * Non-null argument had better be an array. We assume that any call
5241  * context that could let get_fn_expr_variadic return true will have
5242  * checked that a VARIADIC-labeled parameter actually is an array. So
5243  * it should be okay to just Assert that it's an array rather than
5244  * doing a full-fledged error check.
5245  */
5247 
5248  /* OK, safe to fetch the array value */
5249  arr = PG_GETARG_ARRAYTYPE_P(argidx);
5250 
5251  /*
5252  * And serialize the array. We tell array_to_text to ignore null
5253  * elements, which matches the behavior of the loop below.
5254  */
5255  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5256  }
5257 
5258  /* Normal case without explicit VARIADIC marker */
5259  initStringInfo(&str);
5260 
5261  /* Get output function info, building it if first time through */
5262  foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5263  if (foutcache == NULL)
5264  foutcache = build_concat_foutcache(fcinfo, argidx);
5265 
5266  for (i = argidx; i < PG_NARGS(); i++)
5267  {
5268  if (!PG_ARGISNULL(i))
5269  {
5271 
5272  /* add separator if appropriate */
5273  if (first_arg)
5274  first_arg = false;
5275  else
5276  appendStringInfoString(&str, sepstr);
5277 
5278  /* call the appropriate type output function, append the result */
5280  OutputFunctionCall(&foutcache[i], value));
5281  }
5282  }
5283 
5284  result = cstring_to_text_with_len(str.data, str.len);
5285  pfree(str.data);
5286 
5287  return result;
5288 }
5289 
5290 /*
5291  * Concatenate all arguments. NULL arguments are ignored.
5292  */
5293 Datum
5295 {
5296  text *result;
5297 
5298  result = concat_internal("", 0, fcinfo);
5299  if (result == NULL)
5300  PG_RETURN_NULL();
5301  PG_RETURN_TEXT_P(result);
5302 }
5303 
5304 /*
5305  * Concatenate all but first argument value with separators. The first
5306  * parameter is used as the separator. NULL arguments are ignored.
5307  */
5308 Datum
5310 {
5311  char *sep;
5312  text *result;
5313 
5314  /* return NULL when separator is NULL */
5315  if (PG_ARGISNULL(0))
5316  PG_RETURN_NULL();
5318 
5319  result = concat_internal(sep, 1, fcinfo);
5320  if (result == NULL)
5321  PG_RETURN_NULL();
5322  PG_RETURN_TEXT_P(result);
5323 }
5324 
5325 /*
5326  * Return first n characters in the string. When n is negative,
5327  * return all but last |n| characters.
5328  */
5329 Datum
5331 {
5332  int n = PG_GETARG_INT32(1);
5333 
5334  if (n < 0)
5335  {
5336  text *str = PG_GETARG_TEXT_PP(0);
5337  const char *p = VARDATA_ANY(str);
5338  int len = VARSIZE_ANY_EXHDR(str);
5339  int rlen;
5340 
5341  n = pg_mbstrlen_with_len(p, len) + n;
5342  rlen = pg_mbcharcliplen(p, len, n);
5344  }
5345  else
5347 }
5348 
5349 /*
5350  * Return last n characters in the string. When n is negative,
5351  * return all but first |n| characters.
5352  */
5353 Datum
5355 {
5356  text *str = PG_GETARG_TEXT_PP(0);
5357  const char *p = VARDATA_ANY(str);
5358  int len = VARSIZE_ANY_EXHDR(str);
5359  int n = PG_GETARG_INT32(1);
5360  int off;
5361 
5362  if (n < 0)
5363  n = -n;
5364  else
5365  n = pg_mbstrlen_with_len(p, len) - n;
5366  off = pg_mbcharcliplen(p, len, n);
5367 
5368  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5369 }
5370 
5371 /*
5372  * Return reversed string
5373  */
5374 Datum
5376 {
5377  text *str = PG_GETARG_TEXT_PP(0);
5378  const char *p = VARDATA_ANY(str);
5379  int len = VARSIZE_ANY_EXHDR(str);
5380  const char *endp = p + len;
5381  text *result;
5382  char *dst;
5383 
5384  result = palloc(len + VARHDRSZ);
5385  dst = (char *) VARDATA(result) + len;
5386  SET_VARSIZE(result, len + VARHDRSZ);
5387 
5389  {
5390  /* multibyte version */
5391  while (p < endp)
5392  {
5393  int sz;
5394 
5395  sz = pg_mblen(p);
5396  dst -= sz;
5397  memcpy(dst, p, sz);
5398  p += sz;
5399  }
5400  }
5401  else
5402  {
5403  /* single byte version */
5404  while (p < endp)
5405  *(--dst) = *p++;
5406  }
5407 
5408  PG_RETURN_TEXT_P(result);
5409 }
5410 
5411 
5412 /*
5413  * Support macros for text_format()
5414  */
5415 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5416 
5417 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5418  do { \
5419  if (++(ptr) >= (end_ptr)) \
5420  ereport(ERROR, \
5421  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5422  errmsg("unterminated format() type specifier"), \
5423  errhint("For a single \"%%\" use \"%%%%\"."))); \
5424  } while (0)
5425 
5426 /*
5427  * Returns a formatted string
5428  */
5429 Datum
5431 {
5432  text *fmt;
5434  const char *cp;
5435  const char *start_ptr;
5436  const char *end_ptr;
5437  text *result;
5438  int arg;
5439  bool funcvariadic;
5440  int nargs;
5441  Datum *elements = NULL;
5442  bool *nulls = NULL;
5443  Oid element_type = InvalidOid;
5444  Oid prev_type = InvalidOid;
5445  Oid prev_width_type = InvalidOid;
5446  FmgrInfo typoutputfinfo;
5447  FmgrInfo typoutputinfo_width;
5448 
5449  /* When format string is null, immediately return null */
5450  if (PG_ARGISNULL(0))
5451  PG_RETURN_NULL();
5452 
5453  /* If argument is marked VARIADIC, expand array into elements */
5454  if (get_fn_expr_variadic(fcinfo->flinfo))
5455  {
5456  ArrayType *arr;
5457  int16 elmlen;
5458  bool elmbyval;
5459  char elmalign;
5460  int nitems;
5461 
5462  /* Should have just the one argument */
5463  Assert(PG_NARGS() == 2);
5464 
5465  /* If argument is NULL, we treat it as zero-length array */
5466  if (PG_ARGISNULL(1))
5467  nitems = 0;
5468  else
5469  {
5470  /*
5471  * Non-null argument had better be an array. We assume that any
5472  * call context that could let get_fn_expr_variadic return true
5473  * will have checked that a VARIADIC-labeled parameter actually is
5474  * an array. So it should be okay to just Assert that it's an
5475  * array rather than doing a full-fledged error check.
5476  */
5478 
5479  /* OK, safe to fetch the array value */
5480  arr = PG_GETARG_ARRAYTYPE_P(1);
5481 
5482  /* Get info about array element type */
5483  element_type = ARR_ELEMTYPE(arr);
5484  get_typlenbyvalalign(element_type,
5485  &elmlen, &elmbyval, &elmalign);
5486 
5487  /* Extract all array elements */
5488  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5489  &elements, &nulls, &nitems);
5490  }
5491 
5492  nargs = nitems + 1;
5493  funcvariadic = true;
5494  }
5495  else
5496  {
5497  /* Non-variadic case, we'll process the arguments individually */
5498  nargs = PG_NARGS();
5499  funcvariadic = false;
5500  }
5501 
5502  /* Setup for main loop. */
5503  fmt = PG_GETARG_TEXT_PP(0);
5504  start_ptr = VARDATA_ANY(fmt);
5505  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5506  initStringInfo(&str);
5507  arg = 1; /* next argument position to print */
5508 
5509  /* Scan format string, looking for conversion specifiers. */
5510  for (cp = start_ptr; cp < end_ptr; cp++)
5511  {
5512  int argpos;
5513  int widthpos;
5514  int flags;
5515  int width;
5516  Datum value;
5517  bool isNull;
5518  Oid typid;
5519 
5520  /*
5521  * If it's not the start of a conversion specifier, just copy it to
5522  * the output buffer.
5523  */
5524  if (*cp != '%')
5525  {
5526  appendStringInfoCharMacro(&str, *cp);
5527  continue;
5528  }
5529 
5530  ADVANCE_PARSE_POINTER(cp, end_ptr);
5531 
5532  /* Easy case: %% outputs a single % */
5533  if (*cp == '%')
5534  {
5535  appendStringInfoCharMacro(&str, *cp);
5536  continue;
5537  }
5538 
5539  /* Parse the optional portions of the format specifier */
5540  cp = text_format_parse_format(cp, end_ptr,
5541  &argpos, &widthpos,
5542  &flags, &width);
5543 
5544  /*
5545  * Next we should see the main conversion specifier. Whether or not
5546  * an argument position was present, it's known that at least one
5547  * character remains in the string at this point. Experience suggests
5548  * that it's worth checking that that character is one of the expected
5549  * ones before we try to fetch arguments, so as to produce the least
5550  * confusing response to a mis-formatted specifier.
5551  */
5552  if (strchr("sIL", *cp) == NULL)
5553  ereport(ERROR,
5554  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5555  errmsg("unrecognized format() type specifier \"%c\"",
5556  *cp),
5557  errhint("For a single \"%%\" use \"%%%%\".")));
5558 
5559  /* If indirect width was specified, get its value */
5560  if (widthpos >= 0)
5561  {
5562  /* Collect the specified or next argument position */
5563  if (widthpos > 0)
5564  arg = widthpos;
5565  if (arg >= nargs)
5566  ereport(ERROR,
5567  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5568  errmsg("too few arguments for format()")));
5569 
5570  /* Get the value and type of the selected argument */
5571  if (!funcvariadic)
5572  {
5573  value = PG_GETARG_DATUM(arg);
5574  isNull = PG_ARGISNULL(arg);
5575  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5576  }
5577  else
5578  {
5579  value = elements[arg - 1];
5580  isNull = nulls[arg - 1];
5581  typid = element_type;
5582  }
5583  if (!OidIsValid(typid))
5584  elog(ERROR, "could not determine data type of format() input");
5585 
5586  arg++;
5587 
5588  /* We can treat NULL width the same as zero */
5589  if (isNull)
5590  width = 0;
5591  else if (typid == INT4OID)
5592  width = DatumGetInt32(value);
5593  else if (typid == INT2OID)
5594  width = DatumGetInt16(value);
5595  else
5596  {
5597  /* For less-usual datatypes, convert to text then to int */
5598  char *str;
5599 
5600  if (typid != prev_width_type)
5601  {
5602  Oid typoutputfunc;
5603  bool typIsVarlena;
5604 
5605  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5606  fmgr_info(typoutputfunc, &typoutputinfo_width);
5607  prev_width_type = typid;
5608  }
5609 
5610  str = OutputFunctionCall(&typoutputinfo_width, value);
5611 
5612  /* pg_strtoint32 will complain about bad data or overflow */
5613  width = pg_strtoint32(str);
5614 
5615  pfree(str);
5616  }
5617  }
5618 
5619  /* Collect the specified or next argument position */
5620  if (argpos > 0)
5621  arg = argpos;
5622  if (arg >= nargs)
5623  ereport(ERROR,
5624  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5625  errmsg("too few arguments for format()")));
5626 
5627  /* Get the value and type of the selected argument */
5628  if (!funcvariadic)
5629  {
5630  value = PG_GETARG_DATUM(arg);
5631  isNull = PG_ARGISNULL(arg);
5632  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5633  }
5634  else
5635  {
5636  value = elements[arg - 1];
5637  isNull = nulls[arg - 1];
5638  typid = element_type;
5639  }
5640  if (!OidIsValid(typid))
5641  elog(ERROR, "could not determine data type of format() input");
5642 
5643  arg++;
5644 
5645  /*
5646  * Get the appropriate typOutput function, reusing previous one if
5647  * same type as previous argument. That's particularly useful in the
5648  * variadic-array case, but often saves work even for ordinary calls.
5649  */
5650  if (typid != prev_type)
5651  {
5652  Oid typoutputfunc;
5653  bool typIsVarlena;
5654 
5655  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5656  fmgr_info(typoutputfunc, &typoutputfinfo);
5657  prev_type = typid;
5658  }
5659 
5660  /*
5661  * And now we can format the value.
5662  */
5663  switch (*cp)
5664  {
5665  case 's':
5666  case 'I':
5667  case 'L':
5668  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5669  value, isNull,
5670  flags, width);
5671  break;
5672  default:
5673  /* should not get here, because of previous check */
5674  ereport(ERROR,
5675  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5676  errmsg("unrecognized format() type specifier \"%c\"",
5677  *cp),
5678  errhint("For a single \"%%\" use \"%%%%\".")));
5679  break;
5680  }
5681  }
5682 
5683  /* Don't need deconstruct_array results anymore. */
5684  if (elements != NULL)
5685  pfree(elements);
5686  if (nulls != NULL)
5687  pfree(nulls);
5688 
5689  /* Generate results. */
5690  result = cstring_to_text_with_len(str.data, str.len);
5691  pfree(str.data);
5692 
5693  PG_RETURN_TEXT_P(result);
5694 }
5695 
5696 /*
5697  * Parse contiguous digits as a decimal number.
5698  *
5699  * Returns true if some digits could be parsed.
5700  * The value is returned into *value, and *ptr is advanced to the next
5701  * character to be parsed.
5702  *
5703  * Note parsing invariant: at least one character is known available before
5704  * string end (end_ptr) at entry, and this is still true at exit.
5705  */
5706 static bool
5707 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5708 {
5709  bool found = false;
5710  const char *cp = *ptr;
5711  int val = 0;
5712 
5713  while (*cp >= '0' && *cp <= '9')
5714  {
5715  int8 digit = (*cp - '0');
5716 
5717  if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5718  unlikely(pg_add_s32_overflow(val, digit, &val)))
5719  ereport(ERROR,
5720  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5721  errmsg("number is out of range")));
5722  ADVANCE_PARSE_POINTER(cp, end_ptr);
5723  found = true;
5724  }
5725 
5726  *ptr = cp;
5727  *value = val;
5728 
5729  return found;
5730 }
5731 
5732 /*
5733  * Parse a format specifier (generally following the SUS printf spec).
5734  *
5735  * We have already advanced over the initial '%', and we are looking for
5736  * [argpos][flags][width]type (but the type character is not consumed here).
5737  *
5738  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5739  * Output parameters:
5740  * argpos: argument position for value to be printed. -1 means unspecified.
5741  * widthpos: argument position for width. Zero means the argument position
5742  * was unspecified (ie, take the next arg) and -1 means no width
5743  * argument (width was omitted or specified as a constant).
5744  * flags: bitmask of flags.
5745  * width: directly-specified width value. Zero means the width was omitted
5746  * (note it's not necessary to distinguish this case from an explicit
5747  * zero width value).
5748  *
5749  * The function result is the next character position to be parsed, ie, the
5750  * location where the type character is/should be.
5751  *
5752  * Note parsing invariant: at least one character is known available before
5753  * string end (end_ptr) at entry, and this is still true at exit.
5754  */
5755 static const char *
5756 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5757  int *argpos, int *widthpos,
5758  int *flags, int *width)
5759 {
5760  const char *cp = start_ptr;
5761  int n;
5762 
5763  /* set defaults for output parameters */
5764  *argpos = -1;
5765  *widthpos = -1;
5766  *flags = 0;
5767  *width = 0;
5768 
5769  /* try to identify first number */
5770  if (text_format_parse_digits(&cp, end_ptr, &n))
5771  {
5772  if (*cp != '$')
5773  {
5774  /* Must be just a width and a type, so we're done */
5775  *width = n;
5776  return cp;
5777  }
5778  /* The number was argument position */
5779  *argpos = n;
5780  /* Explicit 0 for argument index is immediately refused */
5781  if (n == 0)
5782  ereport(ERROR,
5783  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5784  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5785  ADVANCE_PARSE_POINTER(cp, end_ptr);
5786  }
5787 
5788  /* Handle flags (only minus is supported now) */
5789  while (*cp == '-')
5790  {
5791  *flags |= TEXT_FORMAT_FLAG_MINUS;
5792  ADVANCE_PARSE_POINTER(cp, end_ptr);
5793  }
5794 
5795  if (*cp == '*')
5796  {
5797  /* Handle indirect width */
5798  ADVANCE_PARSE_POINTER(cp, end_ptr);
5799  if (text_format_parse_digits(&cp, end_ptr, &n))
5800  {
5801  /* number in this position must be closed by $ */
5802  if (*cp != '$')
5803  ereport(ERROR,
5804  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5805  errmsg("width argument position must be ended by \"$\"")));
5806  /* The number was width argument position */
5807  *widthpos = n;
5808  /* Explicit 0 for argument index is immediately refused */
5809  if (n == 0)
5810  ereport(ERROR,
5811  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5812  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5813  ADVANCE_PARSE_POINTER(cp, end_ptr);
5814  }
5815  else
5816  *widthpos = 0; /* width's argument position is unspecified */
5817  }
5818  else
5819  {
5820  /* Check for direct width specification */
5821  if (text_format_parse_digits(&cp, end_ptr, &n))
5822  *width = n;
5823  }
5824 
5825  /* cp should now be pointing at type character */
5826  return cp;
5827 }
5828 
5829 /*
5830  * Format a %s, %I, or %L conversion
5831  */
5832 static void
5834  FmgrInfo *typOutputInfo,
5835  Datum value, bool isNull,
5836  int flags, int width)
5837 {
5838  char *str;
5839 
5840  /* Handle NULL arguments before trying to stringify the value. */
5841  if (isNull)
5842  {
5843  if (conversion == 's')
5844  text_format_append_string(buf, "", flags, width);
5845  else if (conversion == 'L')
5846  text_format_append_string(buf, "NULL", flags, width);
5847  else if (conversion == 'I')
5848  ereport(ERROR,
5849  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5850  errmsg("null values cannot be formatted as an SQL identifier")));
5851  return;
5852  }
5853 
5854  /* Stringify. */
5855  str = OutputFunctionCall(typOutputInfo, value);
5856 
5857  /* Escape. */
5858  if (conversion == 'I')
5859  {
5860  /* quote_identifier may or may not allocate a new string. */
5861  text_format_append_string(buf, quote_identifier(str), flags, width);
5862  }
5863  else if (conversion == 'L')
5864  {
5865  char *qstr = quote_literal_cstr(str);
5866 
5867  text_format_append_string(buf, qstr, flags, width);
5868  /* quote_literal_cstr() always allocates a new string */
5869  pfree(qstr);
5870  }
5871  else
5872  text_format_append_string(buf, str, flags, width);
5873 
5874  /* Cleanup. */
5875  pfree(str);
5876 }
5877 
5878 /*
5879  * Append str to buf, padding as directed by flags/width
5880  */
5881 static void
5883  int flags, int width)
5884 {
5885  bool align_to_left = false;
5886  int len;
5887 
5888  /* fast path for typical easy case */
5889  if (width == 0)
5890  {
5891  appendStringInfoString(buf, str);
5892  return;
5893  }
5894 
5895  if (width < 0)
5896  {
5897  /* Negative width: implicit '-' flag, then take absolute value */
5898  align_to_left = true;
5899  /* -INT_MIN is undefined */
5900  if (width <= INT_MIN)
5901  ereport(ERROR,
5902  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5903  errmsg("number is out of range")));
5904  width = -width;
5905  }
5906  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5907  align_to_left = true;
5908 
5909  len = pg_mbstrlen(str);
5910  if (align_to_left)
5911  {
5912  /* left justify */
5913  appendStringInfoString(buf, str);
5914  if (len < width)
5915  appendStringInfoSpaces(buf, width - len);
5916  }
5917  else
5918  {
5919  /* right justify */
5920  if (len < width)
5921  appendStringInfoSpaces(buf, width - len);
5922  appendStringInfoString(buf, str);
5923  }
5924 }
5925 
5926 /*
5927  * text_format_nv - nonvariadic wrapper for text_format function.
5928  *
5929  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5930  * which checks that all built-in functions that share the implementing C
5931  * function take the same number of arguments.
5932  */
5933 Datum
5935 {
5936  return text_format(fcinfo);
5937 }
5938 
5939 /*
5940  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5941  * for this use case.
5942  */
5943 static inline bool
5944 rest_of_char_same(const char *s1, const char *s2, int len)
5945 {
5946  while (len > 0)
5947  {
5948  len--;
5949  if (s1[len] != s2[len])
5950  return false;
5951  }
5952  return true;
5953 }
5954 
5955 /* Expand each Levenshtein distance variant */
5956 #include "levenshtein.c"
5957 #define LEVENSHTEIN_LESS_EQUAL
5958 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3099
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4646
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3235
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:346
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:373
union pg_locale_struct::@144 info
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
#define DatumGetUInt32(X)
Definition: postgres.h:486
#define NIL
Definition: pg_list.h:65
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:5430
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2166
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:5417
Definition: fmgr.h:56
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:4404
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:312
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3504
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:904
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4539
int errhint(const char *fmt,...)
Definition: elog.c:1071
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1044
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2674
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
Datum namegetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2972
MemoryContext fn_mcxt
Definition: fmgr.h:65
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:146
Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.c:148
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10626
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1825
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:3051
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:298
#define DatumGetInt32(X)
Definition: postgres.h:472
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:140
Datum namelttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2954
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:3083
#define HEXBASE
Definition: varlena.c:4995
char * refpoint
Definition: varlena.c:71
#define VARSIZE(PTR)
Definition: postgres.h:303
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4191
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:4093
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5833
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2049
#define PointerGetDatum(X)
Definition: postgres.h:556
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:130
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:552
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
#define VARHDRSZ
Definition: c.h:556
char * pstrdup(const char *in)
Definition: mcxt.c:1186
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:541
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:286
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
StringInfoData * StringInfo
Definition: stringinfo.h:44
#define Min(x, y)
Definition: c.h:905
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:308
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2433
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2408
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:297
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:3169
static struct @145 value
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:1951
char * last_match
Definition: varlena.c:63
int errcode(int sqlerrcode)
Definition: elog.c:608
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:263
#define DatumGetByteaPP(X)
Definition: fmgr.h:285
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3472
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:504
static bool text_position_next(TextPositionState *state)
Definition: varlena.c:1285
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5128
Datum nameeqtext(PG_FUNCTION_ARGS)
Definition: varlena.c:2816
static char * text_position_get_match_ptr(TextPositionState *state)
Definition: varlena.c:1418
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:360
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3410
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:3244
#define LOG
Definition: elog.h:26
Datum bttextnamecmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2932
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3989
static bool text_isequal(text *txt1, text *txt2, Oid collid)
Definition: varlena.c:4632
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:644
#define OidIsValid(objectId)
Definition: c.h:639
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1940
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:235
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1056
bool trace_sort
Definition: tuplesort.c:130
#define PG_GET_COLLATION()
Definition: fmgr.h:193
static char * text_position_next_internal(char *start_ptr, TextPositionState *state)
Definition: varlena.c:1349
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3314
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:5294
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:682
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5882
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4852
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:5309
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:347
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:3202
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2129
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1575
static int32 text_length(Datum str)
Definition: varlena.c:662
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:836
Size toast_datum_size(Datum value)
Definition: detoast.c:862
bool typbyval
Definition: array.h:228
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:186
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:5025
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:313
#define DatumGetName(X)
Definition: postgres.h:585
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4671
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:4053
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3779
static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2211
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:128
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4836