PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "common/md5.h"
25 #include "lib/hyperloglog.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "port/pg_bswap.h"
30 #include "regex/regex.h"
31 #include "utils/builtins.h"
32 #include "utils/bytea.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38 
39 
40 /* GUC variable */
42 
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45 
46 typedef struct
47 {
48  bool use_wchar; /* T if multibyte encoding */
49  char *str1; /* use these if not use_wchar */
50  char *str2; /* note: these point to original texts */
51  pg_wchar *wstr1; /* use these if use_wchar */
52  pg_wchar *wstr2; /* note: these are palloc'd */
53  int len1; /* string lengths in logical characters */
54  int len2;
55  /* Skip table for Boyer-Moore-Horspool search algorithm: */
56  int skiptablemask; /* mask for ANDing with skiptable subscripts */
57  int skiptable[256]; /* skip distance for given mismatched char */
59 
60 typedef struct
61 {
62  char *buf1; /* 1st string, or abbreviation original string
63  * buf */
64  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
65  int buflen1;
66  int buflen2;
67  int last_len1; /* Length of last buf1 string/strxfrm() input */
68  int last_len2; /* Length of last buf2 string/strxfrm() blob */
69  int last_returned; /* Last comparison result (cache) */
70  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
71  bool collate_c;
72  bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
73  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
74  hyperLogLogState full_card; /* Full key cardinality state */
75  double prop_card; /* Required cardinality proportion */
78 
79 /*
80  * This should be large enough that most strings will fit, but small enough
81  * that we feel comfortable putting it on the stack
82  */
83 #define TEXTBUFLEN 1024
84 
85 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
86 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
87 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
88 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
89 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
90 
91 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
92 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
93 
94 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
95 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
96 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
97 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
98 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
99 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
100 static int32 text_length(Datum str);
101 static text *text_catenate(text *t1, text *t2);
102 static text *text_substring(Datum str,
103  int32 start,
104  int32 length,
105  bool length_not_specified);
106 static text *text_overlay(text *t1, text *t2, int sp, int sl);
107 static int text_position(text *t1, text *t2);
108 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
109 static int text_position_next(int start_pos, TextPositionState *state);
111 static int text_cmp(text *arg1, text *arg2, Oid collid);
112 static bytea *bytea_catenate(bytea *t1, bytea *t2);
113 static bytea *bytea_substring(Datum str,
114  int S,
115  int L,
116  bool length_not_specified);
117 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
118 static void appendStringInfoText(StringInfo str, const text *t);
121  const char *fldsep, const char *null_string);
123 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
124  int *value);
125 static const char *text_format_parse_format(const char *start_ptr,
126  const char *end_ptr,
127  int *argpos, int *widthpos,
128  int *flags, int *width);
129 static void text_format_string_conversion(StringInfo buf, char conversion,
130  FmgrInfo *typOutputInfo,
131  Datum value, bool isNull,
132  int flags, int width);
133 static void text_format_append_string(StringInfo buf, const char *str,
134  int flags, int width);
135 
136 
137 /*****************************************************************************
138  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
139  *****************************************************************************/
140 
141 /*
142  * cstring_to_text
143  *
144  * Create a text value from a null-terminated C string.
145  *
146  * The new text value is freshly palloc'd with a full-size VARHDR.
147  */
148 text *
149 cstring_to_text(const char *s)
150 {
151  return cstring_to_text_with_len(s, strlen(s));
152 }
153 
154 /*
155  * cstring_to_text_with_len
156  *
157  * Same as cstring_to_text except the caller specifies the string length;
158  * the string need not be null_terminated.
159  */
160 text *
161 cstring_to_text_with_len(const char *s, int len)
162 {
163  text *result = (text *) palloc(len + VARHDRSZ);
164 
165  SET_VARSIZE(result, len + VARHDRSZ);
166  memcpy(VARDATA(result), s, len);
167 
168  return result;
169 }
170 
171 /*
172  * text_to_cstring
173  *
174  * Create a palloc'd, null-terminated C string from a text value.
175  *
176  * We support being passed a compressed or toasted text value.
177  * This is a bit bogus since such values shouldn't really be referred to as
178  * "text *", but it seems useful for robustness. If we didn't handle that
179  * case here, we'd need another routine that did, anyway.
180  */
181 char *
183 {
184  /* must cast away the const, unfortunately */
185  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
186  int len = VARSIZE_ANY_EXHDR(tunpacked);
187  char *result;
188 
189  result = (char *) palloc(len + 1);
190  memcpy(result, VARDATA_ANY(tunpacked), len);
191  result[len] = '\0';
192 
193  if (tunpacked != t)
194  pfree(tunpacked);
195 
196  return result;
197 }
198 
199 /*
200  * text_to_cstring_buffer
201  *
202  * Copy a text value into a caller-supplied buffer of size dst_len.
203  *
204  * The text string is truncated if necessary to fit. The result is
205  * guaranteed null-terminated (unless dst_len == 0).
206  *
207  * We support being passed a compressed or toasted text value.
208  * This is a bit bogus since such values shouldn't really be referred to as
209  * "text *", but it seems useful for robustness. If we didn't handle that
210  * case here, we'd need another routine that did, anyway.
211  */
212 void
213 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
214 {
215  /* must cast away the const, unfortunately */
216  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
217  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
218 
219  if (dst_len > 0)
220  {
221  dst_len--;
222  if (dst_len >= src_len)
223  dst_len = src_len;
224  else /* ensure truncation is encoding-safe */
225  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
226  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
227  dst[dst_len] = '\0';
228  }
229 
230  if (srcunpacked != src)
231  pfree(srcunpacked);
232 }
233 
234 
235 /*****************************************************************************
236  * USER I/O ROUTINES *
237  *****************************************************************************/
238 
239 
240 #define VAL(CH) ((CH) - '0')
241 #define DIG(VAL) ((VAL) + '0')
242 
243 /*
244  * byteain - converts from printable representation of byte array
245  *
246  * Non-printable characters must be passed as '\nnn' (octal) and are
247  * converted to internal form. '\' must be passed as '\\'.
248  * ereport(ERROR, ...) if bad form.
249  *
250  * BUGS:
251  * The input is scanned twice.
252  * The error checking of input is minimal.
253  */
254 Datum
256 {
257  char *inputText = PG_GETARG_CSTRING(0);
258  char *tp;
259  char *rp;
260  int bc;
261  bytea *result;
262 
263  /* Recognize hex input */
264  if (inputText[0] == '\\' && inputText[1] == 'x')
265  {
266  size_t len = strlen(inputText);
267 
268  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
269  result = palloc(bc);
270  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
271  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
272 
273  PG_RETURN_BYTEA_P(result);
274  }
275 
276  /* Else, it's the traditional escaped style */
277  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
278  {
279  if (tp[0] != '\\')
280  tp++;
281  else if ((tp[0] == '\\') &&
282  (tp[1] >= '0' && tp[1] <= '3') &&
283  (tp[2] >= '0' && tp[2] <= '7') &&
284  (tp[3] >= '0' && tp[3] <= '7'))
285  tp += 4;
286  else if ((tp[0] == '\\') &&
287  (tp[1] == '\\'))
288  tp += 2;
289  else
290  {
291  /*
292  * one backslash, not followed by another or ### valid octal
293  */
294  ereport(ERROR,
295  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
296  errmsg("invalid input syntax for type %s", "bytea")));
297  }
298  }
299 
300  bc += VARHDRSZ;
301 
302  result = (bytea *) palloc(bc);
303  SET_VARSIZE(result, bc);
304 
305  tp = inputText;
306  rp = VARDATA(result);
307  while (*tp != '\0')
308  {
309  if (tp[0] != '\\')
310  *rp++ = *tp++;
311  else if ((tp[0] == '\\') &&
312  (tp[1] >= '0' && tp[1] <= '3') &&
313  (tp[2] >= '0' && tp[2] <= '7') &&
314  (tp[3] >= '0' && tp[3] <= '7'))
315  {
316  bc = VAL(tp[1]);
317  bc <<= 3;
318  bc += VAL(tp[2]);
319  bc <<= 3;
320  *rp++ = bc + VAL(tp[3]);
321 
322  tp += 4;
323  }
324  else if ((tp[0] == '\\') &&
325  (tp[1] == '\\'))
326  {
327  *rp++ = '\\';
328  tp += 2;
329  }
330  else
331  {
332  /*
333  * We should never get here. The first pass should not allow it.
334  */
335  ereport(ERROR,
336  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
337  errmsg("invalid input syntax for type %s", "bytea")));
338  }
339  }
340 
341  PG_RETURN_BYTEA_P(result);
342 }
343 
344 /*
345  * byteaout - converts to printable representation of byte array
346  *
347  * In the traditional escaped format, non-printable characters are
348  * printed as '\nnn' (octal) and '\' as '\\'.
349  */
350 Datum
352 {
353  bytea *vlena = PG_GETARG_BYTEA_PP(0);
354  char *result;
355  char *rp;
356 
358  {
359  /* Print hex format */
360  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
361  *rp++ = '\\';
362  *rp++ = 'x';
363  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
364  }
365  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
366  {
367  /* Print traditional escaped format */
368  char *vp;
369  int len;
370  int i;
371 
372  len = 1; /* empty string has 1 char */
373  vp = VARDATA_ANY(vlena);
374  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
375  {
376  if (*vp == '\\')
377  len += 2;
378  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
379  len += 4;
380  else
381  len++;
382  }
383  rp = result = (char *) palloc(len);
384  vp = VARDATA_ANY(vlena);
385  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
386  {
387  if (*vp == '\\')
388  {
389  *rp++ = '\\';
390  *rp++ = '\\';
391  }
392  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
393  {
394  int val; /* holds unprintable chars */
395 
396  val = *vp;
397  rp[0] = '\\';
398  rp[3] = DIG(val & 07);
399  val >>= 3;
400  rp[2] = DIG(val & 07);
401  val >>= 3;
402  rp[1] = DIG(val & 03);
403  rp += 4;
404  }
405  else
406  *rp++ = *vp;
407  }
408  }
409  else
410  {
411  elog(ERROR, "unrecognized bytea_output setting: %d",
412  bytea_output);
413  rp = result = NULL; /* keep compiler quiet */
414  }
415  *rp = '\0';
416  PG_RETURN_CSTRING(result);
417 }
418 
419 /*
420  * bytearecv - converts external binary format to bytea
421  */
422 Datum
424 {
426  bytea *result;
427  int nbytes;
428 
429  nbytes = buf->len - buf->cursor;
430  result = (bytea *) palloc(nbytes + VARHDRSZ);
431  SET_VARSIZE(result, nbytes + VARHDRSZ);
432  pq_copymsgbytes(buf, VARDATA(result), nbytes);
433  PG_RETURN_BYTEA_P(result);
434 }
435 
436 /*
437  * byteasend - converts bytea to binary format
438  *
439  * This is a special case: just copy the input...
440  */
441 Datum
443 {
444  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
445 
446  PG_RETURN_BYTEA_P(vlena);
447 }
448 
449 Datum
451 {
453 
454  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
455 
456  /* Append the value unless null. */
457  if (!PG_ARGISNULL(1))
458  {
460 
461  /* On the first time through, we ignore the delimiter. */
462  if (state == NULL)
463  state = makeStringAggState(fcinfo);
464  else if (!PG_ARGISNULL(2))
465  {
466  bytea *delim = PG_GETARG_BYTEA_PP(2);
467 
469  }
470 
472  }
473 
474  /*
475  * The transition type for string_agg() is declared to be "internal",
476  * which is a pass-by-value type the same size as a pointer.
477  */
478  PG_RETURN_POINTER(state);
479 }
480 
481 Datum
483 {
485 
486  /* cannot be called directly because of internal-type argument */
487  Assert(AggCheckCallContext(fcinfo, NULL));
488 
489  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
490 
491  if (state != NULL)
492  {
493  bytea *result;
494 
495  result = (bytea *) palloc(state->len + VARHDRSZ);
496  SET_VARSIZE(result, state->len + VARHDRSZ);
497  memcpy(VARDATA(result), state->data, state->len);
498  PG_RETURN_BYTEA_P(result);
499  }
500  else
501  PG_RETURN_NULL();
502 }
503 
504 /*
505  * textin - converts "..." to internal representation
506  */
507 Datum
509 {
510  char *inputText = PG_GETARG_CSTRING(0);
511 
512  PG_RETURN_TEXT_P(cstring_to_text(inputText));
513 }
514 
515 /*
516  * textout - converts internal representation to "..."
517  */
518 Datum
520 {
521  Datum txt = PG_GETARG_DATUM(0);
522 
524 }
525 
526 /*
527  * textrecv - converts external binary format to text
528  */
529 Datum
531 {
533  text *result;
534  char *str;
535  int nbytes;
536 
537  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
538 
539  result = cstring_to_text_with_len(str, nbytes);
540  pfree(str);
541  PG_RETURN_TEXT_P(result);
542 }
543 
544 /*
545  * textsend - converts text to binary format
546  */
547 Datum
549 {
550  text *t = PG_GETARG_TEXT_PP(0);
552 
553  pq_begintypsend(&buf);
556 }
557 
558 
559 /*
560  * unknownin - converts "..." to internal representation
561  */
562 Datum
564 {
565  char *str = PG_GETARG_CSTRING(0);
566 
567  /* representation is same as cstring */
569 }
570 
571 /*
572  * unknownout - converts internal representation to "..."
573  */
574 Datum
576 {
577  /* representation is same as cstring */
578  char *str = PG_GETARG_CSTRING(0);
579 
581 }
582 
583 /*
584  * unknownrecv - converts external binary format to unknown
585  */
586 Datum
588 {
590  char *str;
591  int nbytes;
592 
593  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
594  /* representation is same as cstring */
595  PG_RETURN_CSTRING(str);
596 }
597 
598 /*
599  * unknownsend - converts unknown to binary format
600  */
601 Datum
603 {
604  /* representation is same as cstring */
605  char *str = PG_GETARG_CSTRING(0);
607 
608  pq_begintypsend(&buf);
609  pq_sendtext(&buf, str, strlen(str));
611 }
612 
613 
614 /* ========== PUBLIC ROUTINES ========== */
615 
616 /*
617  * textlen -
618  * returns the logical length of a text*
619  * (which is less than the VARSIZE of the text*)
620  */
621 Datum
623 {
624  Datum str = PG_GETARG_DATUM(0);
625 
626  /* try to avoid decompressing argument */
628 }
629 
630 /*
631  * text_length -
632  * Does the real work for textlen()
633  *
634  * This is broken out so it can be called directly by other string processing
635  * functions. Note that the argument is passed as a Datum, to indicate that
636  * it may still be in compressed form. We can avoid decompressing it at all
637  * in some cases.
638  */
639 static int32
641 {
642  /* fastpath when max encoding length is one */
645  else
646  {
647  text *t = DatumGetTextPP(str);
648 
650  VARSIZE_ANY_EXHDR(t)));
651  }
652 }
653 
654 /*
655  * textoctetlen -
656  * returns the physical length of a text*
657  * (which is less than the VARSIZE of the text*)
658  */
659 Datum
661 {
662  Datum str = PG_GETARG_DATUM(0);
663 
664  /* We need not detoast the input at all */
666 }
667 
668 /*
669  * textcat -
670  * takes two text* and returns a text* that is the concatenation of
671  * the two.
672  *
673  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
674  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
675  * Allocate space for output in all cases.
676  * XXX - thomas 1997-07-10
677  */
678 Datum
680 {
681  text *t1 = PG_GETARG_TEXT_PP(0);
682  text *t2 = PG_GETARG_TEXT_PP(1);
683 
685 }
686 
687 /*
688  * text_catenate
689  * Guts of textcat(), broken out so it can be used by other functions
690  *
691  * Arguments can be in short-header form, but not compressed or out-of-line
692  */
693 static text *
695 {
696  text *result;
697  int len1,
698  len2,
699  len;
700  char *ptr;
701 
702  len1 = VARSIZE_ANY_EXHDR(t1);
703  len2 = VARSIZE_ANY_EXHDR(t2);
704 
705  /* paranoia ... probably should throw error instead? */
706  if (len1 < 0)
707  len1 = 0;
708  if (len2 < 0)
709  len2 = 0;
710 
711  len = len1 + len2 + VARHDRSZ;
712  result = (text *) palloc(len);
713 
714  /* Set size of result string... */
715  SET_VARSIZE(result, len);
716 
717  /* Fill data field of result string... */
718  ptr = VARDATA(result);
719  if (len1 > 0)
720  memcpy(ptr, VARDATA_ANY(t1), len1);
721  if (len2 > 0)
722  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
723 
724  return result;
725 }
726 
727 /*
728  * charlen_to_bytelen()
729  * Compute the number of bytes occupied by n characters starting at *p
730  *
731  * It is caller's responsibility that there actually are n characters;
732  * the string need not be null-terminated.
733  */
734 static int
735 charlen_to_bytelen(const char *p, int n)
736 {
738  {
739  /* Optimization for single-byte encodings */
740  return n;
741  }
742  else
743  {
744  const char *s;
745 
746  for (s = p; n > 0; n--)
747  s += pg_mblen(s);
748 
749  return s - p;
750  }
751 }
752 
753 /*
754  * text_substr()
755  * Return a substring starting at the specified position.
756  * - thomas 1997-12-31
757  *
758  * Input:
759  * - string
760  * - starting position (is one-based)
761  * - string length
762  *
763  * If the starting position is zero or less, then return from the start of the string
764  * adjusting the length to be consistent with the "negative start" per SQL.
765  * If the length is less than zero, return the remaining string.
766  *
767  * Added multibyte support.
768  * - Tatsuo Ishii 1998-4-21
769  * Changed behavior if starting position is less than one to conform to SQL behavior.
770  * Formerly returned the entire string; now returns a portion.
771  * - Thomas Lockhart 1998-12-10
772  * Now uses faster TOAST-slicing interface
773  * - John Gray 2002-02-22
774  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
775  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
776  * error; if E < 1, return '', not entire string). Fixed MB related bug when
777  * S > LC and < LC + 4 sometimes garbage characters are returned.
778  * - Joe Conway 2002-08-10
779  */
780 Datum
782 {
784  PG_GETARG_INT32(1),
785  PG_GETARG_INT32(2),
786  false));
787 }
788 
789 /*
790  * text_substr_no_len -
791  * Wrapper to avoid opr_sanity failure due to
792  * one function accepting a different number of args.
793  */
794 Datum
796 {
798  PG_GETARG_INT32(1),
799  -1, true));
800 }
801 
802 /*
803  * text_substring -
804  * Does the real work for text_substr() and text_substr_no_len()
805  *
806  * This is broken out so it can be called directly by other string processing
807  * functions. Note that the argument is passed as a Datum, to indicate that
808  * it may still be in compressed/toasted form. We can avoid detoasting all
809  * of it in some cases.
810  *
811  * The result is always a freshly palloc'd datum.
812  */
813 static text *
814 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
815 {
817  int32 S = start; /* start position */
818  int32 S1; /* adjusted start position */
819  int32 L1; /* adjusted substring length */
820 
821  /* life is easy if the encoding max length is 1 */
822  if (eml == 1)
823  {
824  S1 = Max(S, 1);
825 
826  if (length_not_specified) /* special case - get length to end of
827  * string */
828  L1 = -1;
829  else
830  {
831  /* end position */
832  int E = S + length;
833 
834  /*
835  * A negative value for L is the only way for the end position to
836  * be before the start. SQL99 says to throw an error.
837  */
838  if (E < S)
839  ereport(ERROR,
840  (errcode(ERRCODE_SUBSTRING_ERROR),
841  errmsg("negative substring length not allowed")));
842 
843  /*
844  * A zero or negative value for the end position can happen if the
845  * start was negative or one. SQL99 says to return a zero-length
846  * string.
847  */
848  if (E < 1)
849  return cstring_to_text("");
850 
851  L1 = E - S1;
852  }
853 
854  /*
855  * If the start position is past the end of the string, SQL99 says to
856  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
857  * that for us. Convert to zero-based starting position
858  */
859  return DatumGetTextPSlice(str, S1 - 1, L1);
860  }
861  else if (eml > 1)
862  {
863  /*
864  * When encoding max length is > 1, we can't get LC without
865  * detoasting, so we'll grab a conservatively large slice now and go
866  * back later to do the right thing
867  */
868  int32 slice_start;
869  int32 slice_size;
870  int32 slice_strlen;
871  text *slice;
872  int32 E1;
873  int32 i;
874  char *p;
875  char *s;
876  text *ret;
877 
878  /*
879  * if S is past the end of the string, the tuple toaster will return a
880  * zero-length string to us
881  */
882  S1 = Max(S, 1);
883 
884  /*
885  * We need to start at position zero because there is no way to know
886  * in advance which byte offset corresponds to the supplied start
887  * position.
888  */
889  slice_start = 0;
890 
891  if (length_not_specified) /* special case - get length to end of
892  * string */
893  slice_size = L1 = -1;
894  else
895  {
896  int E = S + length;
897 
898  /*
899  * A negative value for L is the only way for the end position to
900  * be before the start. SQL99 says to throw an error.
901  */
902  if (E < S)
903  ereport(ERROR,
904  (errcode(ERRCODE_SUBSTRING_ERROR),
905  errmsg("negative substring length not allowed")));
906 
907  /*
908  * A zero or negative value for the end position can happen if the
909  * start was negative or one. SQL99 says to return a zero-length
910  * string.
911  */
912  if (E < 1)
913  return cstring_to_text("");
914 
915  /*
916  * if E is past the end of the string, the tuple toaster will
917  * truncate the length for us
918  */
919  L1 = E - S1;
920 
921  /*
922  * Total slice size in bytes can't be any longer than the start
923  * position plus substring length times the encoding max length.
924  */
925  slice_size = (S1 + L1) * eml;
926  }
927 
928  /*
929  * If we're working with an untoasted source, no need to do an extra
930  * copying step.
931  */
934  slice = DatumGetTextPSlice(str, slice_start, slice_size);
935  else
936  slice = (text *) DatumGetPointer(str);
937 
938  /* see if we got back an empty string */
939  if (VARSIZE_ANY_EXHDR(slice) == 0)
940  {
941  if (slice != (text *) DatumGetPointer(str))
942  pfree(slice);
943  return cstring_to_text("");
944  }
945 
946  /* Now we can get the actual length of the slice in MB characters */
947  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
948  VARSIZE_ANY_EXHDR(slice));
949 
950  /*
951  * Check that the start position wasn't > slice_strlen. If so, SQL99
952  * says to return a zero-length string.
953  */
954  if (S1 > slice_strlen)
955  {
956  if (slice != (text *) DatumGetPointer(str))
957  pfree(slice);
958  return cstring_to_text("");
959  }
960 
961  /*
962  * Adjust L1 and E1 now that we know the slice string length. Again
963  * remember that S1 is one based, and slice_start is zero based.
964  */
965  if (L1 > -1)
966  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
967  else
968  E1 = slice_start + 1 + slice_strlen;
969 
970  /*
971  * Find the start position in the slice; remember S1 is not zero based
972  */
973  p = VARDATA_ANY(slice);
974  for (i = 0; i < S1 - 1; i++)
975  p += pg_mblen(p);
976 
977  /* hang onto a pointer to our start position */
978  s = p;
979 
980  /*
981  * Count the actual bytes used by the substring of the requested
982  * length.
983  */
984  for (i = S1; i < E1; i++)
985  p += pg_mblen(p);
986 
987  ret = (text *) palloc(VARHDRSZ + (p - s));
988  SET_VARSIZE(ret, VARHDRSZ + (p - s));
989  memcpy(VARDATA(ret), s, (p - s));
990 
991  if (slice != (text *) DatumGetPointer(str))
992  pfree(slice);
993 
994  return ret;
995  }
996  else
997  elog(ERROR, "invalid backend encoding: encoding max length < 1");
998 
999  /* not reached: suppress compiler warning */
1000  return NULL;
1001 }
1002 
1003 /*
1004  * textoverlay
1005  * Replace specified substring of first string with second
1006  *
1007  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1008  * This code is a direct implementation of what the standard says.
1009  */
1010 Datum
1012 {
1013  text *t1 = PG_GETARG_TEXT_PP(0);
1014  text *t2 = PG_GETARG_TEXT_PP(1);
1015  int sp = PG_GETARG_INT32(2); /* substring start position */
1016  int sl = PG_GETARG_INT32(3); /* substring length */
1017 
1018  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1019 }
1020 
1021 Datum
1023 {
1024  text *t1 = PG_GETARG_TEXT_PP(0);
1025  text *t2 = PG_GETARG_TEXT_PP(1);
1026  int sp = PG_GETARG_INT32(2); /* substring start position */
1027  int sl;
1028 
1029  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1030  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1031 }
1032 
1033 static text *
1034 text_overlay(text *t1, text *t2, int sp, int sl)
1035 {
1036  text *result;
1037  text *s1;
1038  text *s2;
1039  int sp_pl_sl;
1040 
1041  /*
1042  * Check for possible integer-overflow cases. For negative sp, throw a
1043  * "substring length" error because that's what should be expected
1044  * according to the spec's definition of OVERLAY().
1045  */
1046  if (sp <= 0)
1047  ereport(ERROR,
1048  (errcode(ERRCODE_SUBSTRING_ERROR),
1049  errmsg("negative substring length not allowed")));
1050  sp_pl_sl = sp + sl;
1051  if (sp_pl_sl <= sl)
1052  ereport(ERROR,
1053  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1054  errmsg("integer out of range")));
1055 
1056  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1057  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1058  result = text_catenate(s1, t2);
1059  result = text_catenate(result, s2);
1060 
1061  return result;
1062 }
1063 
1064 /*
1065  * textpos -
1066  * Return the position of the specified substring.
1067  * Implements the SQL POSITION() function.
1068  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1069  * - thomas 1997-07-27
1070  */
1071 Datum
1073 {
1074  text *str = PG_GETARG_TEXT_PP(0);
1075  text *search_str = PG_GETARG_TEXT_PP(1);
1076 
1077  PG_RETURN_INT32((int32) text_position(str, search_str));
1078 }
1079 
1080 /*
1081  * text_position -
1082  * Does the real work for textpos()
1083  *
1084  * Inputs:
1085  * t1 - string to be searched
1086  * t2 - pattern to match within t1
1087  * Result:
1088  * Character index of the first matched char, starting from 1,
1089  * or 0 if no match.
1090  *
1091  * This is broken out so it can be called directly by other string processing
1092  * functions.
1093  */
1094 static int
1096 {
1098  int result;
1099 
1100  text_position_setup(t1, t2, &state);
1101  result = text_position_next(1, &state);
1102  text_position_cleanup(&state);
1103  return result;
1104 }
1105 
1106 
1107 /*
1108  * text_position_setup, text_position_next, text_position_cleanup -
1109  * Component steps of text_position()
1110  *
1111  * These are broken out so that a string can be efficiently searched for
1112  * multiple occurrences of the same pattern. text_position_next may be
1113  * called multiple times with increasing values of start_pos, which is
1114  * the 1-based character position to start the search from. The "state"
1115  * variable is normally just a local variable in the caller.
1116  */
1117 
1118 static void
1120 {
1121  int len1 = VARSIZE_ANY_EXHDR(t1);
1122  int len2 = VARSIZE_ANY_EXHDR(t2);
1123 
1125  {
1126  /* simple case - single byte encoding */
1127  state->use_wchar = false;
1128  state->str1 = VARDATA_ANY(t1);
1129  state->str2 = VARDATA_ANY(t2);
1130  state->len1 = len1;
1131  state->len2 = len2;
1132  }
1133  else
1134  {
1135  /* not as simple - multibyte encoding */
1136  pg_wchar *p1,
1137  *p2;
1138 
1139  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1140  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1141  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1142  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1143 
1144  state->use_wchar = true;
1145  state->wstr1 = p1;
1146  state->wstr2 = p2;
1147  state->len1 = len1;
1148  state->len2 = len2;
1149  }
1150 
1151  /*
1152  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1153  * notes we use the terminology that the "haystack" is the string to be
1154  * searched (t1) and the "needle" is the pattern being sought (t2).
1155  *
1156  * If the needle is empty or bigger than the haystack then there is no
1157  * point in wasting cycles initializing the table. We also choose not to
1158  * use B-M-H for needles of length 1, since the skip table can't possibly
1159  * save anything in that case.
1160  */
1161  if (len1 >= len2 && len2 > 1)
1162  {
1163  int searchlength = len1 - len2;
1164  int skiptablemask;
1165  int last;
1166  int i;
1167 
1168  /*
1169  * First we must determine how much of the skip table to use. The
1170  * declaration of TextPositionState allows up to 256 elements, but for
1171  * short search problems we don't really want to have to initialize so
1172  * many elements --- it would take too long in comparison to the
1173  * actual search time. So we choose a useful skip table size based on
1174  * the haystack length minus the needle length. The closer the needle
1175  * length is to the haystack length the less useful skipping becomes.
1176  *
1177  * Note: since we use bit-masking to select table elements, the skip
1178  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1179  */
1180  if (searchlength < 16)
1181  skiptablemask = 3;
1182  else if (searchlength < 64)
1183  skiptablemask = 7;
1184  else if (searchlength < 128)
1185  skiptablemask = 15;
1186  else if (searchlength < 512)
1187  skiptablemask = 31;
1188  else if (searchlength < 2048)
1189  skiptablemask = 63;
1190  else if (searchlength < 4096)
1191  skiptablemask = 127;
1192  else
1193  skiptablemask = 255;
1194  state->skiptablemask = skiptablemask;
1195 
1196  /*
1197  * Initialize the skip table. We set all elements to the needle
1198  * length, since this is the correct skip distance for any character
1199  * not found in the needle.
1200  */
1201  for (i = 0; i <= skiptablemask; i++)
1202  state->skiptable[i] = len2;
1203 
1204  /*
1205  * Now examine the needle. For each character except the last one,
1206  * set the corresponding table element to the appropriate skip
1207  * distance. Note that when two characters share the same skip table
1208  * entry, the one later in the needle must determine the skip
1209  * distance.
1210  */
1211  last = len2 - 1;
1212 
1213  if (!state->use_wchar)
1214  {
1215  const char *str2 = state->str2;
1216 
1217  for (i = 0; i < last; i++)
1218  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1219  }
1220  else
1221  {
1222  const pg_wchar *wstr2 = state->wstr2;
1223 
1224  for (i = 0; i < last; i++)
1225  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1226  }
1227  }
1228 }
1229 
1230 static int
1232 {
1233  int haystack_len = state->len1;
1234  int needle_len = state->len2;
1235  int skiptablemask = state->skiptablemask;
1236 
1237  Assert(start_pos > 0); /* else caller error */
1238 
1239  if (needle_len <= 0)
1240  return start_pos; /* result for empty pattern */
1241 
1242  start_pos--; /* adjust for zero based arrays */
1243 
1244  /* Done if the needle can't possibly fit */
1245  if (haystack_len < start_pos + needle_len)
1246  return 0;
1247 
1248  if (!state->use_wchar)
1249  {
1250  /* simple case - single byte encoding */
1251  const char *haystack = state->str1;
1252  const char *needle = state->str2;
1253  const char *haystack_end = &haystack[haystack_len];
1254  const char *hptr;
1255 
1256  if (needle_len == 1)
1257  {
1258  /* No point in using B-M-H for a one-character needle */
1259  char nchar = *needle;
1260 
1261  hptr = &haystack[start_pos];
1262  while (hptr < haystack_end)
1263  {
1264  if (*hptr == nchar)
1265  return hptr - haystack + 1;
1266  hptr++;
1267  }
1268  }
1269  else
1270  {
1271  const char *needle_last = &needle[needle_len - 1];
1272 
1273  /* Start at startpos plus the length of the needle */
1274  hptr = &haystack[start_pos + needle_len - 1];
1275  while (hptr < haystack_end)
1276  {
1277  /* Match the needle scanning *backward* */
1278  const char *nptr;
1279  const char *p;
1280 
1281  nptr = needle_last;
1282  p = hptr;
1283  while (*nptr == *p)
1284  {
1285  /* Matched it all? If so, return 1-based position */
1286  if (nptr == needle)
1287  return p - haystack + 1;
1288  nptr--, p--;
1289  }
1290 
1291  /*
1292  * No match, so use the haystack char at hptr to decide how
1293  * far to advance. If the needle had any occurrence of that
1294  * character (or more precisely, one sharing the same
1295  * skiptable entry) before its last character, then we advance
1296  * far enough to align the last such needle character with
1297  * that haystack position. Otherwise we can advance by the
1298  * whole needle length.
1299  */
1300  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1301  }
1302  }
1303  }
1304  else
1305  {
1306  /* The multibyte char version. This works exactly the same way. */
1307  const pg_wchar *haystack = state->wstr1;
1308  const pg_wchar *needle = state->wstr2;
1309  const pg_wchar *haystack_end = &haystack[haystack_len];
1310  const pg_wchar *hptr;
1311 
1312  if (needle_len == 1)
1313  {
1314  /* No point in using B-M-H for a one-character needle */
1315  pg_wchar nchar = *needle;
1316 
1317  hptr = &haystack[start_pos];
1318  while (hptr < haystack_end)
1319  {
1320  if (*hptr == nchar)
1321  return hptr - haystack + 1;
1322  hptr++;
1323  }
1324  }
1325  else
1326  {
1327  const pg_wchar *needle_last = &needle[needle_len - 1];
1328 
1329  /* Start at startpos plus the length of the needle */
1330  hptr = &haystack[start_pos + needle_len - 1];
1331  while (hptr < haystack_end)
1332  {
1333  /* Match the needle scanning *backward* */
1334  const pg_wchar *nptr;
1335  const pg_wchar *p;
1336 
1337  nptr = needle_last;
1338  p = hptr;
1339  while (*nptr == *p)
1340  {
1341  /* Matched it all? If so, return 1-based position */
1342  if (nptr == needle)
1343  return p - haystack + 1;
1344  nptr--, p--;
1345  }
1346 
1347  /*
1348  * No match, so use the haystack char at hptr to decide how
1349  * far to advance. If the needle had any occurrence of that
1350  * character (or more precisely, one sharing the same
1351  * skiptable entry) before its last character, then we advance
1352  * far enough to align the last such needle character with
1353  * that haystack position. Otherwise we can advance by the
1354  * whole needle length.
1355  */
1356  hptr += state->skiptable[*hptr & skiptablemask];
1357  }
1358  }
1359  }
1360 
1361  return 0; /* not found */
1362 }
1363 
1364 static void
1366 {
1367  if (state->use_wchar)
1368  {
1369  pfree(state->wstr1);
1370  pfree(state->wstr2);
1371  }
1372 }
1373 
1374 /* varstr_cmp()
1375  * Comparison function for text strings with given lengths.
1376  * Includes locale support, but must copy strings to temporary memory
1377  * to allow null-termination for inputs to strcoll().
1378  * Returns an integer less than, equal to, or greater than zero, indicating
1379  * whether arg1 is less than, equal to, or greater than arg2.
1380  */
1381 int
1382 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1383 {
1384  int result;
1385 
1386  /*
1387  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1388  * have to do some memory copying. This turns out to be significantly
1389  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1390  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1391  */
1392  if (lc_collate_is_c(collid))
1393  {
1394  result = memcmp(arg1, arg2, Min(len1, len2));
1395  if ((result == 0) && (len1 != len2))
1396  result = (len1 < len2) ? -1 : 1;
1397  }
1398  else
1399  {
1400  char a1buf[TEXTBUFLEN];
1401  char a2buf[TEXTBUFLEN];
1402  char *a1p,
1403  *a2p;
1404  pg_locale_t mylocale = 0;
1405 
1406  if (collid != DEFAULT_COLLATION_OID)
1407  {
1408  if (!OidIsValid(collid))
1409  {
1410  /*
1411  * This typically means that the parser could not resolve a
1412  * conflict of implicit collations, so report it that way.
1413  */
1414  ereport(ERROR,
1415  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1416  errmsg("could not determine which collation to use for string comparison"),
1417  errhint("Use the COLLATE clause to set the collation explicitly.")));
1418  }
1419  mylocale = pg_newlocale_from_collation(collid);
1420  }
1421 
1422  /*
1423  * memcmp() can't tell us which of two unequal strings sorts first,
1424  * but it's a cheap way to tell if they're equal. Testing shows that
1425  * memcmp() followed by strcoll() is only trivially slower than
1426  * strcoll() by itself, so we don't lose much if this doesn't work out
1427  * very often, and if it does - for example, because there are many
1428  * equal strings in the input - then we win big by avoiding expensive
1429  * collation-aware comparisons.
1430  */
1431  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1432  return 0;
1433 
1434 #ifdef WIN32
1435  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1436  if (GetDatabaseEncoding() == PG_UTF8)
1437  {
1438  int a1len;
1439  int a2len;
1440  int r;
1441 
1442  if (len1 >= TEXTBUFLEN / 2)
1443  {
1444  a1len = len1 * 2 + 2;
1445  a1p = palloc(a1len);
1446  }
1447  else
1448  {
1449  a1len = TEXTBUFLEN;
1450  a1p = a1buf;
1451  }
1452  if (len2 >= TEXTBUFLEN / 2)
1453  {
1454  a2len = len2 * 2 + 2;
1455  a2p = palloc(a2len);
1456  }
1457  else
1458  {
1459  a2len = TEXTBUFLEN;
1460  a2p = a2buf;
1461  }
1462 
1463  /* stupid Microsloth API does not work for zero-length input */
1464  if (len1 == 0)
1465  r = 0;
1466  else
1467  {
1468  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1469  (LPWSTR) a1p, a1len / 2);
1470  if (!r)
1471  ereport(ERROR,
1472  (errmsg("could not convert string to UTF-16: error code %lu",
1473  GetLastError())));
1474  }
1475  ((LPWSTR) a1p)[r] = 0;
1476 
1477  if (len2 == 0)
1478  r = 0;
1479  else
1480  {
1481  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1482  (LPWSTR) a2p, a2len / 2);
1483  if (!r)
1484  ereport(ERROR,
1485  (errmsg("could not convert string to UTF-16: error code %lu",
1486  GetLastError())));
1487  }
1488  ((LPWSTR) a2p)[r] = 0;
1489 
1490  errno = 0;
1491 #ifdef HAVE_LOCALE_T
1492  if (mylocale)
1493  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1494  else
1495 #endif
1496  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1497  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1498  * headers */
1499  ereport(ERROR,
1500  (errmsg("could not compare Unicode strings: %m")));
1501 
1502  /*
1503  * In some locales wcscoll() can claim that nonidentical strings
1504  * are equal. Believing that would be bad news for a number of
1505  * reasons, so we follow Perl's lead and sort "equal" strings
1506  * according to strcmp (on the UTF-8 representation).
1507  */
1508  if (result == 0)
1509  {
1510  result = memcmp(arg1, arg2, Min(len1, len2));
1511  if ((result == 0) && (len1 != len2))
1512  result = (len1 < len2) ? -1 : 1;
1513  }
1514 
1515  if (a1p != a1buf)
1516  pfree(a1p);
1517  if (a2p != a2buf)
1518  pfree(a2p);
1519 
1520  return result;
1521  }
1522 #endif /* WIN32 */
1523 
1524  if (len1 >= TEXTBUFLEN)
1525  a1p = (char *) palloc(len1 + 1);
1526  else
1527  a1p = a1buf;
1528  if (len2 >= TEXTBUFLEN)
1529  a2p = (char *) palloc(len2 + 1);
1530  else
1531  a2p = a2buf;
1532 
1533  memcpy(a1p, arg1, len1);
1534  a1p[len1] = '\0';
1535  memcpy(a2p, arg2, len2);
1536  a2p[len2] = '\0';
1537 
1538  if (mylocale)
1539  {
1540  if (mylocale->provider == COLLPROVIDER_ICU)
1541  {
1542 #ifdef USE_ICU
1543 #ifdef HAVE_UCOL_STRCOLLUTF8
1544  if (GetDatabaseEncoding() == PG_UTF8)
1545  {
1546  UErrorCode status;
1547 
1548  status = U_ZERO_ERROR;
1549  result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1550  arg1, len1,
1551  arg2, len2,
1552  &status);
1553  if (U_FAILURE(status))
1554  ereport(ERROR,
1555  (errmsg("collation failed: %s", u_errorName(status))));
1556  }
1557  else
1558 #endif
1559  {
1560  int32_t ulen1,
1561  ulen2;
1562  UChar *uchar1,
1563  *uchar2;
1564 
1565  ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1566  ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1567 
1568  result = ucol_strcoll(mylocale->info.icu.ucol,
1569  uchar1, ulen1,
1570  uchar2, ulen2);
1571  }
1572 #else /* not USE_ICU */
1573  /* shouldn't happen */
1574  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1575 #endif /* not USE_ICU */
1576  }
1577  else
1578  {
1579 #ifdef HAVE_LOCALE_T
1580  result = strcoll_l(a1p, a2p, mylocale->info.lt);
1581 #else
1582  /* shouldn't happen */
1583  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1584 #endif
1585  }
1586  }
1587  else
1588  result = strcoll(a1p, a2p);
1589 
1590  /*
1591  * In some locales strcoll() can claim that nonidentical strings are
1592  * equal. Believing that would be bad news for a number of reasons,
1593  * so we follow Perl's lead and sort "equal" strings according to
1594  * strcmp().
1595  */
1596  if (result == 0)
1597  result = strcmp(a1p, a2p);
1598 
1599  if (a1p != a1buf)
1600  pfree(a1p);
1601  if (a2p != a2buf)
1602  pfree(a2p);
1603  }
1604 
1605  return result;
1606 }
1607 
1608 /* text_cmp()
1609  * Internal comparison function for text strings.
1610  * Returns -1, 0 or 1
1611  */
1612 static int
1613 text_cmp(text *arg1, text *arg2, Oid collid)
1614 {
1615  char *a1p,
1616  *a2p;
1617  int len1,
1618  len2;
1619 
1620  a1p = VARDATA_ANY(arg1);
1621  a2p = VARDATA_ANY(arg2);
1622 
1623  len1 = VARSIZE_ANY_EXHDR(arg1);
1624  len2 = VARSIZE_ANY_EXHDR(arg2);
1625 
1626  return varstr_cmp(a1p, len1, a2p, len2, collid);
1627 }
1628 
1629 /*
1630  * Comparison functions for text strings.
1631  *
1632  * Note: btree indexes need these routines not to leak memory; therefore,
1633  * be careful to free working copies of toasted datums. Most places don't
1634  * need to be so careful.
1635  */
1636 
1637 Datum
1639 {
1640  Datum arg1 = PG_GETARG_DATUM(0);
1641  Datum arg2 = PG_GETARG_DATUM(1);
1642  bool result;
1643  Size len1,
1644  len2;
1645 
1646  /*
1647  * Since we only care about equality or not-equality, we can avoid all the
1648  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1649  * don't even have to do a bitwise comparison if we can show the lengths
1650  * of the strings are unequal; which might save us from having to detoast
1651  * one or both values.
1652  */
1653  len1 = toast_raw_datum_size(arg1);
1654  len2 = toast_raw_datum_size(arg2);
1655  if (len1 != len2)
1656  result = false;
1657  else
1658  {
1659  text *targ1 = DatumGetTextPP(arg1);
1660  text *targ2 = DatumGetTextPP(arg2);
1661 
1662  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1663  len1 - VARHDRSZ) == 0);
1664 
1665  PG_FREE_IF_COPY(targ1, 0);
1666  PG_FREE_IF_COPY(targ2, 1);
1667  }
1668 
1669  PG_RETURN_BOOL(result);
1670 }
1671 
1672 Datum
1674 {
1675  Datum arg1 = PG_GETARG_DATUM(0);
1676  Datum arg2 = PG_GETARG_DATUM(1);
1677  bool result;
1678  Size len1,
1679  len2;
1680 
1681  /* See comment in texteq() */
1682  len1 = toast_raw_datum_size(arg1);
1683  len2 = toast_raw_datum_size(arg2);
1684  if (len1 != len2)
1685  result = true;
1686  else
1687  {
1688  text *targ1 = DatumGetTextPP(arg1);
1689  text *targ2 = DatumGetTextPP(arg2);
1690 
1691  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1692  len1 - VARHDRSZ) != 0);
1693 
1694  PG_FREE_IF_COPY(targ1, 0);
1695  PG_FREE_IF_COPY(targ2, 1);
1696  }
1697 
1698  PG_RETURN_BOOL(result);
1699 }
1700 
1701 Datum
1703 {
1704  text *arg1 = PG_GETARG_TEXT_PP(0);
1705  text *arg2 = PG_GETARG_TEXT_PP(1);
1706  bool result;
1707 
1708  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1709 
1710  PG_FREE_IF_COPY(arg1, 0);
1711  PG_FREE_IF_COPY(arg2, 1);
1712 
1713  PG_RETURN_BOOL(result);
1714 }
1715 
1716 Datum
1718 {
1719  text *arg1 = PG_GETARG_TEXT_PP(0);
1720  text *arg2 = PG_GETARG_TEXT_PP(1);
1721  bool result;
1722 
1723  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1724 
1725  PG_FREE_IF_COPY(arg1, 0);
1726  PG_FREE_IF_COPY(arg2, 1);
1727 
1728  PG_RETURN_BOOL(result);
1729 }
1730 
1731 Datum
1733 {
1734  text *arg1 = PG_GETARG_TEXT_PP(0);
1735  text *arg2 = PG_GETARG_TEXT_PP(1);
1736  bool result;
1737 
1738  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1739 
1740  PG_FREE_IF_COPY(arg1, 0);
1741  PG_FREE_IF_COPY(arg2, 1);
1742 
1743  PG_RETURN_BOOL(result);
1744 }
1745 
1746 Datum
1748 {
1749  text *arg1 = PG_GETARG_TEXT_PP(0);
1750  text *arg2 = PG_GETARG_TEXT_PP(1);
1751  bool result;
1752 
1753  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1754 
1755  PG_FREE_IF_COPY(arg1, 0);
1756  PG_FREE_IF_COPY(arg2, 1);
1757 
1758  PG_RETURN_BOOL(result);
1759 }
1760 
1761 Datum
1763 {
1764  text *arg1 = PG_GETARG_TEXT_PP(0);
1765  text *arg2 = PG_GETARG_TEXT_PP(1);
1766  int32 result;
1767 
1768  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1769 
1770  PG_FREE_IF_COPY(arg1, 0);
1771  PG_FREE_IF_COPY(arg2, 1);
1772 
1773  PG_RETURN_INT32(result);
1774 }
1775 
1776 Datum
1778 {
1780  Oid collid = ssup->ssup_collation;
1781  MemoryContext oldcontext;
1782 
1783  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1784 
1785  /* Use generic string SortSupport */
1786  varstr_sortsupport(ssup, collid, false);
1787 
1788  MemoryContextSwitchTo(oldcontext);
1789 
1790  PG_RETURN_VOID();
1791 }
1792 
1793 /*
1794  * Generic sortsupport interface for character type's operator classes.
1795  * Includes locale support, and support for BpChar semantics (i.e. removing
1796  * trailing spaces before comparison).
1797  *
1798  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1799  * same representation. Callers that always use the C collation (e.g.
1800  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1801  * this will not work with any other collation, though.
1802  */
1803 void
1805 {
1806  bool abbreviate = ssup->abbreviate;
1807  bool collate_c = false;
1808  VarStringSortSupport *sss;
1809  pg_locale_t locale = 0;
1810 
1811  /*
1812  * If possible, set ssup->comparator to a function which can be used to
1813  * directly compare two datums. If we can do this, we'll avoid the
1814  * overhead of a trip through the fmgr layer for every comparison, which
1815  * can be substantial.
1816  *
1817  * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1818  * uses strcoll() to perform comparisons and knows about the special
1819  * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1820  * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1821  * both of which use memcmp() rather than strcoll().
1822  *
1823  * There is a further exception on Windows. When the database encoding is
1824  * UTF-8 and we are not using the C collation, complex hacks are required.
1825  * We don't currently have a comparator that handles that case, so we fall
1826  * back on the slow method of having the sort code invoke bttextcmp() (in
1827  * the case of text) via the fmgr trampoline.
1828  */
1829  if (lc_collate_is_c(collid))
1830  {
1831  if (!bpchar)
1832  ssup->comparator = varstrfastcmp_c;
1833  else
1834  ssup->comparator = bpcharfastcmp_c;
1835 
1836  collate_c = true;
1837  }
1838 #ifdef WIN32
1839  else if (GetDatabaseEncoding() == PG_UTF8)
1840  return;
1841 #endif
1842  else
1843  {
1845 
1846  /*
1847  * We need a collation-sensitive comparison. To make things faster,
1848  * we'll figure out the collation based on the locale id and cache the
1849  * result.
1850  */
1851  if (collid != DEFAULT_COLLATION_OID)
1852  {
1853  if (!OidIsValid(collid))
1854  {
1855  /*
1856  * This typically means that the parser could not resolve a
1857  * conflict of implicit collations, so report it that way.
1858  */
1859  ereport(ERROR,
1860  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1861  errmsg("could not determine which collation to use for string comparison"),
1862  errhint("Use the COLLATE clause to set the collation explicitly.")));
1863  }
1864  locale = pg_newlocale_from_collation(collid);
1865  }
1866  }
1867 
1868  /*
1869  * Unfortunately, it seems that abbreviation for non-C collations is
1870  * broken on many common platforms; testing of multiple versions of glibc
1871  * reveals that, for many locales, strcoll() and strxfrm() do not return
1872  * consistent results, which is fatal to this optimization. While no
1873  * other libc other than Cygwin has so far been shown to have a problem,
1874  * we take the conservative course of action for right now and disable
1875  * this categorically. (Users who are certain this isn't a problem on
1876  * their system can define TRUST_STRXFRM.)
1877  *
1878  * Even apart from the risk of broken locales, it's possible that there
1879  * are platforms where the use of abbreviated keys should be disabled at
1880  * compile time. Having only 4 byte datums could make worst-case
1881  * performance drastically more likely, for example. Moreover, macOS's
1882  * strxfrm() implementation is known to not effectively concentrate a
1883  * significant amount of entropy from the original string in earlier
1884  * transformed blobs. It's possible that other supported platforms are
1885  * similarly encumbered. So, if we ever get past disabling this
1886  * categorically, we may still want or need to disable it for particular
1887  * platforms.
1888  */
1889 #ifndef TRUST_STRXFRM
1890  if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
1891  abbreviate = false;
1892 #endif
1893 
1894  /*
1895  * If we're using abbreviated keys, or if we're using a locale-aware
1896  * comparison, we need to initialize a StringSortSupport object. Both
1897  * cases will make use of the temporary buffers we initialize here for
1898  * scratch space (and to detect requirement for BpChar semantics from
1899  * caller), and the abbreviation case requires additional state.
1900  */
1901  if (abbreviate || !collate_c)
1902  {
1903  sss = palloc(sizeof(VarStringSortSupport));
1904  sss->buf1 = palloc(TEXTBUFLEN);
1905  sss->buflen1 = TEXTBUFLEN;
1906  sss->buf2 = palloc(TEXTBUFLEN);
1907  sss->buflen2 = TEXTBUFLEN;
1908  /* Start with invalid values */
1909  sss->last_len1 = -1;
1910  sss->last_len2 = -1;
1911  /* Initialize */
1912  sss->last_returned = 0;
1913  sss->locale = locale;
1914 
1915  /*
1916  * To avoid somehow confusing a strxfrm() blob and an original string,
1917  * constantly keep track of the variety of data that buf1 and buf2
1918  * currently contain.
1919  *
1920  * Comparisons may be interleaved with conversion calls. Frequently,
1921  * conversions and comparisons are batched into two distinct phases,
1922  * but the correctness of caching cannot hinge upon this. For
1923  * comparison caching, buffer state is only trusted if cache_blob is
1924  * found set to false, whereas strxfrm() caching only trusts the state
1925  * when cache_blob is found set to true.
1926  *
1927  * Arbitrarily initialize cache_blob to true.
1928  */
1929  sss->cache_blob = true;
1930  sss->collate_c = collate_c;
1931  sss->bpchar = bpchar;
1932  ssup->ssup_extra = sss;
1933 
1934  /*
1935  * If possible, plan to use the abbreviated keys optimization. The
1936  * core code may switch back to authoritative comparator should
1937  * abbreviation be aborted.
1938  */
1939  if (abbreviate)
1940  {
1941  sss->prop_card = 0.20;
1942  initHyperLogLog(&sss->abbr_card, 10);
1943  initHyperLogLog(&sss->full_card, 10);
1944  ssup->abbrev_full_comparator = ssup->comparator;
1945  ssup->comparator = varstrcmp_abbrev;
1948  }
1949  }
1950 }
1951 
1952 /*
1953  * sortsupport comparison func (for C locale case)
1954  */
1955 static int
1957 {
1958  VarString *arg1 = DatumGetVarStringPP(x);
1959  VarString *arg2 = DatumGetVarStringPP(y);
1960  char *a1p,
1961  *a2p;
1962  int len1,
1963  len2,
1964  result;
1965 
1966  a1p = VARDATA_ANY(arg1);
1967  a2p = VARDATA_ANY(arg2);
1968 
1969  len1 = VARSIZE_ANY_EXHDR(arg1);
1970  len2 = VARSIZE_ANY_EXHDR(arg2);
1971 
1972  result = memcmp(a1p, a2p, Min(len1, len2));
1973  if ((result == 0) && (len1 != len2))
1974  result = (len1 < len2) ? -1 : 1;
1975 
1976  /* We can't afford to leak memory here. */
1977  if (PointerGetDatum(arg1) != x)
1978  pfree(arg1);
1979  if (PointerGetDatum(arg2) != y)
1980  pfree(arg2);
1981 
1982  return result;
1983 }
1984 
1985 /*
1986  * sortsupport comparison func (for BpChar C locale case)
1987  *
1988  * BpChar outsources its sortsupport to this module. Specialization for the
1989  * varstr_sortsupport BpChar case, modeled on
1990  * internal_bpchar_pattern_compare().
1991  */
1992 static int
1994 {
1995  BpChar *arg1 = DatumGetBpCharPP(x);
1996  BpChar *arg2 = DatumGetBpCharPP(y);
1997  char *a1p,
1998  *a2p;
1999  int len1,
2000  len2,
2001  result;
2002 
2003  a1p = VARDATA_ANY(arg1);
2004  a2p = VARDATA_ANY(arg2);
2005 
2006  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2007  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2008 
2009  result = memcmp(a1p, a2p, Min(len1, len2));
2010  if ((result == 0) && (len1 != len2))
2011  result = (len1 < len2) ? -1 : 1;
2012 
2013  /* We can't afford to leak memory here. */
2014  if (PointerGetDatum(arg1) != x)
2015  pfree(arg1);
2016  if (PointerGetDatum(arg2) != y)
2017  pfree(arg2);
2018 
2019  return result;
2020 }
2021 
2022 /*
2023  * sortsupport comparison func (for locale case)
2024  */
2025 static int
2027 {
2028  VarString *arg1 = DatumGetVarStringPP(x);
2029  VarString *arg2 = DatumGetVarStringPP(y);
2030  bool arg1_match;
2032 
2033  /* working state */
2034  char *a1p,
2035  *a2p;
2036  int len1,
2037  len2,
2038  result;
2039 
2040  a1p = VARDATA_ANY(arg1);
2041  a2p = VARDATA_ANY(arg2);
2042 
2043  len1 = VARSIZE_ANY_EXHDR(arg1);
2044  len2 = VARSIZE_ANY_EXHDR(arg2);
2045 
2046  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2047  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2048  {
2049  /*
2050  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2051  * last_len2. Existing contents of buffers might still be used by
2052  * next call.
2053  *
2054  * It's fine to allow the comparison of BpChar padding bytes here,
2055  * even though that implies that the memcmp() will usually be
2056  * performed for BpChar callers (though multibyte characters could
2057  * still prevent that from occurring). The memcmp() is still very
2058  * cheap, and BpChar's funny semantics have us remove trailing spaces
2059  * (not limited to padding), so we need make no distinction between
2060  * padding space characters and "real" space characters.
2061  */
2062  result = 0;
2063  goto done;
2064  }
2065 
2066  if (sss->bpchar)
2067  {
2068  /* Get true number of bytes, ignoring trailing spaces */
2069  len1 = bpchartruelen(a1p, len1);
2070  len2 = bpchartruelen(a2p, len2);
2071  }
2072 
2073  if (len1 >= sss->buflen1)
2074  {
2075  pfree(sss->buf1);
2076  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2077  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2078  }
2079  if (len2 >= sss->buflen2)
2080  {
2081  pfree(sss->buf2);
2082  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2083  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2084  }
2085 
2086  /*
2087  * We're likely to be asked to compare the same strings repeatedly, and
2088  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2089  * comparisons, even though in general there is no reason to think that
2090  * that will work out (every string datum may be unique). Caching does
2091  * not slow things down measurably when it doesn't work out, and can speed
2092  * things up by rather a lot when it does. In part, this is because the
2093  * memcmp() compares data from cachelines that are needed in L1 cache even
2094  * when the last comparison's result cannot be reused.
2095  */
2096  arg1_match = true;
2097  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2098  {
2099  arg1_match = false;
2100  memcpy(sss->buf1, a1p, len1);
2101  sss->buf1[len1] = '\0';
2102  sss->last_len1 = len1;
2103  }
2104 
2105  /*
2106  * If we're comparing the same two strings as last time, we can return the
2107  * same answer without calling strcoll() again. This is more likely than
2108  * it seems (at least with moderate to low cardinality sets), because
2109  * quicksort compares the same pivot against many values.
2110  */
2111  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2112  {
2113  memcpy(sss->buf2, a2p, len2);
2114  sss->buf2[len2] = '\0';
2115  sss->last_len2 = len2;
2116  }
2117  else if (arg1_match && !sss->cache_blob)
2118  {
2119  /* Use result cached following last actual strcoll() call */
2120  result = sss->last_returned;
2121  goto done;
2122  }
2123 
2124  if (sss->locale)
2125  {
2126  if (sss->locale->provider == COLLPROVIDER_ICU)
2127  {
2128 #ifdef USE_ICU
2129 #ifdef HAVE_UCOL_STRCOLLUTF8
2130  if (GetDatabaseEncoding() == PG_UTF8)
2131  {
2132  UErrorCode status;
2133 
2134  status = U_ZERO_ERROR;
2135  result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2136  a1p, len1,
2137  a2p, len2,
2138  &status);
2139  if (U_FAILURE(status))
2140  ereport(ERROR,
2141  (errmsg("collation failed: %s", u_errorName(status))));
2142  }
2143  else
2144 #endif
2145  {
2146  int32_t ulen1,
2147  ulen2;
2148  UChar *uchar1,
2149  *uchar2;
2150 
2151  ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2152  ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2153 
2154  result = ucol_strcoll(sss->locale->info.icu.ucol,
2155  uchar1, ulen1,
2156  uchar2, ulen2);
2157  }
2158 #else /* not USE_ICU */
2159  /* shouldn't happen */
2160  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2161 #endif /* not USE_ICU */
2162  }
2163  else
2164  {
2165 #ifdef HAVE_LOCALE_T
2166  result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2167 #else
2168  /* shouldn't happen */
2169  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2170 #endif
2171  }
2172  }
2173  else
2174  result = strcoll(sss->buf1, sss->buf2);
2175 
2176  /*
2177  * In some locales strcoll() can claim that nonidentical strings are
2178  * equal. Believing that would be bad news for a number of reasons, so we
2179  * follow Perl's lead and sort "equal" strings according to strcmp().
2180  */
2181  if (result == 0)
2182  result = strcmp(sss->buf1, sss->buf2);
2183 
2184  /* Cache result, perhaps saving an expensive strcoll() call next time */
2185  sss->cache_blob = false;
2186  sss->last_returned = result;
2187 done:
2188  /* We can't afford to leak memory here. */
2189  if (PointerGetDatum(arg1) != x)
2190  pfree(arg1);
2191  if (PointerGetDatum(arg2) != y)
2192  pfree(arg2);
2193 
2194  return result;
2195 }
2196 
2197 /*
2198  * Abbreviated key comparison func
2199  */
2200 static int
2202 {
2203  /*
2204  * When 0 is returned, the core system will call varstrfastcmp_c()
2205  * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2206  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2207  * authoritatively, for the same reason that there is a strcoll()
2208  * tie-breaker call to strcmp() in varstr_cmp().
2209  */
2210  if (x > y)
2211  return 1;
2212  else if (x == y)
2213  return 0;
2214  else
2215  return -1;
2216 }
2217 
2218 /*
2219  * Conversion routine for sortsupport. Converts original to abbreviated key
2220  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2221  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2222  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2223  * locale is used, or in case of bytea, just memcpy() from original instead.
2224  */
2225 static Datum
2227 {
2229  VarString *authoritative = DatumGetVarStringPP(original);
2230  char *authoritative_data = VARDATA_ANY(authoritative);
2231 
2232  /* working state */
2233  Datum res;
2234  char *pres;
2235  int len;
2236  uint32 hash;
2237 
2238  pres = (char *) &res;
2239  /* memset(), so any non-overwritten bytes are NUL */
2240  memset(pres, 0, sizeof(Datum));
2241  len = VARSIZE_ANY_EXHDR(authoritative);
2242 
2243  /* Get number of bytes, ignoring trailing spaces */
2244  if (sss->bpchar)
2245  len = bpchartruelen(authoritative_data, len);
2246 
2247  /*
2248  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2249  * abbreviate keys. The full comparator for the C locale is always
2250  * memcmp(). It would be incorrect to allow bytea callers (callers that
2251  * always force the C collation -- bytea isn't a collatable type, but this
2252  * approach is convenient) to use strxfrm(). This is because bytea
2253  * strings may contain NUL bytes. Besides, this should be faster, too.
2254  *
2255  * More generally, it's okay that bytea callers can have NUL bytes in
2256  * strings because varstrcmp_abbrev() need not make a distinction between
2257  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2258  * authoritative representation. Hopefully a comparison at or past one
2259  * abbreviated key's terminating NUL byte will resolve the comparison
2260  * without consulting the authoritative representation; specifically, some
2261  * later non-NUL byte in the longer string can resolve the comparison
2262  * against a subsequent terminating NUL in the shorter string. There will
2263  * usually be what is effectively a "length-wise" resolution there and
2264  * then.
2265  *
2266  * If that doesn't work out -- if all bytes in the longer string
2267  * positioned at or past the offset of the smaller string's (first)
2268  * terminating NUL are actually representative of NUL bytes in the
2269  * authoritative binary string (perhaps with some *terminating* NUL bytes
2270  * towards the end of the longer string iff it happens to still be small)
2271  * -- then an authoritative tie-breaker will happen, and do the right
2272  * thing: explicitly consider string length.
2273  */
2274  if (sss->collate_c)
2275  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2276  else
2277  {
2278  Size bsize;
2279 #ifdef USE_ICU
2280  int32_t ulen = -1;
2281  UChar *uchar;
2282 #endif
2283 
2284  /*
2285  * We're not using the C collation, so fall back on strxfrm or ICU
2286  * analogs.
2287  */
2288 
2289  /* By convention, we use buffer 1 to store and NUL-terminate */
2290  if (len >= sss->buflen1)
2291  {
2292  pfree(sss->buf1);
2293  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2294  sss->buf1 = palloc(sss->buflen1);
2295  }
2296 
2297  /* Might be able to reuse strxfrm() blob from last call */
2298  if (sss->last_len1 == len && sss->cache_blob &&
2299  memcmp(sss->buf1, authoritative_data, len) == 0)
2300  {
2301  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2302  /* No change affecting cardinality, so no hashing required */
2303  goto done;
2304  }
2305 
2306  memcpy(sss->buf1, authoritative_data, len);
2307 
2308  /*
2309  * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2310  * necessary for ICU, but doesn't hurt.
2311  */
2312  sss->buf1[len] = '\0';
2313  sss->last_len1 = len;
2314 
2315 #ifdef USE_ICU
2316  /* When using ICU and not UTF8, convert string to UChar. */
2317  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2319  ulen = icu_to_uchar(&uchar, sss->buf1, len);
2320 #endif
2321 
2322  /*
2323  * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2324  * and try again. Both of these functions have the result buffer
2325  * content undefined if the result did not fit, so we need to retry
2326  * until everything fits, even though we only need the first few bytes
2327  * in the end. When using ucol_nextSortKeyPart(), however, we only
2328  * ask for as many bytes as we actually need.
2329  */
2330  for (;;)
2331  {
2332 #ifdef USE_ICU
2333  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2334  {
2335  /*
2336  * When using UTF8, use the iteration interface so we only
2337  * need to produce as many bytes as we actually need.
2338  */
2339  if (GetDatabaseEncoding() == PG_UTF8)
2340  {
2341  UCharIterator iter;
2342  uint32_t state[2];
2343  UErrorCode status;
2344 
2345  uiter_setUTF8(&iter, sss->buf1, len);
2346  state[0] = state[1] = 0; /* won't need that again */
2347  status = U_ZERO_ERROR;
2348  bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2349  &iter,
2350  state,
2351  (uint8_t *) sss->buf2,
2352  Min(sizeof(Datum), sss->buflen2),
2353  &status);
2354  if (U_FAILURE(status))
2355  ereport(ERROR,
2356  (errmsg("sort key generation failed: %s", u_errorName(status))));
2357  }
2358  else
2359  bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2360  uchar, ulen,
2361  (uint8_t *) sss->buf2, sss->buflen2);
2362  }
2363  else
2364 #endif
2365 #ifdef HAVE_LOCALE_T
2366  if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2367  bsize = strxfrm_l(sss->buf2, sss->buf1,
2368  sss->buflen2, sss->locale->info.lt);
2369  else
2370 #endif
2371  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2372 
2373  sss->last_len2 = bsize;
2374  if (bsize < sss->buflen2)
2375  break;
2376 
2377  /*
2378  * Grow buffer and retry.
2379  */
2380  pfree(sss->buf2);
2381  sss->buflen2 = Max(bsize + 1,
2382  Min(sss->buflen2 * 2, MaxAllocSize));
2383  sss->buf2 = palloc(sss->buflen2);
2384  }
2385 
2386  /*
2387  * Every Datum byte is always compared. This is safe because the
2388  * strxfrm() blob is itself NUL terminated, leaving no danger of
2389  * misinterpreting any NUL bytes not intended to be interpreted as
2390  * logically representing termination.
2391  *
2392  * (Actually, even if there were NUL bytes in the blob it would be
2393  * okay. See remarks on bytea case above.)
2394  */
2395  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2396  }
2397 
2398  /*
2399  * Maintain approximate cardinality of both abbreviated keys and original,
2400  * authoritative keys using HyperLogLog. Used as cheap insurance against
2401  * the worst case, where we do many string transformations for no saving
2402  * in full strcoll()-based comparisons. These statistics are used by
2403  * varstr_abbrev_abort().
2404  *
2405  * First, Hash key proper, or a significant fraction of it. Mix in length
2406  * in order to compensate for cases where differences are past
2407  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2408  */
2409  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2410  Min(len, PG_CACHE_LINE_SIZE)));
2411 
2412  if (len > PG_CACHE_LINE_SIZE)
2413  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2414 
2415  addHyperLogLog(&sss->full_card, hash);
2416 
2417  /* Hash abbreviated key */
2418 #if SIZEOF_DATUM == 8
2419  {
2420  uint32 lohalf,
2421  hihalf;
2422 
2423  lohalf = (uint32) res;
2424  hihalf = (uint32) (res >> 32);
2425  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2426  }
2427 #else /* SIZEOF_DATUM != 8 */
2428  hash = DatumGetUInt32(hash_uint32((uint32) res));
2429 #endif
2430 
2431  addHyperLogLog(&sss->abbr_card, hash);
2432 
2433  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2434  sss->cache_blob = true;
2435 done:
2436 
2437  /*
2438  * Byteswap on little-endian machines.
2439  *
2440  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2441  * comparator) works correctly on all platforms. If we didn't do this,
2442  * the comparator would have to call memcmp() with a pair of pointers to
2443  * the first byte of each abbreviated key, which is slower.
2444  */
2445  res = DatumBigEndianToNative(res);
2446 
2447  /* Don't leak memory here */
2448  if (PointerGetDatum(authoritative) != original)
2449  pfree(authoritative);
2450 
2451  return res;
2452 }
2453 
2454 /*
2455  * Callback for estimating effectiveness of abbreviated key optimization, using
2456  * heuristic rules. Returns value indicating if the abbreviation optimization
2457  * should be aborted, based on its projected effectiveness.
2458  */
2459 static bool
2460 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2461 {
2463  double abbrev_distinct,
2464  key_distinct;
2465 
2466  Assert(ssup->abbreviate);
2467 
2468  /* Have a little patience */
2469  if (memtupcount < 100)
2470  return false;
2471 
2472  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2473  key_distinct = estimateHyperLogLog(&sss->full_card);
2474 
2475  /*
2476  * Clamp cardinality estimates to at least one distinct value. While
2477  * NULLs are generally disregarded, if only NULL values were seen so far,
2478  * that might misrepresent costs if we failed to clamp.
2479  */
2480  if (abbrev_distinct <= 1.0)
2481  abbrev_distinct = 1.0;
2482 
2483  if (key_distinct <= 1.0)
2484  key_distinct = 1.0;
2485 
2486  /*
2487  * In the worst case all abbreviated keys are identical, while at the same
2488  * time there are differences within full key strings not captured in
2489  * abbreviations.
2490  */
2491 #ifdef TRACE_SORT
2492  if (trace_sort)
2493  {
2494  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2495 
2496  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2497  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2498  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2499  sss->prop_card);
2500  }
2501 #endif
2502 
2503  /*
2504  * If the number of distinct abbreviated keys approximately matches the
2505  * number of distinct authoritative original keys, that's reason enough to
2506  * proceed. We can win even with a very low cardinality set if most
2507  * tie-breakers only memcmp(). This is by far the most important
2508  * consideration.
2509  *
2510  * While comparisons that are resolved at the abbreviated key level are
2511  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2512  * those two outcomes are so much cheaper than a full strcoll() once
2513  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2514  * cardinality against the overall size of the set in order to more
2515  * accurately model costs. Assume that an abbreviated comparison, and an
2516  * abbreviated comparison with a cheap memcmp()-based authoritative
2517  * resolution are equivalent.
2518  */
2519  if (abbrev_distinct > key_distinct * sss->prop_card)
2520  {
2521  /*
2522  * When we have exceeded 10,000 tuples, decay required cardinality
2523  * aggressively for next call.
2524  *
2525  * This is useful because the number of comparisons required on
2526  * average increases at a linearithmic rate, and at roughly 10,000
2527  * tuples that factor will start to dominate over the linear costs of
2528  * string transformation (this is a conservative estimate). The decay
2529  * rate is chosen to be a little less aggressive than halving -- which
2530  * (since we're called at points at which memtupcount has doubled)
2531  * would never see the cost model actually abort past the first call
2532  * following a decay. This decay rate is mostly a precaution against
2533  * a sudden, violent swing in how well abbreviated cardinality tracks
2534  * full key cardinality. The decay also serves to prevent a marginal
2535  * case from being aborted too late, when too much has already been
2536  * invested in string transformation.
2537  *
2538  * It's possible for sets of several million distinct strings with
2539  * mere tens of thousands of distinct abbreviated keys to still
2540  * benefit very significantly. This will generally occur provided
2541  * each abbreviated key is a proxy for a roughly uniform number of the
2542  * set's full keys. If it isn't so, we hope to catch that early and
2543  * abort. If it isn't caught early, by the time the problem is
2544  * apparent it's probably not worth aborting.
2545  */
2546  if (memtupcount > 10000)
2547  sss->prop_card *= 0.65;
2548 
2549  return false;
2550  }
2551 
2552  /*
2553  * Abort abbreviation strategy.
2554  *
2555  * The worst case, where all abbreviated keys are identical while all
2556  * original strings differ will typically only see a regression of about
2557  * 10% in execution time for small to medium sized lists of strings.
2558  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2559  * often expect very large improvements, particularly with sets of strings
2560  * of moderately high to high abbreviated cardinality. There is little to
2561  * lose but much to gain, which our strategy reflects.
2562  */
2563 #ifdef TRACE_SORT
2564  if (trace_sort)
2565  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2566  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2567  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2568 #endif
2569 
2570  return true;
2571 }
2572 
2573 Datum
2575 {
2576  text *arg1 = PG_GETARG_TEXT_PP(0);
2577  text *arg2 = PG_GETARG_TEXT_PP(1);
2578  text *result;
2579 
2580  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2581 
2582  PG_RETURN_TEXT_P(result);
2583 }
2584 
2585 Datum
2587 {
2588  text *arg1 = PG_GETARG_TEXT_PP(0);
2589  text *arg2 = PG_GETARG_TEXT_PP(1);
2590  text *result;
2591 
2592  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2593 
2594  PG_RETURN_TEXT_P(result);
2595 }
2596 
2597 
2598 /*
2599  * The following operators support character-by-character comparison
2600  * of text datums, to allow building indexes suitable for LIKE clauses.
2601  * Note that the regular texteq/textne comparison operators, and regular
2602  * support functions 1 and 2 with "C" collation are assumed to be
2603  * compatible with these!
2604  */
2605 
2606 static int
2608 {
2609  int result;
2610  int len1,
2611  len2;
2612 
2613  len1 = VARSIZE_ANY_EXHDR(arg1);
2614  len2 = VARSIZE_ANY_EXHDR(arg2);
2615 
2616  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2617  if (result != 0)
2618  return result;
2619  else if (len1 < len2)
2620  return -1;
2621  else if (len1 > len2)
2622  return 1;
2623  else
2624  return 0;
2625 }
2626 
2627 
2628 Datum
2630 {
2631  text *arg1 = PG_GETARG_TEXT_PP(0);
2632  text *arg2 = PG_GETARG_TEXT_PP(1);
2633  int result;
2634 
2635  result = internal_text_pattern_compare(arg1, arg2);
2636 
2637  PG_FREE_IF_COPY(arg1, 0);
2638  PG_FREE_IF_COPY(arg2, 1);
2639 
2640  PG_RETURN_BOOL(result < 0);
2641 }
2642 
2643 
2644 Datum
2646 {
2647  text *arg1 = PG_GETARG_TEXT_PP(0);
2648  text *arg2 = PG_GETARG_TEXT_PP(1);
2649  int result;
2650 
2651  result = internal_text_pattern_compare(arg1, arg2);
2652 
2653  PG_FREE_IF_COPY(arg1, 0);
2654  PG_FREE_IF_COPY(arg2, 1);
2655 
2656  PG_RETURN_BOOL(result <= 0);
2657 }
2658 
2659 
2660 Datum
2662 {
2663  text *arg1 = PG_GETARG_TEXT_PP(0);
2664  text *arg2 = PG_GETARG_TEXT_PP(1);
2665  int result;
2666 
2667  result = internal_text_pattern_compare(arg1, arg2);
2668 
2669  PG_FREE_IF_COPY(arg1, 0);
2670  PG_FREE_IF_COPY(arg2, 1);
2671 
2672  PG_RETURN_BOOL(result >= 0);
2673 }
2674 
2675 
2676 Datum
2678 {
2679  text *arg1 = PG_GETARG_TEXT_PP(0);
2680  text *arg2 = PG_GETARG_TEXT_PP(1);
2681  int result;
2682 
2683  result = internal_text_pattern_compare(arg1, arg2);
2684 
2685  PG_FREE_IF_COPY(arg1, 0);
2686  PG_FREE_IF_COPY(arg2, 1);
2687 
2688  PG_RETURN_BOOL(result > 0);
2689 }
2690 
2691 
2692 Datum
2694 {
2695  text *arg1 = PG_GETARG_TEXT_PP(0);
2696  text *arg2 = PG_GETARG_TEXT_PP(1);
2697  int result;
2698 
2699  result = internal_text_pattern_compare(arg1, arg2);
2700 
2701  PG_FREE_IF_COPY(arg1, 0);
2702  PG_FREE_IF_COPY(arg2, 1);
2703 
2704  PG_RETURN_INT32(result);
2705 }
2706 
2707 
2708 Datum
2710 {
2712  MemoryContext oldcontext;
2713 
2714  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2715 
2716  /* Use generic string SortSupport, forcing "C" collation */
2717  varstr_sortsupport(ssup, C_COLLATION_OID, false);
2718 
2719  MemoryContextSwitchTo(oldcontext);
2720 
2721  PG_RETURN_VOID();
2722 }
2723 
2724 
2725 /*-------------------------------------------------------------
2726  * byteaoctetlen
2727  *
2728  * get the number of bytes contained in an instance of type 'bytea'
2729  *-------------------------------------------------------------
2730  */
2731 Datum
2733 {
2734  Datum str = PG_GETARG_DATUM(0);
2735 
2736  /* We need not detoast the input at all */
2738 }
2739 
2740 /*
2741  * byteacat -
2742  * takes two bytea* and returns a bytea* that is the concatenation of
2743  * the two.
2744  *
2745  * Cloned from textcat and modified as required.
2746  */
2747 Datum
2749 {
2750  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2751  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2752 
2754 }
2755 
2756 /*
2757  * bytea_catenate
2758  * Guts of byteacat(), broken out so it can be used by other functions
2759  *
2760  * Arguments can be in short-header form, but not compressed or out-of-line
2761  */
2762 static bytea *
2764 {
2765  bytea *result;
2766  int len1,
2767  len2,
2768  len;
2769  char *ptr;
2770 
2771  len1 = VARSIZE_ANY_EXHDR(t1);
2772  len2 = VARSIZE_ANY_EXHDR(t2);
2773 
2774  /* paranoia ... probably should throw error instead? */
2775  if (len1 < 0)
2776  len1 = 0;
2777  if (len2 < 0)
2778  len2 = 0;
2779 
2780  len = len1 + len2 + VARHDRSZ;
2781  result = (bytea *) palloc(len);
2782 
2783  /* Set size of result string... */
2784  SET_VARSIZE(result, len);
2785 
2786  /* Fill data field of result string... */
2787  ptr = VARDATA(result);
2788  if (len1 > 0)
2789  memcpy(ptr, VARDATA_ANY(t1), len1);
2790  if (len2 > 0)
2791  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2792 
2793  return result;
2794 }
2795 
2796 #define PG_STR_GET_BYTEA(str_) \
2797  DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2798 
2799 /*
2800  * bytea_substr()
2801  * Return a substring starting at the specified position.
2802  * Cloned from text_substr and modified as required.
2803  *
2804  * Input:
2805  * - string
2806  * - starting position (is one-based)
2807  * - string length (optional)
2808  *
2809  * If the starting position is zero or less, then return from the start of the string
2810  * adjusting the length to be consistent with the "negative start" per SQL.
2811  * If the length is less than zero, an ERROR is thrown. If no third argument
2812  * (length) is provided, the length to the end of the string is assumed.
2813  */
2814 Datum
2816 {
2818  PG_GETARG_INT32(1),
2819  PG_GETARG_INT32(2),
2820  false));
2821 }
2822 
2823 /*
2824  * bytea_substr_no_len -
2825  * Wrapper to avoid opr_sanity failure due to
2826  * one function accepting a different number of args.
2827  */
2828 Datum
2830 {
2832  PG_GETARG_INT32(1),
2833  -1,
2834  true));
2835 }
2836 
2837 static bytea *
2839  int S,
2840  int L,
2841  bool length_not_specified)
2842 {
2843  int S1; /* adjusted start position */
2844  int L1; /* adjusted substring length */
2845 
2846  S1 = Max(S, 1);
2847 
2848  if (length_not_specified)
2849  {
2850  /*
2851  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2852  * end of the string if we pass it a negative value for length.
2853  */
2854  L1 = -1;
2855  }
2856  else
2857  {
2858  /* end position */
2859  int E = S + L;
2860 
2861  /*
2862  * A negative value for L is the only way for the end position to be
2863  * before the start. SQL99 says to throw an error.
2864  */
2865  if (E < S)
2866  ereport(ERROR,
2867  (errcode(ERRCODE_SUBSTRING_ERROR),
2868  errmsg("negative substring length not allowed")));
2869 
2870  /*
2871  * A zero or negative value for the end position can happen if the
2872  * start was negative or one. SQL99 says to return a zero-length
2873  * string.
2874  */
2875  if (E < 1)
2876  return PG_STR_GET_BYTEA("");
2877 
2878  L1 = E - S1;
2879  }
2880 
2881  /*
2882  * If the start position is past the end of the string, SQL99 says to
2883  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2884  * us. Convert to zero-based starting position
2885  */
2886  return DatumGetByteaPSlice(str, S1 - 1, L1);
2887 }
2888 
2889 /*
2890  * byteaoverlay
2891  * Replace specified substring of first string with second
2892  *
2893  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2894  * This code is a direct implementation of what the standard says.
2895  */
2896 Datum
2898 {
2899  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2900  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2901  int sp = PG_GETARG_INT32(2); /* substring start position */
2902  int sl = PG_GETARG_INT32(3); /* substring length */
2903 
2904  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2905 }
2906 
2907 Datum
2909 {
2910  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2911  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2912  int sp = PG_GETARG_INT32(2); /* substring start position */
2913  int sl;
2914 
2915  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2916  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2917 }
2918 
2919 static bytea *
2920 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2921 {
2922  bytea *result;
2923  bytea *s1;
2924  bytea *s2;
2925  int sp_pl_sl;
2926 
2927  /*
2928  * Check for possible integer-overflow cases. For negative sp, throw a
2929  * "substring length" error because that's what should be expected
2930  * according to the spec's definition of OVERLAY().
2931  */
2932  if (sp <= 0)
2933  ereport(ERROR,
2934  (errcode(ERRCODE_SUBSTRING_ERROR),
2935  errmsg("negative substring length not allowed")));
2936  sp_pl_sl = sp + sl;
2937  if (sp_pl_sl <= sl)
2938  ereport(ERROR,
2939  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2940  errmsg("integer out of range")));
2941 
2942  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2943  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2944  result = bytea_catenate(s1, t2);
2945  result = bytea_catenate(result, s2);
2946 
2947  return result;
2948 }
2949 
2950 /*
2951  * byteapos -
2952  * Return the position of the specified substring.
2953  * Implements the SQL POSITION() function.
2954  * Cloned from textpos and modified as required.
2955  */
2956 Datum
2958 {
2959  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2960  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2961  int pos;
2962  int px,
2963  p;
2964  int len1,
2965  len2;
2966  char *p1,
2967  *p2;
2968 
2969  len1 = VARSIZE_ANY_EXHDR(t1);
2970  len2 = VARSIZE_ANY_EXHDR(t2);
2971 
2972  if (len2 <= 0)
2973  PG_RETURN_INT32(1); /* result for empty pattern */
2974 
2975  p1 = VARDATA_ANY(t1);
2976  p2 = VARDATA_ANY(t2);
2977 
2978  pos = 0;
2979  px = (len1 - len2);
2980  for (p = 0; p <= px; p++)
2981  {
2982  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2983  {
2984  pos = p + 1;
2985  break;
2986  };
2987  p1++;
2988  };
2989 
2990  PG_RETURN_INT32(pos);
2991 }
2992 
2993 /*-------------------------------------------------------------
2994  * byteaGetByte
2995  *
2996  * this routine treats "bytea" as an array of bytes.
2997  * It returns the Nth byte (a number between 0 and 255).
2998  *-------------------------------------------------------------
2999  */
3000 Datum
3002 {
3003  bytea *v = PG_GETARG_BYTEA_PP(0);
3004  int32 n = PG_GETARG_INT32(1);
3005  int len;
3006  int byte;
3007 
3008  len = VARSIZE_ANY_EXHDR(v);
3009 
3010  if (n < 0 || n >= len)
3011  ereport(ERROR,
3012  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3013  errmsg("index %d out of valid range, 0..%d",
3014  n, len - 1)));
3015 
3016  byte = ((unsigned char *) VARDATA_ANY(v))[n];
3017 
3018  PG_RETURN_INT32(byte);
3019 }
3020 
3021 /*-------------------------------------------------------------
3022  * byteaGetBit
3023  *
3024  * This routine treats a "bytea" type like an array of bits.
3025  * It returns the value of the Nth bit (0 or 1).
3026  *
3027  *-------------------------------------------------------------
3028  */
3029 Datum
3031 {
3032  bytea *v = PG_GETARG_BYTEA_PP(0);
3033  int32 n = PG_GETARG_INT32(1);
3034  int byteNo,
3035  bitNo;
3036  int len;
3037  int byte;
3038 
3039  len = VARSIZE_ANY_EXHDR(v);
3040 
3041  if (n < 0 || n >= len * 8)
3042  ereport(ERROR,
3043  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3044  errmsg("index %d out of valid range, 0..%d",
3045  n, len * 8 - 1)));
3046 
3047  byteNo = n / 8;
3048  bitNo = n % 8;
3049 
3050  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3051 
3052  if (byte & (1 << bitNo))
3053  PG_RETURN_INT32(1);
3054  else
3055  PG_RETURN_INT32(0);
3056 }
3057 
3058 /*-------------------------------------------------------------
3059  * byteaSetByte
3060  *
3061  * Given an instance of type 'bytea' creates a new one with
3062  * the Nth byte set to the given value.
3063  *
3064  *-------------------------------------------------------------
3065  */
3066 Datum
3068 {
3069  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3070  int32 n = PG_GETARG_INT32(1);
3071  int32 newByte = PG_GETARG_INT32(2);
3072  int len;
3073 
3074  len = VARSIZE(res) - VARHDRSZ;
3075 
3076  if (n < 0 || n >= len)
3077  ereport(ERROR,
3078  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3079  errmsg("index %d out of valid range, 0..%d",
3080  n, len - 1)));
3081 
3082  /*
3083  * Now set the byte.
3084  */
3085  ((unsigned char *) VARDATA(res))[n] = newByte;
3086 
3087  PG_RETURN_BYTEA_P(res);
3088 }
3089 
3090 /*-------------------------------------------------------------
3091  * byteaSetBit
3092  *
3093  * Given an instance of type 'bytea' creates a new one with
3094  * the Nth bit set to the given value.
3095  *
3096  *-------------------------------------------------------------
3097  */
3098 Datum
3100 {
3101  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3102  int32 n = PG_GETARG_INT32(1);
3103  int32 newBit = PG_GETARG_INT32(2);
3104  int len;
3105  int oldByte,
3106  newByte;
3107  int byteNo,
3108  bitNo;
3109 
3110  len = VARSIZE(res) - VARHDRSZ;
3111 
3112  if (n < 0 || n >= len * 8)
3113  ereport(ERROR,
3114  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3115  errmsg("index %d out of valid range, 0..%d",
3116  n, len * 8 - 1)));
3117 
3118  byteNo = n / 8;
3119  bitNo = n % 8;
3120 
3121  /*
3122  * sanity check!
3123  */
3124  if (newBit != 0 && newBit != 1)
3125  ereport(ERROR,
3126  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3127  errmsg("new bit must be 0 or 1")));
3128 
3129  /*
3130  * Update the byte.
3131  */
3132  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3133 
3134  if (newBit == 0)
3135  newByte = oldByte & (~(1 << bitNo));
3136  else
3137  newByte = oldByte | (1 << bitNo);
3138 
3139  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3140 
3141  PG_RETURN_BYTEA_P(res);
3142 }
3143 
3144 
3145 /* text_name()
3146  * Converts a text type to a Name type.
3147  */
3148 Datum
3150 {
3151  text *s = PG_GETARG_TEXT_PP(0);
3152  Name result;
3153  int len;
3154 
3155  len = VARSIZE_ANY_EXHDR(s);
3156 
3157  /* Truncate oversize input */
3158  if (len >= NAMEDATALEN)
3159  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3160 
3161  /* We use palloc0 here to ensure result is zero-padded */
3162  result = (Name) palloc0(NAMEDATALEN);
3163  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3164 
3165  PG_RETURN_NAME(result);
3166 }
3167 
3168 /* name_text()
3169  * Converts a Name type to a text type.
3170  */
3171 Datum
3173 {
3174  Name s = PG_GETARG_NAME(0);
3175 
3177 }
3178 
3179 
3180 /*
3181  * textToQualifiedNameList - convert a text object to list of names
3182  *
3183  * This implements the input parsing needed by nextval() and other
3184  * functions that take a text parameter representing a qualified name.
3185  * We split the name at dots, downcase if not double-quoted, and
3186  * truncate names if they're too long.
3187  */
3188 List *
3190 {
3191  char *rawname;
3192  List *result = NIL;
3193  List *namelist;
3194  ListCell *l;
3195 
3196  /* Convert to C string (handles possible detoasting). */
3197  /* Note we rely on being able to modify rawname below. */
3198  rawname = text_to_cstring(textval);
3199 
3200  if (!SplitIdentifierString(rawname, '.', &namelist))
3201  ereport(ERROR,
3202  (errcode(ERRCODE_INVALID_NAME),
3203  errmsg("invalid name syntax")));
3204 
3205  if (namelist == NIL)
3206  ereport(ERROR,
3207  (errcode(ERRCODE_INVALID_NAME),
3208  errmsg("invalid name syntax")));
3209 
3210  foreach(l, namelist)
3211  {
3212  char *curname = (char *) lfirst(l);
3213 
3214  result = lappend(result, makeString(pstrdup(curname)));
3215  }
3216 
3217  pfree(rawname);
3218  list_free(namelist);
3219 
3220  return result;
3221 }
3222 
3223 /*
3224  * SplitIdentifierString --- parse a string containing identifiers
3225  *
3226  * This is the guts of textToQualifiedNameList, and is exported for use in
3227  * other situations such as parsing GUC variables. In the GUC case, it's
3228  * important to avoid memory leaks, so the API is designed to minimize the
3229  * amount of stuff that needs to be allocated and freed.
3230  *
3231  * Inputs:
3232  * rawstring: the input string; must be overwritable! On return, it's
3233  * been modified to contain the separated identifiers.
3234  * separator: the separator punctuation expected between identifiers
3235  * (typically '.' or ','). Whitespace may also appear around
3236  * identifiers.
3237  * Outputs:
3238  * namelist: filled with a palloc'd list of pointers to identifiers within
3239  * rawstring. Caller should list_free() this even on error return.
3240  *
3241  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3242  *
3243  * Note that an empty string is considered okay here, though not in
3244  * textToQualifiedNameList.
3245  */
3246 bool
3247 SplitIdentifierString(char *rawstring, char separator,
3248  List **namelist)
3249 {
3250  char *nextp = rawstring;
3251  bool done = false;
3252 
3253  *namelist = NIL;
3254 
3255  while (isspace((unsigned char) *nextp))
3256  nextp++; /* skip leading whitespace */
3257 
3258  if (*nextp == '\0')
3259  return true; /* allow empty string */
3260 
3261  /* At the top of the loop, we are at start of a new identifier. */
3262  do
3263  {
3264  char *curname;
3265  char *endp;
3266 
3267  if (*nextp == '"')
3268  {
3269  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3270  curname = nextp + 1;
3271  for (;;)
3272  {
3273  endp = strchr(nextp + 1, '"');
3274  if (endp == NULL)
3275  return false; /* mismatched quotes */
3276  if (endp[1] != '"')
3277  break; /* found end of quoted name */
3278  /* Collapse adjacent quotes into one quote, and look again */
3279  memmove(endp, endp + 1, strlen(endp));
3280  nextp = endp;
3281  }
3282  /* endp now points at the terminating quote */
3283  nextp = endp + 1;
3284  }
3285  else
3286  {
3287  /* Unquoted name --- extends to separator or whitespace */
3288  char *downname;
3289  int len;
3290 
3291  curname = nextp;
3292  while (*nextp && *nextp != separator &&
3293  !isspace((unsigned char) *nextp))
3294  nextp++;
3295  endp = nextp;
3296  if (curname == nextp)
3297  return false; /* empty unquoted name not allowed */
3298 
3299  /*
3300  * Downcase the identifier, using same code as main lexer does.
3301  *
3302  * XXX because we want to overwrite the input in-place, we cannot
3303  * support a downcasing transformation that increases the string
3304  * length. This is not a problem given the current implementation
3305  * of downcase_truncate_identifier, but we'll probably have to do
3306  * something about this someday.
3307  */
3308  len = endp - curname;
3309  downname = downcase_truncate_identifier(curname, len, false);
3310  Assert(strlen(downname) <= len);
3311  strncpy(curname, downname, len); /* strncpy is required here */
3312  pfree(downname);
3313  }
3314 
3315  while (isspace((unsigned char) *nextp))
3316  nextp++; /* skip trailing whitespace */
3317 
3318  if (*nextp == separator)
3319  {
3320  nextp++;
3321  while (isspace((unsigned char) *nextp))
3322  nextp++; /* skip leading whitespace for next */
3323  /* we expect another name, so done remains false */
3324  }
3325  else if (*nextp == '\0')
3326  done = true;
3327  else
3328  return false; /* invalid syntax */
3329 
3330  /* Now safe to overwrite separator with a null */
3331  *endp = '\0';
3332 
3333  /* Truncate name if it's overlength */
3334  truncate_identifier(curname, strlen(curname), false);
3335 
3336  /*
3337  * Finished isolating current name --- add it to list
3338  */
3339  *namelist = lappend(*namelist, curname);
3340 
3341  /* Loop back if we didn't reach end of string */
3342  } while (!done);
3343 
3344  return true;
3345 }
3346 
3347 
3348 /*
3349  * SplitDirectoriesString --- parse a string containing directory names
3350  *
3351  * This is similar to SplitIdentifierString, except that the parsing
3352  * rules are meant to handle pathnames instead of identifiers: there is
3353  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3354  * and we apply canonicalize_path() to each extracted string. Because of the
3355  * last, the returned strings are separately palloc'd rather than being
3356  * pointers into rawstring --- but we still scribble on rawstring.
3357  *
3358  * Inputs:
3359  * rawstring: the input string; must be modifiable!
3360  * separator: the separator punctuation expected between directories
3361  * (typically ',' or ';'). Whitespace may also appear around
3362  * directories.
3363  * Outputs:
3364  * namelist: filled with a palloc'd list of directory names.
3365  * Caller should list_free_deep() this even on error return.
3366  *
3367  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3368  *
3369  * Note that an empty string is considered okay here.
3370  */
3371 bool
3372 SplitDirectoriesString(char *rawstring, char separator,
3373  List **namelist)
3374 {
3375  char *nextp = rawstring;
3376  bool done = false;
3377 
3378  *namelist = NIL;
3379 
3380  while (isspace((unsigned char) *nextp))
3381  nextp++; /* skip leading whitespace */
3382 
3383  if (*nextp == '\0')
3384  return true; /* allow empty string */
3385 
3386  /* At the top of the loop, we are at start of a new directory. */
3387  do
3388  {
3389  char *curname;
3390  char *endp;
3391 
3392  if (*nextp == '"')
3393  {
3394  /* Quoted name --- collapse quote-quote pairs */
3395  curname = nextp + 1;
3396  for (;;)
3397  {
3398  endp = strchr(nextp + 1, '"');
3399  if (endp == NULL)
3400  return false; /* mismatched quotes */
3401  if (endp[1] != '"')
3402  break; /* found end of quoted name */
3403  /* Collapse adjacent quotes into one quote, and look again */
3404  memmove(endp, endp + 1, strlen(endp));
3405  nextp = endp;
3406  }
3407  /* endp now points at the terminating quote */
3408  nextp = endp + 1;
3409  }
3410  else
3411  {
3412  /* Unquoted name --- extends to separator or end of string */
3413  curname = endp = nextp;
3414  while (*nextp && *nextp != separator)
3415  {
3416  /* trailing whitespace should not be included in name */
3417  if (!isspace((unsigned char) *nextp))
3418  endp = nextp + 1;
3419  nextp++;
3420  }
3421  if (curname == endp)
3422  return false; /* empty unquoted name not allowed */
3423  }
3424 
3425  while (isspace((unsigned char) *nextp))
3426  nextp++; /* skip trailing whitespace */
3427 
3428  if (*nextp == separator)
3429  {
3430  nextp++;
3431  while (isspace((unsigned char) *nextp))
3432  nextp++; /* skip leading whitespace for next */
3433  /* we expect another name, so done remains false */
3434  }
3435  else if (*nextp == '\0')
3436  done = true;
3437  else
3438  return false; /* invalid syntax */
3439 
3440  /* Now safe to overwrite separator with a null */
3441  *endp = '\0';
3442 
3443  /* Truncate path if it's overlength */
3444  if (strlen(curname) >= MAXPGPATH)
3445  curname[MAXPGPATH - 1] = '\0';
3446 
3447  /*
3448  * Finished isolating current name --- add it to list
3449  */
3450  curname = pstrdup(curname);
3451  canonicalize_path(curname);
3452  *namelist = lappend(*namelist, curname);
3453 
3454  /* Loop back if we didn't reach end of string */
3455  } while (!done);
3456 
3457  return true;
3458 }
3459 
3460 
3461 /*****************************************************************************
3462  * Comparison Functions used for bytea
3463  *
3464  * Note: btree indexes need these routines not to leak memory; therefore,
3465  * be careful to free working copies of toasted datums. Most places don't
3466  * need to be so careful.
3467  *****************************************************************************/
3468 
3469 Datum
3471 {
3472  Datum arg1 = PG_GETARG_DATUM(0);
3473  Datum arg2 = PG_GETARG_DATUM(1);
3474  bool result;
3475  Size len1,
3476  len2;
3477 
3478  /*
3479  * We can use a fast path for unequal lengths, which might save us from
3480  * having to detoast one or both values.
3481  */
3482  len1 = toast_raw_datum_size(arg1);
3483  len2 = toast_raw_datum_size(arg2);
3484  if (len1 != len2)
3485  result = false;
3486  else
3487  {
3488  bytea *barg1 = DatumGetByteaPP(arg1);
3489  bytea *barg2 = DatumGetByteaPP(arg2);
3490 
3491  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3492  len1 - VARHDRSZ) == 0);
3493 
3494  PG_FREE_IF_COPY(barg1, 0);
3495  PG_FREE_IF_COPY(barg2, 1);
3496  }
3497 
3498  PG_RETURN_BOOL(result);
3499 }
3500 
3501 Datum
3503 {
3504  Datum arg1 = PG_GETARG_DATUM(0);
3505  Datum arg2 = PG_GETARG_DATUM(1);
3506  bool result;
3507  Size len1,
3508  len2;
3509 
3510  /*
3511  * We can use a fast path for unequal lengths, which might save us from
3512  * having to detoast one or both values.
3513  */
3514  len1 = toast_raw_datum_size(arg1);
3515  len2 = toast_raw_datum_size(arg2);
3516  if (len1 != len2)
3517  result = true;
3518  else
3519  {
3520  bytea *barg1 = DatumGetByteaPP(arg1);
3521  bytea *barg2 = DatumGetByteaPP(arg2);
3522 
3523  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3524  len1 - VARHDRSZ) != 0);
3525 
3526  PG_FREE_IF_COPY(barg1, 0);
3527  PG_FREE_IF_COPY(barg2, 1);
3528  }
3529 
3530  PG_RETURN_BOOL(result);
3531 }
3532 
3533 Datum
3535 {
3536  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3537  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3538  int len1,
3539  len2;
3540  int cmp;
3541 
3542  len1 = VARSIZE_ANY_EXHDR(arg1);
3543  len2 = VARSIZE_ANY_EXHDR(arg2);
3544 
3545  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3546 
3547  PG_FREE_IF_COPY(arg1, 0);
3548  PG_FREE_IF_COPY(arg2, 1);
3549 
3550  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3551 }
3552 
3553 Datum
3555 {
3556  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3557  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3558  int len1,
3559  len2;
3560  int cmp;
3561 
3562  len1 = VARSIZE_ANY_EXHDR(arg1);
3563  len2 = VARSIZE_ANY_EXHDR(arg2);
3564 
3565  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3566 
3567  PG_FREE_IF_COPY(arg1, 0);
3568  PG_FREE_IF_COPY(arg2, 1);
3569 
3570  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3571 }
3572 
3573 Datum
3575 {
3576  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3577  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3578  int len1,
3579  len2;
3580  int cmp;
3581 
3582  len1 = VARSIZE_ANY_EXHDR(arg1);
3583  len2 = VARSIZE_ANY_EXHDR(arg2);
3584 
3585  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3586 
3587  PG_FREE_IF_COPY(arg1, 0);
3588  PG_FREE_IF_COPY(arg2, 1);
3589 
3590  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3591 }
3592 
3593 Datum
3595 {
3596  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3597  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3598  int len1,
3599  len2;
3600  int cmp;
3601 
3602  len1 = VARSIZE_ANY_EXHDR(arg1);
3603  len2 = VARSIZE_ANY_EXHDR(arg2);
3604 
3605  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3606 
3607  PG_FREE_IF_COPY(arg1, 0);
3608  PG_FREE_IF_COPY(arg2, 1);
3609 
3610  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3611 }
3612 
3613 Datum
3615 {
3616  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3617  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3618  int len1,
3619  len2;
3620  int cmp;
3621 
3622  len1 = VARSIZE_ANY_EXHDR(arg1);
3623  len2 = VARSIZE_ANY_EXHDR(arg2);
3624 
3625  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3626  if ((cmp == 0) && (len1 != len2))
3627  cmp = (len1 < len2) ? -1 : 1;
3628 
3629  PG_FREE_IF_COPY(arg1, 0);
3630  PG_FREE_IF_COPY(arg2, 1);
3631 
3632  PG_RETURN_INT32(cmp);
3633 }
3634 
3635 Datum
3637 {
3639  MemoryContext oldcontext;
3640 
3641  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3642 
3643  /* Use generic string SortSupport, forcing "C" collation */
3644  varstr_sortsupport(ssup, C_COLLATION_OID, false);
3645 
3646  MemoryContextSwitchTo(oldcontext);
3647 
3648  PG_RETURN_VOID();
3649 }
3650 
3651 /*
3652  * appendStringInfoText
3653  *
3654  * Append a text to str.
3655  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3656  */
3657 static void
3659 {
3661 }
3662 
3663 /*
3664  * replace_text
3665  * replace all occurrences of 'old_sub_str' in 'orig_str'
3666  * with 'new_sub_str' to form 'new_str'
3667  *
3668  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3669  * otherwise returns 'new_str'
3670  */
3671 Datum
3673 {
3674  text *src_text = PG_GETARG_TEXT_PP(0);
3675  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3676  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3677  int src_text_len;
3678  int from_sub_text_len;
3680  text *ret_text;
3681  int start_posn;
3682  int curr_posn;
3683  int chunk_len;
3684  char *start_ptr;
3685  StringInfoData str;
3686 
3687  text_position_setup(src_text, from_sub_text, &state);
3688 
3689  /*
3690  * Note: we check the converted string length, not the original, because
3691  * they could be different if the input contained invalid encoding.
3692  */
3693  src_text_len = state.len1;
3694  from_sub_text_len = state.len2;
3695 
3696  /* Return unmodified source string if empty source or pattern */
3697  if (src_text_len < 1 || from_sub_text_len < 1)
3698  {
3699  text_position_cleanup(&state);
3700  PG_RETURN_TEXT_P(src_text);
3701  }
3702 
3703  start_posn = 1;
3704  curr_posn = text_position_next(1, &state);
3705 
3706  /* When the from_sub_text is not found, there is nothing to do. */
3707  if (curr_posn == 0)
3708  {
3709  text_position_cleanup(&state);
3710  PG_RETURN_TEXT_P(src_text);
3711  }
3712 
3713  /* start_ptr points to the start_posn'th character of src_text */
3714  start_ptr = VARDATA_ANY(src_text);
3715 
3716  initStringInfo(&str);
3717 
3718  do
3719  {
3721 
3722  /* copy the data skipped over by last text_position_next() */
3723  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3724  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3725 
3726  appendStringInfoText(&str, to_sub_text);
3727 
3728  start_posn = curr_posn;
3729  start_ptr += chunk_len;
3730  start_posn += from_sub_text_len;
3731  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3732 
3733  curr_posn = text_position_next(start_posn, &state);
3734  }
3735  while (curr_posn > 0);
3736 
3737  /* copy trailing data */
3738  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3739  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3740 
3741  text_position_cleanup(&state);
3742 
3743  ret_text = cstring_to_text_with_len(str.data, str.len);
3744  pfree(str.data);
3745 
3746  PG_RETURN_TEXT_P(ret_text);
3747 }
3748 
3749 /*
3750  * check_replace_text_has_escape_char
3751  *
3752  * check whether replace_text contains escape char.
3753  */
3754 static bool
3756 {
3757  const char *p = VARDATA_ANY(replace_text);
3758  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3759 
3761  {
3762  for (; p < p_end; p++)
3763  {
3764  if (*p == '\\')
3765  return true;
3766  }
3767  }
3768  else
3769  {
3770  for (; p < p_end; p += pg_mblen(p))
3771  {
3772  if (*p == '\\')
3773  return true;
3774  }
3775  }
3776 
3777  return false;
3778 }
3779 
3780 /*
3781  * appendStringInfoRegexpSubstr
3782  *
3783  * Append replace_text to str, substituting regexp back references for
3784  * \n escapes. start_ptr is the start of the match in the source string,
3785  * at logical character position data_pos.
3786  */
3787 static void
3789  regmatch_t *pmatch,
3790  char *start_ptr, int data_pos)
3791 {
3792  const char *p = VARDATA_ANY(replace_text);
3793  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3794  int eml = pg_database_encoding_max_length();
3795 
3796  for (;;)
3797  {
3798  const char *chunk_start = p;
3799  int so;
3800  int eo;
3801 
3802  /* Find next escape char. */
3803  if (eml == 1)
3804  {
3805  for (; p < p_end && *p != '\\'; p++)
3806  /* nothing */ ;
3807  }
3808  else
3809  {
3810  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3811  /* nothing */ ;
3812  }
3813 
3814  /* Copy the text we just scanned over, if any. */
3815  if (p > chunk_start)
3816  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3817 
3818  /* Done if at end of string, else advance over escape char. */
3819  if (p >= p_end)
3820  break;
3821  p++;
3822 
3823  if (p >= p_end)
3824  {
3825  /* Escape at very end of input. Treat same as unexpected char */
3826  appendStringInfoChar(str, '\\');
3827  break;
3828  }
3829 
3830  if (*p >= '1' && *p <= '9')
3831  {
3832  /* Use the back reference of regexp. */
3833  int idx = *p - '0';
3834 
3835  so = pmatch[idx].rm_so;
3836  eo = pmatch[idx].rm_eo;
3837  p++;
3838  }
3839  else if (*p == '&')
3840  {
3841  /* Use the entire matched string. */
3842  so = pmatch[0].rm_so;
3843  eo = pmatch[0].rm_eo;
3844  p++;
3845  }
3846  else if (*p == '\\')
3847  {
3848  /* \\ means transfer one \ to output. */
3849  appendStringInfoChar(str, '\\');
3850  p++;
3851  continue;
3852  }
3853  else
3854  {
3855  /*
3856  * If escape char is not followed by any expected char, just treat
3857  * it as ordinary data to copy. (XXX would it be better to throw
3858  * an error?)
3859  */
3860  appendStringInfoChar(str, '\\');
3861  continue;
3862  }
3863 
3864  if (so != -1 && eo != -1)
3865  {
3866  /*
3867  * Copy the text that is back reference of regexp. Note so and eo
3868  * are counted in characters not bytes.
3869  */
3870  char *chunk_start;
3871  int chunk_len;
3872 
3873  Assert(so >= data_pos);
3874  chunk_start = start_ptr;
3875  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3876  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3877  appendBinaryStringInfo(str, chunk_start, chunk_len);
3878  }
3879  }
3880 }
3881 
3882 #define REGEXP_REPLACE_BACKREF_CNT 10
3883 
3884 /*
3885  * replace_text_regexp
3886  *
3887  * replace text that matches to regexp in src_text to replace_text.
3888  *
3889  * Note: to avoid having to include regex.h in builtins.h, we declare
3890  * the regexp argument as void *, but really it's regex_t *.
3891  */
3892 text *
3893 replace_text_regexp(text *src_text, void *regexp,
3894  text *replace_text, bool glob)
3895 {
3896  text *ret_text;
3897  regex_t *re = (regex_t *) regexp;
3898  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3901  pg_wchar *data;
3902  size_t data_len;
3903  int search_start;
3904  int data_pos;
3905  char *start_ptr;
3906  bool have_escape;
3907 
3908  initStringInfo(&buf);
3909 
3910  /* Convert data string to wide characters. */
3911  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3912  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3913 
3914  /* Check whether replace_text has escape char. */
3915  have_escape = check_replace_text_has_escape_char(replace_text);
3916 
3917  /* start_ptr points to the data_pos'th character of src_text */
3918  start_ptr = (char *) VARDATA_ANY(src_text);
3919  data_pos = 0;
3920 
3921  search_start = 0;
3922  while (search_start <= data_len)
3923  {
3924  int regexec_result;
3925 
3927 
3928  regexec_result = pg_regexec(re,
3929  data,
3930  data_len,
3931  search_start,
3932  NULL, /* no details */
3934  pmatch,
3935  0);
3936 
3937  if (regexec_result == REG_NOMATCH)
3938  break;
3939 
3940  if (regexec_result != REG_OKAY)
3941  {
3942  char errMsg[100];
3943 
3945  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3946  ereport(ERROR,
3947  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3948  errmsg("regular expression failed: %s", errMsg)));
3949  }
3950 
3951  /*
3952  * Copy the text to the left of the match position. Note we are given
3953  * character not byte indexes.
3954  */
3955  if (pmatch[0].rm_so - data_pos > 0)
3956  {
3957  int chunk_len;
3958 
3959  chunk_len = charlen_to_bytelen(start_ptr,
3960  pmatch[0].rm_so - data_pos);
3961  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3962 
3963  /*
3964  * Advance start_ptr over that text, to avoid multiple rescans of
3965  * it if the replace_text contains multiple back-references.
3966  */
3967  start_ptr += chunk_len;
3968  data_pos = pmatch[0].rm_so;
3969  }
3970 
3971  /*
3972  * Copy the replace_text. Process back references when the
3973  * replace_text has escape characters.
3974  */
3975  if (have_escape)
3976  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3977  start_ptr, data_pos);
3978  else
3979  appendStringInfoText(&buf, replace_text);
3980 
3981  /* Advance start_ptr and data_pos over the matched text. */
3982  start_ptr += charlen_to_bytelen(start_ptr,
3983  pmatch[0].rm_eo - data_pos);
3984  data_pos = pmatch[0].rm_eo;
3985 
3986  /*
3987  * When global option is off, replace the first instance only.
3988  */
3989  if (!glob)
3990  break;
3991 
3992  /*
3993  * Advance search position. Normally we start the next search at the
3994  * end of the previous match; but if the match was of zero length, we
3995  * have to advance by one character, or we'd just find the same match
3996  * again.
3997  */
3998  search_start = data_pos;
3999  if (pmatch[0].rm_so == pmatch[0].rm_eo)
4000  search_start++;
4001  }
4002 
4003  /*
4004  * Copy the text to the right of the last match.
4005  */
4006  if (data_pos < data_len)
4007  {
4008  int chunk_len;
4009 
4010  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4011  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4012  }
4013 
4014  ret_text = cstring_to_text_with_len(buf.data, buf.len);
4015  pfree(buf.data);
4016  pfree(data);
4017 
4018  return ret_text;
4019 }
4020 
4021 /*
4022  * split_text
4023  * parse input string
4024  * return ord item (1 based)
4025  * based on provided field separator
4026  */
4027 Datum
4029 {
4030  text *inputstring = PG_GETARG_TEXT_PP(0);
4031  text *fldsep = PG_GETARG_TEXT_PP(1);
4032  int fldnum = PG_GETARG_INT32(2);
4033  int inputstring_len;
4034  int fldsep_len;
4036  int start_posn;
4037  int end_posn;
4038  text *result_text;
4039 
4040  /* field number is 1 based */
4041  if (fldnum < 1)
4042  ereport(ERROR,
4043  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4044  errmsg("field position must be greater than zero")));
4045 
4046  text_position_setup(inputstring, fldsep, &state);
4047 
4048  /*
4049  * Note: we check the converted string length, not the original, because
4050  * they could be different if the input contained invalid encoding.
4051  */
4052  inputstring_len = state.len1;
4053  fldsep_len = state.len2;
4054 
4055  /* return empty string for empty input string */
4056  if (inputstring_len < 1)
4057  {
4058  text_position_cleanup(&state);
4060  }
4061 
4062  /* empty field separator */
4063  if (fldsep_len < 1)
4064  {
4065  text_position_cleanup(&state);
4066  /* if first field, return input string, else empty string */
4067  if (fldnum == 1)
4068  PG_RETURN_TEXT_P(inputstring);
4069  else
4071  }
4072 
4073  /* identify bounds of first field */
4074  start_posn = 1;
4075  end_posn = text_position_next(1, &state);
4076 
4077  /* special case if fldsep not found at all */
4078  if (end_posn == 0)
4079  {
4080  text_position_cleanup(&state);
4081  /* if field 1 requested, return input string, else empty string */
4082  if (fldnum == 1)
4083  PG_RETURN_TEXT_P(inputstring);
4084  else
4086  }
4087 
4088  while (end_posn > 0 && --fldnum > 0)
4089  {
4090  /* identify bounds of next field */
4091  start_posn = end_posn + fldsep_len;
4092  end_posn = text_position_next(start_posn, &state);
4093  }
4094 
4095  text_position_cleanup(&state);
4096 
4097  if (fldnum > 0)
4098  {
4099  /* N'th field separator not found */
4100  /* if last field requested, return it, else empty string */
4101  if (fldnum == 1)
4102  result_text = text_substring(PointerGetDatum(inputstring),
4103  start_posn,
4104  -1,
4105  true);
4106  else
4107  result_text = cstring_to_text("");
4108  }
4109  else
4110  {
4111  /* non-last field requested */
4112  result_text = text_substring(PointerGetDatum(inputstring),
4113  start_posn,
4114  end_posn - start_posn,
4115  false);
4116  }
4117 
4118  PG_RETURN_TEXT_P(result_text);
4119 }
4120 
4121 /*
4122  * Convenience function to return true when two text params are equal.
4123  */
4124 static bool
4125 text_isequal(text *txt1, text *txt2)
4126 {
4128  PointerGetDatum(txt1),
4129  PointerGetDatum(txt2)));
4130 }
4131 
4132 /*
4133  * text_to_array
4134  * parse input string and return text array of elements,
4135  * based on provided field separator
4136  */
4137 Datum
4139 {
4140  return text_to_array_internal(fcinfo);
4141 }
4142 
4143 /*
4144  * text_to_array_null
4145  * parse input string and return text array of elements,
4146  * based on provided field separator and null string
4147  *
4148  * This is a separate entry point only to prevent the regression tests from
4149  * complaining about different argument sets for the same internal function.
4150  */
4151 Datum
4153 {
4154  return text_to_array_internal(fcinfo);
4155 }
4156 
4157 /*
4158  * common code for text_to_array and text_to_array_null functions
4159  *
4160  * These are not strict so we have to test for null inputs explicitly.
4161  */
4162 static Datum
4164 {
4165  text *inputstring;
4166  text *fldsep;
4167  text *null_string;
4168  int inputstring_len;
4169  int fldsep_len;
4170  char *start_ptr;
4171  text *result_text;
4172  bool is_null;
4173  ArrayBuildState *astate = NULL;
4174 
4175  /* when input string is NULL, then result is NULL too */
4176  if (PG_ARGISNULL(0))
4177  PG_RETURN_NULL();
4178 
4179  inputstring = PG_GETARG_TEXT_PP(0);
4180 
4181  /* fldsep can be NULL */
4182  if (!PG_ARGISNULL(1))
4183  fldsep = PG_GETARG_TEXT_PP(1);
4184  else
4185  fldsep = NULL;
4186 
4187  /* null_string can be NULL or omitted */
4188  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4189  null_string = PG_GETARG_TEXT_PP(2);
4190  else
4191  null_string = NULL;
4192 
4193  if (fldsep != NULL)
4194  {
4195  /*
4196  * Normal case with non-null fldsep. Use the text_position machinery
4197  * to search for occurrences of fldsep.
4198  */
4200  int fldnum;
4201  int start_posn;
4202  int end_posn;
4203  int chunk_len;
4204 
4205  text_position_setup(inputstring, fldsep, &state);
4206 
4207  /*
4208  * Note: we check the converted string length, not the original,
4209  * because they could be different if the input contained invalid
4210  * encoding.
4211  */
4212  inputstring_len = state.len1;
4213  fldsep_len = state.len2;
4214 
4215  /* return empty array for empty input string */
4216  if (inputstring_len < 1)
4217  {
4218  text_position_cleanup(&state);
4220  }
4221 
4222  /*
4223  * empty field separator: return the input string as a one-element
4224  * array
4225  */
4226  if (fldsep_len < 1)
4227  {
4228  Datum elems[1];
4229  bool nulls[1];
4230  int dims[1];
4231  int lbs[1];
4232 
4233  text_position_cleanup(&state);
4234  /* single element can be a NULL too */
4235  is_null = null_string ? text_isequal(inputstring, null_string) : false;
4236 
4237  elems[0] = PointerGetDatum(inputstring);
4238  nulls[0] = is_null;
4239  dims[0] = 1;
4240  lbs[0] = 1;
4241  /* XXX: this hardcodes assumptions about the text type */
4243  1, dims, lbs,
4244  TEXTOID, -1, false, 'i'));
4245  }
4246 
4247  start_posn = 1;
4248  /* start_ptr points to the start_posn'th character of inputstring */
4249  start_ptr = VARDATA_ANY(inputstring);
4250 
4251  for (fldnum = 1;; fldnum++) /* field number is 1 based */
4252  {
4254 
4255  end_posn = text_position_next(start_posn, &state);
4256 
4257  if (end_posn == 0)
4258  {
4259  /* fetch last field */
4260  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4261  }
4262  else
4263  {
4264  /* fetch non-last field */
4265  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4266  }
4267 
4268  /* must build a temp text datum to pass to accumArrayResult */
4269  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4270  is_null = null_string ? text_isequal(result_text, null_string) : false;
4271 
4272  /* stash away this field */
4273  astate = accumArrayResult(astate,
4274  PointerGetDatum(result_text),
4275  is_null,
4276  TEXTOID,
4278 
4279  pfree(result_text);
4280 
4281  if (end_posn == 0)
4282  break;
4283 
4284  start_posn = end_posn;
4285  start_ptr += chunk_len;
4286  start_posn += fldsep_len;
4287  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4288  }
4289 
4290  text_position_cleanup(&state);
4291  }
4292  else
4293  {
4294  /*
4295  * When fldsep is NULL, each character in the inputstring becomes an
4296  * element in the result array. The separator is effectively the
4297  * space between characters.
4298  */
4299  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4300 
4301  /* return empty array for empty input string */
4302  if (inputstring_len < 1)
4304 
4305  start_ptr = VARDATA_ANY(inputstring);
4306 
4307  while (inputstring_len > 0)
4308  {
4309  int chunk_len = pg_mblen(start_ptr);
4310 
4312 
4313  /* must build a temp text datum to pass to accumArrayResult */
4314  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4315  is_null = null_string ? text_isequal(result_text, null_string) : false;
4316 
4317  /* stash away this field */
4318  astate = accumArrayResult(astate,
4319  PointerGetDatum(result_text),
4320  is_null,
4321  TEXTOID,
4323 
4324  pfree(result_text);
4325 
4326  start_ptr += chunk_len;
4327  inputstring_len -= chunk_len;
4328  }
4329  }
4330 
4333 }
4334 
4335 /*
4336  * array_to_text
4337  * concatenate Cstring representation of input array elements
4338  * using provided field separator
4339  */
4340 Datum
4342 {
4344  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4345 
4346  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4347 }
4348 
4349 /*
4350  * array_to_text_null
4351  * concatenate Cstring representation of input array elements
4352  * using provided field separator and null string
4353  *
4354  * This version is not strict so we have to test for null inputs explicitly.
4355  */
4356 Datum
4358 {
4359  ArrayType *v;
4360  char *fldsep;
4361  char *null_string;
4362 
4363  /* returns NULL when first or second parameter is NULL */
4364  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4365  PG_RETURN_NULL();
4366 
4367  v = PG_GETARG_ARRAYTYPE_P(0);
4368  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4369 
4370  /* NULL null string is passed through as a null pointer */
4371  if (!PG_ARGISNULL(2))
4372  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4373  else
4374  null_string = NULL;
4375 
4376  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4377 }
4378 
4379 /*
4380  * common code for array_to_text and array_to_text_null functions
4381  */
4382 static text *
4384  const char *fldsep, const char *null_string)
4385 {
4386  text *result;
4387  int nitems,
4388  *dims,
4389  ndims;
4390  Oid element_type;
4391  int typlen;
4392  bool typbyval;
4393  char typalign;
4395  bool printed = false;
4396  char *p;
4397  bits8 *bitmap;
4398  int bitmask;
4399  int i;
4400  ArrayMetaState *my_extra;
4401 
4402  ndims = ARR_NDIM(v);
4403  dims = ARR_DIMS(v);
4404  nitems = ArrayGetNItems(ndims, dims);
4405 
4406  /* if there are no elements, return an empty string */
4407  if (nitems == 0)
4408  return cstring_to_text_with_len("", 0);
4409 
4410  element_type = ARR_ELEMTYPE(v);
4411  initStringInfo(&buf);
4412 
4413  /*
4414  * We arrange to look up info about element type, including its output
4415  * conversion proc, only once per series of calls, assuming the element
4416  * type doesn't change underneath us.
4417  */
4418  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4419  if (my_extra == NULL)
4420  {
4421  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4422  sizeof(ArrayMetaState));
4423  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4424  my_extra->element_type = ~element_type;
4425  }
4426 
4427  if (my_extra->element_type != element_type)
4428  {
4429  /*
4430  * Get info about element type, including its output conversion proc
4431  */
4432  get_type_io_data(element_type, IOFunc_output,
4433  &my_extra->typlen, &my_extra->typbyval,
4434  &my_extra->typalign, &my_extra->typdelim,
4435  &my_extra->typioparam, &my_extra->typiofunc);
4436  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4437  fcinfo->flinfo->fn_mcxt);
4438  my_extra->element_type = element_type;
4439  }
4440  typlen = my_extra->typlen;
4441  typbyval = my_extra->typbyval;
4442  typalign = my_extra->typalign;
4443 
4444  p = ARR_DATA_PTR(v);
4445  bitmap = ARR_NULLBITMAP(v);
4446  bitmask = 1;
4447 
4448  for (i = 0; i < nitems; i++)
4449  {
4450  Datum itemvalue;
4451  char *value;
4452 
4453  /* Get source element, checking for NULL */
4454  if (bitmap && (*bitmap & bitmask) == 0)
4455  {
4456  /* if null_string is NULL, we just ignore null elements */
4457  if (null_string != NULL)
4458  {
4459  if (printed)
4460  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4461  else
4462  appendStringInfoString(&buf, null_string);
4463  printed = true;
4464  }
4465  }
4466  else
4467  {
4468  itemvalue = fetch_att(p, typbyval, typlen);
4469 
4470  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4471 
4472  if (printed)
4473  appendStringInfo(&buf, "%s%s", fldsep, value);
4474  else
4475  appendStringInfoString(&buf, value);
4476  printed = true;
4477 
4478  p = att_addlength_pointer(p, typlen, p);
4479  p = (char *) att_align_nominal(p, typalign);
4480  }
4481 
4482  /* advance bitmap pointer if any */
4483  if (bitmap)
4484  {
4485  bitmask <<= 1;
4486  if (bitmask == 0x100)
4487  {
4488  bitmap++;
4489  bitmask = 1;
4490  }
4491  }
4492  }
4493 
4494  result = cstring_to_text_with_len(buf.data, buf.len);
4495  pfree(buf.data);
4496 
4497  return result;
4498 }
4499 
4500 #define HEXBASE 16
4501 /*
4502  * Convert an int32 to a string containing a base 16 (hex) representation of
4503  * the number.
4504  */
4505 Datum
4507 {
4509  char *ptr;
4510  const char *digits = "0123456789abcdef";
4511  char buf[32]; /* bigger than needed, but reasonable */
4512 
4513  ptr = buf + sizeof(buf) - 1;
4514  *ptr = '\0';
4515 
4516  do
4517  {
4518  *--ptr = digits[value % HEXBASE];
4519  value /= HEXBASE;
4520  } while (ptr > buf && value);
4521 
4523 }
4524 
4525 /*
4526  * Convert an int64 to a string containing a base 16 (hex) representation of
4527  * the number.
4528  */
4529 Datum
4531 {
4532  uint64 value = (uint64) PG_GETARG_INT64(0);
4533  char *ptr;
4534  const char *digits = "0123456789abcdef";
4535  char buf[32]; /* bigger than needed, but reasonable */
4536 
4537  ptr = buf + sizeof(buf) - 1;
4538  *ptr = '\0';
4539 
4540  do
4541  {
4542  *--ptr = digits[value % HEXBASE];
4543  value /= HEXBASE;
4544  } while (ptr > buf && value);
4545 
4547 }
4548 
4549 /*
4550  * Create an md5 hash of a text string and return it as hex
4551  *
4552  * md5 produces a 16 byte (128 bit) hash; double it for hex
4553  */
4554 #define MD5_HASH_LEN 32
4555 
4556 Datum
4558 {
4559  text *in_text = PG_GETARG_TEXT_PP(0);
4560  size_t len;
4561  char hexsum[MD5_HASH_LEN + 1];
4562 
4563  /* Calculate the length of the buffer using varlena metadata */
4564  len = VARSIZE_ANY_EXHDR(in_text);
4565 
4566  /* get the hash result */
4567  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4568  ereport(ERROR,
4569  (errcode(ERRCODE_OUT_OF_MEMORY),
4570  errmsg("out of memory")));
4571 
4572  /* convert to text and return it */
4574 }
4575 
4576 /*
4577  * Create an md5 hash of a bytea field and return it as a hex string:
4578  * 16-byte md5 digest is represented in 32 hex characters.
4579  */
4580 Datum
4582 {
4583  bytea *in = PG_GETARG_BYTEA_PP(0);
4584  size_t len;
4585  char hexsum[MD5_HASH_LEN + 1];
4586 
4587  len = VARSIZE_ANY_EXHDR(in);
4588  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4589  ereport(ERROR,
4590  (errcode(ERRCODE_OUT_OF_MEMORY),
4591  errmsg("out of memory")));
4592 
4594 }
4595 
4596 /*
4597  * Return the size of a datum, possibly compressed
4598  *
4599  * Works on any data type
4600  */
4601 Datum
4603 {
4605  int32 result;
4606  int typlen;
4607 
4608  /* On first call, get the input type's typlen, and save at *fn_extra */
4609  if (fcinfo->flinfo->fn_extra == NULL)
4610  {
4611  /* Lookup the datatype of the supplied argument */
4612  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4613 
4614  typlen = get_typlen(argtypeid);
4615  if (typlen == 0) /* should not happen */
4616  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4617 
4618  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4619  sizeof(int));
4620  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4621  }
4622  else
4623  typlen = *((int *) fcinfo->flinfo->fn_extra);
4624 
4625  if (typlen == -1)
4626  {
4627  /* varlena type, possibly toasted */
4628  result = toast_datum_size(value);
4629  }
4630  else if (typlen == -2)
4631  {
4632  /* cstring */
4633  result = strlen(DatumGetCString(value)) + 1;
4634  }
4635  else
4636  {
4637  /* ordinary fixed-width type */
4638  result = typlen;
4639  }
4640 
4641  PG_RETURN_INT32(result);
4642 }
4643 
4644 /*
4645  * string_agg - Concatenates values and returns string.
4646  *
4647  * Syntax: string_agg(value text, delimiter text) RETURNS text
4648  *
4649  * Note: Any NULL values are ignored. The first-call delimiter isn't
4650  * actually used at all, and on subsequent calls the delimiter precedes
4651  * the associated value.
4652  */
4653 
4654 /* subroutine to initialize state */
4655 static StringInfo
4657 {
4658  StringInfo state;
4659  MemoryContext aggcontext;
4660  MemoryContext oldcontext;
4661 
4662  if (!AggCheckCallContext(fcinfo, &aggcontext))
4663  {
4664  /* cannot be called directly because of internal-type argument */
4665  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4666  }
4667 
4668  /*
4669  * Create state in aggregate context. It'll stay there across subsequent
4670  * calls.
4671  */
4672  oldcontext = MemoryContextSwitchTo(aggcontext);
4673  state = makeStringInfo();
4674  MemoryContextSwitchTo(oldcontext);
4675 
4676  return state;
4677 }
4678 
4679 Datum
4681 {
4682  StringInfo state;
4683 
4684  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4685 
4686  /* Append the value unless null. */
4687  if (!PG_ARGISNULL(1))
4688  {
4689  /* On the first time through, we ignore the delimiter. */
4690  if (state == NULL)
4691  state = makeStringAggState(fcinfo);
4692  else if (!PG_ARGISNULL(2))
4693  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4694 
4695  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4696  }
4697 
4698  /*
4699  * The transition type for string_agg() is declared to be "internal",
4700  * which is a pass-by-value type the same size as a pointer.
4701  */
4702  PG_RETURN_POINTER(state);
4703 }
4704 
4705 Datum
4707 {
4708  StringInfo state;
4709 
4710  /* cannot be called directly because of internal-type argument */
4711  Assert(AggCheckCallContext(fcinfo, NULL));
4712 
4713  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4714 
4715  if (state != NULL)
4717  else
4718  PG_RETURN_NULL();
4719 }
4720 
4721 /*
4722  * Implementation of both concat() and concat_ws().
4723  *
4724  * sepstr is the separator string to place between values.
4725  * argidx identifies the first argument to concatenate (counting from zero).
4726  * Returns NULL if result should be NULL, else text value.
4727  */
4728 static text *
4729 concat_internal(const char *sepstr, int argidx,
4730  FunctionCallInfo fcinfo)
4731 {
4732  text *result;
4733  StringInfoData str;
4734  bool first_arg = true;
4735  int i;
4736 
4737  /*
4738  * concat(VARIADIC some-array) is essentially equivalent to
4739  * array_to_text(), ie concat the array elements with the given separator.
4740  * So we just pass the case off to that code.
4741  */
4742  if (get_fn_expr_variadic(fcinfo->flinfo))
4743  {
4744  ArrayType *arr;
4745 
4746  /* Should have just the one argument */
4747  Assert(argidx == PG_NARGS() - 1);
4748 
4749  /* concat(VARIADIC NULL) is defined as NULL */
4750  if (PG_ARGISNULL(argidx))
4751  return NULL;
4752 
4753  /*
4754  * Non-null argument had better be an array. We assume that any call
4755  * context that could let get_fn_expr_variadic return true will have
4756  * checked that a VARIADIC-labeled parameter actually is an array. So
4757  * it should be okay to just Assert that it's an array rather than
4758  * doing a full-fledged error check.
4759  */
4761 
4762  /* OK, safe to fetch the array value */
4763  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4764 
4765  /*
4766  * And serialize the array. We tell array_to_text to ignore null
4767  * elements, which matches the behavior of the loop below.
4768  */
4769  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4770  }
4771 
4772  /* Normal case without explicit VARIADIC marker */
4773  initStringInfo(&str);
4774 
4775  for (i = argidx; i < PG_NARGS(); i++)
4776  {
4777  if (!PG_ARGISNULL(i))
4778  {
4780  Oid valtype;
4781  Oid typOutput;
4782  bool typIsVarlena;
4783 
4784  /* add separator if appropriate */
4785  if (first_arg)
4786  first_arg = false;
4787  else
4788  appendStringInfoString(&str, sepstr);
4789 
4790  /* call the appropriate type output function, append the result */
4791  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4792  if (!OidIsValid(valtype))
4793  elog(ERROR, "could not determine data type of concat() input");
4794  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4796  OidOutputFunctionCall(typOutput, value));
4797  }
4798  }
4799 
4800  result = cstring_to_text_with_len(str.data, str.len);
4801  pfree(str.data);
4802 
4803  return result;
4804 }
4805 
4806 /*
4807  * Concatenate all arguments. NULL arguments are ignored.
4808  */
4809 Datum
4811 {
4812  text *result;
4813 
4814  result = concat_internal("", 0, fcinfo);
4815  if (result == NULL)
4816  PG_RETURN_NULL();
4817  PG_RETURN_TEXT_P(result);
4818 }
4819 
4820 /*
4821  * Concatenate all but first argument value with separators. The first
4822  * parameter is used as the separator. NULL arguments are ignored.
4823  */
4824 Datum
4826 {
4827  char *sep;
4828  text *result;
4829 
4830  /* return NULL when separator is NULL */
4831  if (PG_ARGISNULL(0))
4832  PG_RETURN_NULL();
4834 
4835  result = concat_internal(sep, 1, fcinfo);
4836  if (result == NULL)
4837  PG_RETURN_NULL();
4838  PG_RETURN_TEXT_P(result);
4839 }
4840 
4841 /*
4842  * Return first n characters in the string. When n is negative,
4843  * return all but last |n| characters.
4844  */
4845 Datum
4847 {
4848  text *str = PG_GETARG_TEXT_PP(0);
4849  const char *p = VARDATA_ANY(str);
4850  int len = VARSIZE_ANY_EXHDR(str);
4851  int n = PG_GETARG_INT32(1);
4852  int rlen;
4853 
4854  if (n < 0)
4855  n = pg_mbstrlen_with_len(p, len) + n;
4856  rlen = pg_mbcharcliplen(p, len, n);
4857 
4859 }
4860 
4861 /*
4862  * Return last n characters in the string. When n is negative,
4863  * return all but first |n| characters.
4864  */
4865 Datum
4867 {
4868  text *str = PG_GETARG_TEXT_PP(0);
4869  const char *p = VARDATA_ANY(str);
4870  int len = VARSIZE_ANY_EXHDR(str);
4871  int n = PG_GETARG_INT32(1);
4872  int off;
4873 
4874  if (n < 0)
4875  n = -n;
4876  else
4877  n = pg_mbstrlen_with_len(p, len) - n;
4878  off = pg_mbcharcliplen(p, len, n);
4879 
4880  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4881 }
4882 
4883 /*
4884  * Return reversed string
4885  */
4886 Datum
4888 {
4889  text *str = PG_GETARG_TEXT_PP(0);
4890  const char *p = VARDATA_ANY(str);
4891  int len = VARSIZE_ANY_EXHDR(str);
4892  const char *endp = p + len;
4893  text *result;
4894  char *dst;
4895 
4896  result = palloc(len + VARHDRSZ);
4897  dst = (char *) VARDATA(result) + len;
4898  SET_VARSIZE(result, len + VARHDRSZ);
4899 
4901  {
4902  /* multibyte version */
4903  while (p < endp)
4904  {
4905  int sz;
4906 
4907  sz = pg_mblen(p);
4908  dst -= sz;
4909  memcpy(dst, p, sz);
4910  p += sz;
4911  }
4912  }
4913  else
4914  {
4915  /* single byte version */
4916  while (p < endp)
4917  *(--dst) = *p++;
4918  }
4919 
4920  PG_RETURN_TEXT_P(result);
4921 }
4922 
4923 
4924 /*
4925  * Support macros for text_format()
4926  */
4927 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4928 
4929 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4930  do { \
4931  if (++(ptr) >= (end_ptr)) \
4932  ereport(ERROR, \
4933  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4934  errmsg("unterminated format() type specifier"), \
4935  errhint("For a single \"%%\" use \"%%%%\"."))); \
4936  } while (0)
4937 
4938 /*
4939  * Returns a formatted string
4940  */
4941 Datum
4943 {
4944  text *fmt;
4945  StringInfoData str;
4946  const char *cp;
4947  const char *start_ptr;
4948  const char *end_ptr;
4949  text *result;
4950  int arg;
4951  bool funcvariadic;
4952  int nargs;
4953  Datum *elements = NULL;
4954  bool *nulls = NULL;
4955  Oid element_type = InvalidOid;
4956  Oid prev_type = InvalidOid;
4957  Oid prev_width_type = InvalidOid;
4958  FmgrInfo typoutputfinfo;
4959  FmgrInfo typoutputinfo_width;
4960 
4961  /* When format string is null, immediately return null */
4962  if (PG_ARGISNULL(0))
4963  PG_RETURN_NULL();
4964 
4965  /* If argument is marked VARIADIC, expand array into elements */
4966  if (get_fn_expr_variadic(fcinfo->flinfo))
4967  {
4968  ArrayType *arr;
4969  int16 elmlen;
4970  bool elmbyval;
4971  char elmalign;
4972  int nitems;
4973 
4974  /* Should have just the one argument */
4975  Assert(PG_NARGS() == 2);
4976 
4977  /* If argument is NULL, we treat it as zero-length array */
4978  if (PG_ARGISNULL(1))
4979  nitems = 0;
4980  else
4981  {
4982  /*
4983  * Non-null argument had better be an array. We assume that any
4984  * call context that could let get_fn_expr_variadic return true
4985  * will have checked that a VARIADIC-labeled parameter actually is
4986  * an array. So it should be okay to just Assert that it's an
4987  * array rather than doing a full-fledged error check.
4988  */
4990 
4991  /* OK, safe to fetch the array value */
4992  arr = PG_GETARG_ARRAYTYPE_P(1);
4993 
4994  /* Get info about array element type */
4995  element_type = ARR_ELEMTYPE(arr);
4996  get_typlenbyvalalign(element_type,
4997  &elmlen, &elmbyval, &elmalign);
4998 
4999  /* Extract all array elements */
5000  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5001  &elements, &nulls, &nitems);
5002  }
5003 
5004  nargs = nitems + 1;
5005  funcvariadic = true;
5006  }
5007  else
5008  {
5009  /* Non-variadic case, we'll process the arguments individually */
5010  nargs = PG_NARGS();
5011  funcvariadic = false;
5012  }
5013 
5014  /* Setup for main loop. */
5015  fmt = PG_GETARG_TEXT_PP(0);
5016  start_ptr = VARDATA_ANY(fmt);
5017  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5018  initStringInfo(&str);
5019  arg = 1; /* next argument position to print */
5020 
5021  /* Scan format string, looking for conversion specifiers. */
5022  for (cp = start_ptr; cp < end_ptr; cp++)
5023  {
5024  int argpos;
5025  int widthpos;
5026  int flags;
5027  int width;
5028  Datum value;
5029  bool isNull;
5030  Oid typid;
5031 
5032  /*
5033  * If it's not the start of a conversion specifier, just copy it to
5034  * the output buffer.
5035  */
5036  if (*cp != '%')
5037  {
5038  appendStringInfoCharMacro(&str, *cp);
5039  continue;
5040  }
5041 
5042  ADVANCE_PARSE_POINTER(cp, end_ptr);
5043 
5044  /* Easy case: %% outputs a single % */
5045  if (*cp == '%')
5046  {
5047  appendStringInfoCharMacro(&str, *cp);
5048  continue;
5049  }
5050 
5051  /* Parse the optional portions of the format specifier */
5052  cp = text_format_parse_format(cp, end_ptr,
5053  &argpos, &widthpos,
5054  &flags, &width);
5055 
5056  /*
5057  * Next we should see the main conversion specifier. Whether or not
5058  * an argument position was present, it's known that at least one
5059  * character remains in the string at this point. Experience suggests
5060  * that it's worth checking that that character is one of the expected
5061  * ones before we try to fetch arguments, so as to produce the least
5062  * confusing response to a mis-formatted specifier.
5063  */
5064  if (strchr("sIL", *cp) == NULL)
5065  ereport(ERROR,
5066  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5067  errmsg("unrecognized format() type specifier \"%c\"",
5068  *cp),
5069  errhint("For a single \"%%\" use \"%%%%\".")));
5070 
5071  /* If indirect width was specified, get its value */
5072  if (widthpos >= 0)
5073  {
5074  /* Collect the specified or next argument position */
5075  if (widthpos > 0)
5076  arg = widthpos;
5077  if (arg >= nargs)
5078  ereport(ERROR,
5079  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5080  errmsg("too few arguments for format()")));
5081 
5082  /* Get the value and type of the selected argument */
5083  if (!funcvariadic)
5084  {
5085  value = PG_GETARG_DATUM(arg);
5086  isNull = PG_ARGISNULL(arg);
5087  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5088  }
5089  else
5090  {
5091  value = elements[arg - 1];
5092  isNull = nulls[arg - 1];
5093  typid = element_type;
5094  }
5095  if (!OidIsValid(typid))
5096  elog(ERROR, "could not determine data type of format() input");
5097 
5098  arg++;
5099 
5100  /* We can treat NULL width the same as zero */
5101  if (isNull)
5102  width = 0;
5103  else if (typid == INT4OID)
5104  width = DatumGetInt32(value);
5105  else if (typid == INT2OID)
5106  width = DatumGetInt16(value);
5107  else
5108  {
5109  /* For less-usual datatypes, convert to text then to int */
5110  char *str;
5111 
5112  if (typid != prev_width_type)
5113  {
5114  Oid typoutputfunc;
5115  bool typIsVarlena;
5116 
5117  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5118  fmgr_info(typoutputfunc, &typoutputinfo_width);
5119  prev_width_type = typid;
5120  }
5121 
5122  str = OutputFunctionCall(&typoutputinfo_width, value);
5123 
5124  /* pg_atoi will complain about bad data or overflow */
5125  width = pg_atoi(str, sizeof(int), '\0');
5126 
5127  pfree(str);
5128  }
5129  }
5130 
5131  /* Collect the specified or next argument position */
5132  if (argpos > 0)
5133  arg = argpos;
5134  if (arg >= nargs)
5135  ereport(ERROR,
5136  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5137  errmsg("too few arguments for format()")));
5138 
5139  /* Get the value and type of the selected argument */
5140  if (!funcvariadic)
5141  {
5142  value = PG_GETARG_DATUM(arg);
5143  isNull = PG_ARGISNULL(arg);
5144  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5145  }
5146  else
5147  {
5148  value = elements[arg - 1];
5149  isNull = nulls[arg - 1];
5150  typid = element_type;
5151  }
5152  if (!OidIsValid(typid))
5153  elog(ERROR, "could not determine data type of format() input");
5154 
5155  arg++;
5156 
5157  /*
5158  * Get the appropriate typOutput function, reusing previous one if
5159  * same type as previous argument. That's particularly useful in the
5160  * variadic-array case, but often saves work even for ordinary calls.
5161  */
5162  if (typid != prev_type)
5163  {
5164  Oid typoutputfunc;
5165  bool typIsVarlena;
5166 
5167  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5168  fmgr_info(typoutputfunc, &typoutputfinfo);
5169  prev_type = typid;
5170  }
5171 
5172  /*
5173  * And now we can format the value.
5174  */
5175  switch (*cp)
5176  {
5177  case 's':
5178  case 'I':
5179  case 'L':
5180  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5181  value, isNull,
5182  flags, width);
5183  break;
5184  default:
5185  /* should not get here, because of previous check */
5186  ereport(ERROR,
5187  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5188  errmsg("unrecognized format() type specifier \"%c\"",
5189  *cp),
5190  errhint("For a single \"%%\" use \"%%%%\".")));
5191  break;
5192  }
5193  }
5194 
5195  /* Don't need deconstruct_array results anymore. */
5196  if (elements != NULL)
5197  pfree(elements);
5198  if (nulls != NULL)
5199  pfree(nulls);
5200 
5201  /* Generate results. */
5202  result = cstring_to_text_with_len(str.data, str.len);
5203  pfree(str.data);
5204 
5205  PG_RETURN_TEXT_P(result);
5206 }
5207 
5208 /*
5209  * Parse contiguous digits as a decimal number.
5210  *
5211  * Returns true if some digits could be parsed.
5212  * The value is returned into *value, and *ptr is advanced to the next
5213  * character to be parsed.
5214  *
5215  * Note parsing invariant: at least one character is known available before
5216  * string end (end_ptr) at entry, and this is still true at exit.
5217  */
5218 static bool
5219 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5220 {
5221  bool found = false;
5222  const char *cp = *ptr;
5223  int val = 0;
5224 
5225  while (*cp >= '0' && *cp <= '9')
5226  {
5227  int newval = val * 10 + (*cp - '0');
5228 
5229  if (newval / 10 != val) /* overflow? */
5230  ereport(ERROR,
5231  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5232  errmsg("number is out of range")));
5233  val = newval;
5234  ADVANCE_PARSE_POINTER(cp, end_ptr);
5235  found = true;
5236  }
5237 
5238  *ptr = cp;
5239  *value = val;
5240 
5241  return found;
5242 }
5243 
5244 /*
5245  * Parse a format specifier (generally following the SUS printf spec).
5246  *
5247  * We have already advanced over the initial '%', and we are looking for
5248  * [argpos][flags][width]type (but the type character is not consumed here).
5249  *
5250  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5251  * Output parameters:
5252  * argpos: argument position for value to be printed. -1 means unspecified.
5253  * widthpos: argument position for width. Zero means the argument position
5254  * was unspecified (ie, take the next arg) and -1 means no width
5255  * argument (width was omitted or specified as a constant).
5256  * flags: bitmask of flags.
5257  * width: directly-specified width value. Zero means the width was omitted
5258  * (note it's not necessary to distinguish this case from an explicit
5259  * zero width value).
5260  *
5261  * The function result is the next character position to be parsed, ie, the
5262  * location where the type character is/should be.
5263  *
5264  * Note parsing invariant: at least one character is known available before
5265  * string end (end_ptr) at entry, and this is still true at exit.
5266  */
5267 static const char *
5268 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5269  int *argpos, int *widthpos,
5270  int *flags, int *width)
5271 {
5272  const char *cp = start_ptr;
5273  int n;
5274 
5275  /* set defaults for output parameters */
5276  *argpos = -1;
5277  *widthpos = -1;
5278  *flags = 0;
5279  *width = 0;
5280 
5281  /* try to identify first number */
5282  if (text_format_parse_digits(&cp, end_ptr, &n))
5283  {
5284  if (*cp != '$')
5285  {
5286  /* Must be just a width and a type, so we're done */
5287  *width = n;
5288  return cp;
5289  }
5290  /* The number was argument position */
5291  *argpos = n;
5292  /* Explicit 0 for argument index is immediately refused */
5293  if (n == 0)
5294  ereport(ERROR,
5295  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5296  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5297  ADVANCE_PARSE_POINTER(cp, end_ptr);
5298  }
5299 
5300  /* Handle flags (only minus is supported now) */
5301  while (*cp == '-')
5302  {
5303  *flags |= TEXT_FORMAT_FLAG_MINUS;
5304  ADVANCE_PARSE_POINTER(cp, end_ptr);
5305  }
5306 
5307  if (*cp == '*')
5308  {
5309  /* Handle indirect width */
5310  ADVANCE_PARSE_POINTER(cp, end_ptr);
5311  if (text_format_parse_digits(&cp, end_ptr, &n))
5312  {
5313  /* number in this position must be closed by $ */
5314  if (*cp != '$')
5315  ereport(ERROR,
5316  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5317  errmsg("width argument position must be ended by \"$\"")));
5318  /* The number was width argument position */
5319  *widthpos = n;
5320  /* Explicit 0 for argument index is immediately refused */
5321  if (n == 0)
5322  ereport(ERROR,
5323  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5324  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5325  ADVANCE_PARSE_POINTER(cp, end_ptr);
5326  }
5327  else
5328  *widthpos = 0; /* width's argument position is unspecified */
5329  }
5330  else
5331  {
5332  /* Check for direct width specification */
5333  if (text_format_parse_digits(&cp, end_ptr, &n))
5334  *width = n;
5335  }
5336 
5337  /* cp should now be pointing at type character */
5338  return cp;
5339 }
5340 
5341 /*
5342  * Format a %s, %I, or %L conversion
5343  */
5344 static void
5346  FmgrInfo *typOutputInfo,
5347  Datum value, bool isNull,
5348  int flags, int width)
5349 {
5350  char *str;
5351 
5352  /* Handle NULL arguments before trying to stringify the value. */
5353  if (isNull)
5354  {
5355  if (conversion == 's')
5356  text_format_append_string(buf, "", flags, width);
5357  else if (conversion == 'L')
5358  text_format_append_string(buf, "NULL", flags, width);
5359  else if (conversion == 'I')
5360  ereport(ERROR,
5361  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5362  errmsg("null values cannot be formatted as an SQL identifier")));
5363  return;
5364  }
5365 
5366  /* Stringify. */
5367  str = OutputFunctionCall(typOutputInfo, value);
5368 
5369  /* Escape. */
5370  if (conversion == 'I')
5371  {
5372  /* quote_identifier may or may not allocate a new string. */
5373  text_format_append_string(buf, quote_identifier(str), flags, width);
5374  }
5375  else if (conversion == 'L')
5376  {
5377  char *qstr = quote_literal_cstr(str);
5378 
5379  text_format_append_string(buf, qstr, flags, width);
5380  /* quote_literal_cstr() always allocates a new string */
5381  pfree(qstr);
5382  }
5383  else
5384  text_format_append_string(buf, str, flags, width);
5385 
5386  /* Cleanup. */
5387  pfree(str);
5388 }
5389 
5390 /*
5391  * Append str to buf, padding as directed by flags/width
5392  */
5393 static void
5395  int flags, int width)
5396 {
5397  bool align_to_left = false;
5398  int len;
5399 
5400  /* fast path for typical easy case */
5401  if (width == 0)
5402  {
5403  appendStringInfoString(buf, str);
5404  return;
5405  }
5406 
5407  if (width < 0)
5408  {
5409  /* Negative width: implicit '-' flag, then take absolute value */
5410  align_to_left = true;
5411  /* -INT_MIN is undefined */
5412  if (width <= INT_MIN)
5413  ereport(ERROR,
5414  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5415  errmsg("number is out of range")));
5416  width = -width;
5417  }
5418  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5419  align_to_left = true;
5420 
5421  len = pg_mbstrlen(str);
5422  if (align_to_left)
5423  {
5424  /* left justify */
5425  appendStringInfoString(buf, str);
5426  if (len < width)
5427  appendStringInfoSpaces(buf, width - len);
5428  }
5429  else
5430  {
5431  /* right justify */
5432  if (len < width)
5433  appendStringInfoSpaces(buf, width - len);
5434  appendStringInfoString(buf, str);
5435  }
5436 }
5437 
5438 /*
5439  * text_format_nv - nonvariadic wrapper for text_format function.
5440  *
5441  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5442  * which checks that all built-in functions that share the implementing C
5443  * function take the same number of arguments.
5444  */
5445 Datum
5447 {
5448  return text_format(fcinfo);
5449 }
5450 
5451 /*
5452  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5453  * for this use case.
5454  */
5455 static inline bool
5456 rest_of_char_same(const char *s1, const char *s2, int len)
5457 {
5458  while (len > 0)
5459  {
5460  len--;
5461  if (s1[len] != s2[len])
5462  return false;
5463  }
5464  return true;
5465 }
5466 
5467 /* Expand each Levenshtein distance variant */
5468 #include "levenshtein.c"
5469 #define LEVENSHTEIN_LESS_EQUAL
5470 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2693
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4138
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2829
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2026
Value * makeString(char *str)
Definition: value.c:53
#define COLLPROVIDER_ICU
Definition: pg_collation.h:85
signed short int16
Definition: c.h:255
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:351
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
#define DatumGetUInt32(X)
Definition: postgres.h:492
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4942
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1993
int length(const List *list)
Definition: list.c:1271
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4929
Definition: fmgr.h:56
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3893
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:313
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3099
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:873
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4028
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1022
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2632
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
MemoryContext fn_mcxt
Definition: fmgr.h:65
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
#define MD5_HASH_LEN
Definition: varlena.c:4554
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10284
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1702
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2645
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:268
#define DatumGetInt32(X)
Definition: postgres.h:478
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2677
#define HEXBASE
Definition: varlena.c:4500
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:304
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3672
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3574
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5345
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2021
#define PointerGetDatum(X)
Definition: postgres.h:562
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:530
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1119
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:359
#define VARHDRSZ
Definition: c.h:445
Datum md5_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4581
char * pstrdup(const char *in)
Definition: mcxt.c:1077
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:519
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:256
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:806
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
union pg_locale_struct::@120 info
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:278
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2226
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2201
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:267
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2763
#define INT4OID
Definition: pg_type.h:316
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2051
int errcode(int sqlerrcode)
Definition: elog.c:575
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:163
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
return result
Definition: formatting.c:1632
#define DatumGetByteaPP(X)
Definition: fmgr.h:255
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3067
pg_wchar * wstr2
Definition: varlena.c:52
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:482
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4680
Datum md5_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4557
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:329
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3424
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2838
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3470
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:622
#define OidIsValid(objectId)
Definition: c.h:538
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1777
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:379
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:213
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1034
bool trace_sort
Definition: tuplesort.c:155
#define PG_GET_COLLATION()
Definition: fmgr.h:163
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2908
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4810
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:660
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5394
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4357
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4825
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:256
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2796
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1956
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1667
static int32 text_length(Datum str)
Definition: varlena.c:640
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:805
bool typbyval
Definition: array.h:221
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4530
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:314
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4163
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3534
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3372
FmgrInfo * flinfo
Definition: fmgr.h:79
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
#define wcscoll_l
Definition: win32.h:348
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4341
void pfree(void *pointer)
Definition: mcxt.c:950
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:353
#define REG_OKAY
Definition: regex.h:137
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4706
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1011
#define ERROR
Definition: elog.h:43
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3755
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1128
#define DatumGetCString(X)
Definition: postgres.h:572
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:409
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1909
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3594
#define ARR_DIMS(a)
Definition: array.h:275
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:1863
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1231
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4152
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:831
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:255
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:735
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:814
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:587
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4383
Definition: c.h:493
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3658
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2574
#define INT2OID
Definition: pg_type.h:308
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1638
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
#define ARR_DATA_PTR(a)
Definition: array.h:303
hyperLogLogState abbr_card
Definition: varlena.c:73
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2586
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1673
int16 typlen
Definition: array.h:220
pg_locale_t locale
Definition: varlena.c:76
static char * buf
Definition: pg_test_fsync.c:66
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:65
#define memmove(d, s, c)
Definition: c.h:1058
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:161
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3247
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:75
char typdelim
Definition: array.h:223
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2920
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3149
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:694
#define DatumGetInt16(X)
Definition: postgres.h:450
#define DatumGetBool(X)
Definition: postgres.h:399
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Definition: geqo_px.c:46
unsigned int uint32
Definition: c.h:268
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
void * ssup_extra
Definition: sortsupport.h:87
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1072
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:795
int bytea_output
Definition: varlena.c:41
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1613
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3001
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:246
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4602
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1732
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:122
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2607
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5055
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5219
unsigned int pg_wchar
Definition: mbprint.c:31
#define DatumGetVarStringPP(X)
Definition: varlena.c:92
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3189
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:679
List * lappend(List *list, void *datum)
Definition: list.c:128
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3172
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:588
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:57
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:442
void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
Definition: varlena.c:1804
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1265
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1717
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:512
uint8 bits8
Definition: c.h:275
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:781
#define TextDatumGetCString(d)
Definition: builtins.h:92
void * palloc0(Size size)
Definition: mcxt.c:878
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5446
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4887
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2815
int bpchartruelen(char *s, int len)
Definition: varchar.c:660
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3882
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:187
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:785
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1747
#define VARSIZE_ANY(PTR)
Definition: postgres.h:334
#define strxfrm_l
Definition: win32.h:347
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1365
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3614
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4506
hyperLogLogState full_card
Definition: varlena.c:74
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
#define Max(x, y)
Definition: c.h:800
text * cstring_to_text(const char *s)
Definition: varlena.c:149
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:602
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
#define NULL
Definition: c.h:229
bool pg_md5_hash(const void *buff, size_t len, char *hexsum)
Definition: md5.c:293
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:307
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:570
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4866
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4729
int varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
Definition: varlena.c:1382
Oid typioparam
Definition: array.h:224
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:322
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:563
size_t Size
Definition: c.h:356
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2460
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5456
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2629
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:548
#define newval
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:272
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3502
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:4018
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:508
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:225
#define PG_NARGS()
Definition: fmgr.h:168
#define C_COLLATION_OID
Definition: pg_collation.h:78
void * fn_extra
Definition: fmgr.h:64
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3788
#define ARR_NDIM(a)
Definition: array.h:271
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:2957
#define TEXTBUFLEN
Definition: varlena.c:83
Oid typiofunc
Definition: array.h:225
#define DatumGetPointer(X)
Definition: postgres.h:555
char typalign
Definition: array.h:222
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3475
char * text_to_cstring(const text *t)
Definition: varlena.c:182
pg_wchar * wstr1
Definition: varlena.c:51
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:4991
#define DatumGetBpCharPP(X)
Definition: fmgr.h:257
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2557
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1762
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:575
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:172
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1947
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1747
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:423
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4656
FmgrInfo proc
Definition: array.h:226
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:450
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2897
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2709
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2732
void list_free(List *list)
Definition: list.c:1133
int i
Oid element_type
Definition: array.h:219
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:499
static char * locale
Definition: