PostgreSQL Source Code  git master
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "common/int.h"
25 #include "lib/hyperloglog.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "port/pg_bswap.h"
30 #include "regex/regex.h"
31 #include "utils/builtins.h"
32 #include "utils/bytea.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38 
39 
40 /* GUC variable */
42 
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45 
46 typedef struct
47 {
48  bool use_wchar; /* T if multibyte encoding */
49  char *str1; /* use these if not use_wchar */
50  char *str2; /* note: these point to original texts */
51  pg_wchar *wstr1; /* use these if use_wchar */
52  pg_wchar *wstr2; /* note: these are palloc'd */
53  int len1; /* string lengths in logical characters */
54  int len2;
55  /* Skip table for Boyer-Moore-Horspool search algorithm: */
56  int skiptablemask; /* mask for ANDing with skiptable subscripts */
57  int skiptable[256]; /* skip distance for given mismatched char */
59 
60 typedef struct
61 {
62  char *buf1; /* 1st string, or abbreviation original string
63  * buf */
64  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
65  int buflen1;
66  int buflen2;
67  int last_len1; /* Length of last buf1 string/strxfrm() input */
68  int last_len2; /* Length of last buf2 string/strxfrm() blob */
69  int last_returned; /* Last comparison result (cache) */
70  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
71  bool collate_c;
72  bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
73  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
74  hyperLogLogState full_card; /* Full key cardinality state */
75  double prop_card; /* Required cardinality proportion */
78 
79 /*
80  * This should be large enough that most strings will fit, but small enough
81  * that we feel comfortable putting it on the stack
82  */
83 #define TEXTBUFLEN 1024
84 
85 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
86 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
87 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
88 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
89 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
90 
91 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
92 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
93 
94 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
95 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
96 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
97 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
98 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
99 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
100 static int32 text_length(Datum str);
101 static text *text_catenate(text *t1, text *t2);
102 static text *text_substring(Datum str,
103  int32 start,
104  int32 length,
105  bool length_not_specified);
106 static text *text_overlay(text *t1, text *t2, int sp, int sl);
107 static int text_position(text *t1, text *t2);
108 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
109 static int text_position_next(int start_pos, TextPositionState *state);
111 static int text_cmp(text *arg1, text *arg2, Oid collid);
112 static bytea *bytea_catenate(bytea *t1, bytea *t2);
114  int S,
115  int L,
116  bool length_not_specified);
117 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
118 static void appendStringInfoText(StringInfo str, const text *t);
121  const char *fldsep, const char *null_string);
123 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
124  int *value);
125 static const char *text_format_parse_format(const char *start_ptr,
126  const char *end_ptr,
127  int *argpos, int *widthpos,
128  int *flags, int *width);
129 static void text_format_string_conversion(StringInfo buf, char conversion,
130  FmgrInfo *typOutputInfo,
131  Datum value, bool isNull,
132  int flags, int width);
133 static void text_format_append_string(StringInfo buf, const char *str,
134  int flags, int width);
135 
136 
137 /*****************************************************************************
138  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
139  *****************************************************************************/
140 
141 /*
142  * cstring_to_text
143  *
144  * Create a text value from a null-terminated C string.
145  *
146  * The new text value is freshly palloc'd with a full-size VARHDR.
147  */
148 text *
149 cstring_to_text(const char *s)
150 {
151  return cstring_to_text_with_len(s, strlen(s));
152 }
153 
154 /*
155  * cstring_to_text_with_len
156  *
157  * Same as cstring_to_text except the caller specifies the string length;
158  * the string need not be null_terminated.
159  */
160 text *
161 cstring_to_text_with_len(const char *s, int len)
162 {
163  text *result = (text *) palloc(len + VARHDRSZ);
164 
165  SET_VARSIZE(result, len + VARHDRSZ);
166  memcpy(VARDATA(result), s, len);
167 
168  return result;
169 }
170 
171 /*
172  * text_to_cstring
173  *
174  * Create a palloc'd, null-terminated C string from a text value.
175  *
176  * We support being passed a compressed or toasted text value.
177  * This is a bit bogus since such values shouldn't really be referred to as
178  * "text *", but it seems useful for robustness. If we didn't handle that
179  * case here, we'd need another routine that did, anyway.
180  */
181 char *
183 {
184  /* must cast away the const, unfortunately */
185  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
186  int len = VARSIZE_ANY_EXHDR(tunpacked);
187  char *result;
188 
189  result = (char *) palloc(len + 1);
190  memcpy(result, VARDATA_ANY(tunpacked), len);
191  result[len] = '\0';
192 
193  if (tunpacked != t)
194  pfree(tunpacked);
195 
196  return result;
197 }
198 
199 /*
200  * text_to_cstring_buffer
201  *
202  * Copy a text value into a caller-supplied buffer of size dst_len.
203  *
204  * The text string is truncated if necessary to fit. The result is
205  * guaranteed null-terminated (unless dst_len == 0).
206  *
207  * We support being passed a compressed or toasted text value.
208  * This is a bit bogus since such values shouldn't really be referred to as
209  * "text *", but it seems useful for robustness. If we didn't handle that
210  * case here, we'd need another routine that did, anyway.
211  */
212 void
213 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
214 {
215  /* must cast away the const, unfortunately */
216  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
217  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
218 
219  if (dst_len > 0)
220  {
221  dst_len--;
222  if (dst_len >= src_len)
223  dst_len = src_len;
224  else /* ensure truncation is encoding-safe */
225  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
226  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
227  dst[dst_len] = '\0';
228  }
229 
230  if (srcunpacked != src)
231  pfree(srcunpacked);
232 }
233 
234 
235 /*****************************************************************************
236  * USER I/O ROUTINES *
237  *****************************************************************************/
238 
239 
240 #define VAL(CH) ((CH) - '0')
241 #define DIG(VAL) ((VAL) + '0')
242 
243 /*
244  * byteain - converts from printable representation of byte array
245  *
246  * Non-printable characters must be passed as '\nnn' (octal) and are
247  * converted to internal form. '\' must be passed as '\\'.
248  * ereport(ERROR, ...) if bad form.
249  *
250  * BUGS:
251  * The input is scanned twice.
252  * The error checking of input is minimal.
253  */
254 Datum
256 {
257  char *inputText = PG_GETARG_CSTRING(0);
258  char *tp;
259  char *rp;
260  int bc;
261  bytea *result;
262 
263  /* Recognize hex input */
264  if (inputText[0] == '\\' && inputText[1] == 'x')
265  {
266  size_t len = strlen(inputText);
267 
268  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
269  result = palloc(bc);
270  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
271  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
272 
273  PG_RETURN_BYTEA_P(result);
274  }
275 
276  /* Else, it's the traditional escaped style */
277  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
278  {
279  if (tp[0] != '\\')
280  tp++;
281  else if ((tp[0] == '\\') &&
282  (tp[1] >= '0' && tp[1] <= '3') &&
283  (tp[2] >= '0' && tp[2] <= '7') &&
284  (tp[3] >= '0' && tp[3] <= '7'))
285  tp += 4;
286  else if ((tp[0] == '\\') &&
287  (tp[1] == '\\'))
288  tp += 2;
289  else
290  {
291  /*
292  * one backslash, not followed by another or ### valid octal
293  */
294  ereport(ERROR,
295  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
296  errmsg("invalid input syntax for type %s", "bytea")));
297  }
298  }
299 
300  bc += VARHDRSZ;
301 
302  result = (bytea *) palloc(bc);
303  SET_VARSIZE(result, bc);
304 
305  tp = inputText;
306  rp = VARDATA(result);
307  while (*tp != '\0')
308  {
309  if (tp[0] != '\\')
310  *rp++ = *tp++;
311  else if ((tp[0] == '\\') &&
312  (tp[1] >= '0' && tp[1] <= '3') &&
313  (tp[2] >= '0' && tp[2] <= '7') &&
314  (tp[3] >= '0' && tp[3] <= '7'))
315  {
316  bc = VAL(tp[1]);
317  bc <<= 3;
318  bc += VAL(tp[2]);
319  bc <<= 3;
320  *rp++ = bc + VAL(tp[3]);
321 
322  tp += 4;
323  }
324  else if ((tp[0] == '\\') &&
325  (tp[1] == '\\'))
326  {
327  *rp++ = '\\';
328  tp += 2;
329  }
330  else
331  {
332  /*
333  * We should never get here. The first pass should not allow it.
334  */
335  ereport(ERROR,
336  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
337  errmsg("invalid input syntax for type %s", "bytea")));
338  }
339  }
340 
341  PG_RETURN_BYTEA_P(result);
342 }
343 
344 /*
345  * byteaout - converts to printable representation of byte array
346  *
347  * In the traditional escaped format, non-printable characters are
348  * printed as '\nnn' (octal) and '\' as '\\'.
349  */
350 Datum
352 {
353  bytea *vlena = PG_GETARG_BYTEA_PP(0);
354  char *result;
355  char *rp;
356 
358  {
359  /* Print hex format */
360  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
361  *rp++ = '\\';
362  *rp++ = 'x';
363  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
364  }
365  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
366  {
367  /* Print traditional escaped format */
368  char *vp;
369  int len;
370  int i;
371 
372  len = 1; /* empty string has 1 char */
373  vp = VARDATA_ANY(vlena);
374  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
375  {
376  if (*vp == '\\')
377  len += 2;
378  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
379  len += 4;
380  else
381  len++;
382  }
383  rp = result = (char *) palloc(len);
384  vp = VARDATA_ANY(vlena);
385  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
386  {
387  if (*vp == '\\')
388  {
389  *rp++ = '\\';
390  *rp++ = '\\';
391  }
392  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
393  {
394  int val; /* holds unprintable chars */
395 
396  val = *vp;
397  rp[0] = '\\';
398  rp[3] = DIG(val & 07);
399  val >>= 3;
400  rp[2] = DIG(val & 07);
401  val >>= 3;
402  rp[1] = DIG(val & 03);
403  rp += 4;
404  }
405  else
406  *rp++ = *vp;
407  }
408  }
409  else
410  {
411  elog(ERROR, "unrecognized bytea_output setting: %d",
412  bytea_output);
413  rp = result = NULL; /* keep compiler quiet */
414  }
415  *rp = '\0';
416  PG_RETURN_CSTRING(result);
417 }
418 
419 /*
420  * bytearecv - converts external binary format to bytea
421  */
422 Datum
424 {
426  bytea *result;
427  int nbytes;
428 
429  nbytes = buf->len - buf->cursor;
430  result = (bytea *) palloc(nbytes + VARHDRSZ);
431  SET_VARSIZE(result, nbytes + VARHDRSZ);
432  pq_copymsgbytes(buf, VARDATA(result), nbytes);
433  PG_RETURN_BYTEA_P(result);
434 }
435 
436 /*
437  * byteasend - converts bytea to binary format
438  *
439  * This is a special case: just copy the input...
440  */
441 Datum
443 {
444  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
445 
446  PG_RETURN_BYTEA_P(vlena);
447 }
448 
449 Datum
451 {
453 
454  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
455 
456  /* Append the value unless null. */
457  if (!PG_ARGISNULL(1))
458  {
460 
461  /* On the first time through, we ignore the delimiter. */
462  if (state == NULL)
463  state = makeStringAggState(fcinfo);
464  else if (!PG_ARGISNULL(2))
465  {
466  bytea *delim = PG_GETARG_BYTEA_PP(2);
467 
469  }
470 
472  }
473 
474  /*
475  * The transition type for string_agg() is declared to be "internal",
476  * which is a pass-by-value type the same size as a pointer.
477  */
478  PG_RETURN_POINTER(state);
479 }
480 
481 Datum
483 {
485 
486  /* cannot be called directly because of internal-type argument */
487  Assert(AggCheckCallContext(fcinfo, NULL));
488 
489  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
490 
491  if (state != NULL)
492  {
493  bytea *result;
494 
495  result = (bytea *) palloc(state->len + VARHDRSZ);
496  SET_VARSIZE(result, state->len + VARHDRSZ);
497  memcpy(VARDATA(result), state->data, state->len);
498  PG_RETURN_BYTEA_P(result);
499  }
500  else
501  PG_RETURN_NULL();
502 }
503 
504 /*
505  * textin - converts "..." to internal representation
506  */
507 Datum
509 {
510  char *inputText = PG_GETARG_CSTRING(0);
511 
512  PG_RETURN_TEXT_P(cstring_to_text(inputText));
513 }
514 
515 /*
516  * textout - converts internal representation to "..."
517  */
518 Datum
520 {
521  Datum txt = PG_GETARG_DATUM(0);
522 
524 }
525 
526 /*
527  * textrecv - converts external binary format to text
528  */
529 Datum
531 {
533  text *result;
534  char *str;
535  int nbytes;
536 
537  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
538 
539  result = cstring_to_text_with_len(str, nbytes);
540  pfree(str);
541  PG_RETURN_TEXT_P(result);
542 }
543 
544 /*
545  * textsend - converts text to binary format
546  */
547 Datum
549 {
550  text *t = PG_GETARG_TEXT_PP(0);
552 
553  pq_begintypsend(&buf);
556 }
557 
558 
559 /*
560  * unknownin - converts "..." to internal representation
561  */
562 Datum
564 {
565  char *str = PG_GETARG_CSTRING(0);
566 
567  /* representation is same as cstring */
569 }
570 
571 /*
572  * unknownout - converts internal representation to "..."
573  */
574 Datum
576 {
577  /* representation is same as cstring */
578  char *str = PG_GETARG_CSTRING(0);
579 
581 }
582 
583 /*
584  * unknownrecv - converts external binary format to unknown
585  */
586 Datum
588 {
590  char *str;
591  int nbytes;
592 
593  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
594  /* representation is same as cstring */
595  PG_RETURN_CSTRING(str);
596 }
597 
598 /*
599  * unknownsend - converts unknown to binary format
600  */
601 Datum
603 {
604  /* representation is same as cstring */
605  char *str = PG_GETARG_CSTRING(0);
607 
608  pq_begintypsend(&buf);
609  pq_sendtext(&buf, str, strlen(str));
611 }
612 
613 
614 /* ========== PUBLIC ROUTINES ========== */
615 
616 /*
617  * textlen -
618  * returns the logical length of a text*
619  * (which is less than the VARSIZE of the text*)
620  */
621 Datum
623 {
625 
626  /* try to avoid decompressing argument */
628 }
629 
630 /*
631  * text_length -
632  * Does the real work for textlen()
633  *
634  * This is broken out so it can be called directly by other string processing
635  * functions. Note that the argument is passed as a Datum, to indicate that
636  * it may still be in compressed form. We can avoid decompressing it at all
637  * in some cases.
638  */
639 static int32
641 {
642  /* fastpath when max encoding length is one */
645  else
646  {
647  text *t = DatumGetTextPP(str);
648 
650  VARSIZE_ANY_EXHDR(t)));
651  }
652 }
653 
654 /*
655  * textoctetlen -
656  * returns the physical length of a text*
657  * (which is less than the VARSIZE of the text*)
658  */
659 Datum
661 {
663 
664  /* We need not detoast the input at all */
666 }
667 
668 /*
669  * textcat -
670  * takes two text* and returns a text* that is the concatenation of
671  * the two.
672  *
673  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
674  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
675  * Allocate space for output in all cases.
676  * XXX - thomas 1997-07-10
677  */
678 Datum
680 {
681  text *t1 = PG_GETARG_TEXT_PP(0);
682  text *t2 = PG_GETARG_TEXT_PP(1);
683 
685 }
686 
687 /*
688  * text_catenate
689  * Guts of textcat(), broken out so it can be used by other functions
690  *
691  * Arguments can be in short-header form, but not compressed or out-of-line
692  */
693 static text *
695 {
696  text *result;
697  int len1,
698  len2,
699  len;
700  char *ptr;
701 
702  len1 = VARSIZE_ANY_EXHDR(t1);
703  len2 = VARSIZE_ANY_EXHDR(t2);
704 
705  /* paranoia ... probably should throw error instead? */
706  if (len1 < 0)
707  len1 = 0;
708  if (len2 < 0)
709  len2 = 0;
710 
711  len = len1 + len2 + VARHDRSZ;
712  result = (text *) palloc(len);
713 
714  /* Set size of result string... */
715  SET_VARSIZE(result, len);
716 
717  /* Fill data field of result string... */
718  ptr = VARDATA(result);
719  if (len1 > 0)
720  memcpy(ptr, VARDATA_ANY(t1), len1);
721  if (len2 > 0)
722  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
723 
724  return result;
725 }
726 
727 /*
728  * charlen_to_bytelen()
729  * Compute the number of bytes occupied by n characters starting at *p
730  *
731  * It is caller's responsibility that there actually are n characters;
732  * the string need not be null-terminated.
733  */
734 static int
735 charlen_to_bytelen(const char *p, int n)
736 {
738  {
739  /* Optimization for single-byte encodings */
740  return n;
741  }
742  else
743  {
744  const char *s;
745 
746  for (s = p; n > 0; n--)
747  s += pg_mblen(s);
748 
749  return s - p;
750  }
751 }
752 
753 /*
754  * text_substr()
755  * Return a substring starting at the specified position.
756  * - thomas 1997-12-31
757  *
758  * Input:
759  * - string
760  * - starting position (is one-based)
761  * - string length
762  *
763  * If the starting position is zero or less, then return from the start of the string
764  * adjusting the length to be consistent with the "negative start" per SQL.
765  * If the length is less than zero, return the remaining string.
766  *
767  * Added multibyte support.
768  * - Tatsuo Ishii 1998-4-21
769  * Changed behavior if starting position is less than one to conform to SQL behavior.
770  * Formerly returned the entire string; now returns a portion.
771  * - Thomas Lockhart 1998-12-10
772  * Now uses faster TOAST-slicing interface
773  * - John Gray 2002-02-22
774  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
775  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
776  * error; if E < 1, return '', not entire string). Fixed MB related bug when
777  * S > LC and < LC + 4 sometimes garbage characters are returned.
778  * - Joe Conway 2002-08-10
779  */
780 Datum
782 {
784  PG_GETARG_INT32(1),
785  PG_GETARG_INT32(2),
786  false));
787 }
788 
789 /*
790  * text_substr_no_len -
791  * Wrapper to avoid opr_sanity failure due to
792  * one function accepting a different number of args.
793  */
794 Datum
796 {
798  PG_GETARG_INT32(1),
799  -1, true));
800 }
801 
802 /*
803  * text_substring -
804  * Does the real work for text_substr() and text_substr_no_len()
805  *
806  * This is broken out so it can be called directly by other string processing
807  * functions. Note that the argument is passed as a Datum, to indicate that
808  * it may still be in compressed/toasted form. We can avoid detoasting all
809  * of it in some cases.
810  *
811  * The result is always a freshly palloc'd datum.
812  */
813 static text *
814 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
815 {
817  int32 S = start; /* start position */
818  int32 S1; /* adjusted start position */
819  int32 L1; /* adjusted substring length */
820 
821  /* life is easy if the encoding max length is 1 */
822  if (eml == 1)
823  {
824  S1 = Max(S, 1);
825 
826  if (length_not_specified) /* special case - get length to end of
827  * string */
828  L1 = -1;
829  else
830  {
831  /* end position */
832  int E = S + length;
833 
834  /*
835  * A negative value for L is the only way for the end position to
836  * be before the start. SQL99 says to throw an error.
837  */
838  if (E < S)
839  ereport(ERROR,
840  (errcode(ERRCODE_SUBSTRING_ERROR),
841  errmsg("negative substring length not allowed")));
842 
843  /*
844  * A zero or negative value for the end position can happen if the
845  * start was negative or one. SQL99 says to return a zero-length
846  * string.
847  */
848  if (E < 1)
849  return cstring_to_text("");
850 
851  L1 = E - S1;
852  }
853 
854  /*
855  * If the start position is past the end of the string, SQL99 says to
856  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
857  * that for us. Convert to zero-based starting position
858  */
859  return DatumGetTextPSlice(str, S1 - 1, L1);
860  }
861  else if (eml > 1)
862  {
863  /*
864  * When encoding max length is > 1, we can't get LC without
865  * detoasting, so we'll grab a conservatively large slice now and go
866  * back later to do the right thing
867  */
868  int32 slice_start;
869  int32 slice_size;
870  int32 slice_strlen;
871  text *slice;
872  int32 E1;
873  int32 i;
874  char *p;
875  char *s;
876  text *ret;
877 
878  /*
879  * if S is past the end of the string, the tuple toaster will return a
880  * zero-length string to us
881  */
882  S1 = Max(S, 1);
883 
884  /*
885  * We need to start at position zero because there is no way to know
886  * in advance which byte offset corresponds to the supplied start
887  * position.
888  */
889  slice_start = 0;
890 
891  if (length_not_specified) /* special case - get length to end of
892  * string */
893  slice_size = L1 = -1;
894  else
895  {
896  int E = S + length;
897 
898  /*
899  * A negative value for L is the only way for the end position to
900  * be before the start. SQL99 says to throw an error.
901  */
902  if (E < S)
903  ereport(ERROR,
904  (errcode(ERRCODE_SUBSTRING_ERROR),
905  errmsg("negative substring length not allowed")));
906 
907  /*
908  * A zero or negative value for the end position can happen if the
909  * start was negative or one. SQL99 says to return a zero-length
910  * string.
911  */
912  if (E < 1)
913  return cstring_to_text("");
914 
915  /*
916  * if E is past the end of the string, the tuple toaster will
917  * truncate the length for us
918  */
919  L1 = E - S1;
920 
921  /*
922  * Total slice size in bytes can't be any longer than the start
923  * position plus substring length times the encoding max length.
924  */
925  slice_size = (S1 + L1) * eml;
926  }
927 
928  /*
929  * If we're working with an untoasted source, no need to do an extra
930  * copying step.
931  */
934  slice = DatumGetTextPSlice(str, slice_start, slice_size);
935  else
936  slice = (text *) DatumGetPointer(str);
937 
938  /* see if we got back an empty string */
939  if (VARSIZE_ANY_EXHDR(slice) == 0)
940  {
941  if (slice != (text *) DatumGetPointer(str))
942  pfree(slice);
943  return cstring_to_text("");
944  }
945 
946  /* Now we can get the actual length of the slice in MB characters */
947  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
948  VARSIZE_ANY_EXHDR(slice));
949 
950  /*
951  * Check that the start position wasn't > slice_strlen. If so, SQL99
952  * says to return a zero-length string.
953  */
954  if (S1 > slice_strlen)
955  {
956  if (slice != (text *) DatumGetPointer(str))
957  pfree(slice);
958  return cstring_to_text("");
959  }
960 
961  /*
962  * Adjust L1 and E1 now that we know the slice string length. Again
963  * remember that S1 is one based, and slice_start is zero based.
964  */
965  if (L1 > -1)
966  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
967  else
968  E1 = slice_start + 1 + slice_strlen;
969 
970  /*
971  * Find the start position in the slice; remember S1 is not zero based
972  */
973  p = VARDATA_ANY(slice);
974  for (i = 0; i < S1 - 1; i++)
975  p += pg_mblen(p);
976 
977  /* hang onto a pointer to our start position */
978  s = p;
979 
980  /*
981  * Count the actual bytes used by the substring of the requested
982  * length.
983  */
984  for (i = S1; i < E1; i++)
985  p += pg_mblen(p);
986 
987  ret = (text *) palloc(VARHDRSZ + (p - s));
988  SET_VARSIZE(ret, VARHDRSZ + (p - s));
989  memcpy(VARDATA(ret), s, (p - s));
990 
991  if (slice != (text *) DatumGetPointer(str))
992  pfree(slice);
993 
994  return ret;
995  }
996  else
997  elog(ERROR, "invalid backend encoding: encoding max length < 1");
998 
999  /* not reached: suppress compiler warning */
1000  return NULL;
1001 }
1002 
1003 /*
1004  * textoverlay
1005  * Replace specified substring of first string with second
1006  *
1007  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1008  * This code is a direct implementation of what the standard says.
1009  */
1010 Datum
1012 {
1013  text *t1 = PG_GETARG_TEXT_PP(0);
1014  text *t2 = PG_GETARG_TEXT_PP(1);
1015  int sp = PG_GETARG_INT32(2); /* substring start position */
1016  int sl = PG_GETARG_INT32(3); /* substring length */
1017 
1018  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1019 }
1020 
1021 Datum
1023 {
1024  text *t1 = PG_GETARG_TEXT_PP(0);
1025  text *t2 = PG_GETARG_TEXT_PP(1);
1026  int sp = PG_GETARG_INT32(2); /* substring start position */
1027  int sl;
1028 
1029  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1030  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1031 }
1032 
1033 static text *
1034 text_overlay(text *t1, text *t2, int sp, int sl)
1035 {
1036  text *result;
1037  text *s1;
1038  text *s2;
1039  int sp_pl_sl;
1040 
1041  /*
1042  * Check for possible integer-overflow cases. For negative sp, throw a
1043  * "substring length" error because that's what should be expected
1044  * according to the spec's definition of OVERLAY().
1045  */
1046  if (sp <= 0)
1047  ereport(ERROR,
1048  (errcode(ERRCODE_SUBSTRING_ERROR),
1049  errmsg("negative substring length not allowed")));
1050  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1051  ereport(ERROR,
1052  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1053  errmsg("integer out of range")));
1054 
1055  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1056  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1057  result = text_catenate(s1, t2);
1058  result = text_catenate(result, s2);
1059 
1060  return result;
1061 }
1062 
1063 /*
1064  * textpos -
1065  * Return the position of the specified substring.
1066  * Implements the SQL POSITION() function.
1067  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1068  * - thomas 1997-07-27
1069  */
1070 Datum
1072 {
1073  text *str = PG_GETARG_TEXT_PP(0);
1074  text *search_str = PG_GETARG_TEXT_PP(1);
1075 
1076  PG_RETURN_INT32((int32) text_position(str, search_str));
1077 }
1078 
1079 /*
1080  * text_position -
1081  * Does the real work for textpos()
1082  *
1083  * Inputs:
1084  * t1 - string to be searched
1085  * t2 - pattern to match within t1
1086  * Result:
1087  * Character index of the first matched char, starting from 1,
1088  * or 0 if no match.
1089  *
1090  * This is broken out so it can be called directly by other string processing
1091  * functions.
1092  */
1093 static int
1095 {
1097  int result;
1098 
1099  text_position_setup(t1, t2, &state);
1100  result = text_position_next(1, &state);
1101  text_position_cleanup(&state);
1102  return result;
1103 }
1104 
1105 
1106 /*
1107  * text_position_setup, text_position_next, text_position_cleanup -
1108  * Component steps of text_position()
1109  *
1110  * These are broken out so that a string can be efficiently searched for
1111  * multiple occurrences of the same pattern. text_position_next may be
1112  * called multiple times with increasing values of start_pos, which is
1113  * the 1-based character position to start the search from. The "state"
1114  * variable is normally just a local variable in the caller.
1115  */
1116 
1117 static void
1119 {
1120  int len1 = VARSIZE_ANY_EXHDR(t1);
1121  int len2 = VARSIZE_ANY_EXHDR(t2);
1122 
1124  {
1125  /* simple case - single byte encoding */
1126  state->use_wchar = false;
1127  state->str1 = VARDATA_ANY(t1);
1128  state->str2 = VARDATA_ANY(t2);
1129  state->len1 = len1;
1130  state->len2 = len2;
1131  }
1132  else
1133  {
1134  /* not as simple - multibyte encoding */
1135  pg_wchar *p1,
1136  *p2;
1137 
1138  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1139  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1140  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1141  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1142 
1143  state->use_wchar = true;
1144  state->wstr1 = p1;
1145  state->wstr2 = p2;
1146  state->len1 = len1;
1147  state->len2 = len2;
1148  }
1149 
1150  /*
1151  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1152  * notes we use the terminology that the "haystack" is the string to be
1153  * searched (t1) and the "needle" is the pattern being sought (t2).
1154  *
1155  * If the needle is empty or bigger than the haystack then there is no
1156  * point in wasting cycles initializing the table. We also choose not to
1157  * use B-M-H for needles of length 1, since the skip table can't possibly
1158  * save anything in that case.
1159  */
1160  if (len1 >= len2 && len2 > 1)
1161  {
1162  int searchlength = len1 - len2;
1163  int skiptablemask;
1164  int last;
1165  int i;
1166 
1167  /*
1168  * First we must determine how much of the skip table to use. The
1169  * declaration of TextPositionState allows up to 256 elements, but for
1170  * short search problems we don't really want to have to initialize so
1171  * many elements --- it would take too long in comparison to the
1172  * actual search time. So we choose a useful skip table size based on
1173  * the haystack length minus the needle length. The closer the needle
1174  * length is to the haystack length the less useful skipping becomes.
1175  *
1176  * Note: since we use bit-masking to select table elements, the skip
1177  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1178  */
1179  if (searchlength < 16)
1180  skiptablemask = 3;
1181  else if (searchlength < 64)
1182  skiptablemask = 7;
1183  else if (searchlength < 128)
1184  skiptablemask = 15;
1185  else if (searchlength < 512)
1186  skiptablemask = 31;
1187  else if (searchlength < 2048)
1188  skiptablemask = 63;
1189  else if (searchlength < 4096)
1190  skiptablemask = 127;
1191  else
1192  skiptablemask = 255;
1193  state->skiptablemask = skiptablemask;
1194 
1195  /*
1196  * Initialize the skip table. We set all elements to the needle
1197  * length, since this is the correct skip distance for any character
1198  * not found in the needle.
1199  */
1200  for (i = 0; i <= skiptablemask; i++)
1201  state->skiptable[i] = len2;
1202 
1203  /*
1204  * Now examine the needle. For each character except the last one,
1205  * set the corresponding table element to the appropriate skip
1206  * distance. Note that when two characters share the same skip table
1207  * entry, the one later in the needle must determine the skip
1208  * distance.
1209  */
1210  last = len2 - 1;
1211 
1212  if (!state->use_wchar)
1213  {
1214  const char *str2 = state->str2;
1215 
1216  for (i = 0; i < last; i++)
1217  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1218  }
1219  else
1220  {
1221  const pg_wchar *wstr2 = state->wstr2;
1222 
1223  for (i = 0; i < last; i++)
1224  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1225  }
1226  }
1227 }
1228 
1229 static int
1231 {
1232  int haystack_len = state->len1;
1233  int needle_len = state->len2;
1234  int skiptablemask = state->skiptablemask;
1235 
1236  Assert(start_pos > 0); /* else caller error */
1237 
1238  if (needle_len <= 0)
1239  return start_pos; /* result for empty pattern */
1240 
1241  start_pos--; /* adjust for zero based arrays */
1242 
1243  /* Done if the needle can't possibly fit */
1244  if (haystack_len < start_pos + needle_len)
1245  return 0;
1246 
1247  if (!state->use_wchar)
1248  {
1249  /* simple case - single byte encoding */
1250  const char *haystack = state->str1;
1251  const char *needle = state->str2;
1252  const char *haystack_end = &haystack[haystack_len];
1253  const char *hptr;
1254 
1255  if (needle_len == 1)
1256  {
1257  /* No point in using B-M-H for a one-character needle */
1258  char nchar = *needle;
1259 
1260  hptr = &haystack[start_pos];
1261  while (hptr < haystack_end)
1262  {
1263  if (*hptr == nchar)
1264  return hptr - haystack + 1;
1265  hptr++;
1266  }
1267  }
1268  else
1269  {
1270  const char *needle_last = &needle[needle_len - 1];
1271 
1272  /* Start at startpos plus the length of the needle */
1273  hptr = &haystack[start_pos + needle_len - 1];
1274  while (hptr < haystack_end)
1275  {
1276  /* Match the needle scanning *backward* */
1277  const char *nptr;
1278  const char *p;
1279 
1280  nptr = needle_last;
1281  p = hptr;
1282  while (*nptr == *p)
1283  {
1284  /* Matched it all? If so, return 1-based position */
1285  if (nptr == needle)
1286  return p - haystack + 1;
1287  nptr--, p--;
1288  }
1289 
1290  /*
1291  * No match, so use the haystack char at hptr to decide how
1292  * far to advance. If the needle had any occurrence of that
1293  * character (or more precisely, one sharing the same
1294  * skiptable entry) before its last character, then we advance
1295  * far enough to align the last such needle character with
1296  * that haystack position. Otherwise we can advance by the
1297  * whole needle length.
1298  */
1299  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1300  }
1301  }
1302  }
1303  else
1304  {
1305  /* The multibyte char version. This works exactly the same way. */
1306  const pg_wchar *haystack = state->wstr1;
1307  const pg_wchar *needle = state->wstr2;
1308  const pg_wchar *haystack_end = &haystack[haystack_len];
1309  const pg_wchar *hptr;
1310 
1311  if (needle_len == 1)
1312  {
1313  /* No point in using B-M-H for a one-character needle */
1314  pg_wchar nchar = *needle;
1315 
1316  hptr = &haystack[start_pos];
1317  while (hptr < haystack_end)
1318  {
1319  if (*hptr == nchar)
1320  return hptr - haystack + 1;
1321  hptr++;
1322  }
1323  }
1324  else
1325  {
1326  const pg_wchar *needle_last = &needle[needle_len - 1];
1327 
1328  /* Start at startpos plus the length of the needle */
1329  hptr = &haystack[start_pos + needle_len - 1];
1330  while (hptr < haystack_end)
1331  {
1332  /* Match the needle scanning *backward* */
1333  const pg_wchar *nptr;
1334  const pg_wchar *p;
1335 
1336  nptr = needle_last;
1337  p = hptr;
1338  while (*nptr == *p)
1339  {
1340  /* Matched it all? If so, return 1-based position */
1341  if (nptr == needle)
1342  return p - haystack + 1;
1343  nptr--, p--;
1344  }
1345 
1346  /*
1347  * No match, so use the haystack char at hptr to decide how
1348  * far to advance. If the needle had any occurrence of that
1349  * character (or more precisely, one sharing the same
1350  * skiptable entry) before its last character, then we advance
1351  * far enough to align the last such needle character with
1352  * that haystack position. Otherwise we can advance by the
1353  * whole needle length.
1354  */
1355  hptr += state->skiptable[*hptr & skiptablemask];
1356  }
1357  }
1358  }
1359 
1360  return 0; /* not found */
1361 }
1362 
1363 static void
1365 {
1366  if (state->use_wchar)
1367  {
1368  pfree(state->wstr1);
1369  pfree(state->wstr2);
1370  }
1371 }
1372 
1373 /* varstr_cmp()
1374  * Comparison function for text strings with given lengths.
1375  * Includes locale support, but must copy strings to temporary memory
1376  * to allow null-termination for inputs to strcoll().
1377  * Returns an integer less than, equal to, or greater than zero, indicating
1378  * whether arg1 is less than, equal to, or greater than arg2.
1379  */
1380 int
1381 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1382 {
1383  int result;
1384 
1385  /*
1386  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1387  * have to do some memory copying. This turns out to be significantly
1388  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1389  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1390  */
1391  if (lc_collate_is_c(collid))
1392  {
1393  result = memcmp(arg1, arg2, Min(len1, len2));
1394  if ((result == 0) && (len1 != len2))
1395  result = (len1 < len2) ? -1 : 1;
1396  }
1397  else
1398  {
1399  char a1buf[TEXTBUFLEN];
1400  char a2buf[TEXTBUFLEN];
1401  char *a1p,
1402  *a2p;
1403  pg_locale_t mylocale = 0;
1404 
1405  if (collid != DEFAULT_COLLATION_OID)
1406  {
1407  if (!OidIsValid(collid))
1408  {
1409  /*
1410  * This typically means that the parser could not resolve a
1411  * conflict of implicit collations, so report it that way.
1412  */
1413  ereport(ERROR,
1414  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1415  errmsg("could not determine which collation to use for string comparison"),
1416  errhint("Use the COLLATE clause to set the collation explicitly.")));
1417  }
1418  mylocale = pg_newlocale_from_collation(collid);
1419  }
1420 
1421  /*
1422  * memcmp() can't tell us which of two unequal strings sorts first,
1423  * but it's a cheap way to tell if they're equal. Testing shows that
1424  * memcmp() followed by strcoll() is only trivially slower than
1425  * strcoll() by itself, so we don't lose much if this doesn't work out
1426  * very often, and if it does - for example, because there are many
1427  * equal strings in the input - then we win big by avoiding expensive
1428  * collation-aware comparisons.
1429  */
1430  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1431  return 0;
1432 
1433 #ifdef WIN32
1434  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1435  if (GetDatabaseEncoding() == PG_UTF8
1436  && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1437  {
1438  int a1len;
1439  int a2len;
1440  int r;
1441 
1442  if (len1 >= TEXTBUFLEN / 2)
1443  {
1444  a1len = len1 * 2 + 2;
1445  a1p = palloc(a1len);
1446  }
1447  else
1448  {
1449  a1len = TEXTBUFLEN;
1450  a1p = a1buf;
1451  }
1452  if (len2 >= TEXTBUFLEN / 2)
1453  {
1454  a2len = len2 * 2 + 2;
1455  a2p = palloc(a2len);
1456  }
1457  else
1458  {
1459  a2len = TEXTBUFLEN;
1460  a2p = a2buf;
1461  }
1462 
1463  /* stupid Microsloth API does not work for zero-length input */
1464  if (len1 == 0)
1465  r = 0;
1466  else
1467  {
1468  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1469  (LPWSTR) a1p, a1len / 2);
1470  if (!r)
1471  ereport(ERROR,
1472  (errmsg("could not convert string to UTF-16: error code %lu",
1473  GetLastError())));
1474  }
1475  ((LPWSTR) a1p)[r] = 0;
1476 
1477  if (len2 == 0)
1478  r = 0;
1479  else
1480  {
1481  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1482  (LPWSTR) a2p, a2len / 2);
1483  if (!r)
1484  ereport(ERROR,
1485  (errmsg("could not convert string to UTF-16: error code %lu",
1486  GetLastError())));
1487  }
1488  ((LPWSTR) a2p)[r] = 0;
1489 
1490  errno = 0;
1491 #ifdef HAVE_LOCALE_T
1492  if (mylocale)
1493  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1494  else
1495 #endif
1496  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1497  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1498  * headers */
1499  ereport(ERROR,
1500  (errmsg("could not compare Unicode strings: %m")));
1501 
1502  /*
1503  * In some locales wcscoll() can claim that nonidentical strings
1504  * are equal. Believing that would be bad news for a number of
1505  * reasons, so we follow Perl's lead and sort "equal" strings
1506  * according to strcmp (on the UTF-8 representation).
1507  */
1508  if (result == 0)
1509  {
1510  result = memcmp(arg1, arg2, Min(len1, len2));
1511  if ((result == 0) && (len1 != len2))
1512  result = (len1 < len2) ? -1 : 1;
1513  }
1514 
1515  if (a1p != a1buf)
1516  pfree(a1p);
1517  if (a2p != a2buf)
1518  pfree(a2p);
1519 
1520  return result;
1521  }
1522 #endif /* WIN32 */
1523 
1524  if (len1 >= TEXTBUFLEN)
1525  a1p = (char *) palloc(len1 + 1);
1526  else
1527  a1p = a1buf;
1528  if (len2 >= TEXTBUFLEN)
1529  a2p = (char *) palloc(len2 + 1);
1530  else
1531  a2p = a2buf;
1532 
1533  memcpy(a1p, arg1, len1);
1534  a1p[len1] = '\0';
1535  memcpy(a2p, arg2, len2);
1536  a2p[len2] = '\0';
1537 
1538  if (mylocale)
1539  {
1540  if (mylocale->provider == COLLPROVIDER_ICU)
1541  {
1542 #ifdef USE_ICU
1543 #ifdef HAVE_UCOL_STRCOLLUTF8
1544  if (GetDatabaseEncoding() == PG_UTF8)
1545  {
1546  UErrorCode status;
1547 
1548  status = U_ZERO_ERROR;
1549  result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1550  arg1, len1,
1551  arg2, len2,
1552  &status);
1553  if (U_FAILURE(status))
1554  ereport(ERROR,
1555  (errmsg("collation failed: %s", u_errorName(status))));
1556  }
1557  else
1558 #endif
1559  {
1560  int32_t ulen1,
1561  ulen2;
1562  UChar *uchar1,
1563  *uchar2;
1564 
1565  ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1566  ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1567 
1568  result = ucol_strcoll(mylocale->info.icu.ucol,
1569  uchar1, ulen1,
1570  uchar2, ulen2);
1571 
1572  pfree(uchar1);
1573  pfree(uchar2);
1574  }
1575 #else /* not USE_ICU */
1576  /* shouldn't happen */
1577  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1578 #endif /* not USE_ICU */
1579  }
1580  else
1581  {
1582 #ifdef HAVE_LOCALE_T
1583  result = strcoll_l(a1p, a2p, mylocale->info.lt);
1584 #else
1585  /* shouldn't happen */
1586  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1587 #endif
1588  }
1589  }
1590  else
1591  result = strcoll(a1p, a2p);
1592 
1593  /*
1594  * In some locales strcoll() can claim that nonidentical strings are
1595  * equal. Believing that would be bad news for a number of reasons,
1596  * so we follow Perl's lead and sort "equal" strings according to
1597  * strcmp().
1598  */
1599  if (result == 0)
1600  result = strcmp(a1p, a2p);
1601 
1602  if (a1p != a1buf)
1603  pfree(a1p);
1604  if (a2p != a2buf)
1605  pfree(a2p);
1606  }
1607 
1608  return result;
1609 }
1610 
1611 /* text_cmp()
1612  * Internal comparison function for text strings.
1613  * Returns -1, 0 or 1
1614  */
1615 static int
1616 text_cmp(text *arg1, text *arg2, Oid collid)
1617 {
1618  char *a1p,
1619  *a2p;
1620  int len1,
1621  len2;
1622 
1623  a1p = VARDATA_ANY(arg1);
1624  a2p = VARDATA_ANY(arg2);
1625 
1626  len1 = VARSIZE_ANY_EXHDR(arg1);
1627  len2 = VARSIZE_ANY_EXHDR(arg2);
1628 
1629  return varstr_cmp(a1p, len1, a2p, len2, collid);
1630 }
1631 
1632 /*
1633  * Comparison functions for text strings.
1634  *
1635  * Note: btree indexes need these routines not to leak memory; therefore,
1636  * be careful to free working copies of toasted datums. Most places don't
1637  * need to be so careful.
1638  */
1639 
1640 Datum
1642 {
1643  Datum arg1 = PG_GETARG_DATUM(0);
1644  Datum arg2 = PG_GETARG_DATUM(1);
1645  bool result;
1646  Size len1,
1647  len2;
1648 
1649  /*
1650  * Since we only care about equality or not-equality, we can avoid all the
1651  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1652  * don't even have to do a bitwise comparison if we can show the lengths
1653  * of the strings are unequal; which might save us from having to detoast
1654  * one or both values.
1655  */
1656  len1 = toast_raw_datum_size(arg1);
1657  len2 = toast_raw_datum_size(arg2);
1658  if (len1 != len2)
1659  result = false;
1660  else
1661  {
1662  text *targ1 = DatumGetTextPP(arg1);
1663  text *targ2 = DatumGetTextPP(arg2);
1664 
1665  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1666  len1 - VARHDRSZ) == 0);
1667 
1668  PG_FREE_IF_COPY(targ1, 0);
1669  PG_FREE_IF_COPY(targ2, 1);
1670  }
1671 
1672  PG_RETURN_BOOL(result);
1673 }
1674 
1675 Datum
1677 {
1678  Datum arg1 = PG_GETARG_DATUM(0);
1679  Datum arg2 = PG_GETARG_DATUM(1);
1680  bool result;
1681  Size len1,
1682  len2;
1683 
1684  /* See comment in texteq() */
1685  len1 = toast_raw_datum_size(arg1);
1686  len2 = toast_raw_datum_size(arg2);
1687  if (len1 != len2)
1688  result = true;
1689  else
1690  {
1691  text *targ1 = DatumGetTextPP(arg1);
1692  text *targ2 = DatumGetTextPP(arg2);
1693 
1694  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1695  len1 - VARHDRSZ) != 0);
1696 
1697  PG_FREE_IF_COPY(targ1, 0);
1698  PG_FREE_IF_COPY(targ2, 1);
1699  }
1700 
1701  PG_RETURN_BOOL(result);
1702 }
1703 
1704 Datum
1706 {
1707  text *arg1 = PG_GETARG_TEXT_PP(0);
1708  text *arg2 = PG_GETARG_TEXT_PP(1);
1709  bool result;
1710 
1711  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1712 
1713  PG_FREE_IF_COPY(arg1, 0);
1714  PG_FREE_IF_COPY(arg2, 1);
1715 
1716  PG_RETURN_BOOL(result);
1717 }
1718 
1719 Datum
1721 {
1722  text *arg1 = PG_GETARG_TEXT_PP(0);
1723  text *arg2 = PG_GETARG_TEXT_PP(1);
1724  bool result;
1725 
1726  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1727 
1728  PG_FREE_IF_COPY(arg1, 0);
1729  PG_FREE_IF_COPY(arg2, 1);
1730 
1731  PG_RETURN_BOOL(result);
1732 }
1733 
1734 Datum
1736 {
1737  text *arg1 = PG_GETARG_TEXT_PP(0);
1738  text *arg2 = PG_GETARG_TEXT_PP(1);
1739  bool result;
1740 
1741  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1742 
1743  PG_FREE_IF_COPY(arg1, 0);
1744  PG_FREE_IF_COPY(arg2, 1);
1745 
1746  PG_RETURN_BOOL(result);
1747 }
1748 
1749 Datum
1751 {
1752  text *arg1 = PG_GETARG_TEXT_PP(0);
1753  text *arg2 = PG_GETARG_TEXT_PP(1);
1754  bool result;
1755 
1756  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1757 
1758  PG_FREE_IF_COPY(arg1, 0);
1759  PG_FREE_IF_COPY(arg2, 1);
1760 
1761  PG_RETURN_BOOL(result);
1762 }
1763 
1764 Datum
1766 {
1767  Datum arg1 = PG_GETARG_DATUM(0);
1768  Datum arg2 = PG_GETARG_DATUM(1);
1769  bool result;
1770  Size len1,
1771  len2;
1772 
1773  len1 = toast_raw_datum_size(arg1);
1774  len2 = toast_raw_datum_size(arg2);
1775  if (len2 > len1)
1776  result = false;
1777  else
1778  {
1779  text *targ1 = DatumGetTextPP(arg1);
1780  text *targ2 = DatumGetTextPP(arg2);
1781 
1782  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1783  VARSIZE_ANY_EXHDR(targ2)) == 0);
1784 
1785  PG_FREE_IF_COPY(targ1, 0);
1786  PG_FREE_IF_COPY(targ2, 1);
1787  }
1788 
1789  PG_RETURN_BOOL(result);
1790 }
1791 
1792 Datum
1794 {
1795  text *arg1 = PG_GETARG_TEXT_PP(0);
1796  text *arg2 = PG_GETARG_TEXT_PP(1);
1797  int32 result;
1798 
1799  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1800 
1801  PG_FREE_IF_COPY(arg1, 0);
1802  PG_FREE_IF_COPY(arg2, 1);
1803 
1804  PG_RETURN_INT32(result);
1805 }
1806 
1807 Datum
1809 {
1811  Oid collid = ssup->ssup_collation;
1812  MemoryContext oldcontext;
1813 
1814  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1815 
1816  /* Use generic string SortSupport */
1817  varstr_sortsupport(ssup, collid, false);
1818 
1819  MemoryContextSwitchTo(oldcontext);
1820 
1821  PG_RETURN_VOID();
1822 }
1823 
1824 /*
1825  * Generic sortsupport interface for character type's operator classes.
1826  * Includes locale support, and support for BpChar semantics (i.e. removing
1827  * trailing spaces before comparison).
1828  *
1829  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1830  * same representation. Callers that always use the C collation (e.g.
1831  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1832  * this will not work with any other collation, though.
1833  */
1834 void
1836 {
1837  bool abbreviate = ssup->abbreviate;
1838  bool collate_c = false;
1839  VarStringSortSupport *sss;
1840  pg_locale_t locale = 0;
1841 
1842  /*
1843  * If possible, set ssup->comparator to a function which can be used to
1844  * directly compare two datums. If we can do this, we'll avoid the
1845  * overhead of a trip through the fmgr layer for every comparison, which
1846  * can be substantial.
1847  *
1848  * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1849  * uses strcoll() to perform comparisons and knows about the special
1850  * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1851  * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1852  * both of which use memcmp() rather than strcoll().
1853  */
1854  if (lc_collate_is_c(collid))
1855  {
1856  if (!bpchar)
1857  ssup->comparator = varstrfastcmp_c;
1858  else
1859  ssup->comparator = bpcharfastcmp_c;
1860 
1861  collate_c = true;
1862  }
1863  else
1864  {
1865  /*
1866  * We need a collation-sensitive comparison. To make things faster,
1867  * we'll figure out the collation based on the locale id and cache the
1868  * result.
1869  */
1870  if (collid != DEFAULT_COLLATION_OID)
1871  {
1872  if (!OidIsValid(collid))
1873  {
1874  /*
1875  * This typically means that the parser could not resolve a
1876  * conflict of implicit collations, so report it that way.
1877  */
1878  ereport(ERROR,
1879  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1880  errmsg("could not determine which collation to use for string comparison"),
1881  errhint("Use the COLLATE clause to set the collation explicitly.")));
1882  }
1883  locale = pg_newlocale_from_collation(collid);
1884  }
1885 
1886  /*
1887  * There is a further exception on Windows. When the database
1888  * encoding is UTF-8 and we are not using the C collation, complex
1889  * hacks are required. We don't currently have a comparator that
1890  * handles that case, so we fall back on the slow method of having the
1891  * sort code invoke bttextcmp() (in the case of text) via the fmgr
1892  * trampoline. ICU locales work just the same on Windows, however.
1893  */
1894 #ifdef WIN32
1895  if (GetDatabaseEncoding() == PG_UTF8 &&
1896  !(locale && locale->provider == COLLPROVIDER_ICU))
1897  return;
1898 #endif
1899 
1901  }
1902 
1903  /*
1904  * Unfortunately, it seems that abbreviation for non-C collations is
1905  * broken on many common platforms; testing of multiple versions of glibc
1906  * reveals that, for many locales, strcoll() and strxfrm() do not return
1907  * consistent results, which is fatal to this optimization. While no
1908  * other libc other than Cygwin has so far been shown to have a problem,
1909  * we take the conservative course of action for right now and disable
1910  * this categorically. (Users who are certain this isn't a problem on
1911  * their system can define TRUST_STRXFRM.)
1912  *
1913  * Even apart from the risk of broken locales, it's possible that there
1914  * are platforms where the use of abbreviated keys should be disabled at
1915  * compile time. Having only 4 byte datums could make worst-case
1916  * performance drastically more likely, for example. Moreover, macOS's
1917  * strxfrm() implementation is known to not effectively concentrate a
1918  * significant amount of entropy from the original string in earlier
1919  * transformed blobs. It's possible that other supported platforms are
1920  * similarly encumbered. So, if we ever get past disabling this
1921  * categorically, we may still want or need to disable it for particular
1922  * platforms.
1923  */
1924 #ifndef TRUST_STRXFRM
1925  if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
1926  abbreviate = false;
1927 #endif
1928 
1929  /*
1930  * If we're using abbreviated keys, or if we're using a locale-aware
1931  * comparison, we need to initialize a StringSortSupport object. Both
1932  * cases will make use of the temporary buffers we initialize here for
1933  * scratch space (and to detect requirement for BpChar semantics from
1934  * caller), and the abbreviation case requires additional state.
1935  */
1936  if (abbreviate || !collate_c)
1937  {
1938  sss = palloc(sizeof(VarStringSortSupport));
1939  sss->buf1 = palloc(TEXTBUFLEN);
1940  sss->buflen1 = TEXTBUFLEN;
1941  sss->buf2 = palloc(TEXTBUFLEN);
1942  sss->buflen2 = TEXTBUFLEN;
1943  /* Start with invalid values */
1944  sss->last_len1 = -1;
1945  sss->last_len2 = -1;
1946  /* Initialize */
1947  sss->last_returned = 0;
1948  sss->locale = locale;
1949 
1950  /*
1951  * To avoid somehow confusing a strxfrm() blob and an original string,
1952  * constantly keep track of the variety of data that buf1 and buf2
1953  * currently contain.
1954  *
1955  * Comparisons may be interleaved with conversion calls. Frequently,
1956  * conversions and comparisons are batched into two distinct phases,
1957  * but the correctness of caching cannot hinge upon this. For
1958  * comparison caching, buffer state is only trusted if cache_blob is
1959  * found set to false, whereas strxfrm() caching only trusts the state
1960  * when cache_blob is found set to true.
1961  *
1962  * Arbitrarily initialize cache_blob to true.
1963  */
1964  sss->cache_blob = true;
1965  sss->collate_c = collate_c;
1966  sss->bpchar = bpchar;
1967  ssup->ssup_extra = sss;
1968 
1969  /*
1970  * If possible, plan to use the abbreviated keys optimization. The
1971  * core code may switch back to authoritative comparator should
1972  * abbreviation be aborted.
1973  */
1974  if (abbreviate)
1975  {
1976  sss->prop_card = 0.20;
1977  initHyperLogLog(&sss->abbr_card, 10);
1978  initHyperLogLog(&sss->full_card, 10);
1979  ssup->abbrev_full_comparator = ssup->comparator;
1980  ssup->comparator = varstrcmp_abbrev;
1983  }
1984  }
1985 }
1986 
1987 /*
1988  * sortsupport comparison func (for C locale case)
1989  */
1990 static int
1992 {
1993  VarString *arg1 = DatumGetVarStringPP(x);
1994  VarString *arg2 = DatumGetVarStringPP(y);
1995  char *a1p,
1996  *a2p;
1997  int len1,
1998  len2,
1999  result;
2000 
2001  a1p = VARDATA_ANY(arg1);
2002  a2p = VARDATA_ANY(arg2);
2003 
2004  len1 = VARSIZE_ANY_EXHDR(arg1);
2005  len2 = VARSIZE_ANY_EXHDR(arg2);
2006 
2007  result = memcmp(a1p, a2p, Min(len1, len2));
2008  if ((result == 0) && (len1 != len2))
2009  result = (len1 < len2) ? -1 : 1;
2010 
2011  /* We can't afford to leak memory here. */
2012  if (PointerGetDatum(arg1) != x)
2013  pfree(arg1);
2014  if (PointerGetDatum(arg2) != y)
2015  pfree(arg2);
2016 
2017  return result;
2018 }
2019 
2020 /*
2021  * sortsupport comparison func (for BpChar C locale case)
2022  *
2023  * BpChar outsources its sortsupport to this module. Specialization for the
2024  * varstr_sortsupport BpChar case, modeled on
2025  * internal_bpchar_pattern_compare().
2026  */
2027 static int
2029 {
2030  BpChar *arg1 = DatumGetBpCharPP(x);
2031  BpChar *arg2 = DatumGetBpCharPP(y);
2032  char *a1p,
2033  *a2p;
2034  int len1,
2035  len2,
2036  result;
2037 
2038  a1p = VARDATA_ANY(arg1);
2039  a2p = VARDATA_ANY(arg2);
2040 
2041  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2042  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2043 
2044  result = memcmp(a1p, a2p, Min(len1, len2));
2045  if ((result == 0) && (len1 != len2))
2046  result = (len1 < len2) ? -1 : 1;
2047 
2048  /* We can't afford to leak memory here. */
2049  if (PointerGetDatum(arg1) != x)
2050  pfree(arg1);
2051  if (PointerGetDatum(arg2) != y)
2052  pfree(arg2);
2053 
2054  return result;
2055 }
2056 
2057 /*
2058  * sortsupport comparison func (for locale case)
2059  */
2060 static int
2062 {
2063  VarString *arg1 = DatumGetVarStringPP(x);
2064  VarString *arg2 = DatumGetVarStringPP(y);
2065  bool arg1_match;
2067 
2068  /* working state */
2069  char *a1p,
2070  *a2p;
2071  int len1,
2072  len2,
2073  result;
2074 
2075  a1p = VARDATA_ANY(arg1);
2076  a2p = VARDATA_ANY(arg2);
2077 
2078  len1 = VARSIZE_ANY_EXHDR(arg1);
2079  len2 = VARSIZE_ANY_EXHDR(arg2);
2080 
2081  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2082  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2083  {
2084  /*
2085  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2086  * last_len2. Existing contents of buffers might still be used by
2087  * next call.
2088  *
2089  * It's fine to allow the comparison of BpChar padding bytes here,
2090  * even though that implies that the memcmp() will usually be
2091  * performed for BpChar callers (though multibyte characters could
2092  * still prevent that from occurring). The memcmp() is still very
2093  * cheap, and BpChar's funny semantics have us remove trailing spaces
2094  * (not limited to padding), so we need make no distinction between
2095  * padding space characters and "real" space characters.
2096  */
2097  result = 0;
2098  goto done;
2099  }
2100 
2101  if (sss->bpchar)
2102  {
2103  /* Get true number of bytes, ignoring trailing spaces */
2104  len1 = bpchartruelen(a1p, len1);
2105  len2 = bpchartruelen(a2p, len2);
2106  }
2107 
2108  if (len1 >= sss->buflen1)
2109  {
2110  pfree(sss->buf1);
2111  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2112  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2113  }
2114  if (len2 >= sss->buflen2)
2115  {
2116  pfree(sss->buf2);
2117  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2118  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2119  }
2120 
2121  /*
2122  * We're likely to be asked to compare the same strings repeatedly, and
2123  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2124  * comparisons, even though in general there is no reason to think that
2125  * that will work out (every string datum may be unique). Caching does
2126  * not slow things down measurably when it doesn't work out, and can speed
2127  * things up by rather a lot when it does. In part, this is because the
2128  * memcmp() compares data from cachelines that are needed in L1 cache even
2129  * when the last comparison's result cannot be reused.
2130  */
2131  arg1_match = true;
2132  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2133  {
2134  arg1_match = false;
2135  memcpy(sss->buf1, a1p, len1);
2136  sss->buf1[len1] = '\0';
2137  sss->last_len1 = len1;
2138  }
2139 
2140  /*
2141  * If we're comparing the same two strings as last time, we can return the
2142  * same answer without calling strcoll() again. This is more likely than
2143  * it seems (at least with moderate to low cardinality sets), because
2144  * quicksort compares the same pivot against many values.
2145  */
2146  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2147  {
2148  memcpy(sss->buf2, a2p, len2);
2149  sss->buf2[len2] = '\0';
2150  sss->last_len2 = len2;
2151  }
2152  else if (arg1_match && !sss->cache_blob)
2153  {
2154  /* Use result cached following last actual strcoll() call */
2155  result = sss->last_returned;
2156  goto done;
2157  }
2158 
2159  if (sss->locale)
2160  {
2161  if (sss->locale->provider == COLLPROVIDER_ICU)
2162  {
2163 #ifdef USE_ICU
2164 #ifdef HAVE_UCOL_STRCOLLUTF8
2165  if (GetDatabaseEncoding() == PG_UTF8)
2166  {
2167  UErrorCode status;
2168 
2169  status = U_ZERO_ERROR;
2170  result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2171  a1p, len1,
2172  a2p, len2,
2173  &status);
2174  if (U_FAILURE(status))
2175  ereport(ERROR,
2176  (errmsg("collation failed: %s", u_errorName(status))));
2177  }
2178  else
2179 #endif
2180  {
2181  int32_t ulen1,
2182  ulen2;
2183  UChar *uchar1,
2184  *uchar2;
2185 
2186  ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2187  ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2188 
2189  result = ucol_strcoll(sss->locale->info.icu.ucol,
2190  uchar1, ulen1,
2191  uchar2, ulen2);
2192 
2193  pfree(uchar1);
2194  pfree(uchar2);
2195  }
2196 #else /* not USE_ICU */
2197  /* shouldn't happen */
2198  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2199 #endif /* not USE_ICU */
2200  }
2201  else
2202  {
2203 #ifdef HAVE_LOCALE_T
2204  result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2205 #else
2206  /* shouldn't happen */
2207  elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2208 #endif
2209  }
2210  }
2211  else
2212  result = strcoll(sss->buf1, sss->buf2);
2213 
2214  /*
2215  * In some locales strcoll() can claim that nonidentical strings are
2216  * equal. Believing that would be bad news for a number of reasons, so we
2217  * follow Perl's lead and sort "equal" strings according to strcmp().
2218  */
2219  if (result == 0)
2220  result = strcmp(sss->buf1, sss->buf2);
2221 
2222  /* Cache result, perhaps saving an expensive strcoll() call next time */
2223  sss->cache_blob = false;
2224  sss->last_returned = result;
2225 done:
2226  /* We can't afford to leak memory here. */
2227  if (PointerGetDatum(arg1) != x)
2228  pfree(arg1);
2229  if (PointerGetDatum(arg2) != y)
2230  pfree(arg2);
2231 
2232  return result;
2233 }
2234 
2235 /*
2236  * Abbreviated key comparison func
2237  */
2238 static int
2240 {
2241  /*
2242  * When 0 is returned, the core system will call varstrfastcmp_c()
2243  * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2244  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2245  * authoritatively, for the same reason that there is a strcoll()
2246  * tie-breaker call to strcmp() in varstr_cmp().
2247  */
2248  if (x > y)
2249  return 1;
2250  else if (x == y)
2251  return 0;
2252  else
2253  return -1;
2254 }
2255 
2256 /*
2257  * Conversion routine for sortsupport. Converts original to abbreviated key
2258  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2259  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2260  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2261  * locale is used, or in case of bytea, just memcpy() from original instead.
2262  */
2263 static Datum
2265 {
2267  VarString *authoritative = DatumGetVarStringPP(original);
2268  char *authoritative_data = VARDATA_ANY(authoritative);
2269 
2270  /* working state */
2271  Datum res;
2272  char *pres;
2273  int len;
2274  uint32 hash;
2275 
2276  pres = (char *) &res;
2277  /* memset(), so any non-overwritten bytes are NUL */
2278  memset(pres, 0, sizeof(Datum));
2279  len = VARSIZE_ANY_EXHDR(authoritative);
2280 
2281  /* Get number of bytes, ignoring trailing spaces */
2282  if (sss->bpchar)
2283  len = bpchartruelen(authoritative_data, len);
2284 
2285  /*
2286  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2287  * abbreviate keys. The full comparator for the C locale is always
2288  * memcmp(). It would be incorrect to allow bytea callers (callers that
2289  * always force the C collation -- bytea isn't a collatable type, but this
2290  * approach is convenient) to use strxfrm(). This is because bytea
2291  * strings may contain NUL bytes. Besides, this should be faster, too.
2292  *
2293  * More generally, it's okay that bytea callers can have NUL bytes in
2294  * strings because varstrcmp_abbrev() need not make a distinction between
2295  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2296  * authoritative representation. Hopefully a comparison at or past one
2297  * abbreviated key's terminating NUL byte will resolve the comparison
2298  * without consulting the authoritative representation; specifically, some
2299  * later non-NUL byte in the longer string can resolve the comparison
2300  * against a subsequent terminating NUL in the shorter string. There will
2301  * usually be what is effectively a "length-wise" resolution there and
2302  * then.
2303  *
2304  * If that doesn't work out -- if all bytes in the longer string
2305  * positioned at or past the offset of the smaller string's (first)
2306  * terminating NUL are actually representative of NUL bytes in the
2307  * authoritative binary string (perhaps with some *terminating* NUL bytes
2308  * towards the end of the longer string iff it happens to still be small)
2309  * -- then an authoritative tie-breaker will happen, and do the right
2310  * thing: explicitly consider string length.
2311  */
2312  if (sss->collate_c)
2313  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2314  else
2315  {
2316  Size bsize;
2317 #ifdef USE_ICU
2318  int32_t ulen = -1;
2319  UChar *uchar = NULL;
2320 #endif
2321 
2322  /*
2323  * We're not using the C collation, so fall back on strxfrm or ICU
2324  * analogs.
2325  */
2326 
2327  /* By convention, we use buffer 1 to store and NUL-terminate */
2328  if (len >= sss->buflen1)
2329  {
2330  pfree(sss->buf1);
2331  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2332  sss->buf1 = palloc(sss->buflen1);
2333  }
2334 
2335  /* Might be able to reuse strxfrm() blob from last call */
2336  if (sss->last_len1 == len && sss->cache_blob &&
2337  memcmp(sss->buf1, authoritative_data, len) == 0)
2338  {
2339  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2340  /* No change affecting cardinality, so no hashing required */
2341  goto done;
2342  }
2343 
2344  memcpy(sss->buf1, authoritative_data, len);
2345 
2346  /*
2347  * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2348  * necessary for ICU, but doesn't hurt.
2349  */
2350  sss->buf1[len] = '\0';
2351  sss->last_len1 = len;
2352 
2353 #ifdef USE_ICU
2354  /* When using ICU and not UTF8, convert string to UChar. */
2355  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2357  ulen = icu_to_uchar(&uchar, sss->buf1, len);
2358 #endif
2359 
2360  /*
2361  * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2362  * and try again. Both of these functions have the result buffer
2363  * content undefined if the result did not fit, so we need to retry
2364  * until everything fits, even though we only need the first few bytes
2365  * in the end. When using ucol_nextSortKeyPart(), however, we only
2366  * ask for as many bytes as we actually need.
2367  */
2368  for (;;)
2369  {
2370 #ifdef USE_ICU
2371  if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2372  {
2373  /*
2374  * When using UTF8, use the iteration interface so we only
2375  * need to produce as many bytes as we actually need.
2376  */
2377  if (GetDatabaseEncoding() == PG_UTF8)
2378  {
2379  UCharIterator iter;
2380  uint32_t state[2];
2381  UErrorCode status;
2382 
2383  uiter_setUTF8(&iter, sss->buf1, len);
2384  state[0] = state[1] = 0; /* won't need that again */
2385  status = U_ZERO_ERROR;
2386  bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2387  &iter,
2388  state,
2389  (uint8_t *) sss->buf2,
2390  Min(sizeof(Datum), sss->buflen2),
2391  &status);
2392  if (U_FAILURE(status))
2393  ereport(ERROR,
2394  (errmsg("sort key generation failed: %s",
2395  u_errorName(status))));
2396  }
2397  else
2398  bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2399  uchar, ulen,
2400  (uint8_t *) sss->buf2, sss->buflen2);
2401  }
2402  else
2403 #endif
2404 #ifdef HAVE_LOCALE_T
2405  if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2406  bsize = strxfrm_l(sss->buf2, sss->buf1,
2407  sss->buflen2, sss->locale->info.lt);
2408  else
2409 #endif
2410  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2411 
2412  sss->last_len2 = bsize;
2413  if (bsize < sss->buflen2)
2414  break;
2415 
2416  /*
2417  * Grow buffer and retry.
2418  */
2419  pfree(sss->buf2);
2420  sss->buflen2 = Max(bsize + 1,
2421  Min(sss->buflen2 * 2, MaxAllocSize));
2422  sss->buf2 = palloc(sss->buflen2);
2423  }
2424 
2425  /*
2426  * Every Datum byte is always compared. This is safe because the
2427  * strxfrm() blob is itself NUL terminated, leaving no danger of
2428  * misinterpreting any NUL bytes not intended to be interpreted as
2429  * logically representing termination.
2430  *
2431  * (Actually, even if there were NUL bytes in the blob it would be
2432  * okay. See remarks on bytea case above.)
2433  */
2434  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2435 
2436 #ifdef USE_ICU
2437  if (uchar)
2438  pfree(uchar);
2439 #endif
2440  }
2441 
2442  /*
2443  * Maintain approximate cardinality of both abbreviated keys and original,
2444  * authoritative keys using HyperLogLog. Used as cheap insurance against
2445  * the worst case, where we do many string transformations for no saving
2446  * in full strcoll()-based comparisons. These statistics are used by
2447  * varstr_abbrev_abort().
2448  *
2449  * First, Hash key proper, or a significant fraction of it. Mix in length
2450  * in order to compensate for cases where differences are past
2451  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2452  */
2453  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2454  Min(len, PG_CACHE_LINE_SIZE)));
2455 
2456  if (len > PG_CACHE_LINE_SIZE)
2457  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2458 
2459  addHyperLogLog(&sss->full_card, hash);
2460 
2461  /* Hash abbreviated key */
2462 #if SIZEOF_DATUM == 8
2463  {
2464  uint32 lohalf,
2465  hihalf;
2466 
2467  lohalf = (uint32) res;
2468  hihalf = (uint32) (res >> 32);
2469  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2470  }
2471 #else /* SIZEOF_DATUM != 8 */
2472  hash = DatumGetUInt32(hash_uint32((uint32) res));
2473 #endif
2474 
2475  addHyperLogLog(&sss->abbr_card, hash);
2476 
2477  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2478  sss->cache_blob = true;
2479 done:
2480 
2481  /*
2482  * Byteswap on little-endian machines.
2483  *
2484  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2485  * comparator) works correctly on all platforms. If we didn't do this,
2486  * the comparator would have to call memcmp() with a pair of pointers to
2487  * the first byte of each abbreviated key, which is slower.
2488  */
2489  res = DatumBigEndianToNative(res);
2490 
2491  /* Don't leak memory here */
2492  if (PointerGetDatum(authoritative) != original)
2493  pfree(authoritative);
2494 
2495  return res;
2496 }
2497 
2498 /*
2499  * Callback for estimating effectiveness of abbreviated key optimization, using
2500  * heuristic rules. Returns value indicating if the abbreviation optimization
2501  * should be aborted, based on its projected effectiveness.
2502  */
2503 static bool
2504 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2505 {
2507  double abbrev_distinct,
2508  key_distinct;
2509 
2510  Assert(ssup->abbreviate);
2511 
2512  /* Have a little patience */
2513  if (memtupcount < 100)
2514  return false;
2515 
2516  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2517  key_distinct = estimateHyperLogLog(&sss->full_card);
2518 
2519  /*
2520  * Clamp cardinality estimates to at least one distinct value. While
2521  * NULLs are generally disregarded, if only NULL values were seen so far,
2522  * that might misrepresent costs if we failed to clamp.
2523  */
2524  if (abbrev_distinct <= 1.0)
2525  abbrev_distinct = 1.0;
2526 
2527  if (key_distinct <= 1.0)
2528  key_distinct = 1.0;
2529 
2530  /*
2531  * In the worst case all abbreviated keys are identical, while at the same
2532  * time there are differences within full key strings not captured in
2533  * abbreviations.
2534  */
2535 #ifdef TRACE_SORT
2536  if (trace_sort)
2537  {
2538  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2539 
2540  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2541  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2542  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2543  sss->prop_card);
2544  }
2545 #endif
2546 
2547  /*
2548  * If the number of distinct abbreviated keys approximately matches the
2549  * number of distinct authoritative original keys, that's reason enough to
2550  * proceed. We can win even with a very low cardinality set if most
2551  * tie-breakers only memcmp(). This is by far the most important
2552  * consideration.
2553  *
2554  * While comparisons that are resolved at the abbreviated key level are
2555  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2556  * those two outcomes are so much cheaper than a full strcoll() once
2557  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2558  * cardinality against the overall size of the set in order to more
2559  * accurately model costs. Assume that an abbreviated comparison, and an
2560  * abbreviated comparison with a cheap memcmp()-based authoritative
2561  * resolution are equivalent.
2562  */
2563  if (abbrev_distinct > key_distinct * sss->prop_card)
2564  {
2565  /*
2566  * When we have exceeded 10,000 tuples, decay required cardinality
2567  * aggressively for next call.
2568  *
2569  * This is useful because the number of comparisons required on
2570  * average increases at a linearithmic rate, and at roughly 10,000
2571  * tuples that factor will start to dominate over the linear costs of
2572  * string transformation (this is a conservative estimate). The decay
2573  * rate is chosen to be a little less aggressive than halving -- which
2574  * (since we're called at points at which memtupcount has doubled)
2575  * would never see the cost model actually abort past the first call
2576  * following a decay. This decay rate is mostly a precaution against
2577  * a sudden, violent swing in how well abbreviated cardinality tracks
2578  * full key cardinality. The decay also serves to prevent a marginal
2579  * case from being aborted too late, when too much has already been
2580  * invested in string transformation.
2581  *
2582  * It's possible for sets of several million distinct strings with
2583  * mere tens of thousands of distinct abbreviated keys to still
2584  * benefit very significantly. This will generally occur provided
2585  * each abbreviated key is a proxy for a roughly uniform number of the
2586  * set's full keys. If it isn't so, we hope to catch that early and
2587  * abort. If it isn't caught early, by the time the problem is
2588  * apparent it's probably not worth aborting.
2589  */
2590  if (memtupcount > 10000)
2591  sss->prop_card *= 0.65;
2592 
2593  return false;
2594  }
2595 
2596  /*
2597  * Abort abbreviation strategy.
2598  *
2599  * The worst case, where all abbreviated keys are identical while all
2600  * original strings differ will typically only see a regression of about
2601  * 10% in execution time for small to medium sized lists of strings.
2602  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2603  * often expect very large improvements, particularly with sets of strings
2604  * of moderately high to high abbreviated cardinality. There is little to
2605  * lose but much to gain, which our strategy reflects.
2606  */
2607 #ifdef TRACE_SORT
2608  if (trace_sort)
2609  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2610  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2611  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2612 #endif
2613 
2614  return true;
2615 }
2616 
2617 Datum
2619 {
2620  text *arg1 = PG_GETARG_TEXT_PP(0);
2621  text *arg2 = PG_GETARG_TEXT_PP(1);
2622  text *result;
2623 
2624  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2625 
2626  PG_RETURN_TEXT_P(result);
2627 }
2628 
2629 Datum
2631 {
2632  text *arg1 = PG_GETARG_TEXT_PP(0);
2633  text *arg2 = PG_GETARG_TEXT_PP(1);
2634  text *result;
2635 
2636  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2637 
2638  PG_RETURN_TEXT_P(result);
2639 }
2640 
2641 
2642 /*
2643  * The following operators support character-by-character comparison
2644  * of text datums, to allow building indexes suitable for LIKE clauses.
2645  * Note that the regular texteq/textne comparison operators, and regular
2646  * support functions 1 and 2 with "C" collation are assumed to be
2647  * compatible with these!
2648  */
2649 
2650 static int
2652 {
2653  int result;
2654  int len1,
2655  len2;
2656 
2657  len1 = VARSIZE_ANY_EXHDR(arg1);
2658  len2 = VARSIZE_ANY_EXHDR(arg2);
2659 
2660  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2661  if (result != 0)
2662  return result;
2663  else if (len1 < len2)
2664  return -1;
2665  else if (len1 > len2)
2666  return 1;
2667  else
2668  return 0;
2669 }
2670 
2671 
2672 Datum
2674 {
2675  text *arg1 = PG_GETARG_TEXT_PP(0);
2676  text *arg2 = PG_GETARG_TEXT_PP(1);
2677  int result;
2678 
2679  result = internal_text_pattern_compare(arg1, arg2);
2680 
2681  PG_FREE_IF_COPY(arg1, 0);
2682  PG_FREE_IF_COPY(arg2, 1);
2683 
2684  PG_RETURN_BOOL(result < 0);
2685 }
2686 
2687 
2688 Datum
2690 {
2691  text *arg1 = PG_GETARG_TEXT_PP(0);
2692  text *arg2 = PG_GETARG_TEXT_PP(1);
2693  int result;
2694 
2695  result = internal_text_pattern_compare(arg1, arg2);
2696 
2697  PG_FREE_IF_COPY(arg1, 0);
2698  PG_FREE_IF_COPY(arg2, 1);
2699 
2700  PG_RETURN_BOOL(result <= 0);
2701 }
2702 
2703 
2704 Datum
2706 {
2707  text *arg1 = PG_GETARG_TEXT_PP(0);
2708  text *arg2 = PG_GETARG_TEXT_PP(1);
2709  int result;
2710 
2711  result = internal_text_pattern_compare(arg1, arg2);
2712 
2713  PG_FREE_IF_COPY(arg1, 0);
2714  PG_FREE_IF_COPY(arg2, 1);
2715 
2716  PG_RETURN_BOOL(result >= 0);
2717 }
2718 
2719 
2720 Datum
2722 {
2723  text *arg1 = PG_GETARG_TEXT_PP(0);
2724  text *arg2 = PG_GETARG_TEXT_PP(1);
2725  int result;
2726 
2727  result = internal_text_pattern_compare(arg1, arg2);
2728 
2729  PG_FREE_IF_COPY(arg1, 0);
2730  PG_FREE_IF_COPY(arg2, 1);
2731 
2732  PG_RETURN_BOOL(result > 0);
2733 }
2734 
2735 
2736 Datum
2738 {
2739  text *arg1 = PG_GETARG_TEXT_PP(0);
2740  text *arg2 = PG_GETARG_TEXT_PP(1);
2741  int result;
2742 
2743  result = internal_text_pattern_compare(arg1, arg2);
2744 
2745  PG_FREE_IF_COPY(arg1, 0);
2746  PG_FREE_IF_COPY(arg2, 1);
2747 
2748  PG_RETURN_INT32(result);
2749 }
2750 
2751 
2752 Datum
2754 {
2756  MemoryContext oldcontext;
2757 
2758  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2759 
2760  /* Use generic string SortSupport, forcing "C" collation */
2761  varstr_sortsupport(ssup, C_COLLATION_OID, false);
2762 
2763  MemoryContextSwitchTo(oldcontext);
2764 
2765  PG_RETURN_VOID();
2766 }
2767 
2768 
2769 /*-------------------------------------------------------------
2770  * byteaoctetlen
2771  *
2772  * get the number of bytes contained in an instance of type 'bytea'
2773  *-------------------------------------------------------------
2774  */
2775 Datum
2777 {
2778  Datum str = PG_GETARG_DATUM(0);
2779 
2780  /* We need not detoast the input at all */
2782 }
2783 
2784 /*
2785  * byteacat -
2786  * takes two bytea* and returns a bytea* that is the concatenation of
2787  * the two.
2788  *
2789  * Cloned from textcat and modified as required.
2790  */
2791 Datum
2793 {
2794  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2795  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2796 
2798 }
2799 
2800 /*
2801  * bytea_catenate
2802  * Guts of byteacat(), broken out so it can be used by other functions
2803  *
2804  * Arguments can be in short-header form, but not compressed or out-of-line
2805  */
2806 static bytea *
2808 {
2809  bytea *result;
2810  int len1,
2811  len2,
2812  len;
2813  char *ptr;
2814 
2815  len1 = VARSIZE_ANY_EXHDR(t1);
2816  len2 = VARSIZE_ANY_EXHDR(t2);
2817 
2818  /* paranoia ... probably should throw error instead? */
2819  if (len1 < 0)
2820  len1 = 0;
2821  if (len2 < 0)
2822  len2 = 0;
2823 
2824  len = len1 + len2 + VARHDRSZ;
2825  result = (bytea *) palloc(len);
2826 
2827  /* Set size of result string... */
2828  SET_VARSIZE(result, len);
2829 
2830  /* Fill data field of result string... */
2831  ptr = VARDATA(result);
2832  if (len1 > 0)
2833  memcpy(ptr, VARDATA_ANY(t1), len1);
2834  if (len2 > 0)
2835  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2836 
2837  return result;
2838 }
2839 
2840 #define PG_STR_GET_BYTEA(str_) \
2841  DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2842 
2843 /*
2844  * bytea_substr()
2845  * Return a substring starting at the specified position.
2846  * Cloned from text_substr and modified as required.
2847  *
2848  * Input:
2849  * - string
2850  * - starting position (is one-based)
2851  * - string length (optional)
2852  *
2853  * If the starting position is zero or less, then return from the start of the string
2854  * adjusting the length to be consistent with the "negative start" per SQL.
2855  * If the length is less than zero, an ERROR is thrown. If no third argument
2856  * (length) is provided, the length to the end of the string is assumed.
2857  */
2858 Datum
2860 {
2862  PG_GETARG_INT32(1),
2863  PG_GETARG_INT32(2),
2864  false));
2865 }
2866 
2867 /*
2868  * bytea_substr_no_len -
2869  * Wrapper to avoid opr_sanity failure due to
2870  * one function accepting a different number of args.
2871  */
2872 Datum
2874 {
2876  PG_GETARG_INT32(1),
2877  -1,
2878  true));
2879 }
2880 
2881 static bytea *
2883  int S,
2884  int L,
2885  bool length_not_specified)
2886 {
2887  int S1; /* adjusted start position */
2888  int L1; /* adjusted substring length */
2889 
2890  S1 = Max(S, 1);
2891 
2892  if (length_not_specified)
2893  {
2894  /*
2895  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2896  * end of the string if we pass it a negative value for length.
2897  */
2898  L1 = -1;
2899  }
2900  else
2901  {
2902  /* end position */
2903  int E = S + L;
2904 
2905  /*
2906  * A negative value for L is the only way for the end position to be
2907  * before the start. SQL99 says to throw an error.
2908  */
2909  if (E < S)
2910  ereport(ERROR,
2911  (errcode(ERRCODE_SUBSTRING_ERROR),
2912  errmsg("negative substring length not allowed")));
2913 
2914  /*
2915  * A zero or negative value for the end position can happen if the
2916  * start was negative or one. SQL99 says to return a zero-length
2917  * string.
2918  */
2919  if (E < 1)
2920  return PG_STR_GET_BYTEA("");
2921 
2922  L1 = E - S1;
2923  }
2924 
2925  /*
2926  * If the start position is past the end of the string, SQL99 says to
2927  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2928  * us. Convert to zero-based starting position
2929  */
2930  return DatumGetByteaPSlice(str, S1 - 1, L1);
2931 }
2932 
2933 /*
2934  * byteaoverlay
2935  * Replace specified substring of first string with second
2936  *
2937  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2938  * This code is a direct implementation of what the standard says.
2939  */
2940 Datum
2942 {
2943  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2944  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2945  int sp = PG_GETARG_INT32(2); /* substring start position */
2946  int sl = PG_GETARG_INT32(3); /* substring length */
2947 
2948  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2949 }
2950 
2951 Datum
2953 {
2954  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2955  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2956  int sp = PG_GETARG_INT32(2); /* substring start position */
2957  int sl;
2958 
2959  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2960  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2961 }
2962 
2963 static bytea *
2964 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2965 {
2966  bytea *result;
2967  bytea *s1;
2968  bytea *s2;
2969  int sp_pl_sl;
2970 
2971  /*
2972  * Check for possible integer-overflow cases. For negative sp, throw a
2973  * "substring length" error because that's what should be expected
2974  * according to the spec's definition of OVERLAY().
2975  */
2976  if (sp <= 0)
2977  ereport(ERROR,
2978  (errcode(ERRCODE_SUBSTRING_ERROR),
2979  errmsg("negative substring length not allowed")));
2980  if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
2981  ereport(ERROR,
2982  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2983  errmsg("integer out of range")));
2984 
2985  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2986  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2987  result = bytea_catenate(s1, t2);
2988  result = bytea_catenate(result, s2);
2989 
2990  return result;
2991 }
2992 
2993 /*
2994  * byteapos -
2995  * Return the position of the specified substring.
2996  * Implements the SQL POSITION() function.
2997  * Cloned from textpos and modified as required.
2998  */
2999 Datum
3001 {
3002  bytea *t1 = PG_GETARG_BYTEA_PP(0);
3003  bytea *t2 = PG_GETARG_BYTEA_PP(1);
3004  int pos;
3005  int px,
3006  p;
3007  int len1,
3008  len2;
3009  char *p1,
3010  *p2;
3011 
3012  len1 = VARSIZE_ANY_EXHDR(t1);
3013  len2 = VARSIZE_ANY_EXHDR(t2);
3014 
3015  if (len2 <= 0)
3016  PG_RETURN_INT32(1); /* result for empty pattern */
3017 
3018  p1 = VARDATA_ANY(t1);
3019  p2 = VARDATA_ANY(t2);
3020 
3021  pos = 0;
3022  px = (len1 - len2);
3023  for (p = 0; p <= px; p++)
3024  {
3025  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3026  {
3027  pos = p + 1;
3028  break;
3029  };
3030  p1++;
3031  };
3032 
3033  PG_RETURN_INT32(pos);
3034 }
3035 
3036 /*-------------------------------------------------------------
3037  * byteaGetByte
3038  *
3039  * this routine treats "bytea" as an array of bytes.
3040  * It returns the Nth byte (a number between 0 and 255).
3041  *-------------------------------------------------------------
3042  */
3043 Datum
3045 {
3046  bytea *v = PG_GETARG_BYTEA_PP(0);
3047  int32 n = PG_GETARG_INT32(1);
3048  int len;
3049  int byte;
3050 
3051  len = VARSIZE_ANY_EXHDR(v);
3052 
3053  if (n < 0 || n >= len)
3054  ereport(ERROR,
3055  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3056  errmsg("index %d out of valid range, 0..%d",
3057  n, len - 1)));
3058 
3059  byte = ((unsigned char *) VARDATA_ANY(v))[n];
3060 
3061  PG_RETURN_INT32(byte);
3062 }
3063 
3064 /*-------------------------------------------------------------
3065  * byteaGetBit
3066  *
3067  * This routine treats a "bytea" type like an array of bits.
3068  * It returns the value of the Nth bit (0 or 1).
3069  *
3070  *-------------------------------------------------------------
3071  */
3072 Datum
3074 {
3075  bytea *v = PG_GETARG_BYTEA_PP(0);
3076  int32 n = PG_GETARG_INT32(1);
3077  int byteNo,
3078  bitNo;
3079  int len;
3080  int byte;
3081 
3082  len = VARSIZE_ANY_EXHDR(v);
3083 
3084  if (n < 0 || n >= len * 8)
3085  ereport(ERROR,
3086  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3087  errmsg("index %d out of valid range, 0..%d",
3088  n, len * 8 - 1)));
3089 
3090  byteNo = n / 8;
3091  bitNo = n % 8;
3092 
3093  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3094 
3095  if (byte & (1 << bitNo))
3096  PG_RETURN_INT32(1);
3097  else
3098  PG_RETURN_INT32(0);
3099 }
3100 
3101 /*-------------------------------------------------------------
3102  * byteaSetByte
3103  *
3104  * Given an instance of type 'bytea' creates a new one with
3105  * the Nth byte set to the given value.
3106  *
3107  *-------------------------------------------------------------
3108  */
3109 Datum
3111 {
3112  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3113  int32 n = PG_GETARG_INT32(1);
3114  int32 newByte = PG_GETARG_INT32(2);
3115  int len;
3116 
3117  len = VARSIZE(res) - VARHDRSZ;
3118 
3119  if (n < 0 || n >= len)
3120  ereport(ERROR,
3121  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3122  errmsg("index %d out of valid range, 0..%d",
3123  n, len - 1)));
3124 
3125  /*
3126  * Now set the byte.
3127  */
3128  ((unsigned char *) VARDATA(res))[n] = newByte;
3129 
3130  PG_RETURN_BYTEA_P(res);
3131 }
3132 
3133 /*-------------------------------------------------------------
3134  * byteaSetBit
3135  *
3136  * Given an instance of type 'bytea' creates a new one with
3137  * the Nth bit set to the given value.
3138  *
3139  *-------------------------------------------------------------
3140  */
3141 Datum
3143 {
3144  bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3145  int32 n = PG_GETARG_INT32(1);
3146  int32 newBit = PG_GETARG_INT32(2);
3147  int len;
3148  int oldByte,
3149  newByte;
3150  int byteNo,
3151  bitNo;
3152 
3153  len = VARSIZE(res) - VARHDRSZ;
3154 
3155  if (n < 0 || n >= len * 8)
3156  ereport(ERROR,
3157  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3158  errmsg("index %d out of valid range, 0..%d",
3159  n, len * 8 - 1)));
3160 
3161  byteNo = n / 8;
3162  bitNo = n % 8;
3163 
3164  /*
3165  * sanity check!
3166  */
3167  if (newBit != 0 && newBit != 1)
3168  ereport(ERROR,
3169  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3170  errmsg("new bit must be 0 or 1")));
3171 
3172  /*
3173  * Update the byte.
3174  */
3175  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3176 
3177  if (newBit == 0)
3178  newByte = oldByte & (~(1 << bitNo));
3179  else
3180  newByte = oldByte | (1 << bitNo);
3181 
3182  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3183 
3184  PG_RETURN_BYTEA_P(res);
3185 }
3186 
3187 
3188 /* text_name()
3189  * Converts a text type to a Name type.
3190  */
3191 Datum
3193 {
3194  text *s = PG_GETARG_TEXT_PP(0);
3195  Name result;
3196  int len;
3197 
3198  len = VARSIZE_ANY_EXHDR(s);
3199 
3200  /* Truncate oversize input */
3201  if (len >= NAMEDATALEN)
3202  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3203 
3204  /* We use palloc0 here to ensure result is zero-padded */
3205  result = (Name) palloc0(NAMEDATALEN);
3206  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3207 
3208  PG_RETURN_NAME(result);
3209 }
3210 
3211 /* name_text()
3212  * Converts a Name type to a text type.
3213  */
3214 Datum
3216 {
3217  Name s = PG_GETARG_NAME(0);
3218 
3220 }
3221 
3222 
3223 /*
3224  * textToQualifiedNameList - convert a text object to list of names
3225  *
3226  * This implements the input parsing needed by nextval() and other
3227  * functions that take a text parameter representing a qualified name.
3228  * We split the name at dots, downcase if not double-quoted, and
3229  * truncate names if they're too long.
3230  */
3231 List *
3233 {
3234  char *rawname;
3235  List *result = NIL;
3236  List *namelist;
3237  ListCell *l;
3238 
3239  /* Convert to C string (handles possible detoasting). */
3240  /* Note we rely on being able to modify rawname below. */
3241  rawname = text_to_cstring(textval);
3242 
3243  if (!SplitIdentifierString(rawname, '.', &namelist))
3244  ereport(ERROR,
3245  (errcode(ERRCODE_INVALID_NAME),
3246  errmsg("invalid name syntax")));
3247 
3248  if (namelist == NIL)
3249  ereport(ERROR,
3250  (errcode(ERRCODE_INVALID_NAME),
3251  errmsg("invalid name syntax")));
3252 
3253  foreach(l, namelist)
3254  {
3255  char *curname = (char *) lfirst(l);
3256 
3257  result = lappend(result, makeString(pstrdup(curname)));
3258  }
3259 
3260  pfree(rawname);
3261  list_free(namelist);
3262 
3263  return result;
3264 }
3265 
3266 /*
3267  * SplitIdentifierString --- parse a string containing identifiers
3268  *
3269  * This is the guts of textToQualifiedNameList, and is exported for use in
3270  * other situations such as parsing GUC variables. In the GUC case, it's
3271  * important to avoid memory leaks, so the API is designed to minimize the
3272  * amount of stuff that needs to be allocated and freed.
3273  *
3274  * Inputs:
3275  * rawstring: the input string; must be overwritable! On return, it's
3276  * been modified to contain the separated identifiers.
3277  * separator: the separator punctuation expected between identifiers
3278  * (typically '.' or ','). Whitespace may also appear around
3279  * identifiers.
3280  * Outputs:
3281  * namelist: filled with a palloc'd list of pointers to identifiers within
3282  * rawstring. Caller should list_free() this even on error return.
3283  *
3284  * Returns true if okay, false if there is a syntax error in the string.
3285  *
3286  * Note that an empty string is considered okay here, though not in
3287  * textToQualifiedNameList.
3288  */
3289 bool
3290 SplitIdentifierString(char *rawstring, char separator,
3291  List **namelist)
3292 {
3293  char *nextp = rawstring;
3294  bool done = false;
3295 
3296  *namelist = NIL;
3297 
3298  while (scanner_isspace(*nextp))
3299  nextp++; /* skip leading whitespace */
3300 
3301  if (*nextp == '\0')
3302  return true; /* allow empty string */
3303 
3304  /* At the top of the loop, we are at start of a new identifier. */
3305  do
3306  {
3307  char *curname;
3308  char *endp;
3309 
3310  if (*nextp == '"')
3311  {
3312  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3313  curname = nextp + 1;
3314  for (;;)
3315  {
3316  endp = strchr(nextp + 1, '"');
3317  if (endp == NULL)
3318  return false; /* mismatched quotes */
3319  if (endp[1] != '"')
3320  break; /* found end of quoted name */
3321  /* Collapse adjacent quotes into one quote, and look again */
3322  memmove(endp, endp + 1, strlen(endp));
3323  nextp = endp;
3324  }
3325  /* endp now points at the terminating quote */
3326  nextp = endp + 1;
3327  }
3328  else
3329  {
3330  /* Unquoted name --- extends to separator or whitespace */
3331  char *downname;
3332  int len;
3333 
3334  curname = nextp;
3335  while (*nextp && *nextp != separator &&
3336  !scanner_isspace(*nextp))
3337  nextp++;
3338  endp = nextp;
3339  if (curname == nextp)
3340  return false; /* empty unquoted name not allowed */
3341 
3342  /*
3343  * Downcase the identifier, using same code as main lexer does.
3344  *
3345  * XXX because we want to overwrite the input in-place, we cannot
3346  * support a downcasing transformation that increases the string
3347  * length. This is not a problem given the current implementation
3348  * of downcase_truncate_identifier, but we'll probably have to do
3349  * something about this someday.
3350  */
3351  len = endp - curname;
3352  downname = downcase_truncate_identifier(curname, len, false);
3353  Assert(strlen(downname) <= len);
3354  strncpy(curname, downname, len); /* strncpy is required here */
3355  pfree(downname);
3356  }
3357 
3358  while (scanner_isspace(*nextp))
3359  nextp++; /* skip trailing whitespace */
3360 
3361  if (*nextp == separator)
3362  {
3363  nextp++;
3364  while (scanner_isspace(*nextp))
3365  nextp++; /* skip leading whitespace for next */
3366  /* we expect another name, so done remains false */
3367  }
3368  else if (*nextp == '\0')
3369  done = true;
3370  else
3371  return false; /* invalid syntax */
3372 
3373  /* Now safe to overwrite separator with a null */
3374  *endp = '\0';
3375 
3376  /* Truncate name if it's overlength */
3377  truncate_identifier(curname, strlen(curname), false);
3378 
3379  /*
3380  * Finished isolating current name --- add it to list
3381  */
3382  *namelist = lappend(*namelist, curname);
3383 
3384  /* Loop back if we didn't reach end of string */
3385  } while (!done);
3386 
3387  return true;
3388 }
3389 
3390 
3391 /*
3392  * SplitDirectoriesString --- parse a string containing file/directory names
3393  *
3394  * This works fine on file names too; the function name is historical.
3395  *
3396  * This is similar to SplitIdentifierString, except that the parsing
3397  * rules are meant to handle pathnames instead of identifiers: there is
3398  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3399  * and we apply canonicalize_path() to each extracted string. Because of the
3400  * last, the returned strings are separately palloc'd rather than being
3401  * pointers into rawstring --- but we still scribble on rawstring.
3402  *
3403  * Inputs:
3404  * rawstring: the input string; must be modifiable!
3405  * separator: the separator punctuation expected between directories
3406  * (typically ',' or ';'). Whitespace may also appear around
3407  * directories.
3408  * Outputs:
3409  * namelist: filled with a palloc'd list of directory names.
3410  * Caller should list_free_deep() this even on error return.
3411  *
3412  * Returns true if okay, false if there is a syntax error in the string.
3413  *
3414  * Note that an empty string is considered okay here.
3415  */
3416 bool
3417 SplitDirectoriesString(char *rawstring, char separator,
3418  List **namelist)
3419 {
3420  char *nextp = rawstring;
3421  bool done = false;
3422 
3423  *namelist = NIL;
3424 
3425  while (scanner_isspace(*nextp))
3426  nextp++; /* skip leading whitespace */
3427 
3428  if (*nextp == '\0')
3429  return true; /* allow empty string */
3430 
3431  /* At the top of the loop, we are at start of a new directory. */
3432  do
3433  {
3434  char *curname;
3435  char *endp;
3436 
3437  if (*nextp == '"')
3438  {
3439  /* Quoted name --- collapse quote-quote pairs */
3440  curname = nextp + 1;
3441  for (;;)
3442  {
3443  endp = strchr(nextp + 1, '"');
3444  if (endp == NULL)
3445  return false; /* mismatched quotes */
3446  if (endp[1] != '"')
3447  break; /* found end of quoted name */
3448  /* Collapse adjacent quotes into one quote, and look again */
3449  memmove(endp, endp + 1, strlen(endp));
3450  nextp = endp;
3451  }
3452  /* endp now points at the terminating quote */
3453  nextp = endp + 1;
3454  }
3455  else
3456  {
3457  /* Unquoted name --- extends to separator or end of string */
3458  curname = endp = nextp;
3459  while (*nextp && *nextp != separator)
3460  {
3461  /* trailing whitespace should not be included in name */
3462  if (!scanner_isspace(*nextp))
3463  endp = nextp + 1;
3464  nextp++;
3465  }
3466  if (curname == endp)
3467  return false; /* empty unquoted name not allowed */
3468  }
3469 
3470  while (scanner_isspace(*nextp))
3471  nextp++; /* skip trailing whitespace */
3472 
3473  if (*nextp == separator)
3474  {
3475  nextp++;
3476  while (scanner_isspace(*nextp))
3477  nextp++; /* skip leading whitespace for next */
3478  /* we expect another name, so done remains false */
3479  }
3480  else if (*nextp == '\0')
3481  done = true;
3482  else
3483  return false; /* invalid syntax */
3484 
3485  /* Now safe to overwrite separator with a null */
3486  *endp = '\0';
3487 
3488  /* Truncate path if it's overlength */
3489  if (strlen(curname) >= MAXPGPATH)
3490  curname[MAXPGPATH - 1] = '\0';
3491 
3492  /*
3493  * Finished isolating current name --- add it to list
3494  */
3495  curname = pstrdup(curname);
3496  canonicalize_path(curname);
3497  *namelist = lappend(*namelist, curname);
3498 
3499  /* Loop back if we didn't reach end of string */
3500  } while (!done);
3501 
3502  return true;
3503 }
3504 
3505 
3506 /*****************************************************************************
3507  * Comparison Functions used for bytea
3508  *
3509  * Note: btree indexes need these routines not to leak memory; therefore,
3510  * be careful to free working copies of toasted datums. Most places don't
3511  * need to be so careful.
3512  *****************************************************************************/
3513 
3514 Datum
3516 {
3517  Datum arg1 = PG_GETARG_DATUM(0);
3518  Datum arg2 = PG_GETARG_DATUM(1);
3519  bool result;
3520  Size len1,
3521  len2;
3522 
3523  /*
3524  * We can use a fast path for unequal lengths, which might save us from
3525  * having to detoast one or both values.
3526  */
3527  len1 = toast_raw_datum_size(arg1);
3528  len2 = toast_raw_datum_size(arg2);
3529  if (len1 != len2)
3530  result = false;
3531  else
3532  {
3533  bytea *barg1 = DatumGetByteaPP(arg1);
3534  bytea *barg2 = DatumGetByteaPP(arg2);
3535 
3536  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3537  len1 - VARHDRSZ) == 0);
3538 
3539  PG_FREE_IF_COPY(barg1, 0);
3540  PG_FREE_IF_COPY(barg2, 1);
3541  }
3542 
3543  PG_RETURN_BOOL(result);
3544 }
3545 
3546 Datum
3548 {
3549  Datum arg1 = PG_GETARG_DATUM(0);
3550  Datum arg2 = PG_GETARG_DATUM(1);
3551  bool result;
3552  Size len1,
3553  len2;
3554 
3555  /*
3556  * We can use a fast path for unequal lengths, which might save us from
3557  * having to detoast one or both values.
3558  */
3559  len1 = toast_raw_datum_size(arg1);
3560  len2 = toast_raw_datum_size(arg2);
3561  if (len1 != len2)
3562  result = true;
3563  else
3564  {
3565  bytea *barg1 = DatumGetByteaPP(arg1);
3566  bytea *barg2 = DatumGetByteaPP(arg2);
3567 
3568  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3569  len1 - VARHDRSZ) != 0);
3570 
3571  PG_FREE_IF_COPY(barg1, 0);
3572  PG_FREE_IF_COPY(barg2, 1);
3573  }
3574 
3575  PG_RETURN_BOOL(result);
3576 }
3577 
3578 Datum
3580 {
3581  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3582  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3583  int len1,
3584  len2;
3585  int cmp;
3586 
3587  len1 = VARSIZE_ANY_EXHDR(arg1);
3588  len2 = VARSIZE_ANY_EXHDR(arg2);
3589 
3590  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3591 
3592  PG_FREE_IF_COPY(arg1, 0);
3593  PG_FREE_IF_COPY(arg2, 1);
3594 
3595  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3596 }
3597 
3598 Datum
3600 {
3601  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3602  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3603  int len1,
3604  len2;
3605  int cmp;
3606 
3607  len1 = VARSIZE_ANY_EXHDR(arg1);
3608  len2 = VARSIZE_ANY_EXHDR(arg2);
3609 
3610  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3611 
3612  PG_FREE_IF_COPY(arg1, 0);
3613  PG_FREE_IF_COPY(arg2, 1);
3614 
3615  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3616 }
3617 
3618 Datum
3620 {
3621  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3622  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3623  int len1,
3624  len2;
3625  int cmp;
3626 
3627  len1 = VARSIZE_ANY_EXHDR(arg1);
3628  len2 = VARSIZE_ANY_EXHDR(arg2);
3629 
3630  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3631 
3632  PG_FREE_IF_COPY(arg1, 0);
3633  PG_FREE_IF_COPY(arg2, 1);
3634 
3635  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3636 }
3637 
3638 Datum
3640 {
3641  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3642  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3643  int len1,
3644  len2;
3645  int cmp;
3646 
3647  len1 = VARSIZE_ANY_EXHDR(arg1);
3648  len2 = VARSIZE_ANY_EXHDR(arg2);
3649 
3650  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3651 
3652  PG_FREE_IF_COPY(arg1, 0);
3653  PG_FREE_IF_COPY(arg2, 1);
3654 
3655  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3656 }
3657 
3658 Datum
3660 {
3661  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3662  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3663  int len1,
3664  len2;
3665  int cmp;
3666 
3667  len1 = VARSIZE_ANY_EXHDR(arg1);
3668  len2 = VARSIZE_ANY_EXHDR(arg2);
3669 
3670  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3671  if ((cmp == 0) && (len1 != len2))
3672  cmp = (len1 < len2) ? -1 : 1;
3673 
3674  PG_FREE_IF_COPY(arg1, 0);
3675  PG_FREE_IF_COPY(arg2, 1);
3676 
3677  PG_RETURN_INT32(cmp);
3678 }
3679 
3680 Datum
3682 {
3684  MemoryContext oldcontext;
3685 
3686  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3687 
3688  /* Use generic string SortSupport, forcing "C" collation */
3689  varstr_sortsupport(ssup, C_COLLATION_OID, false);
3690 
3691  MemoryContextSwitchTo(oldcontext);
3692 
3693  PG_RETURN_VOID();
3694 }
3695 
3696 /*
3697  * appendStringInfoText
3698  *
3699  * Append a text to str.
3700  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3701  */
3702 static void
3704 {
3706 }
3707 
3708 /*
3709  * replace_text
3710  * replace all occurrences of 'old_sub_str' in 'orig_str'
3711  * with 'new_sub_str' to form 'new_str'
3712  *
3713  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3714  * otherwise returns 'new_str'
3715  */
3716 Datum
3718 {
3719  text *src_text = PG_GETARG_TEXT_PP(0);
3720  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3721  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3722  int src_text_len;
3723  int from_sub_text_len;
3725  text *ret_text;
3726  int start_posn;
3727  int curr_posn;
3728  int chunk_len;
3729  char *start_ptr;
3731 
3732  text_position_setup(src_text, from_sub_text, &state);
3733 
3734  /*
3735  * Note: we check the converted string length, not the original, because
3736  * they could be different if the input contained invalid encoding.
3737  */
3738  src_text_len = state.len1;
3739  from_sub_text_len = state.len2;
3740 
3741  /* Return unmodified source string if empty source or pattern */
3742  if (src_text_len < 1 || from_sub_text_len < 1)
3743  {
3744  text_position_cleanup(&state);
3745  PG_RETURN_TEXT_P(src_text);
3746  }
3747 
3748  start_posn = 1;
3749  curr_posn = text_position_next(1, &state);
3750 
3751  /* When the from_sub_text is not found, there is nothing to do. */
3752  if (curr_posn == 0)
3753  {
3754  text_position_cleanup(&state);
3755  PG_RETURN_TEXT_P(src_text);
3756  }
3757 
3758  /* start_ptr points to the start_posn'th character of src_text */
3759  start_ptr = VARDATA_ANY(src_text);
3760 
3761  initStringInfo(&str);
3762 
3763  do
3764  {
3766 
3767  /* copy the data skipped over by last text_position_next() */
3768  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3769  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3770 
3771  appendStringInfoText(&str, to_sub_text);
3772 
3773  start_posn = curr_posn;
3774  start_ptr += chunk_len;
3775  start_posn += from_sub_text_len;
3776  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3777 
3778  curr_posn = text_position_next(start_posn, &state);
3779  }
3780  while (curr_posn > 0);
3781 
3782  /* copy trailing data */
3783  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3784  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3785 
3786  text_position_cleanup(&state);
3787 
3788  ret_text = cstring_to_text_with_len(str.data, str.len);
3789  pfree(str.data);
3790 
3791  PG_RETURN_TEXT_P(ret_text);
3792 }
3793 
3794 /*
3795  * check_replace_text_has_escape_char
3796  *
3797  * check whether replace_text contains escape char.
3798  */
3799 static bool
3801 {
3802  const char *p = VARDATA_ANY(replace_text);
3803  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3804 
3806  {
3807  for (; p < p_end; p++)
3808  {
3809  if (*p == '\\')
3810  return true;
3811  }
3812  }
3813  else
3814  {
3815  for (; p < p_end; p += pg_mblen(p))
3816  {
3817  if (*p == '\\')
3818  return true;
3819  }
3820  }
3821 
3822  return false;
3823 }
3824 
3825 /*
3826  * appendStringInfoRegexpSubstr
3827  *
3828  * Append replace_text to str, substituting regexp back references for
3829  * \n escapes. start_ptr is the start of the match in the source string,
3830  * at logical character position data_pos.
3831  */
3832 static void
3834  regmatch_t *pmatch,
3835  char *start_ptr, int data_pos)
3836 {
3837  const char *p = VARDATA_ANY(replace_text);
3838  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3839  int eml = pg_database_encoding_max_length();
3840 
3841  for (;;)
3842  {
3843  const char *chunk_start = p;
3844  int so;
3845  int eo;
3846 
3847  /* Find next escape char. */
3848  if (eml == 1)
3849  {
3850  for (; p < p_end && *p != '\\'; p++)
3851  /* nothing */ ;
3852  }
3853  else
3854  {
3855  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3856  /* nothing */ ;
3857  }
3858 
3859  /* Copy the text we just scanned over, if any. */
3860  if (p > chunk_start)
3861  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3862 
3863  /* Done if at end of string, else advance over escape char. */
3864  if (p >= p_end)
3865  break;
3866  p++;
3867 
3868  if (p >= p_end)
3869  {
3870  /* Escape at very end of input. Treat same as unexpected char */
3871  appendStringInfoChar(str, '\\');
3872  break;
3873  }
3874 
3875  if (*p >= '1' && *p <= '9')
3876  {
3877  /* Use the back reference of regexp. */
3878  int idx = *p - '0';
3879 
3880  so = pmatch[idx].rm_so;
3881  eo = pmatch[idx].rm_eo;
3882  p++;
3883  }
3884  else if (*p == '&')
3885  {
3886  /* Use the entire matched string. */
3887  so = pmatch[0].rm_so;
3888  eo = pmatch[0].rm_eo;
3889  p++;
3890  }
3891  else if (*p == '\\')
3892  {
3893  /* \\ means transfer one \ to output. */
3894  appendStringInfoChar(str, '\\');
3895  p++;
3896  continue;
3897  }
3898  else
3899  {
3900  /*
3901  * If escape char is not followed by any expected char, just treat
3902  * it as ordinary data to copy. (XXX would it be better to throw
3903  * an error?)
3904  */
3905  appendStringInfoChar(str, '\\');
3906  continue;
3907  }
3908 
3909  if (so != -1 && eo != -1)
3910  {
3911  /*
3912  * Copy the text that is back reference of regexp. Note so and eo
3913  * are counted in characters not bytes.
3914  */
3915  char *chunk_start;
3916  int chunk_len;
3917 
3918  Assert(so >= data_pos);
3919  chunk_start = start_ptr;
3920  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3921  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3922  appendBinaryStringInfo(str, chunk_start, chunk_len);
3923  }
3924  }
3925 }
3926 
3927 #define REGEXP_REPLACE_BACKREF_CNT 10
3928 
3929 /*
3930  * replace_text_regexp
3931  *
3932  * replace text that matches to regexp in src_text to replace_text.
3933  *
3934  * Note: to avoid having to include regex.h in builtins.h, we declare
3935  * the regexp argument as void *, but really it's regex_t *.
3936  */
3937 text *
3938 replace_text_regexp(text *src_text, void *regexp,
3939  text *replace_text, bool glob)
3940 {
3941  text *ret_text;
3942  regex_t *re = (regex_t *) regexp;
3943  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3946  pg_wchar *data;
3947  size_t data_len;
3948  int search_start;
3949  int data_pos;
3950  char *start_ptr;
3951  bool have_escape;
3952 
3953  initStringInfo(&buf);
3954 
3955  /* Convert data string to wide characters. */
3956  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3957  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3958 
3959  /* Check whether replace_text has escape char. */
3960  have_escape = check_replace_text_has_escape_char(replace_text);
3961 
3962  /* start_ptr points to the data_pos'th character of src_text */
3963  start_ptr = (char *) VARDATA_ANY(src_text);
3964  data_pos = 0;
3965 
3966  search_start = 0;
3967  while (search_start <= data_len)
3968  {
3969  int regexec_result;
3970 
3972 
3973  regexec_result = pg_regexec(re,
3974  data,
3975  data_len,
3976  search_start,
3977  NULL, /* no details */
3979  pmatch,
3980  0);
3981 
3982  if (regexec_result == REG_NOMATCH)
3983  break;
3984 
3985  if (regexec_result != REG_OKAY)
3986  {
3987  char errMsg[100];
3988 
3990  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3991  ereport(ERROR,
3992  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3993  errmsg("regular expression failed: %s", errMsg)));
3994  }
3995 
3996  /*
3997  * Copy the text to the left of the match position. Note we are given
3998  * character not byte indexes.
3999  */
4000  if (pmatch[0].rm_so - data_pos > 0)
4001  {
4002  int chunk_len;
4003 
4004  chunk_len = charlen_to_bytelen(start_ptr,
4005  pmatch[0].rm_so - data_pos);
4006  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4007 
4008  /*
4009  * Advance start_ptr over that text, to avoid multiple rescans of
4010  * it if the replace_text contains multiple back-references.
4011  */
4012  start_ptr += chunk_len;
4013  data_pos = pmatch[0].rm_so;
4014  }
4015 
4016  /*
4017  * Copy the replace_text. Process back references when the
4018  * replace_text has escape characters.
4019  */
4020  if (have_escape)
4021  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4022  start_ptr, data_pos);
4023  else
4024  appendStringInfoText(&buf, replace_text);
4025 
4026  /* Advance start_ptr and data_pos over the matched text. */
4027  start_ptr += charlen_to_bytelen(start_ptr,
4028  pmatch[0].rm_eo - data_pos);
4029  data_pos = pmatch[0].rm_eo;
4030 
4031  /*
4032  * When global option is off, replace the first instance only.
4033  */
4034  if (!glob)
4035  break;
4036 
4037  /*
4038  * Advance search position. Normally we start the next search at the
4039  * end of the previous match; but if the match was of zero length, we
4040  * have to advance by one character, or we'd just find the same match
4041  * again.
4042  */
4043  search_start = data_pos;
4044  if (pmatch[0].rm_so == pmatch[0].rm_eo)
4045  search_start++;
4046  }
4047 
4048  /*
4049  * Copy the text to the right of the last match.
4050  */
4051  if (data_pos < data_len)
4052  {
4053  int chunk_len;
4054 
4055  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4056  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4057  }
4058 
4059  ret_text = cstring_to_text_with_len(buf.data, buf.len);
4060  pfree(buf.data);
4061  pfree(data);
4062 
4063  return ret_text;
4064 }
4065 
4066 /*
4067  * split_text
4068  * parse input string
4069  * return ord item (1 based)
4070  * based on provided field separator
4071  */
4072 Datum
4074 {
4075  text *inputstring = PG_GETARG_TEXT_PP(0);
4076  text *fldsep = PG_GETARG_TEXT_PP(1);
4077  int fldnum = PG_GETARG_INT32(2);
4078  int inputstring_len;
4079  int fldsep_len;
4081  int start_posn;
4082  int end_posn;
4083  text *result_text;
4084 
4085  /* field number is 1 based */
4086  if (fldnum < 1)
4087  ereport(ERROR,
4088  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4089  errmsg("field position must be greater than zero")));
4090 
4091  text_position_setup(inputstring, fldsep, &state);
4092 
4093  /*
4094  * Note: we check the converted string length, not the original, because
4095  * they could be different if the input contained invalid encoding.
4096  */
4097  inputstring_len = state.len1;
4098  fldsep_len = state.len2;
4099 
4100  /* return empty string for empty input string */
4101  if (inputstring_len < 1)
4102  {
4103  text_position_cleanup(&state);
4105  }
4106 
4107  /* empty field separator */
4108  if (fldsep_len < 1)
4109  {
4110  text_position_cleanup(&state);
4111  /* if first field, return input string, else empty string */
4112  if (fldnum == 1)
4113  PG_RETURN_TEXT_P(inputstring);
4114  else
4116  }
4117 
4118  /* identify bounds of first field */
4119  start_posn = 1;
4120  end_posn = text_position_next(1, &state);
4121 
4122  /* special case if fldsep not found at all */
4123  if (end_posn == 0)
4124  {
4125  text_position_cleanup(&state);
4126  /* if field 1 requested, return input string, else empty string */
4127  if (fldnum == 1)
4128  PG_RETURN_TEXT_P(inputstring);
4129  else
4131  }
4132 
4133  while (end_posn > 0 && --fldnum > 0)
4134  {
4135  /* identify bounds of next field */
4136  start_posn = end_posn + fldsep_len;
4137  end_posn = text_position_next(start_posn, &state);
4138  }
4139 
4140  text_position_cleanup(&state);
4141 
4142  if (fldnum > 0)
4143  {
4144  /* N'th field separator not found */
4145  /* if last field requested, return it, else empty string */
4146  if (fldnum == 1)
4147  result_text = text_substring(PointerGetDatum(inputstring),
4148  start_posn,
4149  -1,
4150  true);
4151  else
4152  result_text = cstring_to_text("");
4153  }
4154  else
4155  {
4156  /* non-last field requested */
4157  result_text = text_substring(PointerGetDatum(inputstring),
4158  start_posn,
4159  end_posn - start_posn,
4160  false);
4161  }
4162 
4163  PG_RETURN_TEXT_P(result_text);
4164 }
4165 
4166 /*
4167  * Convenience function to return true when two text params are equal.
4168  */
4169 static bool
4170 text_isequal(text *txt1, text *txt2)
4171 {
4173  PointerGetDatum(txt1),
4174  PointerGetDatum(txt2)));
4175 }
4176 
4177 /*
4178  * text_to_array
4179  * parse input string and return text array of elements,
4180  * based on provided field separator
4181  */
4182 Datum
4184 {
4185  return text_to_array_internal(fcinfo);
4186 }
4187 
4188 /*
4189  * text_to_array_null
4190  * parse input string and return text array of elements,
4191  * based on provided field separator and null string
4192  *
4193  * This is a separate entry point only to prevent the regression tests from
4194  * complaining about different argument sets for the same internal function.
4195  */
4196 Datum
4198 {
4199  return text_to_array_internal(fcinfo);
4200 }
4201 
4202 /*
4203  * common code for text_to_array and text_to_array_null functions
4204  *
4205  * These are not strict so we have to test for null inputs explicitly.
4206  */
4207 static Datum
4209 {
4210  text *inputstring;
4211  text *fldsep;
4212  text *null_string;
4213  int inputstring_len;
4214  int fldsep_len;
4215  char *start_ptr;
4216  text *result_text;
4217  bool is_null;
4218  ArrayBuildState *astate = NULL;
4219 
4220  /* when input string is NULL, then result is NULL too */
4221  if (PG_ARGISNULL(0))
4222  PG_RETURN_NULL();
4223 
4224  inputstring = PG_GETARG_TEXT_PP(0);
4225 
4226  /* fldsep can be NULL */
4227  if (!PG_ARGISNULL(1))
4228  fldsep = PG_GETARG_TEXT_PP(1);
4229  else
4230  fldsep = NULL;
4231 
4232  /* null_string can be NULL or omitted */
4233  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4234  null_string = PG_GETARG_TEXT_PP(2);
4235  else
4236  null_string = NULL;
4237 
4238  if (fldsep != NULL)
4239  {
4240  /*
4241  * Normal case with non-null fldsep. Use the text_position machinery
4242  * to search for occurrences of fldsep.
4243  */
4245  int fldnum;
4246  int start_posn;
4247  int end_posn;
4248  int chunk_len;
4249 
4250  text_position_setup(inputstring, fldsep, &state);
4251 
4252  /*
4253  * Note: we check the converted string length, not the original,
4254  * because they could be different if the input contained invalid
4255  * encoding.
4256  */
4257  inputstring_len = state.len1;
4258  fldsep_len = state.len2;
4259 
4260  /* return empty array for empty input string */
4261  if (inputstring_len < 1)
4262  {
4263  text_position_cleanup(&state);
4265  }
4266 
4267  /*
4268  * empty field separator: return the input string as a one-element
4269  * array
4270  */
4271  if (fldsep_len < 1)
4272  {
4273  Datum elems[1];
4274  bool nulls[1];
4275  int dims[1];
4276  int lbs[1];
4277 
4278  text_position_cleanup(&state);
4279  /* single element can be a NULL too */
4280  is_null = null_string ? text_isequal(inputstring, null_string) : false;
4281 
4282  elems[0] = PointerGetDatum(inputstring);
4283  nulls[0] = is_null;
4284  dims[0] = 1;
4285  lbs[0] = 1;
4286  /* XXX: this hardcodes assumptions about the text type */
4288  1, dims, lbs,
4289  TEXTOID, -1, false, 'i'));
4290  }
4291 
4292  start_posn = 1;
4293  /* start_ptr points to the start_posn'th character of inputstring */
4294  start_ptr = VARDATA_ANY(inputstring);
4295 
4296  for (fldnum = 1;; fldnum++) /* field number is 1 based */
4297  {
4299 
4300  end_posn = text_position_next(start_posn, &state);
4301 
4302  if (end_posn == 0)
4303  {
4304  /* fetch last field */
4305  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4306  }
4307  else
4308  {
4309  /* fetch non-last field */
4310  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4311  }
4312 
4313  /* must build a temp text datum to pass to accumArrayResult */
4314  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4315  is_null = null_string ? text_isequal(result_text, null_string) : false;
4316 
4317  /* stash away this field */
4318  astate = accumArrayResult(astate,
4319  PointerGetDatum(result_text),
4320  is_null,
4321  TEXTOID,
4323 
4324  pfree(result_text);
4325 
4326  if (end_posn == 0)
4327  break;
4328 
4329  start_posn = end_posn;
4330  start_ptr += chunk_len;
4331  start_posn += fldsep_len;
4332  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4333  }
4334 
4335  text_position_cleanup(&state);
4336  }
4337  else
4338  {
4339  /*
4340  * When fldsep is NULL, each character in the inputstring becomes an
4341  * element in the result array. The separator is effectively the
4342  * space between characters.
4343  */
4344  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4345 
4346  /* return empty array for empty input string */
4347  if (inputstring_len < 1)
4349 
4350  start_ptr = VARDATA_ANY(inputstring);
4351 
4352  while (inputstring_len > 0)
4353  {
4354  int chunk_len = pg_mblen(start_ptr);
4355 
4357 
4358  /* must build a temp text datum to pass to accumArrayResult */
4359  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4360  is_null = null_string ? text_isequal(result_text, null_string) : false;
4361 
4362  /* stash away this field */
4363  astate = accumArrayResult(astate,
4364  PointerGetDatum(result_text),
4365  is_null,
4366  TEXTOID,
4368 
4369  pfree(result_text);
4370 
4371  start_ptr += chunk_len;
4372  inputstring_len -= chunk_len;
4373  }
4374  }
4375 
4378 }
4379 
4380 /*
4381  * array_to_text
4382  * concatenate Cstring representation of input array elements
4383  * using provided field separator
4384  */
4385 Datum
4387 {
4389  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4390 
4391  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4392 }
4393 
4394 /*
4395  * array_to_text_null
4396  * concatenate Cstring representation of input array elements
4397  * using provided field separator and null string
4398  *
4399  * This version is not strict so we have to test for null inputs explicitly.
4400  */
4401 Datum
4403 {
4404  ArrayType *v;
4405  char *fldsep;
4406  char *null_string;
4407 
4408  /* returns NULL when first or second parameter is NULL */
4409  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4410  PG_RETURN_NULL();
4411 
4412  v = PG_GETARG_ARRAYTYPE_P(0);
4413  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4414 
4415  /* NULL null string is passed through as a null pointer */
4416  if (!PG_ARGISNULL(2))
4417  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4418  else
4419  null_string = NULL;
4420 
4421  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4422 }
4423 
4424 /*
4425  * common code for array_to_text and array_to_text_null functions
4426  */
4427 static text *
4429  const char *fldsep, const char *null_string)
4430 {
4431  text *result;
4432  int nitems,
4433  *dims,
4434  ndims;
4435  Oid element_type;
4436  int typlen;
4437  bool typbyval;
4438  char typalign;
4440  bool printed = false;
4441  char *p;
4442  bits8 *bitmap;
4443  int bitmask;
4444  int i;
4445  ArrayMetaState *my_extra;
4446 
4447  ndims = ARR_NDIM(v);
4448  dims = ARR_DIMS(v);
4449  nitems = ArrayGetNItems(ndims, dims);
4450 
4451  /* if there are no elements, return an empty string */
4452  if (nitems == 0)
4453  return cstring_to_text_with_len("", 0);
4454 
4455  element_type = ARR_ELEMTYPE(v);
4456  initStringInfo(&buf);
4457 
4458  /*
4459  * We arrange to look up info about element type, including its output
4460  * conversion proc, only once per series of calls, assuming the element
4461  * type doesn't change underneath us.
4462  */
4463  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4464  if (my_extra == NULL)
4465  {
4466  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4467  sizeof(ArrayMetaState));
4468  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4469  my_extra->element_type = ~element_type;
4470  }
4471 
4472  if (my_extra->element_type != element_type)
4473  {
4474  /*
4475  * Get info about element type, including its output conversion proc
4476  */
4477  get_type_io_data(element_type, IOFunc_output,
4478  &my_extra->typlen, &my_extra->typbyval,
4479  &my_extra->typalign, &my_extra->typdelim,
4480  &my_extra->typioparam, &my_extra->typiofunc);
4481  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4482  fcinfo->flinfo->fn_mcxt);
4483  my_extra->element_type = element_type;
4484  }
4485  typlen = my_extra->typlen;
4486  typbyval = my_extra->typbyval;
4487  typalign = my_extra->typalign;
4488 
4489  p = ARR_DATA_PTR(v);
4490  bitmap = ARR_NULLBITMAP(v);
4491  bitmask = 1;
4492 
4493  for (i = 0; i < nitems; i++)
4494  {
4495  Datum itemvalue;
4496  char *value;
4497 
4498  /* Get source element, checking for NULL */
4499  if (bitmap && (*bitmap & bitmask) == 0)
4500  {
4501  /* if null_string is NULL, we just ignore null elements */
4502  if (null_string != NULL)
4503  {
4504  if (printed)
4505  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4506  else
4507  appendStringInfoString(&buf, null_string);
4508  printed = true;
4509  }
4510  }
4511  else
4512  {
4513  itemvalue = fetch_att(p, typbyval, typlen);
4514 
4515  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4516 
4517  if (printed)
4518  appendStringInfo(&buf, "%s%s", fldsep, value);
4519  else
4520  appendStringInfoString(&buf, value);
4521  printed = true;
4522 
4523  p = att_addlength_pointer(p, typlen, p);
4524  p = (char *) att_align_nominal(p, typalign);
4525  }
4526 
4527  /* advance bitmap pointer if any */
4528  if (bitmap)
4529  {
4530  bitmask <<= 1;
4531  if (bitmask == 0x100)
4532  {
4533  bitmap++;
4534  bitmask = 1;
4535  }
4536  }
4537  }
4538 
4539  result = cstring_to_text_with_len(buf.data, buf.len);
4540  pfree(buf.data);
4541 
4542  return result;
4543 }
4544 
4545 #define HEXBASE 16
4546 /*
4547  * Convert an int32 to a string containing a base 16 (hex) representation of
4548  * the number.
4549  */
4550 Datum
4552 {
4554  char *ptr;
4555  const char *digits = "0123456789abcdef";
4556  char buf[32]; /* bigger than needed, but reasonable */
4557 
4558  ptr = buf + sizeof(buf) - 1;
4559  *ptr = '\0';
4560 
4561  do
4562  {
4563  *--ptr = digits[value % HEXBASE];
4564  value /= HEXBASE;
4565  } while (ptr > buf && value);
4566 
4568 }
4569 
4570 /*
4571  * Convert an int64 to a string containing a base 16 (hex) representation of
4572  * the number.
4573  */
4574 Datum
4576 {
4577  uint64 value = (uint64) PG_GETARG_INT64(0);
4578  char *ptr;
4579  const char *digits = "0123456789abcdef";
4580  char buf[32]; /* bigger than needed, but reasonable */
4581 
4582  ptr = buf + sizeof(buf) - 1;
4583  *ptr = '\0';
4584 
4585  do
4586  {
4587  *--ptr = digits[value % HEXBASE];
4588  value /= HEXBASE;
4589  } while (ptr > buf && value);
4590 
4592 }
4593 
4594 /*
4595  * Return the size of a datum, possibly compressed
4596  *
4597  * Works on any data type
4598  */
4599 Datum
4601 {
4603  int32 result;
4604  int typlen;
4605 
4606  /* On first call, get the input type's typlen, and save at *fn_extra */
4607  if (fcinfo->flinfo->fn_extra == NULL)
4608  {
4609  /* Lookup the datatype of the supplied argument */
4610  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4611 
4612  typlen = get_typlen(argtypeid);
4613  if (typlen == 0) /* should not happen */
4614  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4615 
4616  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4617  sizeof(int));
4618  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4619  }
4620  else
4621  typlen = *((int *) fcinfo->flinfo->fn_extra);
4622 
4623  if (typlen == -1)
4624  {
4625  /* varlena type, possibly toasted */
4626  result = toast_datum_size(value);
4627  }
4628  else if (typlen == -2)
4629  {
4630  /* cstring */
4631  result = strlen(DatumGetCString(value)) + 1;
4632  }
4633  else
4634  {
4635  /* ordinary fixed-width type */
4636  result = typlen;
4637  }
4638 
4639  PG_RETURN_INT32(result);
4640 }
4641 
4642 /*
4643  * string_agg - Concatenates values and returns string.
4644  *
4645  * Syntax: string_agg(value text, delimiter text) RETURNS text
4646  *
4647  * Note: Any NULL values are ignored. The first-call delimiter isn't
4648  * actually used at all, and on subsequent calls the delimiter precedes
4649  * the associated value.
4650  */
4651 
4652 /* subroutine to initialize state */
4653 static StringInfo
4655 {
4656  StringInfo state;
4657  MemoryContext aggcontext;
4658  MemoryContext oldcontext;
4659 
4660  if (!AggCheckCallContext(fcinfo, &aggcontext))
4661  {
4662  /* cannot be called directly because of internal-type argument */
4663  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4664  }
4665 
4666  /*
4667  * Create state in aggregate context. It'll stay there across subsequent
4668  * calls.
4669  */
4670  oldcontext = MemoryContextSwitchTo(aggcontext);
4671  state = makeStringInfo();
4672  MemoryContextSwitchTo(oldcontext);
4673 
4674  return state;
4675 }
4676 
4677 Datum
4679 {
4680  StringInfo state;
4681 
4682  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4683 
4684  /* Append the value unless null. */
4685  if (!PG_ARGISNULL(1))
4686  {
4687  /* On the first time through, we ignore the delimiter. */
4688  if (state == NULL)
4689  state = makeStringAggState(fcinfo);
4690  else if (!PG_ARGISNULL(2))
4691  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4692 
4693  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4694  }
4695 
4696  /*
4697  * The transition type for string_agg() is declared to be "internal",
4698  * which is a pass-by-value type the same size as a pointer.
4699  */
4700  PG_RETURN_POINTER(state);
4701 }
4702 
4703 Datum
4705 {
4706  StringInfo state;
4707 
4708  /* cannot be called directly because of internal-type argument */
4709  Assert(AggCheckCallContext(fcinfo, NULL));
4710 
4711  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4712 
4713  if (state != NULL)
4715  else
4716  PG_RETURN_NULL();
4717 }
4718 
4719 /*
4720  * Prepare cache with fmgr info for the output functions of the datatypes of
4721  * the arguments of a concat-like function, beginning with argument "argidx".
4722  * (Arguments before that will have corresponding slots in the resulting
4723  * FmgrInfo array, but we don't fill those slots.)
4724  */
4725 static FmgrInfo *
4727 {
4728  FmgrInfo *foutcache;
4729  int i;
4730 
4731  /* We keep the info in fn_mcxt so it survives across calls */
4732  foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4733  PG_NARGS() * sizeof(FmgrInfo));
4734 
4735  for (i = argidx; i < PG_NARGS(); i++)
4736  {
4737  Oid valtype;
4738  Oid typOutput;
4739  bool typIsVarlena;
4740 
4741  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4742  if (!OidIsValid(valtype))
4743  elog(ERROR, "could not determine data type of concat() input");
4744 
4745  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4746  fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
4747  }
4748 
4749  fcinfo->flinfo->fn_extra = foutcache;
4750 
4751  return foutcache;
4752 }
4753 
4754 /*
4755  * Implementation of both concat() and concat_ws().
4756  *
4757  * sepstr is the separator string to place between values.
4758  * argidx identifies the first argument to concatenate (counting from zero);
4759  * note that this must be constant across any one series of calls.
4760  *
4761  * Returns NULL if result should be NULL, else text value.
4762  */
4763 static text *
4764 concat_internal(const char *sepstr, int argidx,
4765  FunctionCallInfo fcinfo)
4766 {
4767  text *result;
4769  FmgrInfo *foutcache;
4770  bool first_arg = true;
4771  int i;
4772 
4773  /*
4774  * concat(VARIADIC some-array) is essentially equivalent to
4775  * array_to_text(), ie concat the array elements with the given separator.
4776  * So we just pass the case off to that code.
4777  */
4778  if (get_fn_expr_variadic(fcinfo->flinfo))
4779  {
4780  ArrayType *arr;
4781 
4782  /* Should have just the one argument */
4783  Assert(argidx == PG_NARGS() - 1);
4784 
4785  /* concat(VARIADIC NULL) is defined as NULL */
4786  if (PG_ARGISNULL(argidx))
4787  return NULL;
4788 
4789  /*
4790  * Non-null argument had better be an array. We assume that any call
4791  * context that could let get_fn_expr_variadic return true will have
4792  * checked that a VARIADIC-labeled parameter actually is an array. So
4793  * it should be okay to just Assert that it's an array rather than
4794  * doing a full-fledged error check.
4795  */
4797 
4798  /* OK, safe to fetch the array value */
4799  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4800 
4801  /*
4802  * And serialize the array. We tell array_to_text to ignore null
4803  * elements, which matches the behavior of the loop below.
4804  */
4805  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4806  }
4807 
4808  /* Normal case without explicit VARIADIC marker */
4809  initStringInfo(&str);
4810 
4811  /* Get output function info, building it if first time through */
4812  foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
4813  if (foutcache == NULL)
4814  foutcache = build_concat_foutcache(fcinfo, argidx);
4815 
4816  for (i = argidx; i < PG_NARGS(); i++)
4817  {
4818  if (!PG_ARGISNULL(i))
4819  {
4821 
4822  /* add separator if appropriate */
4823  if (first_arg)
4824  first_arg = false;
4825  else
4826  appendStringInfoString(&str, sepstr);
4827 
4828  /* call the appropriate type output function, append the result */
4830  OutputFunctionCall(&foutcache[i], value));
4831  }
4832  }
4833 
4834  result = cstring_to_text_with_len(str.data, str.len);
4835  pfree(str.data);
4836 
4837  return result;
4838 }
4839 
4840 /*
4841  * Concatenate all arguments. NULL arguments are ignored.
4842  */
4843 Datum
4845 {
4846  text *result;
4847 
4848  result = concat_internal("", 0, fcinfo);
4849  if (result == NULL)
4850  PG_RETURN_NULL();
4851  PG_RETURN_TEXT_P(result);
4852 }
4853 
4854 /*
4855  * Concatenate all but first argument value with separators. The first
4856  * parameter is used as the separator. NULL arguments are ignored.
4857  */
4858 Datum
4860 {
4861  char *sep;
4862  text *result;
4863 
4864  /* return NULL when separator is NULL */
4865  if (PG_ARGISNULL(0))
4866  PG_RETURN_NULL();
4868 
4869  result = concat_internal(sep, 1, fcinfo);
4870  if (result == NULL)
4871  PG_RETURN_NULL();
4872  PG_RETURN_TEXT_P(result);
4873 }
4874 
4875 /*
4876  * Return first n characters in the string. When n is negative,
4877  * return all but last |n| characters.
4878  */
4879 Datum
4881 {
4882  text *str = PG_GETARG_TEXT_PP(0);
4883  const char *p = VARDATA_ANY(str);
4884  int len = VARSIZE_ANY_EXHDR(str);
4885  int n = PG_GETARG_INT32(1);
4886  int rlen;
4887 
4888  if (n < 0)
4889  n = pg_mbstrlen_with_len(p, len) + n;
4890  rlen = pg_mbcharcliplen(p, len, n);
4891 
4893 }
4894 
4895 /*
4896  * Return last n characters in the string. When n is negative,
4897  * return all but first |n| characters.
4898  */
4899 Datum
4901 {
4902  text *str = PG_GETARG_TEXT_PP(0);
4903  const char *p = VARDATA_ANY(str);
4904  int len = VARSIZE_ANY_EXHDR(str);
4905  int n = PG_GETARG_INT32(1);
4906  int off;
4907 
4908  if (n < 0)
4909  n = -n;
4910  else
4911  n = pg_mbstrlen_with_len(p, len) - n;
4912  off = pg_mbcharcliplen(p, len, n);
4913 
4914  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4915 }
4916 
4917 /*
4918  * Return reversed string
4919  */
4920 Datum
4922 {
4923  text *str = PG_GETARG_TEXT_PP(0);
4924  const char *p = VARDATA_ANY(str);
4925  int len = VARSIZE_ANY_EXHDR(str);
4926  const char *endp = p + len;
4927  text *result;
4928  char *dst;
4929 
4930  result = palloc(len + VARHDRSZ);
4931  dst = (char *) VARDATA(result) + len;
4932  SET_VARSIZE(result, len + VARHDRSZ);
4933 
4935  {
4936  /* multibyte version */
4937  while (p < endp)
4938  {
4939  int sz;
4940 
4941  sz = pg_mblen(p);
4942  dst -= sz;
4943  memcpy(dst, p, sz);
4944  p += sz;
4945  }
4946  }
4947  else
4948  {
4949  /* single byte version */
4950  while (p < endp)
4951  *(--dst) = *p++;
4952  }
4953 
4954  PG_RETURN_TEXT_P(result);
4955 }
4956 
4957 
4958 /*
4959  * Support macros for text_format()
4960  */
4961 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4962 
4963 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4964  do { \
4965  if (++(ptr) >= (end_ptr)) \
4966  ereport(ERROR, \
4967  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4968  errmsg("unterminated format() type specifier"), \
4969  errhint("For a single \"%%\" use \"%%%%\"."))); \
4970  } while (0)
4971 
4972 /*
4973  * Returns a formatted string
4974  */
4975 Datum
4977 {
4978  text *fmt;
4980  const char *cp;
4981  const char *start_ptr;
4982  const char *end_ptr;
4983  text *result;
4984  int arg;
4985  bool funcvariadic;
4986  int nargs;
4987  Datum *elements = NULL;
4988  bool *nulls = NULL;
4989  Oid element_type = InvalidOid;
4990  Oid prev_type = InvalidOid;
4991  Oid prev_width_type = InvalidOid;
4992  FmgrInfo typoutputfinfo;
4993  FmgrInfo typoutputinfo_width;
4994 
4995  /* When format string is null, immediately return null */
4996  if (PG_ARGISNULL(0))
4997  PG_RETURN_NULL();
4998 
4999  /* If argument is marked VARIADIC, expand array into elements */
5000  if (get_fn_expr_variadic(fcinfo->flinfo))
5001  {
5002  ArrayType *arr;
5003  int16 elmlen;
5004  bool elmbyval;
5005  char elmalign;
5006  int nitems;
5007 
5008  /* Should have just the one argument */
5009  Assert(PG_NARGS() == 2);
5010 
5011  /* If argument is NULL, we treat it as zero-length array */
5012  if (PG_ARGISNULL(1))
5013  nitems = 0;
5014  else
5015  {
5016  /*
5017  * Non-null argument had better be an array. We assume that any
5018  * call context that could let get_fn_expr_variadic return true
5019  * will have checked that a VARIADIC-labeled parameter actually is
5020  * an array. So it should be okay to just Assert that it's an
5021  * array rather than doing a full-fledged error check.
5022  */
5024 
5025  /* OK, safe to fetch the array value */
5026  arr = PG_GETARG_ARRAYTYPE_P(1);
5027 
5028  /* Get info about array element type */
5029  element_type = ARR_ELEMTYPE(arr);
5030  get_typlenbyvalalign(element_type,
5031  &elmlen, &elmbyval, &elmalign);
5032 
5033  /* Extract all array elements */
5034  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5035  &elements, &nulls, &nitems);
5036  }
5037 
5038  nargs = nitems + 1;
5039  funcvariadic = true;
5040  }
5041  else
5042  {
5043  /* Non-variadic case, we'll process the arguments individually */
5044  nargs = PG_NARGS();
5045  funcvariadic = false;
5046  }
5047 
5048  /* Setup for main loop. */
5049  fmt = PG_GETARG_TEXT_PP(0);
5050  start_ptr = VARDATA_ANY(fmt);
5051  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5052  initStringInfo(&str);
5053  arg = 1; /* next argument position to print */
5054 
5055  /* Scan format string, looking for conversion specifiers. */
5056  for (cp = start_ptr; cp < end_ptr; cp++)
5057  {
5058  int argpos;
5059  int widthpos;
5060  int flags;
5061  int width;
5062  Datum value;
5063  bool isNull;
5064  Oid typid;
5065 
5066  /*
5067  * If it's not the start of a conversion specifier, just copy it to
5068  * the output buffer.
5069  */
5070  if (*cp != '%')
5071  {
5072  appendStringInfoCharMacro(&str, *cp);
5073  continue;
5074  }
5075 
5076  ADVANCE_PARSE_POINTER(cp, end_ptr);
5077 
5078  /* Easy case: %% outputs a single % */
5079  if (*cp == '%')
5080  {
5081  appendStringInfoCharMacro(&str, *cp);
5082  continue;
5083  }
5084 
5085  /* Parse the optional portions of the format specifier */
5086  cp = text_format_parse_format(cp, end_ptr,
5087  &argpos, &widthpos,
5088  &flags, &width);
5089 
5090  /*
5091  * Next we should see the main conversion specifier. Whether or not
5092  * an argument position was present, it's known that at least one
5093  * character remains in the string at this point. Experience suggests
5094  * that it's worth checking that that character is one of the expected
5095  * ones before we try to fetch arguments, so as to produce the least
5096  * confusing response to a mis-formatted specifier.
5097  */
5098  if (strchr("sIL", *cp) == NULL)
5099  ereport(ERROR,
5100  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5101  errmsg("unrecognized format() type specifier \"%c\"",
5102  *cp),
5103  errhint("For a single \"%%\" use \"%%%%\".")));
5104 
5105  /* If indirect width was specified, get its value */
5106  if (widthpos >= 0)
5107  {
5108  /* Collect the specified or next argument position */
5109  if (widthpos > 0)
5110  arg = widthpos;
5111  if (arg >= nargs)
5112  ereport(ERROR,
5113  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5114  errmsg("too few arguments for format()")));
5115 
5116  /* Get the value and type of the selected argument */
5117  if (!funcvariadic)
5118  {
5119  value = PG_GETARG_DATUM(arg);
5120  isNull = PG_ARGISNULL(arg);
5121  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5122  }
5123  else
5124  {
5125  value = elements[arg - 1];
5126  isNull = nulls[arg - 1];
5127  typid = element_type;
5128  }
5129  if (!OidIsValid(typid))
5130  elog(ERROR, "could not determine data type of format() input");
5131 
5132  arg++;
5133 
5134  /* We can treat NULL width the same as zero */
5135  if (isNull)
5136  width = 0;
5137  else if (typid == INT4OID)
5138  width = DatumGetInt32(value);
5139  else if (typid == INT2OID)
5140  width = DatumGetInt16(value);
5141  else
5142  {
5143  /* For less-usual datatypes, convert to text then to int */
5144  char *str;
5145 
5146  if (typid != prev_width_type)
5147  {
5148  Oid typoutputfunc;
5149  bool typIsVarlena;
5150 
5151  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5152  fmgr_info(typoutputfunc, &typoutputinfo_width);
5153  prev_width_type = typid;
5154  }
5155 
5156  str = OutputFunctionCall(&typoutputinfo_width, value);
5157 
5158  /* pg_atoi will complain about bad data or overflow */
5159  width = pg_atoi(str, sizeof(int), '\0');
5160 
5161  pfree(str);
5162  }
5163  }
5164 
5165  /* Collect the specified or next argument position */
5166  if (argpos > 0)
5167  arg = argpos;
5168  if (arg >= nargs)
5169  ereport(ERROR,
5170  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5171  errmsg("too few arguments for format()")));
5172 
5173  /* Get the value and type of the selected argument */
5174  if (!funcvariadic)
5175  {
5176  value = PG_GETARG_DATUM(arg);
5177  isNull = PG_ARGISNULL(arg);
5178  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5179  }
5180  else
5181  {
5182  value = elements[arg - 1];
5183  isNull = nulls[arg - 1];
5184  typid = element_type;
5185  }
5186  if (!OidIsValid(typid))
5187  elog(ERROR, "could not determine data type of format() input");
5188 
5189  arg++;
5190 
5191  /*
5192  * Get the appropriate typOutput function, reusing previous one if
5193  * same type as previous argument. That's particularly useful in the
5194  * variadic-array case, but often saves work even for ordinary calls.
5195  */
5196  if (typid != prev_type)
5197  {
5198  Oid typoutputfunc;
5199  bool typIsVarlena;
5200 
5201  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5202  fmgr_info(typoutputfunc, &typoutputfinfo);
5203  prev_type = typid;
5204  }
5205 
5206  /*
5207  * And now we can format the value.
5208  */
5209  switch (*cp)
5210  {
5211  case 's':
5212  case 'I':
5213  case 'L':
5214  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5215  value, isNull,
5216  flags, width);
5217  break;
5218  default:
5219  /* should not get here, because of previous check */
5220  ereport(ERROR,
5221  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5222  errmsg("unrecognized format() type specifier \"%c\"",
5223  *cp),
5224  errhint("For a single \"%%\" use \"%%%%\".")));
5225  break;
5226  }
5227  }
5228 
5229  /* Don't need deconstruct_array results anymore. */
5230  if (elements != NULL)
5231  pfree(elements);
5232  if (nulls != NULL)
5233  pfree(nulls);
5234 
5235  /* Generate results. */
5236  result = cstring_to_text_with_len(str.data, str.len);
5237  pfree(str.data);
5238 
5239  PG_RETURN_TEXT_P(result);
5240 }
5241 
5242 /*
5243  * Parse contiguous digits as a decimal number.
5244  *
5245  * Returns true if some digits could be parsed.
5246  * The value is returned into *value, and *ptr is advanced to the next
5247  * character to be parsed.
5248  *
5249  * Note parsing invariant: at least one character is known available before
5250  * string end (end_ptr) at entry, and this is still true at exit.
5251  */
5252 static bool
5253 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5254 {
5255  bool found = false;
5256  const char *cp = *ptr;
5257  int val = 0;
5258 
5259  while (*cp >= '0' && *cp <= '9')
5260  {
5261  int8 digit = (*cp - '0');
5262 
5263  if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5264  unlikely(pg_add_s32_overflow(val, digit, &val)))
5265  ereport(ERROR,
5266  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5267  errmsg("number is out of range")));
5268  ADVANCE_PARSE_POINTER(cp, end_ptr);
5269  found = true;
5270  }
5271 
5272  *ptr = cp;
5273  *value = val;
5274 
5275  return found;
5276 }
5277 
5278 /*
5279  * Parse a format specifier (generally following the SUS printf spec).
5280  *
5281  * We have already advanced over the initial '%', and we are looking for
5282  * [argpos][flags][width]type (but the type character is not consumed here).
5283  *
5284  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5285  * Output parameters:
5286  * argpos: argument position for value to be printed. -1 means unspecified.
5287  * widthpos: argument position for width. Zero means the argument position
5288  * was unspecified (ie, take the next arg) and -1 means no width
5289  * argument (width was omitted or specified as a constant).
5290  * flags: bitmask of flags.
5291  * width: directly-specified width value. Zero means the width was omitted
5292  * (note it's not necessary to distinguish this case from an explicit
5293  * zero width value).
5294  *
5295  * The function result is the next character position to be parsed, ie, the
5296  * location where the type character is/should be.
5297  *
5298  * Note parsing invariant: at least one character is known available before
5299  * string end (end_ptr) at entry, and this is still true at exit.
5300  */
5301 static const char *
5302 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5303  int *argpos, int *widthpos,
5304  int *flags, int *width)
5305 {
5306  const char *cp = start_ptr;
5307  int n;
5308 
5309  /* set defaults for output parameters */
5310  *argpos = -1;
5311  *widthpos = -1;
5312  *flags = 0;
5313  *width = 0;
5314 
5315  /* try to identify first number */
5316  if (text_format_parse_digits(&cp, end_ptr, &n))
5317  {
5318  if (*cp != '$')
5319  {
5320  /* Must be just a width and a type, so we're done */
5321  *width = n;
5322  return cp;
5323  }
5324  /* The number was argument position */
5325  *argpos = n;
5326  /* Explicit 0 for argument index is immediately refused */
5327  if (n == 0)
5328  ereport(ERROR,
5329  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5330  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5331  ADVANCE_PARSE_POINTER(cp, end_ptr);
5332  }
5333 
5334  /* Handle flags (only minus is supported now) */
5335  while (*cp == '-')
5336  {
5337  *flags |= TEXT_FORMAT_FLAG_MINUS;
5338  ADVANCE_PARSE_POINTER(cp, end_ptr);
5339  }
5340 
5341  if (*cp == '*')
5342  {
5343  /* Handle indirect width */
5344  ADVANCE_PARSE_POINTER(cp, end_ptr);
5345  if (text_format_parse_digits(&cp, end_ptr, &n))
5346  {
5347  /* number in this position must be closed by $ */
5348  if (*cp != '$')
5349  ereport(ERROR,
5350  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5351  errmsg("width argument position must be ended by \"$\"")));
5352  /* The number was width argument position */
5353  *widthpos = n;
5354  /* Explicit 0 for argument index is immediately refused */
5355  if (n == 0)
5356  ereport(ERROR,
5357  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5358  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5359  ADVANCE_PARSE_POINTER(cp, end_ptr);
5360  }
5361  else
5362  *widthpos = 0; /* width's argument position is unspecified */
5363  }
5364  else
5365  {
5366  /* Check for direct width specification */
5367  if (text_format_parse_digits(&cp, end_ptr, &n))
5368  *width = n;
5369  }
5370 
5371  /* cp should now be pointing at type character */
5372  return cp;
5373 }
5374 
5375 /*
5376  * Format a %s, %I, or %L conversion
5377  */
5378 static void
5380  FmgrInfo *typOutputInfo,
5381  Datum value, bool isNull,
5382  int flags, int width)
5383 {
5384  char *str;
5385 
5386  /* Handle NULL arguments before trying to stringify the value. */
5387  if (isNull)
5388  {
5389  if (conversion == 's')
5390  text_format_append_string(buf, "", flags, width);
5391  else if (conversion == 'L')
5392  text_format_append_string(buf, "NULL", flags, width);
5393  else if (conversion == 'I')
5394  ereport(ERROR,
5395  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5396  errmsg("null values cannot be formatted as an SQL identifier")));
5397  return;
5398  }
5399 
5400  /* Stringify. */
5401  str = OutputFunctionCall(typOutputInfo, value);
5402 
5403  /* Escape. */
5404  if (conversion == 'I')
5405  {
5406  /* quote_identifier may or may not allocate a new string. */
5407  text_format_append_string(buf, quote_identifier(str), flags, width);
5408  }
5409  else if (conversion == 'L')
5410  {
5411  char *qstr = quote_literal_cstr(str);
5412 
5413  text_format_append_string(buf, qstr, flags, width);
5414  /* quote_literal_cstr() always allocates a new string */
5415  pfree(qstr);
5416  }
5417  else
5418  text_format_append_string(buf, str, flags, width);
5419 
5420  /* Cleanup. */
5421  pfree(str);
5422 }
5423 
5424 /*
5425  * Append str to buf, padding as directed by flags/width
5426  */
5427 static void
5429  int flags, int width)
5430 {
5431  bool align_to_left = false;
5432  int len;
5433 
5434  /* fast path for typical easy case */
5435  if (width == 0)
5436  {
5437  appendStringInfoString(buf, str);
5438  return;
5439  }
5440 
5441  if (width < 0)
5442  {
5443  /* Negative width: implicit '-' flag, then take absolute value */
5444  align_to_left = true;
5445  /* -INT_MIN is undefined */
5446  if (width <= INT_MIN)
5447  ereport(ERROR,
5448  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5449  errmsg("number is out of range")));
5450  width = -width;
5451  }
5452  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5453  align_to_left = true;
5454 
5455  len = pg_mbstrlen(str);
5456  if (align_to_left)
5457  {
5458  /* left justify */
5459  appendStringInfoString(buf, str);
5460  if (len < width)
5461  appendStringInfoSpaces(buf, width - len);
5462  }
5463  else
5464  {
5465  /* right justify */
5466  if (len < width)
5467  appendStringInfoSpaces(buf, width - len);
5468  appendStringInfoString(buf, str);
5469  }
5470 }
5471 
5472 /*
5473  * text_format_nv - nonvariadic wrapper for text_format function.
5474  *
5475  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5476  * which checks that all built-in functions that share the implementing C
5477  * function take the same number of arguments.
5478  */
5479 Datum
5481 {
5482  return text_format(fcinfo);
5483 }
5484 
5485 /*
5486  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5487  * for this use case.
5488  */
5489 static inline bool
5490 rest_of_char_same(const char *s1, const char *s2, int len)
5491 {
5492  while (len > 0)
5493  {
5494  len--;
5495  if (s1[len] != s2[len])
5496  return false;
5497  }
5498  return true;
5499 }
5500 
5501 /* Expand each Levenshtein distance variant */
5502 #include "levenshtein.c"
5503 #define LEVENSHTEIN_LESS_EQUAL
5504 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2737
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4183
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2873
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2061
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:312
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:351
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:326
#define DatumGetUInt32(X)
Definition: postgres.h:471
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4976
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2028
int length(const List *list)
Definition: list.c:1333
union pg_locale_struct::@130 info
#define PG_GETARG_INT32(n)
Definition: fmgr.h:239
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4963
Definition: fmgr.h:56
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3938
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:312
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3142
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:862
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4073
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1022
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2650
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
MemoryContext fn_mcxt
Definition: fmgr.h:65
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
int16 typlen
Definition: pg_type.h:55
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10488
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1705
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2689
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:273
#define DatumGetInt32(X)
Definition: postgres.h:457
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:144
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2721
#define HEXBASE
Definition: varlena.c:4545
#define VARSIZE(PTR)
Definition: postgres.h:303
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3717
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3619
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5379
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2025
#define PointerGetDatum(X)
Definition: postgres.h:541
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:530
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:238
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1118
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
#define VARHDRSZ
Definition: c.h:522
char * pstrdup(const char *in)
Definition: mcxt.c:1161
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:519
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:261
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:857
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:283
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2264
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2239
#define PG_RETURN_INT32(x)
Definition: fmgr.h:319
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:272
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2807
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2129
int errcode(int sqlerrcode)
Definition: elog.c:575
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:246
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3110
pg_wchar * wstr2
Definition: varlena.c:52
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:482
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4678
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:335
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3398
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2882
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3515
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:622
#define OidIsValid(objectId)
Definition: c.h:605
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1808
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:213
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1034
bool trace_sort
Definition: tuplesort.c:130
#define PG_GET_COLLATION()
Definition: fmgr.h:168
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2952
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4844
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:660
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5428
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4402
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4859
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:313
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2840
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1991
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1753
static int32 text_length(Datum str)
Definition: varlena.c:640
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:794
bool typbyval
Definition: array.h:225
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4575
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:313
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4208
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3579
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3417
FmgrInfo * flinfo
Definition: fmgr.h:79
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:248
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4386
void pfree(void *pointer)
Definition: mcxt.c:1031
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:353
#define REG_OKAY
Definition: regex.h:137
char typalign
Definition: pg_type.h:167
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4704
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1011
#define ERROR
Definition: elog.h:43
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3800
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1128
#define DatumGetCString(X)
Definition: postgres.h:551
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:409
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1381
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1995
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3639
#define ARR_DIMS(a)
Definition: array.h:279
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:124
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:1949
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1230
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4197
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:820
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:255
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:735
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:814
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:587
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4428
Definition: c.h:570
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3703
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2618
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1641
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
#define ARR_DATA_PTR(a)
Definition: array.h:307
hyperLogLogState abbr_card
Definition: varlena.c:73
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2630
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1676
int16 typlen
Definition: array.h:224
pg_locale_t locale
Definition: varlena.c:76
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
static char * buf
Definition: pg_test_fsync.c:67
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:149
#define memmove(d, s, c)
Definition: c.h:1135
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:161
#define strcoll_l
Definition: win32_port.h:397
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3290
char typdelim
Definition: array.h:227
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2964
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3192
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:694
#define DatumGetInt16(X)
Definition: postgres.h:429
#define DatumGetBool(X)
Definition: postgres.h:378
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:173
unsigned int uint32
Definition: c.h:325
void * ssup_extra
Definition: sortsupport.h:87
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1071
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:795
int bytea_output
Definition: varlena.c:41
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:134
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1616
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3044
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:250
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4600
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1735
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:122
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2651
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5106
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5253
unsigned int pg_wchar
Definition: mbprint.c:31
Datum text_starts_with(PG_FUNCTION_ARGS)
Definition: varlena.c:1765
#define DatumGetVarStringPP(X)
Definition: varlena.c:92
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3232
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:679
List * lappend(List *list, void *datum)
Definition: list.c:128
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3215
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:57
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:442
signed char int8
Definition: c.h:311
void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
Definition: varlena.c:1835
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1275
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1720
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:893
uint8 bits8
Definition: c.h:332
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:781
#define TextDatumGetCString(d)
Definition: builtins.h:96
void * palloc0(Size size)
Definition: mcxt.c:955
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5480
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
uintptr_t Datum
Definition: postgres.h:367
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4921
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2859
int bpchartruelen(char *s, int len)
Definition: varchar.c:660
bool scanner_isspace(char ch)
Definition: scansup.c:221
#define wcscoll_l
Definition: win32_port.h:399
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3927
static struct @131 value
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:187
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:774
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1750
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1364
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3659
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:723
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4551
hyperLogLogState full_card
Definition: varlena.c:74
#define PG_RETURN_VOID()
Definition: fmgr.h:314
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:336
#define Max(x, y)
Definition: c.h:851
text * cstring_to_text(const char *s)
Definition: varlena.c:149
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:602
#define PG_ARGISNULL(n)
Definition: fmgr.h:179
#define Assert(condition)
Definition: c.h:699
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:428
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:530
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4900
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4764
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:183
Oid typioparam
Definition: array.h:228
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:98
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:327
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:563
size_t Size
Definition: c.h:433
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2504
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5490
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2673
struct FmgrInfo FmgrInfo
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:548
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:277
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3547
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:3492
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:508
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define PG_NARGS()
Definition: fmgr.h:173
void * fn_extra
Definition: fmgr.h:64
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3833
#define ARR_NDIM(a)
Definition: array.h:275
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:3000
#define TEXTBUFLEN
Definition: varlena.c:83
Oid typiofunc
Definition: array.h:229
#define DatumGetPointer(X)
Definition: postgres.h:534
char typalign
Definition: array.h:226
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3449
char * text_to_cstring(const text *t)
Definition: varlena.c:182
pg_wchar * wstr1
Definition: varlena.c:51
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5042
#define DatumGetBpCharPP(X)
Definition: fmgr.h:262
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2575
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1793
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:575
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:172
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1951
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:423
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4654
FmgrInfo proc
Definition: array.h:230
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:450
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2941
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2753
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2776
void list_free(List *list)
Definition: list.c:1133
int i
static FmgrInfo * build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
Definition: varlena.c:4726
Oid element_type
Definition: array.h:223
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:576
static char * locale
Definition: initdb.c:124
void * arg
#define unlikely(x)
Definition: c.h:208
static bool text_isequal(text *txt1, text *txt2)
Definition: varlena.c:4170
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:247
#define VAL(CH)
Definition: varlena.c:240
int int32_t
Definition: crypt.c:108
Definition: c.h:516
#define PG_FUNCTION_ARGS
Definition: fmgr.h:163
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:4880
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define strxfrm_l
Definition: win32_port.h:398
#define elog
Definition: elog.h:219
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:5302
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3073
Datum bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:267
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
ArrayType * construct_md_array(Datum *elems, bool *nulls, int ndims, int *dims, int *lbs, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3314
NameData * Name
Definition: c.h:574
#define PG_GETARG_INT64(n)
Definition: fmgr.h:252
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3599