PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "common/md5.h"
25 #include "lib/hyperloglog.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "port/pg_bswap.h"
30 #include "regex/regex.h"
31 #include "utils/builtins.h"
32 #include "utils/bytea.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38 
39 
40 /* GUC variable */
42 
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45 
46 typedef struct
47 {
48  bool use_wchar; /* T if multibyte encoding */
49  char *str1; /* use these if not use_wchar */
50  char *str2; /* note: these point to original texts */
51  pg_wchar *wstr1; /* use these if use_wchar */
52  pg_wchar *wstr2; /* note: these are palloc'd */
53  int len1; /* string lengths in logical characters */
54  int len2;
55  /* Skip table for Boyer-Moore-Horspool search algorithm: */
56  int skiptablemask; /* mask for ANDing with skiptable subscripts */
57  int skiptable[256]; /* skip distance for given mismatched char */
59 
60 typedef struct
61 {
62  char *buf1; /* 1st string, or abbreviation original string
63  * buf */
64  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
65  int buflen1;
66  int buflen2;
67  int last_len1; /* Length of last buf1 string/strxfrm() input */
68  int last_len2; /* Length of last buf2 string/strxfrm() blob */
69  int last_returned; /* Last comparison result (cache) */
70  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
71  bool collate_c;
72  bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
73  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
74  hyperLogLogState full_card; /* Full key cardinality state */
75  double prop_card; /* Required cardinality proportion */
76 #ifdef HAVE_LOCALE_T
78 #endif
80 
81 /*
82  * This should be large enough that most strings will fit, but small enough
83  * that we feel comfortable putting it on the stack
84  */
85 #define TEXTBUFLEN 1024
86 
87 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
88 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
89 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
90 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
91 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
92 
93 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
94 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
95 
96 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
97 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
98 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
99 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
100 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
101 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
102 static int32 text_length(Datum str);
103 static text *text_catenate(text *t1, text *t2);
104 static text *text_substring(Datum str,
105  int32 start,
106  int32 length,
107  bool length_not_specified);
108 static text *text_overlay(text *t1, text *t2, int sp, int sl);
109 static int text_position(text *t1, text *t2);
110 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
111 static int text_position_next(int start_pos, TextPositionState *state);
113 static int text_cmp(text *arg1, text *arg2, Oid collid);
114 static bytea *bytea_catenate(bytea *t1, bytea *t2);
115 static bytea *bytea_substring(Datum str,
116  int S,
117  int L,
118  bool length_not_specified);
119 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
120 static void appendStringInfoText(StringInfo str, const text *t);
123  const char *fldsep, const char *null_string);
125 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
126  int *value);
127 static const char *text_format_parse_format(const char *start_ptr,
128  const char *end_ptr,
129  int *argpos, int *widthpos,
130  int *flags, int *width);
131 static void text_format_string_conversion(StringInfo buf, char conversion,
132  FmgrInfo *typOutputInfo,
133  Datum value, bool isNull,
134  int flags, int width);
135 static void text_format_append_string(StringInfo buf, const char *str,
136  int flags, int width);
137 
138 
139 /*****************************************************************************
140  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
141  *****************************************************************************/
142 
143 /*
144  * cstring_to_text
145  *
146  * Create a text value from a null-terminated C string.
147  *
148  * The new text value is freshly palloc'd with a full-size VARHDR.
149  */
150 text *
151 cstring_to_text(const char *s)
152 {
153  return cstring_to_text_with_len(s, strlen(s));
154 }
155 
156 /*
157  * cstring_to_text_with_len
158  *
159  * Same as cstring_to_text except the caller specifies the string length;
160  * the string need not be null_terminated.
161  */
162 text *
163 cstring_to_text_with_len(const char *s, int len)
164 {
165  text *result = (text *) palloc(len + VARHDRSZ);
166 
167  SET_VARSIZE(result, len + VARHDRSZ);
168  memcpy(VARDATA(result), s, len);
169 
170  return result;
171 }
172 
173 /*
174  * text_to_cstring
175  *
176  * Create a palloc'd, null-terminated C string from a text value.
177  *
178  * We support being passed a compressed or toasted text value.
179  * This is a bit bogus since such values shouldn't really be referred to as
180  * "text *", but it seems useful for robustness. If we didn't handle that
181  * case here, we'd need another routine that did, anyway.
182  */
183 char *
185 {
186  /* must cast away the const, unfortunately */
187  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
188  int len = VARSIZE_ANY_EXHDR(tunpacked);
189  char *result;
190 
191  result = (char *) palloc(len + 1);
192  memcpy(result, VARDATA_ANY(tunpacked), len);
193  result[len] = '\0';
194 
195  if (tunpacked != t)
196  pfree(tunpacked);
197 
198  return result;
199 }
200 
201 /*
202  * text_to_cstring_buffer
203  *
204  * Copy a text value into a caller-supplied buffer of size dst_len.
205  *
206  * The text string is truncated if necessary to fit. The result is
207  * guaranteed null-terminated (unless dst_len == 0).
208  *
209  * We support being passed a compressed or toasted text value.
210  * This is a bit bogus since such values shouldn't really be referred to as
211  * "text *", but it seems useful for robustness. If we didn't handle that
212  * case here, we'd need another routine that did, anyway.
213  */
214 void
215 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
216 {
217  /* must cast away the const, unfortunately */
218  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
219  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
220 
221  if (dst_len > 0)
222  {
223  dst_len--;
224  if (dst_len >= src_len)
225  dst_len = src_len;
226  else /* ensure truncation is encoding-safe */
227  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
228  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
229  dst[dst_len] = '\0';
230  }
231 
232  if (srcunpacked != src)
233  pfree(srcunpacked);
234 }
235 
236 
237 /*****************************************************************************
238  * USER I/O ROUTINES *
239  *****************************************************************************/
240 
241 
242 #define VAL(CH) ((CH) - '0')
243 #define DIG(VAL) ((VAL) + '0')
244 
245 /*
246  * byteain - converts from printable representation of byte array
247  *
248  * Non-printable characters must be passed as '\nnn' (octal) and are
249  * converted to internal form. '\' must be passed as '\\'.
250  * ereport(ERROR, ...) if bad form.
251  *
252  * BUGS:
253  * The input is scanned twice.
254  * The error checking of input is minimal.
255  */
256 Datum
258 {
259  char *inputText = PG_GETARG_CSTRING(0);
260  char *tp;
261  char *rp;
262  int bc;
263  bytea *result;
264 
265  /* Recognize hex input */
266  if (inputText[0] == '\\' && inputText[1] == 'x')
267  {
268  size_t len = strlen(inputText);
269 
270  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
271  result = palloc(bc);
272  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
273  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
274 
275  PG_RETURN_BYTEA_P(result);
276  }
277 
278  /* Else, it's the traditional escaped style */
279  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
280  {
281  if (tp[0] != '\\')
282  tp++;
283  else if ((tp[0] == '\\') &&
284  (tp[1] >= '0' && tp[1] <= '3') &&
285  (tp[2] >= '0' && tp[2] <= '7') &&
286  (tp[3] >= '0' && tp[3] <= '7'))
287  tp += 4;
288  else if ((tp[0] == '\\') &&
289  (tp[1] == '\\'))
290  tp += 2;
291  else
292  {
293  /*
294  * one backslash, not followed by another or ### valid octal
295  */
296  ereport(ERROR,
297  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
298  errmsg("invalid input syntax for type %s", "bytea")));
299  }
300  }
301 
302  bc += VARHDRSZ;
303 
304  result = (bytea *) palloc(bc);
305  SET_VARSIZE(result, bc);
306 
307  tp = inputText;
308  rp = VARDATA(result);
309  while (*tp != '\0')
310  {
311  if (tp[0] != '\\')
312  *rp++ = *tp++;
313  else if ((tp[0] == '\\') &&
314  (tp[1] >= '0' && tp[1] <= '3') &&
315  (tp[2] >= '0' && tp[2] <= '7') &&
316  (tp[3] >= '0' && tp[3] <= '7'))
317  {
318  bc = VAL(tp[1]);
319  bc <<= 3;
320  bc += VAL(tp[2]);
321  bc <<= 3;
322  *rp++ = bc + VAL(tp[3]);
323 
324  tp += 4;
325  }
326  else if ((tp[0] == '\\') &&
327  (tp[1] == '\\'))
328  {
329  *rp++ = '\\';
330  tp += 2;
331  }
332  else
333  {
334  /*
335  * We should never get here. The first pass should not allow it.
336  */
337  ereport(ERROR,
338  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
339  errmsg("invalid input syntax for type %s", "bytea")));
340  }
341  }
342 
343  PG_RETURN_BYTEA_P(result);
344 }
345 
346 /*
347  * byteaout - converts to printable representation of byte array
348  *
349  * In the traditional escaped format, non-printable characters are
350  * printed as '\nnn' (octal) and '\' as '\\'.
351  */
352 Datum
354 {
355  bytea *vlena = PG_GETARG_BYTEA_PP(0);
356  char *result;
357  char *rp;
358 
360  {
361  /* Print hex format */
362  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
363  *rp++ = '\\';
364  *rp++ = 'x';
365  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
366  }
367  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
368  {
369  /* Print traditional escaped format */
370  char *vp;
371  int len;
372  int i;
373 
374  len = 1; /* empty string has 1 char */
375  vp = VARDATA_ANY(vlena);
376  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
377  {
378  if (*vp == '\\')
379  len += 2;
380  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
381  len += 4;
382  else
383  len++;
384  }
385  rp = result = (char *) palloc(len);
386  vp = VARDATA_ANY(vlena);
387  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
388  {
389  if (*vp == '\\')
390  {
391  *rp++ = '\\';
392  *rp++ = '\\';
393  }
394  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
395  {
396  int val; /* holds unprintable chars */
397 
398  val = *vp;
399  rp[0] = '\\';
400  rp[3] = DIG(val & 07);
401  val >>= 3;
402  rp[2] = DIG(val & 07);
403  val >>= 3;
404  rp[1] = DIG(val & 03);
405  rp += 4;
406  }
407  else
408  *rp++ = *vp;
409  }
410  }
411  else
412  {
413  elog(ERROR, "unrecognized bytea_output setting: %d",
414  bytea_output);
415  rp = result = NULL; /* keep compiler quiet */
416  }
417  *rp = '\0';
418  PG_RETURN_CSTRING(result);
419 }
420 
421 /*
422  * bytearecv - converts external binary format to bytea
423  */
424 Datum
426 {
428  bytea *result;
429  int nbytes;
430 
431  nbytes = buf->len - buf->cursor;
432  result = (bytea *) palloc(nbytes + VARHDRSZ);
433  SET_VARSIZE(result, nbytes + VARHDRSZ);
434  pq_copymsgbytes(buf, VARDATA(result), nbytes);
435  PG_RETURN_BYTEA_P(result);
436 }
437 
438 /*
439  * byteasend - converts bytea to binary format
440  *
441  * This is a special case: just copy the input...
442  */
443 Datum
445 {
446  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
447 
448  PG_RETURN_BYTEA_P(vlena);
449 }
450 
451 Datum
453 {
455 
456  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
457 
458  /* Append the value unless null. */
459  if (!PG_ARGISNULL(1))
460  {
462 
463  /* On the first time through, we ignore the delimiter. */
464  if (state == NULL)
465  state = makeStringAggState(fcinfo);
466  else if (!PG_ARGISNULL(2))
467  {
468  bytea *delim = PG_GETARG_BYTEA_PP(2);
469 
471  }
472 
474  }
475 
476  /*
477  * The transition type for string_agg() is declared to be "internal",
478  * which is a pass-by-value type the same size as a pointer.
479  */
480  PG_RETURN_POINTER(state);
481 }
482 
483 Datum
485 {
487 
488  /* cannot be called directly because of internal-type argument */
489  Assert(AggCheckCallContext(fcinfo, NULL));
490 
491  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
492 
493  if (state != NULL)
494  {
495  bytea *result;
496 
497  result = (bytea *) palloc(state->len + VARHDRSZ);
498  SET_VARSIZE(result, state->len + VARHDRSZ);
499  memcpy(VARDATA(result), state->data, state->len);
500  PG_RETURN_BYTEA_P(result);
501  }
502  else
503  PG_RETURN_NULL();
504 }
505 
506 /*
507  * textin - converts "..." to internal representation
508  */
509 Datum
511 {
512  char *inputText = PG_GETARG_CSTRING(0);
513 
514  PG_RETURN_TEXT_P(cstring_to_text(inputText));
515 }
516 
517 /*
518  * textout - converts internal representation to "..."
519  */
520 Datum
522 {
523  Datum txt = PG_GETARG_DATUM(0);
524 
526 }
527 
528 /*
529  * textrecv - converts external binary format to text
530  */
531 Datum
533 {
535  text *result;
536  char *str;
537  int nbytes;
538 
539  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
540 
541  result = cstring_to_text_with_len(str, nbytes);
542  pfree(str);
543  PG_RETURN_TEXT_P(result);
544 }
545 
546 /*
547  * textsend - converts text to binary format
548  */
549 Datum
551 {
552  text *t = PG_GETARG_TEXT_PP(0);
554 
555  pq_begintypsend(&buf);
558 }
559 
560 
561 /*
562  * unknownin - converts "..." to internal representation
563  */
564 Datum
566 {
567  char *str = PG_GETARG_CSTRING(0);
568 
569  /* representation is same as cstring */
571 }
572 
573 /*
574  * unknownout - converts internal representation to "..."
575  */
576 Datum
578 {
579  /* representation is same as cstring */
580  char *str = PG_GETARG_CSTRING(0);
581 
583 }
584 
585 /*
586  * unknownrecv - converts external binary format to unknown
587  */
588 Datum
590 {
592  char *str;
593  int nbytes;
594 
595  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
596  /* representation is same as cstring */
597  PG_RETURN_CSTRING(str);
598 }
599 
600 /*
601  * unknownsend - converts unknown to binary format
602  */
603 Datum
605 {
606  /* representation is same as cstring */
607  char *str = PG_GETARG_CSTRING(0);
609 
610  pq_begintypsend(&buf);
611  pq_sendtext(&buf, str, strlen(str));
613 }
614 
615 
616 /* ========== PUBLIC ROUTINES ========== */
617 
618 /*
619  * textlen -
620  * returns the logical length of a text*
621  * (which is less than the VARSIZE of the text*)
622  */
623 Datum
625 {
626  Datum str = PG_GETARG_DATUM(0);
627 
628  /* try to avoid decompressing argument */
630 }
631 
632 /*
633  * text_length -
634  * Does the real work for textlen()
635  *
636  * This is broken out so it can be called directly by other string processing
637  * functions. Note that the argument is passed as a Datum, to indicate that
638  * it may still be in compressed form. We can avoid decompressing it at all
639  * in some cases.
640  */
641 static int32
643 {
644  /* fastpath when max encoding length is one */
647  else
648  {
649  text *t = DatumGetTextPP(str);
650 
652  VARSIZE_ANY_EXHDR(t)));
653  }
654 }
655 
656 /*
657  * textoctetlen -
658  * returns the physical length of a text*
659  * (which is less than the VARSIZE of the text*)
660  */
661 Datum
663 {
664  Datum str = PG_GETARG_DATUM(0);
665 
666  /* We need not detoast the input at all */
668 }
669 
670 /*
671  * textcat -
672  * takes two text* and returns a text* that is the concatenation of
673  * the two.
674  *
675  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
676  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
677  * Allocate space for output in all cases.
678  * XXX - thomas 1997-07-10
679  */
680 Datum
682 {
683  text *t1 = PG_GETARG_TEXT_PP(0);
684  text *t2 = PG_GETARG_TEXT_PP(1);
685 
687 }
688 
689 /*
690  * text_catenate
691  * Guts of textcat(), broken out so it can be used by other functions
692  *
693  * Arguments can be in short-header form, but not compressed or out-of-line
694  */
695 static text *
697 {
698  text *result;
699  int len1,
700  len2,
701  len;
702  char *ptr;
703 
704  len1 = VARSIZE_ANY_EXHDR(t1);
705  len2 = VARSIZE_ANY_EXHDR(t2);
706 
707  /* paranoia ... probably should throw error instead? */
708  if (len1 < 0)
709  len1 = 0;
710  if (len2 < 0)
711  len2 = 0;
712 
713  len = len1 + len2 + VARHDRSZ;
714  result = (text *) palloc(len);
715 
716  /* Set size of result string... */
717  SET_VARSIZE(result, len);
718 
719  /* Fill data field of result string... */
720  ptr = VARDATA(result);
721  if (len1 > 0)
722  memcpy(ptr, VARDATA_ANY(t1), len1);
723  if (len2 > 0)
724  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
725 
726  return result;
727 }
728 
729 /*
730  * charlen_to_bytelen()
731  * Compute the number of bytes occupied by n characters starting at *p
732  *
733  * It is caller's responsibility that there actually are n characters;
734  * the string need not be null-terminated.
735  */
736 static int
737 charlen_to_bytelen(const char *p, int n)
738 {
740  {
741  /* Optimization for single-byte encodings */
742  return n;
743  }
744  else
745  {
746  const char *s;
747 
748  for (s = p; n > 0; n--)
749  s += pg_mblen(s);
750 
751  return s - p;
752  }
753 }
754 
755 /*
756  * text_substr()
757  * Return a substring starting at the specified position.
758  * - thomas 1997-12-31
759  *
760  * Input:
761  * - string
762  * - starting position (is one-based)
763  * - string length
764  *
765  * If the starting position is zero or less, then return from the start of the string
766  * adjusting the length to be consistent with the "negative start" per SQL.
767  * If the length is less than zero, return the remaining string.
768  *
769  * Added multibyte support.
770  * - Tatsuo Ishii 1998-4-21
771  * Changed behavior if starting position is less than one to conform to SQL behavior.
772  * Formerly returned the entire string; now returns a portion.
773  * - Thomas Lockhart 1998-12-10
774  * Now uses faster TOAST-slicing interface
775  * - John Gray 2002-02-22
776  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
777  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
778  * error; if E < 1, return '', not entire string). Fixed MB related bug when
779  * S > LC and < LC + 4 sometimes garbage characters are returned.
780  * - Joe Conway 2002-08-10
781  */
782 Datum
784 {
786  PG_GETARG_INT32(1),
787  PG_GETARG_INT32(2),
788  false));
789 }
790 
791 /*
792  * text_substr_no_len -
793  * Wrapper to avoid opr_sanity failure due to
794  * one function accepting a different number of args.
795  */
796 Datum
798 {
800  PG_GETARG_INT32(1),
801  -1, true));
802 }
803 
804 /*
805  * text_substring -
806  * Does the real work for text_substr() and text_substr_no_len()
807  *
808  * This is broken out so it can be called directly by other string processing
809  * functions. Note that the argument is passed as a Datum, to indicate that
810  * it may still be in compressed/toasted form. We can avoid detoasting all
811  * of it in some cases.
812  *
813  * The result is always a freshly palloc'd datum.
814  */
815 static text *
816 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
817 {
819  int32 S = start; /* start position */
820  int32 S1; /* adjusted start position */
821  int32 L1; /* adjusted substring length */
822 
823  /* life is easy if the encoding max length is 1 */
824  if (eml == 1)
825  {
826  S1 = Max(S, 1);
827 
828  if (length_not_specified) /* special case - get length to end of
829  * string */
830  L1 = -1;
831  else
832  {
833  /* end position */
834  int E = S + length;
835 
836  /*
837  * A negative value for L is the only way for the end position to
838  * be before the start. SQL99 says to throw an error.
839  */
840  if (E < S)
841  ereport(ERROR,
842  (errcode(ERRCODE_SUBSTRING_ERROR),
843  errmsg("negative substring length not allowed")));
844 
845  /*
846  * A zero or negative value for the end position can happen if the
847  * start was negative or one. SQL99 says to return a zero-length
848  * string.
849  */
850  if (E < 1)
851  return cstring_to_text("");
852 
853  L1 = E - S1;
854  }
855 
856  /*
857  * If the start position is past the end of the string, SQL99 says to
858  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
859  * that for us. Convert to zero-based starting position
860  */
861  return DatumGetTextPSlice(str, S1 - 1, L1);
862  }
863  else if (eml > 1)
864  {
865  /*
866  * When encoding max length is > 1, we can't get LC without
867  * detoasting, so we'll grab a conservatively large slice now and go
868  * back later to do the right thing
869  */
870  int32 slice_start;
871  int32 slice_size;
872  int32 slice_strlen;
873  text *slice;
874  int32 E1;
875  int32 i;
876  char *p;
877  char *s;
878  text *ret;
879 
880  /*
881  * if S is past the end of the string, the tuple toaster will return a
882  * zero-length string to us
883  */
884  S1 = Max(S, 1);
885 
886  /*
887  * We need to start at position zero because there is no way to know
888  * in advance which byte offset corresponds to the supplied start
889  * position.
890  */
891  slice_start = 0;
892 
893  if (length_not_specified) /* special case - get length to end of
894  * string */
895  slice_size = L1 = -1;
896  else
897  {
898  int E = S + length;
899 
900  /*
901  * A negative value for L is the only way for the end position to
902  * be before the start. SQL99 says to throw an error.
903  */
904  if (E < S)
905  ereport(ERROR,
906  (errcode(ERRCODE_SUBSTRING_ERROR),
907  errmsg("negative substring length not allowed")));
908 
909  /*
910  * A zero or negative value for the end position can happen if the
911  * start was negative or one. SQL99 says to return a zero-length
912  * string.
913  */
914  if (E < 1)
915  return cstring_to_text("");
916 
917  /*
918  * if E is past the end of the string, the tuple toaster will
919  * truncate the length for us
920  */
921  L1 = E - S1;
922 
923  /*
924  * Total slice size in bytes can't be any longer than the start
925  * position plus substring length times the encoding max length.
926  */
927  slice_size = (S1 + L1) * eml;
928  }
929 
930  /*
931  * If we're working with an untoasted source, no need to do an extra
932  * copying step.
933  */
936  slice = DatumGetTextPSlice(str, slice_start, slice_size);
937  else
938  slice = (text *) DatumGetPointer(str);
939 
940  /* see if we got back an empty string */
941  if (VARSIZE_ANY_EXHDR(slice) == 0)
942  {
943  if (slice != (text *) DatumGetPointer(str))
944  pfree(slice);
945  return cstring_to_text("");
946  }
947 
948  /* Now we can get the actual length of the slice in MB characters */
949  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
950  VARSIZE_ANY_EXHDR(slice));
951 
952  /*
953  * Check that the start position wasn't > slice_strlen. If so, SQL99
954  * says to return a zero-length string.
955  */
956  if (S1 > slice_strlen)
957  {
958  if (slice != (text *) DatumGetPointer(str))
959  pfree(slice);
960  return cstring_to_text("");
961  }
962 
963  /*
964  * Adjust L1 and E1 now that we know the slice string length. Again
965  * remember that S1 is one based, and slice_start is zero based.
966  */
967  if (L1 > -1)
968  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
969  else
970  E1 = slice_start + 1 + slice_strlen;
971 
972  /*
973  * Find the start position in the slice; remember S1 is not zero based
974  */
975  p = VARDATA_ANY(slice);
976  for (i = 0; i < S1 - 1; i++)
977  p += pg_mblen(p);
978 
979  /* hang onto a pointer to our start position */
980  s = p;
981 
982  /*
983  * Count the actual bytes used by the substring of the requested
984  * length.
985  */
986  for (i = S1; i < E1; i++)
987  p += pg_mblen(p);
988 
989  ret = (text *) palloc(VARHDRSZ + (p - s));
990  SET_VARSIZE(ret, VARHDRSZ + (p - s));
991  memcpy(VARDATA(ret), s, (p - s));
992 
993  if (slice != (text *) DatumGetPointer(str))
994  pfree(slice);
995 
996  return ret;
997  }
998  else
999  elog(ERROR, "invalid backend encoding: encoding max length < 1");
1000 
1001  /* not reached: suppress compiler warning */
1002  return NULL;
1003 }
1004 
1005 /*
1006  * textoverlay
1007  * Replace specified substring of first string with second
1008  *
1009  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1010  * This code is a direct implementation of what the standard says.
1011  */
1012 Datum
1014 {
1015  text *t1 = PG_GETARG_TEXT_PP(0);
1016  text *t2 = PG_GETARG_TEXT_PP(1);
1017  int sp = PG_GETARG_INT32(2); /* substring start position */
1018  int sl = PG_GETARG_INT32(3); /* substring length */
1019 
1020  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1021 }
1022 
1023 Datum
1025 {
1026  text *t1 = PG_GETARG_TEXT_PP(0);
1027  text *t2 = PG_GETARG_TEXT_PP(1);
1028  int sp = PG_GETARG_INT32(2); /* substring start position */
1029  int sl;
1030 
1031  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1032  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1033 }
1034 
1035 static text *
1036 text_overlay(text *t1, text *t2, int sp, int sl)
1037 {
1038  text *result;
1039  text *s1;
1040  text *s2;
1041  int sp_pl_sl;
1042 
1043  /*
1044  * Check for possible integer-overflow cases. For negative sp, throw a
1045  * "substring length" error because that's what should be expected
1046  * according to the spec's definition of OVERLAY().
1047  */
1048  if (sp <= 0)
1049  ereport(ERROR,
1050  (errcode(ERRCODE_SUBSTRING_ERROR),
1051  errmsg("negative substring length not allowed")));
1052  sp_pl_sl = sp + sl;
1053  if (sp_pl_sl <= sl)
1054  ereport(ERROR,
1055  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1056  errmsg("integer out of range")));
1057 
1058  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1059  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1060  result = text_catenate(s1, t2);
1061  result = text_catenate(result, s2);
1062 
1063  return result;
1064 }
1065 
1066 /*
1067  * textpos -
1068  * Return the position of the specified substring.
1069  * Implements the SQL POSITION() function.
1070  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1071  * - thomas 1997-07-27
1072  */
1073 Datum
1075 {
1076  text *str = PG_GETARG_TEXT_PP(0);
1077  text *search_str = PG_GETARG_TEXT_PP(1);
1078 
1079  PG_RETURN_INT32((int32) text_position(str, search_str));
1080 }
1081 
1082 /*
1083  * text_position -
1084  * Does the real work for textpos()
1085  *
1086  * Inputs:
1087  * t1 - string to be searched
1088  * t2 - pattern to match within t1
1089  * Result:
1090  * Character index of the first matched char, starting from 1,
1091  * or 0 if no match.
1092  *
1093  * This is broken out so it can be called directly by other string processing
1094  * functions.
1095  */
1096 static int
1098 {
1100  int result;
1101 
1102  text_position_setup(t1, t2, &state);
1103  result = text_position_next(1, &state);
1104  text_position_cleanup(&state);
1105  return result;
1106 }
1107 
1108 
1109 /*
1110  * text_position_setup, text_position_next, text_position_cleanup -
1111  * Component steps of text_position()
1112  *
1113  * These are broken out so that a string can be efficiently searched for
1114  * multiple occurrences of the same pattern. text_position_next may be
1115  * called multiple times with increasing values of start_pos, which is
1116  * the 1-based character position to start the search from. The "state"
1117  * variable is normally just a local variable in the caller.
1118  */
1119 
1120 static void
1122 {
1123  int len1 = VARSIZE_ANY_EXHDR(t1);
1124  int len2 = VARSIZE_ANY_EXHDR(t2);
1125 
1127  {
1128  /* simple case - single byte encoding */
1129  state->use_wchar = false;
1130  state->str1 = VARDATA_ANY(t1);
1131  state->str2 = VARDATA_ANY(t2);
1132  state->len1 = len1;
1133  state->len2 = len2;
1134  }
1135  else
1136  {
1137  /* not as simple - multibyte encoding */
1138  pg_wchar *p1,
1139  *p2;
1140 
1141  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1142  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1143  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1144  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1145 
1146  state->use_wchar = true;
1147  state->wstr1 = p1;
1148  state->wstr2 = p2;
1149  state->len1 = len1;
1150  state->len2 = len2;
1151  }
1152 
1153  /*
1154  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1155  * notes we use the terminology that the "haystack" is the string to be
1156  * searched (t1) and the "needle" is the pattern being sought (t2).
1157  *
1158  * If the needle is empty or bigger than the haystack then there is no
1159  * point in wasting cycles initializing the table. We also choose not to
1160  * use B-M-H for needles of length 1, since the skip table can't possibly
1161  * save anything in that case.
1162  */
1163  if (len1 >= len2 && len2 > 1)
1164  {
1165  int searchlength = len1 - len2;
1166  int skiptablemask;
1167  int last;
1168  int i;
1169 
1170  /*
1171  * First we must determine how much of the skip table to use. The
1172  * declaration of TextPositionState allows up to 256 elements, but for
1173  * short search problems we don't really want to have to initialize so
1174  * many elements --- it would take too long in comparison to the
1175  * actual search time. So we choose a useful skip table size based on
1176  * the haystack length minus the needle length. The closer the needle
1177  * length is to the haystack length the less useful skipping becomes.
1178  *
1179  * Note: since we use bit-masking to select table elements, the skip
1180  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1181  */
1182  if (searchlength < 16)
1183  skiptablemask = 3;
1184  else if (searchlength < 64)
1185  skiptablemask = 7;
1186  else if (searchlength < 128)
1187  skiptablemask = 15;
1188  else if (searchlength < 512)
1189  skiptablemask = 31;
1190  else if (searchlength < 2048)
1191  skiptablemask = 63;
1192  else if (searchlength < 4096)
1193  skiptablemask = 127;
1194  else
1195  skiptablemask = 255;
1196  state->skiptablemask = skiptablemask;
1197 
1198  /*
1199  * Initialize the skip table. We set all elements to the needle
1200  * length, since this is the correct skip distance for any character
1201  * not found in the needle.
1202  */
1203  for (i = 0; i <= skiptablemask; i++)
1204  state->skiptable[i] = len2;
1205 
1206  /*
1207  * Now examine the needle. For each character except the last one,
1208  * set the corresponding table element to the appropriate skip
1209  * distance. Note that when two characters share the same skip table
1210  * entry, the one later in the needle must determine the skip
1211  * distance.
1212  */
1213  last = len2 - 1;
1214 
1215  if (!state->use_wchar)
1216  {
1217  const char *str2 = state->str2;
1218 
1219  for (i = 0; i < last; i++)
1220  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1221  }
1222  else
1223  {
1224  const pg_wchar *wstr2 = state->wstr2;
1225 
1226  for (i = 0; i < last; i++)
1227  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1228  }
1229  }
1230 }
1231 
1232 static int
1234 {
1235  int haystack_len = state->len1;
1236  int needle_len = state->len2;
1237  int skiptablemask = state->skiptablemask;
1238 
1239  Assert(start_pos > 0); /* else caller error */
1240 
1241  if (needle_len <= 0)
1242  return start_pos; /* result for empty pattern */
1243 
1244  start_pos--; /* adjust for zero based arrays */
1245 
1246  /* Done if the needle can't possibly fit */
1247  if (haystack_len < start_pos + needle_len)
1248  return 0;
1249 
1250  if (!state->use_wchar)
1251  {
1252  /* simple case - single byte encoding */
1253  const char *haystack = state->str1;
1254  const char *needle = state->str2;
1255  const char *haystack_end = &haystack[haystack_len];
1256  const char *hptr;
1257 
1258  if (needle_len == 1)
1259  {
1260  /* No point in using B-M-H for a one-character needle */
1261  char nchar = *needle;
1262 
1263  hptr = &haystack[start_pos];
1264  while (hptr < haystack_end)
1265  {
1266  if (*hptr == nchar)
1267  return hptr - haystack + 1;
1268  hptr++;
1269  }
1270  }
1271  else
1272  {
1273  const char *needle_last = &needle[needle_len - 1];
1274 
1275  /* Start at startpos plus the length of the needle */
1276  hptr = &haystack[start_pos + needle_len - 1];
1277  while (hptr < haystack_end)
1278  {
1279  /* Match the needle scanning *backward* */
1280  const char *nptr;
1281  const char *p;
1282 
1283  nptr = needle_last;
1284  p = hptr;
1285  while (*nptr == *p)
1286  {
1287  /* Matched it all? If so, return 1-based position */
1288  if (nptr == needle)
1289  return p - haystack + 1;
1290  nptr--, p--;
1291  }
1292 
1293  /*
1294  * No match, so use the haystack char at hptr to decide how
1295  * far to advance. If the needle had any occurrence of that
1296  * character (or more precisely, one sharing the same
1297  * skiptable entry) before its last character, then we advance
1298  * far enough to align the last such needle character with
1299  * that haystack position. Otherwise we can advance by the
1300  * whole needle length.
1301  */
1302  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1303  }
1304  }
1305  }
1306  else
1307  {
1308  /* The multibyte char version. This works exactly the same way. */
1309  const pg_wchar *haystack = state->wstr1;
1310  const pg_wchar *needle = state->wstr2;
1311  const pg_wchar *haystack_end = &haystack[haystack_len];
1312  const pg_wchar *hptr;
1313 
1314  if (needle_len == 1)
1315  {
1316  /* No point in using B-M-H for a one-character needle */
1317  pg_wchar nchar = *needle;
1318 
1319  hptr = &haystack[start_pos];
1320  while (hptr < haystack_end)
1321  {
1322  if (*hptr == nchar)
1323  return hptr - haystack + 1;
1324  hptr++;
1325  }
1326  }
1327  else
1328  {
1329  const pg_wchar *needle_last = &needle[needle_len - 1];
1330 
1331  /* Start at startpos plus the length of the needle */
1332  hptr = &haystack[start_pos + needle_len - 1];
1333  while (hptr < haystack_end)
1334  {
1335  /* Match the needle scanning *backward* */
1336  const pg_wchar *nptr;
1337  const pg_wchar *p;
1338 
1339  nptr = needle_last;
1340  p = hptr;
1341  while (*nptr == *p)
1342  {
1343  /* Matched it all? If so, return 1-based position */
1344  if (nptr == needle)
1345  return p - haystack + 1;
1346  nptr--, p--;
1347  }
1348 
1349  /*
1350  * No match, so use the haystack char at hptr to decide how
1351  * far to advance. If the needle had any occurrence of that
1352  * character (or more precisely, one sharing the same
1353  * skiptable entry) before its last character, then we advance
1354  * far enough to align the last such needle character with
1355  * that haystack position. Otherwise we can advance by the
1356  * whole needle length.
1357  */
1358  hptr += state->skiptable[*hptr & skiptablemask];
1359  }
1360  }
1361  }
1362 
1363  return 0; /* not found */
1364 }
1365 
1366 static void
1368 {
1369  if (state->use_wchar)
1370  {
1371  pfree(state->wstr1);
1372  pfree(state->wstr2);
1373  }
1374 }
1375 
1376 /* varstr_cmp()
1377  * Comparison function for text strings with given lengths.
1378  * Includes locale support, but must copy strings to temporary memory
1379  * to allow null-termination for inputs to strcoll().
1380  * Returns an integer less than, equal to, or greater than zero, indicating
1381  * whether arg1 is less than, equal to, or greater than arg2.
1382  */
1383 int
1384 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1385 {
1386  int result;
1387 
1388  /*
1389  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1390  * have to do some memory copying. This turns out to be significantly
1391  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1392  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1393  */
1394  if (lc_collate_is_c(collid))
1395  {
1396  result = memcmp(arg1, arg2, Min(len1, len2));
1397  if ((result == 0) && (len1 != len2))
1398  result = (len1 < len2) ? -1 : 1;
1399  }
1400  else
1401  {
1402  char a1buf[TEXTBUFLEN];
1403  char a2buf[TEXTBUFLEN];
1404  char *a1p,
1405  *a2p;
1406 
1407 #ifdef HAVE_LOCALE_T
1408  pg_locale_t mylocale = 0;
1409 #endif
1410 
1411  if (collid != DEFAULT_COLLATION_OID)
1412  {
1413  if (!OidIsValid(collid))
1414  {
1415  /*
1416  * This typically means that the parser could not resolve a
1417  * conflict of implicit collations, so report it that way.
1418  */
1419  ereport(ERROR,
1420  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1421  errmsg("could not determine which collation to use for string comparison"),
1422  errhint("Use the COLLATE clause to set the collation explicitly.")));
1423  }
1424 #ifdef HAVE_LOCALE_T
1425  mylocale = pg_newlocale_from_collation(collid);
1426 #endif
1427  }
1428 
1429  /*
1430  * memcmp() can't tell us which of two unequal strings sorts first,
1431  * but it's a cheap way to tell if they're equal. Testing shows that
1432  * memcmp() followed by strcoll() is only trivially slower than
1433  * strcoll() by itself, so we don't lose much if this doesn't work out
1434  * very often, and if it does - for example, because there are many
1435  * equal strings in the input - then we win big by avoiding expensive
1436  * collation-aware comparisons.
1437  */
1438  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1439  return 0;
1440 
1441 #ifdef WIN32
1442  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1443  if (GetDatabaseEncoding() == PG_UTF8)
1444  {
1445  int a1len;
1446  int a2len;
1447  int r;
1448 
1449  if (len1 >= TEXTBUFLEN / 2)
1450  {
1451  a1len = len1 * 2 + 2;
1452  a1p = palloc(a1len);
1453  }
1454  else
1455  {
1456  a1len = TEXTBUFLEN;
1457  a1p = a1buf;
1458  }
1459  if (len2 >= TEXTBUFLEN / 2)
1460  {
1461  a2len = len2 * 2 + 2;
1462  a2p = palloc(a2len);
1463  }
1464  else
1465  {
1466  a2len = TEXTBUFLEN;
1467  a2p = a2buf;
1468  }
1469 
1470  /* stupid Microsloth API does not work for zero-length input */
1471  if (len1 == 0)
1472  r = 0;
1473  else
1474  {
1475  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1476  (LPWSTR) a1p, a1len / 2);
1477  if (!r)
1478  ereport(ERROR,
1479  (errmsg("could not convert string to UTF-16: error code %lu",
1480  GetLastError())));
1481  }
1482  ((LPWSTR) a1p)[r] = 0;
1483 
1484  if (len2 == 0)
1485  r = 0;
1486  else
1487  {
1488  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1489  (LPWSTR) a2p, a2len / 2);
1490  if (!r)
1491  ereport(ERROR,
1492  (errmsg("could not convert string to UTF-16: error code %lu",
1493  GetLastError())));
1494  }
1495  ((LPWSTR) a2p)[r] = 0;
1496 
1497  errno = 0;
1498 #ifdef HAVE_LOCALE_T
1499  if (mylocale)
1500  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1501  else
1502 #endif
1503  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1504  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1505  * headers */
1506  ereport(ERROR,
1507  (errmsg("could not compare Unicode strings: %m")));
1508 
1509  /*
1510  * In some locales wcscoll() can claim that nonidentical strings
1511  * are equal. Believing that would be bad news for a number of
1512  * reasons, so we follow Perl's lead and sort "equal" strings
1513  * according to strcmp (on the UTF-8 representation).
1514  */
1515  if (result == 0)
1516  {
1517  result = memcmp(arg1, arg2, Min(len1, len2));
1518  if ((result == 0) && (len1 != len2))
1519  result = (len1 < len2) ? -1 : 1;
1520  }
1521 
1522  if (a1p != a1buf)
1523  pfree(a1p);
1524  if (a2p != a2buf)
1525  pfree(a2p);
1526 
1527  return result;
1528  }
1529 #endif /* WIN32 */
1530 
1531  if (len1 >= TEXTBUFLEN)
1532  a1p = (char *) palloc(len1 + 1);
1533  else
1534  a1p = a1buf;
1535  if (len2 >= TEXTBUFLEN)
1536  a2p = (char *) palloc(len2 + 1);
1537  else
1538  a2p = a2buf;
1539 
1540  memcpy(a1p, arg1, len1);
1541  a1p[len1] = '\0';
1542  memcpy(a2p, arg2, len2);
1543  a2p[len2] = '\0';
1544 
1545 #ifdef HAVE_LOCALE_T
1546  if (mylocale)
1547  result = strcoll_l(a1p, a2p, mylocale);
1548  else
1549 #endif
1550  result = strcoll(a1p, a2p);
1551 
1552  /*
1553  * In some locales strcoll() can claim that nonidentical strings are
1554  * equal. Believing that would be bad news for a number of reasons,
1555  * so we follow Perl's lead and sort "equal" strings according to
1556  * strcmp().
1557  */
1558  if (result == 0)
1559  result = strcmp(a1p, a2p);
1560 
1561  if (a1p != a1buf)
1562  pfree(a1p);
1563  if (a2p != a2buf)
1564  pfree(a2p);
1565  }
1566 
1567  return result;
1568 }
1569 
1570 /* text_cmp()
1571  * Internal comparison function for text strings.
1572  * Returns -1, 0 or 1
1573  */
1574 static int
1575 text_cmp(text *arg1, text *arg2, Oid collid)
1576 {
1577  char *a1p,
1578  *a2p;
1579  int len1,
1580  len2;
1581 
1582  a1p = VARDATA_ANY(arg1);
1583  a2p = VARDATA_ANY(arg2);
1584 
1585  len1 = VARSIZE_ANY_EXHDR(arg1);
1586  len2 = VARSIZE_ANY_EXHDR(arg2);
1587 
1588  return varstr_cmp(a1p, len1, a2p, len2, collid);
1589 }
1590 
1591 /*
1592  * Comparison functions for text strings.
1593  *
1594  * Note: btree indexes need these routines not to leak memory; therefore,
1595  * be careful to free working copies of toasted datums. Most places don't
1596  * need to be so careful.
1597  */
1598 
1599 Datum
1601 {
1602  Datum arg1 = PG_GETARG_DATUM(0);
1603  Datum arg2 = PG_GETARG_DATUM(1);
1604  bool result;
1605  Size len1,
1606  len2;
1607 
1608  /*
1609  * Since we only care about equality or not-equality, we can avoid all the
1610  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1611  * don't even have to do a bitwise comparison if we can show the lengths
1612  * of the strings are unequal; which might save us from having to detoast
1613  * one or both values.
1614  */
1615  len1 = toast_raw_datum_size(arg1);
1616  len2 = toast_raw_datum_size(arg2);
1617  if (len1 != len2)
1618  result = false;
1619  else
1620  {
1621  text *targ1 = DatumGetTextPP(arg1);
1622  text *targ2 = DatumGetTextPP(arg2);
1623 
1624  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1625  len1 - VARHDRSZ) == 0);
1626 
1627  PG_FREE_IF_COPY(targ1, 0);
1628  PG_FREE_IF_COPY(targ2, 1);
1629  }
1630 
1631  PG_RETURN_BOOL(result);
1632 }
1633 
1634 Datum
1636 {
1637  Datum arg1 = PG_GETARG_DATUM(0);
1638  Datum arg2 = PG_GETARG_DATUM(1);
1639  bool result;
1640  Size len1,
1641  len2;
1642 
1643  /* See comment in texteq() */
1644  len1 = toast_raw_datum_size(arg1);
1645  len2 = toast_raw_datum_size(arg2);
1646  if (len1 != len2)
1647  result = true;
1648  else
1649  {
1650  text *targ1 = DatumGetTextPP(arg1);
1651  text *targ2 = DatumGetTextPP(arg2);
1652 
1653  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1654  len1 - VARHDRSZ) != 0);
1655 
1656  PG_FREE_IF_COPY(targ1, 0);
1657  PG_FREE_IF_COPY(targ2, 1);
1658  }
1659 
1660  PG_RETURN_BOOL(result);
1661 }
1662 
1663 Datum
1665 {
1666  text *arg1 = PG_GETARG_TEXT_PP(0);
1667  text *arg2 = PG_GETARG_TEXT_PP(1);
1668  bool result;
1669 
1670  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1671 
1672  PG_FREE_IF_COPY(arg1, 0);
1673  PG_FREE_IF_COPY(arg2, 1);
1674 
1675  PG_RETURN_BOOL(result);
1676 }
1677 
1678 Datum
1680 {
1681  text *arg1 = PG_GETARG_TEXT_PP(0);
1682  text *arg2 = PG_GETARG_TEXT_PP(1);
1683  bool result;
1684 
1685  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1686 
1687  PG_FREE_IF_COPY(arg1, 0);
1688  PG_FREE_IF_COPY(arg2, 1);
1689 
1690  PG_RETURN_BOOL(result);
1691 }
1692 
1693 Datum
1695 {
1696  text *arg1 = PG_GETARG_TEXT_PP(0);
1697  text *arg2 = PG_GETARG_TEXT_PP(1);
1698  bool result;
1699 
1700  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1701 
1702  PG_FREE_IF_COPY(arg1, 0);
1703  PG_FREE_IF_COPY(arg2, 1);
1704 
1705  PG_RETURN_BOOL(result);
1706 }
1707 
1708 Datum
1710 {
1711  text *arg1 = PG_GETARG_TEXT_PP(0);
1712  text *arg2 = PG_GETARG_TEXT_PP(1);
1713  bool result;
1714 
1715  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1716 
1717  PG_FREE_IF_COPY(arg1, 0);
1718  PG_FREE_IF_COPY(arg2, 1);
1719 
1720  PG_RETURN_BOOL(result);
1721 }
1722 
1723 Datum
1725 {
1726  text *arg1 = PG_GETARG_TEXT_PP(0);
1727  text *arg2 = PG_GETARG_TEXT_PP(1);
1728  int32 result;
1729 
1730  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1731 
1732  PG_FREE_IF_COPY(arg1, 0);
1733  PG_FREE_IF_COPY(arg2, 1);
1734 
1735  PG_RETURN_INT32(result);
1736 }
1737 
1738 Datum
1740 {
1742  Oid collid = ssup->ssup_collation;
1743  MemoryContext oldcontext;
1744 
1745  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1746 
1747  /* Use generic string SortSupport */
1748  varstr_sortsupport(ssup, collid, false);
1749 
1750  MemoryContextSwitchTo(oldcontext);
1751 
1752  PG_RETURN_VOID();
1753 }
1754 
1755 /*
1756  * Generic sortsupport interface for character type's operator classes.
1757  * Includes locale support, and support for BpChar semantics (i.e. removing
1758  * trailing spaces before comparison).
1759  *
1760  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1761  * same representation. Callers that always use the C collation (e.g.
1762  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1763  * this will not work with any other collation, though.
1764  */
1765 void
1767 {
1768  bool abbreviate = ssup->abbreviate;
1769  bool collate_c = false;
1770  VarStringSortSupport *sss;
1771 
1772 #ifdef HAVE_LOCALE_T
1773  pg_locale_t locale = 0;
1774 #endif
1775 
1776  /*
1777  * If possible, set ssup->comparator to a function which can be used to
1778  * directly compare two datums. If we can do this, we'll avoid the
1779  * overhead of a trip through the fmgr layer for every comparison, which
1780  * can be substantial.
1781  *
1782  * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1783  * uses strcoll() to perform comparisons and knows about the special
1784  * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1785  * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1786  * both of which use memcmp() rather than strcoll().
1787  *
1788  * There is a further exception on Windows. When the database encoding is
1789  * UTF-8 and we are not using the C collation, complex hacks are required.
1790  * We don't currently have a comparator that handles that case, so we fall
1791  * back on the slow method of having the sort code invoke bttextcmp() (in
1792  * the case of text) via the fmgr trampoline.
1793  */
1794  if (lc_collate_is_c(collid))
1795  {
1796  if (!bpchar)
1797  ssup->comparator = varstrfastcmp_c;
1798  else
1799  ssup->comparator = bpcharfastcmp_c;
1800 
1801  collate_c = true;
1802  }
1803 #ifdef WIN32
1804  else if (GetDatabaseEncoding() == PG_UTF8)
1805  return;
1806 #endif
1807  else
1808  {
1810 
1811  /*
1812  * We need a collation-sensitive comparison. To make things faster,
1813  * we'll figure out the collation based on the locale id and cache the
1814  * result.
1815  */
1816  if (collid != DEFAULT_COLLATION_OID)
1817  {
1818  if (!OidIsValid(collid))
1819  {
1820  /*
1821  * This typically means that the parser could not resolve a
1822  * conflict of implicit collations, so report it that way.
1823  */
1824  ereport(ERROR,
1825  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1826  errmsg("could not determine which collation to use for string comparison"),
1827  errhint("Use the COLLATE clause to set the collation explicitly.")));
1828  }
1829 #ifdef HAVE_LOCALE_T
1830  locale = pg_newlocale_from_collation(collid);
1831 #endif
1832  }
1833  }
1834 
1835  /*
1836  * Unfortunately, it seems that abbreviation for non-C collations is
1837  * broken on many common platforms; testing of multiple versions of glibc
1838  * reveals that, for many locales, strcoll() and strxfrm() do not return
1839  * consistent results, which is fatal to this optimization. While no
1840  * other libc other than Cygwin has so far been shown to have a problem,
1841  * we take the conservative course of action for right now and disable
1842  * this categorically. (Users who are certain this isn't a problem on
1843  * their system can define TRUST_STRXFRM.)
1844  *
1845  * Even apart from the risk of broken locales, it's possible that there
1846  * are platforms where the use of abbreviated keys should be disabled at
1847  * compile time. Having only 4 byte datums could make worst-case
1848  * performance drastically more likely, for example. Moreover, macOS's
1849  * strxfrm() implementation is known to not effectively concentrate a
1850  * significant amount of entropy from the original string in earlier
1851  * transformed blobs. It's possible that other supported platforms are
1852  * similarly encumbered. So, if we ever get past disabling this
1853  * categorically, we may still want or need to disable it for particular
1854  * platforms.
1855  */
1856 #ifndef TRUST_STRXFRM
1857  if (!collate_c)
1858  abbreviate = false;
1859 #endif
1860 
1861  /*
1862  * If we're using abbreviated keys, or if we're using a locale-aware
1863  * comparison, we need to initialize a StringSortSupport object. Both
1864  * cases will make use of the temporary buffers we initialize here for
1865  * scratch space (and to detect requirement for BpChar semantics from
1866  * caller), and the abbreviation case requires additional state.
1867  */
1868  if (abbreviate || !collate_c)
1869  {
1870  sss = palloc(sizeof(VarStringSortSupport));
1871  sss->buf1 = palloc(TEXTBUFLEN);
1872  sss->buflen1 = TEXTBUFLEN;
1873  sss->buf2 = palloc(TEXTBUFLEN);
1874  sss->buflen2 = TEXTBUFLEN;
1875  /* Start with invalid values */
1876  sss->last_len1 = -1;
1877  sss->last_len2 = -1;
1878  /* Initialize */
1879  sss->last_returned = 0;
1880 #ifdef HAVE_LOCALE_T
1881  sss->locale = locale;
1882 #endif
1883 
1884  /*
1885  * To avoid somehow confusing a strxfrm() blob and an original string,
1886  * constantly keep track of the variety of data that buf1 and buf2
1887  * currently contain.
1888  *
1889  * Comparisons may be interleaved with conversion calls. Frequently,
1890  * conversions and comparisons are batched into two distinct phases,
1891  * but the correctness of caching cannot hinge upon this. For
1892  * comparison caching, buffer state is only trusted if cache_blob is
1893  * found set to false, whereas strxfrm() caching only trusts the state
1894  * when cache_blob is found set to true.
1895  *
1896  * Arbitrarily initialize cache_blob to true.
1897  */
1898  sss->cache_blob = true;
1899  sss->collate_c = collate_c;
1900  sss->bpchar = bpchar;
1901  ssup->ssup_extra = sss;
1902 
1903  /*
1904  * If possible, plan to use the abbreviated keys optimization. The
1905  * core code may switch back to authoritative comparator should
1906  * abbreviation be aborted.
1907  */
1908  if (abbreviate)
1909  {
1910  sss->prop_card = 0.20;
1911  initHyperLogLog(&sss->abbr_card, 10);
1912  initHyperLogLog(&sss->full_card, 10);
1913  ssup->abbrev_full_comparator = ssup->comparator;
1914  ssup->comparator = varstrcmp_abbrev;
1917  }
1918  }
1919 }
1920 
1921 /*
1922  * sortsupport comparison func (for C locale case)
1923  */
1924 static int
1926 {
1927  VarString *arg1 = DatumGetVarStringPP(x);
1928  VarString *arg2 = DatumGetVarStringPP(y);
1929  char *a1p,
1930  *a2p;
1931  int len1,
1932  len2,
1933  result;
1934 
1935  a1p = VARDATA_ANY(arg1);
1936  a2p = VARDATA_ANY(arg2);
1937 
1938  len1 = VARSIZE_ANY_EXHDR(arg1);
1939  len2 = VARSIZE_ANY_EXHDR(arg2);
1940 
1941  result = memcmp(a1p, a2p, Min(len1, len2));
1942  if ((result == 0) && (len1 != len2))
1943  result = (len1 < len2) ? -1 : 1;
1944 
1945  /* We can't afford to leak memory here. */
1946  if (PointerGetDatum(arg1) != x)
1947  pfree(arg1);
1948  if (PointerGetDatum(arg2) != y)
1949  pfree(arg2);
1950 
1951  return result;
1952 }
1953 
1954 /*
1955  * sortsupport comparison func (for BpChar C locale case)
1956  *
1957  * BpChar outsources its sortsupport to this module. Specialization for the
1958  * varstr_sortsupport BpChar case, modeled on
1959  * internal_bpchar_pattern_compare().
1960  */
1961 static int
1963 {
1964  BpChar *arg1 = DatumGetBpCharPP(x);
1965  BpChar *arg2 = DatumGetBpCharPP(y);
1966  char *a1p,
1967  *a2p;
1968  int len1,
1969  len2,
1970  result;
1971 
1972  a1p = VARDATA_ANY(arg1);
1973  a2p = VARDATA_ANY(arg2);
1974 
1975  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
1976  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
1977 
1978  result = memcmp(a1p, a2p, Min(len1, len2));
1979  if ((result == 0) && (len1 != len2))
1980  result = (len1 < len2) ? -1 : 1;
1981 
1982  /* We can't afford to leak memory here. */
1983  if (PointerGetDatum(arg1) != x)
1984  pfree(arg1);
1985  if (PointerGetDatum(arg2) != y)
1986  pfree(arg2);
1987 
1988  return result;
1989 }
1990 
1991 /*
1992  * sortsupport comparison func (for locale case)
1993  */
1994 static int
1996 {
1997  VarString *arg1 = DatumGetVarStringPP(x);
1998  VarString *arg2 = DatumGetVarStringPP(y);
1999  bool arg1_match;
2001 
2002  /* working state */
2003  char *a1p,
2004  *a2p;
2005  int len1,
2006  len2,
2007  result;
2008 
2009  a1p = VARDATA_ANY(arg1);
2010  a2p = VARDATA_ANY(arg2);
2011 
2012  len1 = VARSIZE_ANY_EXHDR(arg1);
2013  len2 = VARSIZE_ANY_EXHDR(arg2);
2014 
2015  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2016  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2017  {
2018  /*
2019  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2020  * last_len2. Existing contents of buffers might still be used by
2021  * next call.
2022  *
2023  * It's fine to allow the comparison of BpChar padding bytes here,
2024  * even though that implies that the memcmp() will usually be
2025  * performed for BpChar callers (though multibyte characters could
2026  * still prevent that from occurring). The memcmp() is still very
2027  * cheap, and BpChar's funny semantics have us remove trailing spaces
2028  * (not limited to padding), so we need make no distinction between
2029  * padding space characters and "real" space characters.
2030  */
2031  result = 0;
2032  goto done;
2033  }
2034 
2035  if (sss->bpchar)
2036  {
2037  /* Get true number of bytes, ignoring trailing spaces */
2038  len1 = bpchartruelen(a1p, len1);
2039  len2 = bpchartruelen(a2p, len2);
2040  }
2041 
2042  if (len1 >= sss->buflen1)
2043  {
2044  pfree(sss->buf1);
2045  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2046  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2047  }
2048  if (len2 >= sss->buflen2)
2049  {
2050  pfree(sss->buf2);
2051  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2052  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2053  }
2054 
2055  /*
2056  * We're likely to be asked to compare the same strings repeatedly, and
2057  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2058  * comparisons, even though in general there is no reason to think that
2059  * that will work out (every string datum may be unique). Caching does
2060  * not slow things down measurably when it doesn't work out, and can speed
2061  * things up by rather a lot when it does. In part, this is because the
2062  * memcmp() compares data from cachelines that are needed in L1 cache even
2063  * when the last comparison's result cannot be reused.
2064  */
2065  arg1_match = true;
2066  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2067  {
2068  arg1_match = false;
2069  memcpy(sss->buf1, a1p, len1);
2070  sss->buf1[len1] = '\0';
2071  sss->last_len1 = len1;
2072  }
2073 
2074  /*
2075  * If we're comparing the same two strings as last time, we can return the
2076  * same answer without calling strcoll() again. This is more likely than
2077  * it seems (at least with moderate to low cardinality sets), because
2078  * quicksort compares the same pivot against many values.
2079  */
2080  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2081  {
2082  memcpy(sss->buf2, a2p, len2);
2083  sss->buf2[len2] = '\0';
2084  sss->last_len2 = len2;
2085  }
2086  else if (arg1_match && !sss->cache_blob)
2087  {
2088  /* Use result cached following last actual strcoll() call */
2089  result = sss->last_returned;
2090  goto done;
2091  }
2092 
2093 #ifdef HAVE_LOCALE_T
2094  if (sss->locale)
2095  result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
2096  else
2097 #endif
2098  result = strcoll(sss->buf1, sss->buf2);
2099 
2100  /*
2101  * In some locales strcoll() can claim that nonidentical strings are
2102  * equal. Believing that would be bad news for a number of reasons, so we
2103  * follow Perl's lead and sort "equal" strings according to strcmp().
2104  */
2105  if (result == 0)
2106  result = strcmp(sss->buf1, sss->buf2);
2107 
2108  /* Cache result, perhaps saving an expensive strcoll() call next time */
2109  sss->cache_blob = false;
2110  sss->last_returned = result;
2111 done:
2112  /* We can't afford to leak memory here. */
2113  if (PointerGetDatum(arg1) != x)
2114  pfree(arg1);
2115  if (PointerGetDatum(arg2) != y)
2116  pfree(arg2);
2117 
2118  return result;
2119 }
2120 
2121 /*
2122  * Abbreviated key comparison func
2123  */
2124 static int
2126 {
2127  /*
2128  * When 0 is returned, the core system will call varstrfastcmp_c()
2129  * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2130  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2131  * authoritatively, for the same reason that there is a strcoll()
2132  * tie-breaker call to strcmp() in varstr_cmp().
2133  */
2134  if (x > y)
2135  return 1;
2136  else if (x == y)
2137  return 0;
2138  else
2139  return -1;
2140 }
2141 
2142 /*
2143  * Conversion routine for sortsupport. Converts original to abbreviated key
2144  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2145  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2146  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2147  * locale is used, or in case of bytea, just memcpy() from original instead.
2148  */
2149 static Datum
2151 {
2153  VarString *authoritative = DatumGetVarStringPP(original);
2154  char *authoritative_data = VARDATA_ANY(authoritative);
2155 
2156  /* working state */
2157  Datum res;
2158  char *pres;
2159  int len;
2160  uint32 hash;
2161 
2162  pres = (char *) &res;
2163  /* memset(), so any non-overwritten bytes are NUL */
2164  memset(pres, 0, sizeof(Datum));
2165  len = VARSIZE_ANY_EXHDR(authoritative);
2166 
2167  /* Get number of bytes, ignoring trailing spaces */
2168  if (sss->bpchar)
2169  len = bpchartruelen(authoritative_data, len);
2170 
2171  /*
2172  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2173  * abbreviate keys. The full comparator for the C locale is always
2174  * memcmp(). It would be incorrect to allow bytea callers (callers that
2175  * always force the C collation -- bytea isn't a collatable type, but this
2176  * approach is convenient) to use strxfrm(). This is because bytea
2177  * strings may contain NUL bytes. Besides, this should be faster, too.
2178  *
2179  * More generally, it's okay that bytea callers can have NUL bytes in
2180  * strings because varstrcmp_abbrev() need not make a distinction between
2181  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2182  * authoritative representation. Hopefully a comparison at or past one
2183  * abbreviated key's terminating NUL byte will resolve the comparison
2184  * without consulting the authoritative representation; specifically, some
2185  * later non-NUL byte in the longer string can resolve the comparison
2186  * against a subsequent terminating NUL in the shorter string. There will
2187  * usually be what is effectively a "length-wise" resolution there and
2188  * then.
2189  *
2190  * If that doesn't work out -- if all bytes in the longer string
2191  * positioned at or past the offset of the smaller string's (first)
2192  * terminating NUL are actually representative of NUL bytes in the
2193  * authoritative binary string (perhaps with some *terminating* NUL bytes
2194  * towards the end of the longer string iff it happens to still be small)
2195  * -- then an authoritative tie-breaker will happen, and do the right
2196  * thing: explicitly consider string length.
2197  */
2198  if (sss->collate_c)
2199  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2200  else
2201  {
2202  Size bsize;
2203 
2204  /*
2205  * We're not using the C collation, so fall back on strxfrm.
2206  */
2207 
2208  /* By convention, we use buffer 1 to store and NUL-terminate */
2209  if (len >= sss->buflen1)
2210  {
2211  pfree(sss->buf1);
2212  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2213  sss->buf1 = palloc(sss->buflen1);
2214  }
2215 
2216  /* Might be able to reuse strxfrm() blob from last call */
2217  if (sss->last_len1 == len && sss->cache_blob &&
2218  memcmp(sss->buf1, authoritative_data, len) == 0)
2219  {
2220  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2221  /* No change affecting cardinality, so no hashing required */
2222  goto done;
2223  }
2224 
2225  /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
2226  memcpy(sss->buf1, authoritative_data, len);
2227  sss->buf1[len] = '\0';
2228  sss->last_len1 = len;
2229 
2230  for (;;)
2231  {
2232 #ifdef HAVE_LOCALE_T
2233  if (sss->locale)
2234  bsize = strxfrm_l(sss->buf2, sss->buf1,
2235  sss->buflen2, sss->locale);
2236  else
2237 #endif
2238  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2239 
2240  sss->last_len2 = bsize;
2241  if (bsize < sss->buflen2)
2242  break;
2243 
2244  /*
2245  * The C standard states that the contents of the buffer is now
2246  * unspecified. Grow buffer, and retry.
2247  */
2248  pfree(sss->buf2);
2249  sss->buflen2 = Max(bsize + 1,
2250  Min(sss->buflen2 * 2, MaxAllocSize));
2251  sss->buf2 = palloc(sss->buflen2);
2252  }
2253 
2254  /*
2255  * Every Datum byte is always compared. This is safe because the
2256  * strxfrm() blob is itself NUL terminated, leaving no danger of
2257  * misinterpreting any NUL bytes not intended to be interpreted as
2258  * logically representing termination.
2259  *
2260  * (Actually, even if there were NUL bytes in the blob it would be
2261  * okay. See remarks on bytea case above.)
2262  */
2263  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2264  }
2265 
2266  /*
2267  * Maintain approximate cardinality of both abbreviated keys and original,
2268  * authoritative keys using HyperLogLog. Used as cheap insurance against
2269  * the worst case, where we do many string transformations for no saving
2270  * in full strcoll()-based comparisons. These statistics are used by
2271  * varstr_abbrev_abort().
2272  *
2273  * First, Hash key proper, or a significant fraction of it. Mix in length
2274  * in order to compensate for cases where differences are past
2275  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2276  */
2277  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2278  Min(len, PG_CACHE_LINE_SIZE)));
2279 
2280  if (len > PG_CACHE_LINE_SIZE)
2281  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2282 
2283  addHyperLogLog(&sss->full_card, hash);
2284 
2285  /* Hash abbreviated key */
2286 #if SIZEOF_DATUM == 8
2287  {
2288  uint32 lohalf,
2289  hihalf;
2290 
2291  lohalf = (uint32) res;
2292  hihalf = (uint32) (res >> 32);
2293  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2294  }
2295 #else /* SIZEOF_DATUM != 8 */
2296  hash = DatumGetUInt32(hash_uint32((uint32) res));
2297 #endif
2298 
2299  addHyperLogLog(&sss->abbr_card, hash);
2300 
2301  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2302  sss->cache_blob = true;
2303 done:
2304 
2305  /*
2306  * Byteswap on little-endian machines.
2307  *
2308  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2309  * comparator) works correctly on all platforms. If we didn't do this,
2310  * the comparator would have to call memcmp() with a pair of pointers to
2311  * the first byte of each abbreviated key, which is slower.
2312  */
2313  res = DatumBigEndianToNative(res);
2314 
2315  /* Don't leak memory here */
2316  if (PointerGetDatum(authoritative) != original)
2317  pfree(authoritative);
2318 
2319  return res;
2320 }
2321 
2322 /*
2323  * Callback for estimating effectiveness of abbreviated key optimization, using
2324  * heuristic rules. Returns value indicating if the abbreviation optimization
2325  * should be aborted, based on its projected effectiveness.
2326  */
2327 static bool
2328 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2329 {
2331  double abbrev_distinct,
2332  key_distinct;
2333 
2334  Assert(ssup->abbreviate);
2335 
2336  /* Have a little patience */
2337  if (memtupcount < 100)
2338  return false;
2339 
2340  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2341  key_distinct = estimateHyperLogLog(&sss->full_card);
2342 
2343  /*
2344  * Clamp cardinality estimates to at least one distinct value. While
2345  * NULLs are generally disregarded, if only NULL values were seen so far,
2346  * that might misrepresent costs if we failed to clamp.
2347  */
2348  if (abbrev_distinct <= 1.0)
2349  abbrev_distinct = 1.0;
2350 
2351  if (key_distinct <= 1.0)
2352  key_distinct = 1.0;
2353 
2354  /*
2355  * In the worst case all abbreviated keys are identical, while at the same
2356  * time there are differences within full key strings not captured in
2357  * abbreviations.
2358  */
2359 #ifdef TRACE_SORT
2360  if (trace_sort)
2361  {
2362  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2363 
2364  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2365  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2366  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2367  sss->prop_card);
2368  }
2369 #endif
2370 
2371  /*
2372  * If the number of distinct abbreviated keys approximately matches the
2373  * number of distinct authoritative original keys, that's reason enough to
2374  * proceed. We can win even with a very low cardinality set if most
2375  * tie-breakers only memcmp(). This is by far the most important
2376  * consideration.
2377  *
2378  * While comparisons that are resolved at the abbreviated key level are
2379  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2380  * those two outcomes are so much cheaper than a full strcoll() once
2381  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2382  * cardinality against the overall size of the set in order to more
2383  * accurately model costs. Assume that an abbreviated comparison, and an
2384  * abbreviated comparison with a cheap memcmp()-based authoritative
2385  * resolution are equivalent.
2386  */
2387  if (abbrev_distinct > key_distinct * sss->prop_card)
2388  {
2389  /*
2390  * When we have exceeded 10,000 tuples, decay required cardinality
2391  * aggressively for next call.
2392  *
2393  * This is useful because the number of comparisons required on
2394  * average increases at a linearithmic rate, and at roughly 10,000
2395  * tuples that factor will start to dominate over the linear costs of
2396  * string transformation (this is a conservative estimate). The decay
2397  * rate is chosen to be a little less aggressive than halving -- which
2398  * (since we're called at points at which memtupcount has doubled)
2399  * would never see the cost model actually abort past the first call
2400  * following a decay. This decay rate is mostly a precaution against
2401  * a sudden, violent swing in how well abbreviated cardinality tracks
2402  * full key cardinality. The decay also serves to prevent a marginal
2403  * case from being aborted too late, when too much has already been
2404  * invested in string transformation.
2405  *
2406  * It's possible for sets of several million distinct strings with
2407  * mere tens of thousands of distinct abbreviated keys to still
2408  * benefit very significantly. This will generally occur provided
2409  * each abbreviated key is a proxy for a roughly uniform number of the
2410  * set's full keys. If it isn't so, we hope to catch that early and
2411  * abort. If it isn't caught early, by the time the problem is
2412  * apparent it's probably not worth aborting.
2413  */
2414  if (memtupcount > 10000)
2415  sss->prop_card *= 0.65;
2416 
2417  return false;
2418  }
2419 
2420  /*
2421  * Abort abbreviation strategy.
2422  *
2423  * The worst case, where all abbreviated keys are identical while all
2424  * original strings differ will typically only see a regression of about
2425  * 10% in execution time for small to medium sized lists of strings.
2426  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2427  * often expect very large improvements, particularly with sets of strings
2428  * of moderately high to high abbreviated cardinality. There is little to
2429  * lose but much to gain, which our strategy reflects.
2430  */
2431 #ifdef TRACE_SORT
2432  if (trace_sort)
2433  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2434  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2435  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2436 #endif
2437 
2438  return true;
2439 }
2440 
2441 Datum
2443 {
2444  text *arg1 = PG_GETARG_TEXT_PP(0);
2445  text *arg2 = PG_GETARG_TEXT_PP(1);
2446  text *result;
2447 
2448  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2449 
2450  PG_RETURN_TEXT_P(result);
2451 }
2452 
2453 Datum
2455 {
2456  text *arg1 = PG_GETARG_TEXT_PP(0);
2457  text *arg2 = PG_GETARG_TEXT_PP(1);
2458  text *result;
2459 
2460  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2461 
2462  PG_RETURN_TEXT_P(result);
2463 }
2464 
2465 
2466 /*
2467  * The following operators support character-by-character comparison
2468  * of text datums, to allow building indexes suitable for LIKE clauses.
2469  * Note that the regular texteq/textne comparison operators, and regular
2470  * support functions 1 and 2 with "C" collation are assumed to be
2471  * compatible with these!
2472  */
2473 
2474 static int
2476 {
2477  int result;
2478  int len1,
2479  len2;
2480 
2481  len1 = VARSIZE_ANY_EXHDR(arg1);
2482  len2 = VARSIZE_ANY_EXHDR(arg2);
2483 
2484  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2485  if (result != 0)
2486  return result;
2487  else if (len1 < len2)
2488  return -1;
2489  else if (len1 > len2)
2490  return 1;
2491  else
2492  return 0;
2493 }
2494 
2495 
2496 Datum
2498 {
2499  text *arg1 = PG_GETARG_TEXT_PP(0);
2500  text *arg2 = PG_GETARG_TEXT_PP(1);
2501  int result;
2502 
2503  result = internal_text_pattern_compare(arg1, arg2);
2504 
2505  PG_FREE_IF_COPY(arg1, 0);
2506  PG_FREE_IF_COPY(arg2, 1);
2507 
2508  PG_RETURN_BOOL(result < 0);
2509 }
2510 
2511 
2512 Datum
2514 {
2515  text *arg1 = PG_GETARG_TEXT_PP(0);
2516  text *arg2 = PG_GETARG_TEXT_PP(1);
2517  int result;
2518 
2519  result = internal_text_pattern_compare(arg1, arg2);
2520 
2521  PG_FREE_IF_COPY(arg1, 0);
2522  PG_FREE_IF_COPY(arg2, 1);
2523 
2524  PG_RETURN_BOOL(result <= 0);
2525 }
2526 
2527 
2528 Datum
2530 {
2531  text *arg1 = PG_GETARG_TEXT_PP(0);
2532  text *arg2 = PG_GETARG_TEXT_PP(1);
2533  int result;
2534 
2535  result = internal_text_pattern_compare(arg1, arg2);
2536 
2537  PG_FREE_IF_COPY(arg1, 0);
2538  PG_FREE_IF_COPY(arg2, 1);
2539 
2540  PG_RETURN_BOOL(result >= 0);
2541 }
2542 
2543 
2544 Datum
2546 {
2547  text *arg1 = PG_GETARG_TEXT_PP(0);
2548  text *arg2 = PG_GETARG_TEXT_PP(1);
2549  int result;
2550 
2551  result = internal_text_pattern_compare(arg1, arg2);
2552 
2553  PG_FREE_IF_COPY(arg1, 0);
2554  PG_FREE_IF_COPY(arg2, 1);
2555 
2556  PG_RETURN_BOOL(result > 0);
2557 }
2558 
2559 
2560 Datum
2562 {
2563  text *arg1 = PG_GETARG_TEXT_PP(0);
2564  text *arg2 = PG_GETARG_TEXT_PP(1);
2565  int result;
2566 
2567  result = internal_text_pattern_compare(arg1, arg2);
2568 
2569  PG_FREE_IF_COPY(arg1, 0);
2570  PG_FREE_IF_COPY(arg2, 1);
2571 
2572  PG_RETURN_INT32(result);
2573 }
2574 
2575 
2576 Datum
2578 {
2580  MemoryContext oldcontext;
2581 
2582  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2583 
2584  /* Use generic string SortSupport, forcing "C" collation */
2585  varstr_sortsupport(ssup, C_COLLATION_OID, false);
2586 
2587  MemoryContextSwitchTo(oldcontext);
2588 
2589  PG_RETURN_VOID();
2590 }
2591 
2592 
2593 /*-------------------------------------------------------------
2594  * byteaoctetlen
2595  *
2596  * get the number of bytes contained in an instance of type 'bytea'
2597  *-------------------------------------------------------------
2598  */
2599 Datum
2601 {
2602  Datum str = PG_GETARG_DATUM(0);
2603 
2604  /* We need not detoast the input at all */
2606 }
2607 
2608 /*
2609  * byteacat -
2610  * takes two bytea* and returns a bytea* that is the concatenation of
2611  * the two.
2612  *
2613  * Cloned from textcat and modified as required.
2614  */
2615 Datum
2617 {
2618  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2619  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2620 
2622 }
2623 
2624 /*
2625  * bytea_catenate
2626  * Guts of byteacat(), broken out so it can be used by other functions
2627  *
2628  * Arguments can be in short-header form, but not compressed or out-of-line
2629  */
2630 static bytea *
2632 {
2633  bytea *result;
2634  int len1,
2635  len2,
2636  len;
2637  char *ptr;
2638 
2639  len1 = VARSIZE_ANY_EXHDR(t1);
2640  len2 = VARSIZE_ANY_EXHDR(t2);
2641 
2642  /* paranoia ... probably should throw error instead? */
2643  if (len1 < 0)
2644  len1 = 0;
2645  if (len2 < 0)
2646  len2 = 0;
2647 
2648  len = len1 + len2 + VARHDRSZ;
2649  result = (bytea *) palloc(len);
2650 
2651  /* Set size of result string... */
2652  SET_VARSIZE(result, len);
2653 
2654  /* Fill data field of result string... */
2655  ptr = VARDATA(result);
2656  if (len1 > 0)
2657  memcpy(ptr, VARDATA_ANY(t1), len1);
2658  if (len2 > 0)
2659  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2660 
2661  return result;
2662 }
2663 
2664 #define PG_STR_GET_BYTEA(str_) \
2665  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2666 
2667 /*
2668  * bytea_substr()
2669  * Return a substring starting at the specified position.
2670  * Cloned from text_substr and modified as required.
2671  *
2672  * Input:
2673  * - string
2674  * - starting position (is one-based)
2675  * - string length (optional)
2676  *
2677  * If the starting position is zero or less, then return from the start of the string
2678  * adjusting the length to be consistent with the "negative start" per SQL.
2679  * If the length is less than zero, an ERROR is thrown. If no third argument
2680  * (length) is provided, the length to the end of the string is assumed.
2681  */
2682 Datum
2684 {
2686  PG_GETARG_INT32(1),
2687  PG_GETARG_INT32(2),
2688  false));
2689 }
2690 
2691 /*
2692  * bytea_substr_no_len -
2693  * Wrapper to avoid opr_sanity failure due to
2694  * one function accepting a different number of args.
2695  */
2696 Datum
2698 {
2700  PG_GETARG_INT32(1),
2701  -1,
2702  true));
2703 }
2704 
2705 static bytea *
2707  int S,
2708  int L,
2709  bool length_not_specified)
2710 {
2711  int S1; /* adjusted start position */
2712  int L1; /* adjusted substring length */
2713 
2714  S1 = Max(S, 1);
2715 
2716  if (length_not_specified)
2717  {
2718  /*
2719  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2720  * end of the string if we pass it a negative value for length.
2721  */
2722  L1 = -1;
2723  }
2724  else
2725  {
2726  /* end position */
2727  int E = S + L;
2728 
2729  /*
2730  * A negative value for L is the only way for the end position to be
2731  * before the start. SQL99 says to throw an error.
2732  */
2733  if (E < S)
2734  ereport(ERROR,
2735  (errcode(ERRCODE_SUBSTRING_ERROR),
2736  errmsg("negative substring length not allowed")));
2737 
2738  /*
2739  * A zero or negative value for the end position can happen if the
2740  * start was negative or one. SQL99 says to return a zero-length
2741  * string.
2742  */
2743  if (E < 1)
2744  return PG_STR_GET_BYTEA("");
2745 
2746  L1 = E - S1;
2747  }
2748 
2749  /*
2750  * If the start position is past the end of the string, SQL99 says to
2751  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2752  * us. Convert to zero-based starting position
2753  */
2754  return DatumGetByteaPSlice(str, S1 - 1, L1);
2755 }
2756 
2757 /*
2758  * byteaoverlay
2759  * Replace specified substring of first string with second
2760  *
2761  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2762  * This code is a direct implementation of what the standard says.
2763  */
2764 Datum
2766 {
2767  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2768  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2769  int sp = PG_GETARG_INT32(2); /* substring start position */
2770  int sl = PG_GETARG_INT32(3); /* substring length */
2771 
2772  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2773 }
2774 
2775 Datum
2777 {
2778  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2779  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2780  int sp = PG_GETARG_INT32(2); /* substring start position */
2781  int sl;
2782 
2783  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2784  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2785 }
2786 
2787 static bytea *
2788 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2789 {
2790  bytea *result;
2791  bytea *s1;
2792  bytea *s2;
2793  int sp_pl_sl;
2794 
2795  /*
2796  * Check for possible integer-overflow cases. For negative sp, throw a
2797  * "substring length" error because that's what should be expected
2798  * according to the spec's definition of OVERLAY().
2799  */
2800  if (sp <= 0)
2801  ereport(ERROR,
2802  (errcode(ERRCODE_SUBSTRING_ERROR),
2803  errmsg("negative substring length not allowed")));
2804  sp_pl_sl = sp + sl;
2805  if (sp_pl_sl <= sl)
2806  ereport(ERROR,
2807  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2808  errmsg("integer out of range")));
2809 
2810  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2811  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2812  result = bytea_catenate(s1, t2);
2813  result = bytea_catenate(result, s2);
2814 
2815  return result;
2816 }
2817 
2818 /*
2819  * byteapos -
2820  * Return the position of the specified substring.
2821  * Implements the SQL POSITION() function.
2822  * Cloned from textpos and modified as required.
2823  */
2824 Datum
2826 {
2827  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2828  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2829  int pos;
2830  int px,
2831  p;
2832  int len1,
2833  len2;
2834  char *p1,
2835  *p2;
2836 
2837  len1 = VARSIZE_ANY_EXHDR(t1);
2838  len2 = VARSIZE_ANY_EXHDR(t2);
2839 
2840  if (len2 <= 0)
2841  PG_RETURN_INT32(1); /* result for empty pattern */
2842 
2843  p1 = VARDATA_ANY(t1);
2844  p2 = VARDATA_ANY(t2);
2845 
2846  pos = 0;
2847  px = (len1 - len2);
2848  for (p = 0; p <= px; p++)
2849  {
2850  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2851  {
2852  pos = p + 1;
2853  break;
2854  };
2855  p1++;
2856  };
2857 
2858  PG_RETURN_INT32(pos);
2859 }
2860 
2861 /*-------------------------------------------------------------
2862  * byteaGetByte
2863  *
2864  * this routine treats "bytea" as an array of bytes.
2865  * It returns the Nth byte (a number between 0 and 255).
2866  *-------------------------------------------------------------
2867  */
2868 Datum
2870 {
2871  bytea *v = PG_GETARG_BYTEA_PP(0);
2872  int32 n = PG_GETARG_INT32(1);
2873  int len;
2874  int byte;
2875 
2876  len = VARSIZE_ANY_EXHDR(v);
2877 
2878  if (n < 0 || n >= len)
2879  ereport(ERROR,
2880  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2881  errmsg("index %d out of valid range, 0..%d",
2882  n, len - 1)));
2883 
2884  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2885 
2886  PG_RETURN_INT32(byte);
2887 }
2888 
2889 /*-------------------------------------------------------------
2890  * byteaGetBit
2891  *
2892  * This routine treats a "bytea" type like an array of bits.
2893  * It returns the value of the Nth bit (0 or 1).
2894  *
2895  *-------------------------------------------------------------
2896  */
2897 Datum
2899 {
2900  bytea *v = PG_GETARG_BYTEA_PP(0);
2901  int32 n = PG_GETARG_INT32(1);
2902  int byteNo,
2903  bitNo;
2904  int len;
2905  int byte;
2906 
2907  len = VARSIZE_ANY_EXHDR(v);
2908 
2909  if (n < 0 || n >= len * 8)
2910  ereport(ERROR,
2911  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2912  errmsg("index %d out of valid range, 0..%d",
2913  n, len * 8 - 1)));
2914 
2915  byteNo = n / 8;
2916  bitNo = n % 8;
2917 
2918  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2919 
2920  if (byte & (1 << bitNo))
2921  PG_RETURN_INT32(1);
2922  else
2923  PG_RETURN_INT32(0);
2924 }
2925 
2926 /*-------------------------------------------------------------
2927  * byteaSetByte
2928  *
2929  * Given an instance of type 'bytea' creates a new one with
2930  * the Nth byte set to the given value.
2931  *
2932  *-------------------------------------------------------------
2933  */
2934 Datum
2936 {
2937  bytea *v = PG_GETARG_BYTEA_P(0);
2938  int32 n = PG_GETARG_INT32(1);
2939  int32 newByte = PG_GETARG_INT32(2);
2940  int len;
2941  bytea *res;
2942 
2943  len = VARSIZE(v) - VARHDRSZ;
2944 
2945  if (n < 0 || n >= len)
2946  ereport(ERROR,
2947  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2948  errmsg("index %d out of valid range, 0..%d",
2949  n, len - 1)));
2950 
2951  /*
2952  * Make a copy of the original varlena.
2953  */
2954  res = (bytea *) palloc(VARSIZE(v));
2955  memcpy((char *) res, (char *) v, VARSIZE(v));
2956 
2957  /*
2958  * Now set the byte.
2959  */
2960  ((unsigned char *) VARDATA(res))[n] = newByte;
2961 
2962  PG_RETURN_BYTEA_P(res);
2963 }
2964 
2965 /*-------------------------------------------------------------
2966  * byteaSetBit
2967  *
2968  * Given an instance of type 'bytea' creates a new one with
2969  * the Nth bit set to the given value.
2970  *
2971  *-------------------------------------------------------------
2972  */
2973 Datum
2975 {
2976  bytea *v = PG_GETARG_BYTEA_P(0);
2977  int32 n = PG_GETARG_INT32(1);
2978  int32 newBit = PG_GETARG_INT32(2);
2979  bytea *res;
2980  int len;
2981  int oldByte,
2982  newByte;
2983  int byteNo,
2984  bitNo;
2985 
2986  len = VARSIZE(v) - VARHDRSZ;
2987 
2988  if (n < 0 || n >= len * 8)
2989  ereport(ERROR,
2990  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2991  errmsg("index %d out of valid range, 0..%d",
2992  n, len * 8 - 1)));
2993 
2994  byteNo = n / 8;
2995  bitNo = n % 8;
2996 
2997  /*
2998  * sanity check!
2999  */
3000  if (newBit != 0 && newBit != 1)
3001  ereport(ERROR,
3002  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3003  errmsg("new bit must be 0 or 1")));
3004 
3005  /*
3006  * Make a copy of the original varlena.
3007  */
3008  res = (bytea *) palloc(VARSIZE(v));
3009  memcpy((char *) res, (char *) v, VARSIZE(v));
3010 
3011  /*
3012  * Update the byte.
3013  */
3014  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3015 
3016  if (newBit == 0)
3017  newByte = oldByte & (~(1 << bitNo));
3018  else
3019  newByte = oldByte | (1 << bitNo);
3020 
3021  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3022 
3023  PG_RETURN_BYTEA_P(res);
3024 }
3025 
3026 
3027 /* text_name()
3028  * Converts a text type to a Name type.
3029  */
3030 Datum
3032 {
3033  text *s = PG_GETARG_TEXT_PP(0);
3034  Name result;
3035  int len;
3036 
3037  len = VARSIZE_ANY_EXHDR(s);
3038 
3039  /* Truncate oversize input */
3040  if (len >= NAMEDATALEN)
3041  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3042 
3043  /* We use palloc0 here to ensure result is zero-padded */
3044  result = (Name) palloc0(NAMEDATALEN);
3045  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3046 
3047  PG_RETURN_NAME(result);
3048 }
3049 
3050 /* name_text()
3051  * Converts a Name type to a text type.
3052  */
3053 Datum
3055 {
3056  Name s = PG_GETARG_NAME(0);
3057 
3059 }
3060 
3061 
3062 /*
3063  * textToQualifiedNameList - convert a text object to list of names
3064  *
3065  * This implements the input parsing needed by nextval() and other
3066  * functions that take a text parameter representing a qualified name.
3067  * We split the name at dots, downcase if not double-quoted, and
3068  * truncate names if they're too long.
3069  */
3070 List *
3072 {
3073  char *rawname;
3074  List *result = NIL;
3075  List *namelist;
3076  ListCell *l;
3077 
3078  /* Convert to C string (handles possible detoasting). */
3079  /* Note we rely on being able to modify rawname below. */
3080  rawname = text_to_cstring(textval);
3081 
3082  if (!SplitIdentifierString(rawname, '.', &namelist))
3083  ereport(ERROR,
3084  (errcode(ERRCODE_INVALID_NAME),
3085  errmsg("invalid name syntax")));
3086 
3087  if (namelist == NIL)
3088  ereport(ERROR,
3089  (errcode(ERRCODE_INVALID_NAME),
3090  errmsg("invalid name syntax")));
3091 
3092  foreach(l, namelist)
3093  {
3094  char *curname = (char *) lfirst(l);
3095 
3096  result = lappend(result, makeString(pstrdup(curname)));
3097  }
3098 
3099  pfree(rawname);
3100  list_free(namelist);
3101 
3102  return result;
3103 }
3104 
3105 /*
3106  * SplitIdentifierString --- parse a string containing identifiers
3107  *
3108  * This is the guts of textToQualifiedNameList, and is exported for use in
3109  * other situations such as parsing GUC variables. In the GUC case, it's
3110  * important to avoid memory leaks, so the API is designed to minimize the
3111  * amount of stuff that needs to be allocated and freed.
3112  *
3113  * Inputs:
3114  * rawstring: the input string; must be overwritable! On return, it's
3115  * been modified to contain the separated identifiers.
3116  * separator: the separator punctuation expected between identifiers
3117  * (typically '.' or ','). Whitespace may also appear around
3118  * identifiers.
3119  * Outputs:
3120  * namelist: filled with a palloc'd list of pointers to identifiers within
3121  * rawstring. Caller should list_free() this even on error return.
3122  *
3123  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3124  *
3125  * Note that an empty string is considered okay here, though not in
3126  * textToQualifiedNameList.
3127  */
3128 bool
3129 SplitIdentifierString(char *rawstring, char separator,
3130  List **namelist)
3131 {
3132  char *nextp = rawstring;
3133  bool done = false;
3134 
3135  *namelist = NIL;
3136 
3137  while (isspace((unsigned char) *nextp))
3138  nextp++; /* skip leading whitespace */
3139 
3140  if (*nextp == '\0')
3141  return true; /* allow empty string */
3142 
3143  /* At the top of the loop, we are at start of a new identifier. */
3144  do
3145  {
3146  char *curname;
3147  char *endp;
3148 
3149  if (*nextp == '"')
3150  {
3151  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3152  curname = nextp + 1;
3153  for (;;)
3154  {
3155  endp = strchr(nextp + 1, '"');
3156  if (endp == NULL)
3157  return false; /* mismatched quotes */
3158  if (endp[1] != '"')
3159  break; /* found end of quoted name */
3160  /* Collapse adjacent quotes into one quote, and look again */
3161  memmove(endp, endp + 1, strlen(endp));
3162  nextp = endp;
3163  }
3164  /* endp now points at the terminating quote */
3165  nextp = endp + 1;
3166  }
3167  else
3168  {
3169  /* Unquoted name --- extends to separator or whitespace */
3170  char *downname;
3171  int len;
3172 
3173  curname = nextp;
3174  while (*nextp && *nextp != separator &&
3175  !isspace((unsigned char) *nextp))
3176  nextp++;
3177  endp = nextp;
3178  if (curname == nextp)
3179  return false; /* empty unquoted name not allowed */
3180 
3181  /*
3182  * Downcase the identifier, using same code as main lexer does.
3183  *
3184  * XXX because we want to overwrite the input in-place, we cannot
3185  * support a downcasing transformation that increases the string
3186  * length. This is not a problem given the current implementation
3187  * of downcase_truncate_identifier, but we'll probably have to do
3188  * something about this someday.
3189  */
3190  len = endp - curname;
3191  downname = downcase_truncate_identifier(curname, len, false);
3192  Assert(strlen(downname) <= len);
3193  strncpy(curname, downname, len); /* strncpy is required here */
3194  pfree(downname);
3195  }
3196 
3197  while (isspace((unsigned char) *nextp))
3198  nextp++; /* skip trailing whitespace */
3199 
3200  if (*nextp == separator)
3201  {
3202  nextp++;
3203  while (isspace((unsigned char) *nextp))
3204  nextp++; /* skip leading whitespace for next */
3205  /* we expect another name, so done remains false */
3206  }
3207  else if (*nextp == '\0')
3208  done = true;
3209  else
3210  return false; /* invalid syntax */
3211 
3212  /* Now safe to overwrite separator with a null */
3213  *endp = '\0';
3214 
3215  /* Truncate name if it's overlength */
3216  truncate_identifier(curname, strlen(curname), false);
3217 
3218  /*
3219  * Finished isolating current name --- add it to list
3220  */
3221  *namelist = lappend(*namelist, curname);
3222 
3223  /* Loop back if we didn't reach end of string */
3224  } while (!done);
3225 
3226  return true;
3227 }
3228 
3229 
3230 /*
3231  * SplitDirectoriesString --- parse a string containing directory names
3232  *
3233  * This is similar to SplitIdentifierString, except that the parsing
3234  * rules are meant to handle pathnames instead of identifiers: there is
3235  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3236  * and we apply canonicalize_path() to each extracted string. Because of the
3237  * last, the returned strings are separately palloc'd rather than being
3238  * pointers into rawstring --- but we still scribble on rawstring.
3239  *
3240  * Inputs:
3241  * rawstring: the input string; must be modifiable!
3242  * separator: the separator punctuation expected between directories
3243  * (typically ',' or ';'). Whitespace may also appear around
3244  * directories.
3245  * Outputs:
3246  * namelist: filled with a palloc'd list of directory names.
3247  * Caller should list_free_deep() this even on error return.
3248  *
3249  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3250  *
3251  * Note that an empty string is considered okay here.
3252  */
3253 bool
3254 SplitDirectoriesString(char *rawstring, char separator,
3255  List **namelist)
3256 {
3257  char *nextp = rawstring;
3258  bool done = false;
3259 
3260  *namelist = NIL;
3261 
3262  while (isspace((unsigned char) *nextp))
3263  nextp++; /* skip leading whitespace */
3264 
3265  if (*nextp == '\0')
3266  return true; /* allow empty string */
3267 
3268  /* At the top of the loop, we are at start of a new directory. */
3269  do
3270  {
3271  char *curname;
3272  char *endp;
3273 
3274  if (*nextp == '"')
3275  {
3276  /* Quoted name --- collapse quote-quote pairs */
3277  curname = nextp + 1;
3278  for (;;)
3279  {
3280  endp = strchr(nextp + 1, '"');
3281  if (endp == NULL)
3282  return false; /* mismatched quotes */
3283  if (endp[1] != '"')
3284  break; /* found end of quoted name */
3285  /* Collapse adjacent quotes into one quote, and look again */
3286  memmove(endp, endp + 1, strlen(endp));
3287  nextp = endp;
3288  }
3289  /* endp now points at the terminating quote */
3290  nextp = endp + 1;
3291  }
3292  else
3293  {
3294  /* Unquoted name --- extends to separator or end of string */
3295  curname = endp = nextp;
3296  while (*nextp && *nextp != separator)
3297  {
3298  /* trailing whitespace should not be included in name */
3299  if (!isspace((unsigned char) *nextp))
3300  endp = nextp + 1;
3301  nextp++;
3302  }
3303  if (curname == endp)
3304  return false; /* empty unquoted name not allowed */
3305  }
3306 
3307  while (isspace((unsigned char) *nextp))
3308  nextp++; /* skip trailing whitespace */
3309 
3310  if (*nextp == separator)
3311  {
3312  nextp++;
3313  while (isspace((unsigned char) *nextp))
3314  nextp++; /* skip leading whitespace for next */
3315  /* we expect another name, so done remains false */
3316  }
3317  else if (*nextp == '\0')
3318  done = true;
3319  else
3320  return false; /* invalid syntax */
3321 
3322  /* Now safe to overwrite separator with a null */
3323  *endp = '\0';
3324 
3325  /* Truncate path if it's overlength */
3326  if (strlen(curname) >= MAXPGPATH)
3327  curname[MAXPGPATH - 1] = '\0';
3328 
3329  /*
3330  * Finished isolating current name --- add it to list
3331  */
3332  curname = pstrdup(curname);
3333  canonicalize_path(curname);
3334  *namelist = lappend(*namelist, curname);
3335 
3336  /* Loop back if we didn't reach end of string */
3337  } while (!done);
3338 
3339  return true;
3340 }
3341 
3342 
3343 /*****************************************************************************
3344  * Comparison Functions used for bytea
3345  *
3346  * Note: btree indexes need these routines not to leak memory; therefore,
3347  * be careful to free working copies of toasted datums. Most places don't
3348  * need to be so careful.
3349  *****************************************************************************/
3350 
3351 Datum
3353 {
3354  Datum arg1 = PG_GETARG_DATUM(0);
3355  Datum arg2 = PG_GETARG_DATUM(1);
3356  bool result;
3357  Size len1,
3358  len2;
3359 
3360  /*
3361  * We can use a fast path for unequal lengths, which might save us from
3362  * having to detoast one or both values.
3363  */
3364  len1 = toast_raw_datum_size(arg1);
3365  len2 = toast_raw_datum_size(arg2);
3366  if (len1 != len2)
3367  result = false;
3368  else
3369  {
3370  bytea *barg1 = DatumGetByteaPP(arg1);
3371  bytea *barg2 = DatumGetByteaPP(arg2);
3372 
3373  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3374  len1 - VARHDRSZ) == 0);
3375 
3376  PG_FREE_IF_COPY(barg1, 0);
3377  PG_FREE_IF_COPY(barg2, 1);
3378  }
3379 
3380  PG_RETURN_BOOL(result);
3381 }
3382 
3383 Datum
3385 {
3386  Datum arg1 = PG_GETARG_DATUM(0);
3387  Datum arg2 = PG_GETARG_DATUM(1);
3388  bool result;
3389  Size len1,
3390  len2;
3391 
3392  /*
3393  * We can use a fast path for unequal lengths, which might save us from
3394  * having to detoast one or both values.
3395  */
3396  len1 = toast_raw_datum_size(arg1);
3397  len2 = toast_raw_datum_size(arg2);
3398  if (len1 != len2)
3399  result = true;
3400  else
3401  {
3402  bytea *barg1 = DatumGetByteaPP(arg1);
3403  bytea *barg2 = DatumGetByteaPP(arg2);
3404 
3405  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3406  len1 - VARHDRSZ) != 0);
3407 
3408  PG_FREE_IF_COPY(barg1, 0);
3409  PG_FREE_IF_COPY(barg2, 1);
3410  }
3411 
3412  PG_RETURN_BOOL(result);
3413 }
3414 
3415 Datum
3417 {
3418  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3419  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3420  int len1,
3421  len2;
3422  int cmp;
3423 
3424  len1 = VARSIZE_ANY_EXHDR(arg1);
3425  len2 = VARSIZE_ANY_EXHDR(arg2);
3426 
3427  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3428 
3429  PG_FREE_IF_COPY(arg1, 0);
3430  PG_FREE_IF_COPY(arg2, 1);
3431 
3432  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3433 }
3434 
3435 Datum
3437 {
3438  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3439  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3440  int len1,
3441  len2;
3442  int cmp;
3443 
3444  len1 = VARSIZE_ANY_EXHDR(arg1);
3445  len2 = VARSIZE_ANY_EXHDR(arg2);
3446 
3447  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3448 
3449  PG_FREE_IF_COPY(arg1, 0);
3450  PG_FREE_IF_COPY(arg2, 1);
3451 
3452  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3453 }
3454 
3455 Datum
3457 {
3458  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3459  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3460  int len1,
3461  len2;
3462  int cmp;
3463 
3464  len1 = VARSIZE_ANY_EXHDR(arg1);
3465  len2 = VARSIZE_ANY_EXHDR(arg2);
3466 
3467  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3468 
3469  PG_FREE_IF_COPY(arg1, 0);
3470  PG_FREE_IF_COPY(arg2, 1);
3471 
3472  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3473 }
3474 
3475 Datum
3477 {
3478  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3479  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3480  int len1,
3481  len2;
3482  int cmp;
3483 
3484  len1 = VARSIZE_ANY_EXHDR(arg1);
3485  len2 = VARSIZE_ANY_EXHDR(arg2);
3486 
3487  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3488 
3489  PG_FREE_IF_COPY(arg1, 0);
3490  PG_FREE_IF_COPY(arg2, 1);
3491 
3492  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3493 }
3494 
3495 Datum
3497 {
3498  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3499  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3500  int len1,
3501  len2;
3502  int cmp;
3503 
3504  len1 = VARSIZE_ANY_EXHDR(arg1);
3505  len2 = VARSIZE_ANY_EXHDR(arg2);
3506 
3507  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3508  if ((cmp == 0) && (len1 != len2))
3509  cmp = (len1 < len2) ? -1 : 1;
3510 
3511  PG_FREE_IF_COPY(arg1, 0);
3512  PG_FREE_IF_COPY(arg2, 1);
3513 
3514  PG_RETURN_INT32(cmp);
3515 }
3516 
3517 Datum
3519 {
3521  MemoryContext oldcontext;
3522 
3523  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3524 
3525  /* Use generic string SortSupport, forcing "C" collation */
3526  varstr_sortsupport(ssup, C_COLLATION_OID, false);
3527 
3528  MemoryContextSwitchTo(oldcontext);
3529 
3530  PG_RETURN_VOID();
3531 }
3532 
3533 /*
3534  * appendStringInfoText
3535  *
3536  * Append a text to str.
3537  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3538  */
3539 static void
3541 {
3543 }
3544 
3545 /*
3546  * replace_text
3547  * replace all occurrences of 'old_sub_str' in 'orig_str'
3548  * with 'new_sub_str' to form 'new_str'
3549  *
3550  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3551  * otherwise returns 'new_str'
3552  */
3553 Datum
3555 {
3556  text *src_text = PG_GETARG_TEXT_PP(0);
3557  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3558  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3559  int src_text_len;
3560  int from_sub_text_len;
3562  text *ret_text;
3563  int start_posn;
3564  int curr_posn;
3565  int chunk_len;
3566  char *start_ptr;
3567  StringInfoData str;
3568 
3569  text_position_setup(src_text, from_sub_text, &state);
3570 
3571  /*
3572  * Note: we check the converted string length, not the original, because
3573  * they could be different if the input contained invalid encoding.
3574  */
3575  src_text_len = state.len1;
3576  from_sub_text_len = state.len2;
3577 
3578  /* Return unmodified source string if empty source or pattern */
3579  if (src_text_len < 1 || from_sub_text_len < 1)
3580  {
3581  text_position_cleanup(&state);
3582  PG_RETURN_TEXT_P(src_text);
3583  }
3584 
3585  start_posn = 1;
3586  curr_posn = text_position_next(1, &state);
3587 
3588  /* When the from_sub_text is not found, there is nothing to do. */
3589  if (curr_posn == 0)
3590  {
3591  text_position_cleanup(&state);
3592  PG_RETURN_TEXT_P(src_text);
3593  }
3594 
3595  /* start_ptr points to the start_posn'th character of src_text */
3596  start_ptr = VARDATA_ANY(src_text);
3597 
3598  initStringInfo(&str);
3599 
3600  do
3601  {
3603 
3604  /* copy the data skipped over by last text_position_next() */
3605  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3606  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3607 
3608  appendStringInfoText(&str, to_sub_text);
3609 
3610  start_posn = curr_posn;
3611  start_ptr += chunk_len;
3612  start_posn += from_sub_text_len;
3613  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3614 
3615  curr_posn = text_position_next(start_posn, &state);
3616  }
3617  while (curr_posn > 0);
3618 
3619  /* copy trailing data */
3620  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3621  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3622 
3623  text_position_cleanup(&state);
3624 
3625  ret_text = cstring_to_text_with_len(str.data, str.len);
3626  pfree(str.data);
3627 
3628  PG_RETURN_TEXT_P(ret_text);
3629 }
3630 
3631 /*
3632  * check_replace_text_has_escape_char
3633  *
3634  * check whether replace_text contains escape char.
3635  */
3636 static bool
3638 {
3639  const char *p = VARDATA_ANY(replace_text);
3640  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3641 
3643  {
3644  for (; p < p_end; p++)
3645  {
3646  if (*p == '\\')
3647  return true;
3648  }
3649  }
3650  else
3651  {
3652  for (; p < p_end; p += pg_mblen(p))
3653  {
3654  if (*p == '\\')
3655  return true;
3656  }
3657  }
3658 
3659  return false;
3660 }
3661 
3662 /*
3663  * appendStringInfoRegexpSubstr
3664  *
3665  * Append replace_text to str, substituting regexp back references for
3666  * \n escapes. start_ptr is the start of the match in the source string,
3667  * at logical character position data_pos.
3668  */
3669 static void
3671  regmatch_t *pmatch,
3672  char *start_ptr, int data_pos)
3673 {
3674  const char *p = VARDATA_ANY(replace_text);
3675  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3676  int eml = pg_database_encoding_max_length();
3677 
3678  for (;;)
3679  {
3680  const char *chunk_start = p;
3681  int so;
3682  int eo;
3683 
3684  /* Find next escape char. */
3685  if (eml == 1)
3686  {
3687  for (; p < p_end && *p != '\\'; p++)
3688  /* nothing */ ;
3689  }
3690  else
3691  {
3692  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3693  /* nothing */ ;
3694  }
3695 
3696  /* Copy the text we just scanned over, if any. */
3697  if (p > chunk_start)
3698  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3699 
3700  /* Done if at end of string, else advance over escape char. */
3701  if (p >= p_end)
3702  break;
3703  p++;
3704 
3705  if (p >= p_end)
3706  {
3707  /* Escape at very end of input. Treat same as unexpected char */
3708  appendStringInfoChar(str, '\\');
3709  break;
3710  }
3711 
3712  if (*p >= '1' && *p <= '9')
3713  {
3714  /* Use the back reference of regexp. */
3715  int idx = *p - '0';
3716 
3717  so = pmatch[idx].rm_so;
3718  eo = pmatch[idx].rm_eo;
3719  p++;
3720  }
3721  else if (*p == '&')
3722  {
3723  /* Use the entire matched string. */
3724  so = pmatch[0].rm_so;
3725  eo = pmatch[0].rm_eo;
3726  p++;
3727  }
3728  else if (*p == '\\')
3729  {
3730  /* \\ means transfer one \ to output. */
3731  appendStringInfoChar(str, '\\');
3732  p++;
3733  continue;
3734  }
3735  else
3736  {
3737  /*
3738  * If escape char is not followed by any expected char, just treat
3739  * it as ordinary data to copy. (XXX would it be better to throw
3740  * an error?)
3741  */
3742  appendStringInfoChar(str, '\\');
3743  continue;
3744  }
3745 
3746  if (so != -1 && eo != -1)
3747  {
3748  /*
3749  * Copy the text that is back reference of regexp. Note so and eo
3750  * are counted in characters not bytes.
3751  */
3752  char *chunk_start;
3753  int chunk_len;
3754 
3755  Assert(so >= data_pos);
3756  chunk_start = start_ptr;
3757  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3758  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3759  appendBinaryStringInfo(str, chunk_start, chunk_len);
3760  }
3761  }
3762 }
3763 
3764 #define REGEXP_REPLACE_BACKREF_CNT 10
3765 
3766 /*
3767  * replace_text_regexp
3768  *
3769  * replace text that matches to regexp in src_text to replace_text.
3770  *
3771  * Note: to avoid having to include regex.h in builtins.h, we declare
3772  * the regexp argument as void *, but really it's regex_t *.
3773  */
3774 text *
3775 replace_text_regexp(text *src_text, void *regexp,
3776  text *replace_text, bool glob)
3777 {
3778  text *ret_text;
3779  regex_t *re = (regex_t *) regexp;
3780  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3783  pg_wchar *data;
3784  size_t data_len;
3785  int search_start;
3786  int data_pos;
3787  char *start_ptr;
3788  bool have_escape;
3789 
3790  initStringInfo(&buf);
3791 
3792  /* Convert data string to wide characters. */
3793  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3794  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3795 
3796  /* Check whether replace_text has escape char. */
3797  have_escape = check_replace_text_has_escape_char(replace_text);
3798 
3799  /* start_ptr points to the data_pos'th character of src_text */
3800  start_ptr = (char *) VARDATA_ANY(src_text);
3801  data_pos = 0;
3802 
3803  search_start = 0;
3804  while (search_start <= data_len)
3805  {
3806  int regexec_result;
3807 
3809 
3810  regexec_result = pg_regexec(re,
3811  data,
3812  data_len,
3813  search_start,
3814  NULL, /* no details */
3816  pmatch,
3817  0);
3818 
3819  if (regexec_result == REG_NOMATCH)
3820  break;
3821 
3822  if (regexec_result != REG_OKAY)
3823  {
3824  char errMsg[100];
3825 
3827  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3828  ereport(ERROR,
3829  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3830  errmsg("regular expression failed: %s", errMsg)));
3831  }
3832 
3833  /*
3834  * Copy the text to the left of the match position. Note we are given
3835  * character not byte indexes.
3836  */
3837  if (pmatch[0].rm_so - data_pos > 0)
3838  {
3839  int chunk_len;
3840 
3841  chunk_len = charlen_to_bytelen(start_ptr,
3842  pmatch[0].rm_so - data_pos);
3843  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3844 
3845  /*
3846  * Advance start_ptr over that text, to avoid multiple rescans of
3847  * it if the replace_text contains multiple back-references.
3848  */
3849  start_ptr += chunk_len;
3850  data_pos = pmatch[0].rm_so;
3851  }
3852 
3853  /*
3854  * Copy the replace_text. Process back references when the
3855  * replace_text has escape characters.
3856  */
3857  if (have_escape)
3858  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3859  start_ptr, data_pos);
3860  else
3861  appendStringInfoText(&buf, replace_text);
3862 
3863  /* Advance start_ptr and data_pos over the matched text. */
3864  start_ptr += charlen_to_bytelen(start_ptr,
3865  pmatch[0].rm_eo - data_pos);
3866  data_pos = pmatch[0].rm_eo;
3867 
3868  /*
3869  * When global option is off, replace the first instance only.
3870  */
3871  if (!glob)
3872  break;
3873 
3874  /*
3875  * Advance search position. Normally we start the next search at the
3876  * end of the previous match; but if the match was of zero length, we
3877  * have to advance by one character, or we'd just find the same match
3878  * again.
3879  */
3880  search_start = data_pos;
3881  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3882  search_start++;
3883  }
3884 
3885  /*
3886  * Copy the text to the right of the last match.
3887  */
3888  if (data_pos < data_len)
3889  {
3890  int chunk_len;
3891 
3892  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3893  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3894  }
3895 
3896  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3897  pfree(buf.data);
3898  pfree(data);
3899 
3900  return ret_text;
3901 }
3902 
3903 /*
3904  * split_text
3905  * parse input string
3906  * return ord item (1 based)
3907  * based on provided field separator
3908  */
3909 Datum
3911 {
3912  text *inputstring = PG_GETARG_TEXT_PP(0);
3913  text *fldsep = PG_GETARG_TEXT_PP(1);
3914  int fldnum = PG_GETARG_INT32(2);
3915  int inputstring_len;
3916  int fldsep_len;
3918  int start_posn;
3919  int end_posn;
3920  text *result_text;
3921 
3922  /* field number is 1 based */
3923  if (fldnum < 1)
3924  ereport(ERROR,
3925  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3926  errmsg("field position must be greater than zero")));
3927 
3928  text_position_setup(inputstring, fldsep, &state);
3929 
3930  /*
3931  * Note: we check the converted string length, not the original, because
3932  * they could be different if the input contained invalid encoding.
3933  */
3934  inputstring_len = state.len1;
3935  fldsep_len = state.len2;
3936 
3937  /* return empty string for empty input string */
3938  if (inputstring_len < 1)
3939  {
3940  text_position_cleanup(&state);
3942  }
3943 
3944  /* empty field separator */
3945  if (fldsep_len < 1)
3946  {
3947  text_position_cleanup(&state);
3948  /* if first field, return input string, else empty string */
3949  if (fldnum == 1)
3950  PG_RETURN_TEXT_P(inputstring);
3951  else
3953  }
3954 
3955  /* identify bounds of first field */
3956  start_posn = 1;
3957  end_posn = text_position_next(1, &state);
3958 
3959  /* special case if fldsep not found at all */
3960  if (end_posn == 0)
3961  {
3962  text_position_cleanup(&state);
3963  /* if field 1 requested, return input string, else empty string */
3964  if (fldnum == 1)
3965  PG_RETURN_TEXT_P(inputstring);
3966  else
3968  }
3969 
3970  while (end_posn > 0 && --fldnum > 0)
3971  {
3972  /* identify bounds of next field */
3973  start_posn = end_posn + fldsep_len;
3974  end_posn = text_position_next(start_posn, &state);
3975  }
3976 
3977  text_position_cleanup(&state);
3978 
3979  if (fldnum > 0)
3980  {
3981  /* N'th field separator not found */
3982  /* if last field requested, return it, else empty string */
3983  if (fldnum == 1)
3984  result_text = text_substring(PointerGetDatum(inputstring),
3985  start_posn,
3986  -1,
3987  true);
3988  else
3989  result_text = cstring_to_text("");
3990  }
3991  else
3992  {
3993  /* non-last field requested */
3994  result_text = text_substring(PointerGetDatum(inputstring),
3995  start_posn,
3996  end_posn - start_posn,
3997  false);
3998  }
3999 
4000  PG_RETURN_TEXT_P(result_text);
4001 }
4002 
4003 /*
4004  * Convenience function to return true when two text params are equal.
4005  */
4006 static bool
4007 text_isequal(text *txt1, text *txt2)
4008 {
4010  PointerGetDatum(txt1),
4011  PointerGetDatum(txt2)));
4012 }
4013 
4014 /*
4015  * text_to_array
4016  * parse input string and return text array of elements,
4017  * based on provided field separator
4018  */
4019 Datum
4021 {
4022  return text_to_array_internal(fcinfo);
4023 }
4024 
4025 /*
4026  * text_to_array_null
4027  * parse input string and return text array of elements,
4028  * based on provided field separator and null string
4029  *
4030  * This is a separate entry point only to prevent the regression tests from
4031  * complaining about different argument sets for the same internal function.
4032  */
4033 Datum
4035 {
4036  return text_to_array_internal(fcinfo);
4037 }
4038 
4039 /*
4040  * common code for text_to_array and text_to_array_null functions
4041  *
4042  * These are not strict so we have to test for null inputs explicitly.
4043  */
4044 static Datum
4046 {
4047  text *inputstring;
4048  text *fldsep;
4049  text *null_string;
4050  int inputstring_len;
4051  int fldsep_len;
4052  char *start_ptr;
4053  text *result_text;
4054  bool is_null;
4055  ArrayBuildState *astate = NULL;
4056 
4057  /* when input string is NULL, then result is NULL too */
4058  if (PG_ARGISNULL(0))
4059  PG_RETURN_NULL();
4060 
4061  inputstring = PG_GETARG_TEXT_PP(0);
4062 
4063  /* fldsep can be NULL */
4064  if (!PG_ARGISNULL(1))
4065  fldsep = PG_GETARG_TEXT_PP(1);
4066  else
4067  fldsep = NULL;
4068 
4069  /* null_string can be NULL or omitted */
4070  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4071  null_string = PG_GETARG_TEXT_PP(2);
4072  else
4073  null_string = NULL;
4074 
4075  if (fldsep != NULL)
4076  {
4077  /*
4078  * Normal case with non-null fldsep. Use the text_position machinery
4079  * to search for occurrences of fldsep.
4080  */
4082  int fldnum;
4083  int start_posn;
4084  int end_posn;
4085  int chunk_len;
4086 
4087  text_position_setup(inputstring, fldsep, &state);
4088 
4089  /*
4090  * Note: we check the converted string length, not the original,
4091  * because they could be different if the input contained invalid
4092  * encoding.
4093  */
4094  inputstring_len = state.len1;
4095  fldsep_len = state.len2;
4096 
4097  /* return empty array for empty input string */
4098  if (inputstring_len < 1)
4099  {
4100  text_position_cleanup(&state);
4102  }
4103 
4104  /*
4105  * empty field separator: return the input string as a one-element
4106  * array
4107  */
4108  if (fldsep_len < 1)
4109  {
4110  text_position_cleanup(&state);
4111  /* single element can be a NULL too */
4112  is_null = null_string ? text_isequal(inputstring, null_string) : false;
4114  PointerGetDatum(inputstring),
4115  is_null, 1));
4116  }
4117 
4118  start_posn = 1;
4119  /* start_ptr points to the start_posn'th character of inputstring */
4120  start_ptr = VARDATA_ANY(inputstring);
4121 
4122  for (fldnum = 1;; fldnum++) /* field number is 1 based */
4123  {
4125 
4126  end_posn = text_position_next(start_posn, &state);
4127 
4128  if (end_posn == 0)
4129  {
4130  /* fetch last field */
4131  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4132  }
4133  else
4134  {
4135  /* fetch non-last field */
4136  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4137  }
4138 
4139  /* must build a temp text datum to pass to accumArrayResult */
4140  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4141  is_null = null_string ? text_isequal(result_text, null_string) : false;
4142 
4143  /* stash away this field */
4144  astate = accumArrayResult(astate,
4145  PointerGetDatum(result_text),
4146  is_null,
4147  TEXTOID,
4149 
4150  pfree(result_text);
4151 
4152  if (end_posn == 0)
4153  break;
4154 
4155  start_posn = end_posn;
4156  start_ptr += chunk_len;
4157  start_posn += fldsep_len;
4158  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4159  }
4160 
4161  text_position_cleanup(&state);
4162  }
4163  else
4164  {
4165  /*
4166  * When fldsep is NULL, each character in the inputstring becomes an
4167  * element in the result array. The separator is effectively the
4168  * space between characters.
4169  */
4170  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4171 
4172  /* return empty array for empty input string */
4173  if (inputstring_len < 1)
4175 
4176  start_ptr = VARDATA_ANY(inputstring);
4177 
4178  while (inputstring_len > 0)
4179  {
4180  int chunk_len = pg_mblen(start_ptr);
4181 
4183 
4184  /* must build a temp text datum to pass to accumArrayResult */
4185  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4186  is_null = null_string ? text_isequal(result_text, null_string) : false;
4187 
4188  /* stash away this field */
4189  astate = accumArrayResult(astate,
4190  PointerGetDatum(result_text),
4191  is_null,
4192  TEXTOID,
4194 
4195  pfree(result_text);
4196 
4197  start_ptr += chunk_len;
4198  inputstring_len -= chunk_len;
4199  }
4200  }
4201 
4204 }
4205 
4206 /*
4207  * array_to_text
4208  * concatenate Cstring representation of input array elements
4209  * using provided field separator
4210  */
4211 Datum
4213 {
4215  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4216 
4217  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4218 }
4219 
4220 /*
4221  * array_to_text_null
4222  * concatenate Cstring representation of input array elements
4223  * using provided field separator and null string
4224  *
4225  * This version is not strict so we have to test for null inputs explicitly.
4226  */
4227 Datum
4229 {
4230  ArrayType *v;
4231  char *fldsep;
4232  char *null_string;
4233 
4234  /* returns NULL when first or second parameter is NULL */
4235  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4236  PG_RETURN_NULL();
4237 
4238  v = PG_GETARG_ARRAYTYPE_P(0);
4239  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4240 
4241  /* NULL null string is passed through as a null pointer */
4242  if (!PG_ARGISNULL(2))
4243  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4244  else
4245  null_string = NULL;
4246 
4247  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4248 }
4249 
4250 /*
4251  * common code for array_to_text and array_to_text_null functions
4252  */
4253 static text *
4255  const char *fldsep, const char *null_string)
4256 {
4257  text *result;
4258  int nitems,
4259  *dims,
4260  ndims;
4261  Oid element_type;
4262  int typlen;
4263  bool typbyval;
4264  char typalign;
4266  bool printed = false;
4267  char *p;
4268  bits8 *bitmap;
4269  int bitmask;
4270  int i;
4271  ArrayMetaState *my_extra;
4272 
4273  ndims = ARR_NDIM(v);
4274  dims = ARR_DIMS(v);
4275  nitems = ArrayGetNItems(ndims, dims);
4276 
4277  /* if there are no elements, return an empty string */
4278  if (nitems == 0)
4279  return cstring_to_text_with_len("", 0);
4280 
4281  element_type = ARR_ELEMTYPE(v);
4282  initStringInfo(&buf);
4283 
4284  /*
4285  * We arrange to look up info about element type, including its output
4286  * conversion proc, only once per series of calls, assuming the element
4287  * type doesn't change underneath us.
4288  */
4289  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4290  if (my_extra == NULL)
4291  {
4292  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4293  sizeof(ArrayMetaState));
4294  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4295  my_extra->element_type = ~element_type;
4296  }
4297 
4298  if (my_extra->element_type != element_type)
4299  {
4300  /*
4301  * Get info about element type, including its output conversion proc
4302  */
4303  get_type_io_data(element_type, IOFunc_output,
4304  &my_extra->typlen, &my_extra->typbyval,
4305  &my_extra->typalign, &my_extra->typdelim,
4306  &my_extra->typioparam, &my_extra->typiofunc);
4307  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4308  fcinfo->flinfo->fn_mcxt);
4309  my_extra->element_type = element_type;
4310  }
4311  typlen = my_extra->typlen;
4312  typbyval = my_extra->typbyval;
4313  typalign = my_extra->typalign;
4314 
4315  p = ARR_DATA_PTR(v);
4316  bitmap = ARR_NULLBITMAP(v);
4317  bitmask = 1;
4318 
4319  for (i = 0; i < nitems; i++)
4320  {
4321  Datum itemvalue;
4322  char *value;
4323 
4324  /* Get source element, checking for NULL */
4325  if (bitmap && (*bitmap & bitmask) == 0)
4326  {
4327  /* if null_string is NULL, we just ignore null elements */
4328  if (null_string != NULL)
4329  {
4330  if (printed)
4331  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4332  else
4333  appendStringInfoString(&buf, null_string);
4334  printed = true;
4335  }
4336  }
4337  else
4338  {
4339  itemvalue = fetch_att(p, typbyval, typlen);
4340 
4341  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4342 
4343  if (printed)
4344  appendStringInfo(&buf, "%s%s", fldsep, value);
4345  else
4346  appendStringInfoString(&buf, value);
4347  printed = true;
4348 
4349  p = att_addlength_pointer(p, typlen, p);
4350  p = (char *) att_align_nominal(p, typalign);
4351  }
4352 
4353  /* advance bitmap pointer if any */
4354  if (bitmap)
4355  {
4356  bitmask <<= 1;
4357  if (bitmask == 0x100)
4358  {
4359  bitmap++;
4360  bitmask = 1;
4361  }
4362  }
4363  }
4364 
4365  result = cstring_to_text_with_len(buf.data, buf.len);
4366  pfree(buf.data);
4367 
4368  return result;
4369 }
4370 
4371 #define HEXBASE 16
4372 /*
4373  * Convert an int32 to a string containing a base 16 (hex) representation of
4374  * the number.
4375  */
4376 Datum
4378 {
4380  char *ptr;
4381  const char *digits = "0123456789abcdef";
4382  char buf[32]; /* bigger than needed, but reasonable */
4383 
4384  ptr = buf + sizeof(buf) - 1;
4385  *ptr = '\0';
4386 
4387  do
4388  {
4389  *--ptr = digits[value % HEXBASE];
4390  value /= HEXBASE;
4391  } while (ptr > buf && value);
4392 
4394 }
4395 
4396 /*
4397  * Convert an int64 to a string containing a base 16 (hex) representation of
4398  * the number.
4399  */
4400 Datum
4402 {
4403  uint64 value = (uint64) PG_GETARG_INT64(0);
4404  char *ptr;
4405  const char *digits = "0123456789abcdef";
4406  char buf[32]; /* bigger than needed, but reasonable */
4407 
4408  ptr = buf + sizeof(buf) - 1;
4409  *ptr = '\0';
4410 
4411  do
4412  {
4413  *--ptr = digits[value % HEXBASE];
4414  value /= HEXBASE;
4415  } while (ptr > buf && value);
4416 
4418 }
4419 
4420 /*
4421  * Create an md5 hash of a text string and return it as hex
4422  *
4423  * md5 produces a 16 byte (128 bit) hash; double it for hex
4424  */
4425 #define MD5_HASH_LEN 32
4426 
4427 Datum
4429 {
4430  text *in_text = PG_GETARG_TEXT_PP(0);
4431  size_t len;
4432  char hexsum[MD5_HASH_LEN + 1];
4433 
4434  /* Calculate the length of the buffer using varlena metadata */
4435  len = VARSIZE_ANY_EXHDR(in_text);
4436 
4437  /* get the hash result */
4438  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4439  ereport(ERROR,
4440  (errcode(ERRCODE_OUT_OF_MEMORY),
4441  errmsg("out of memory")));
4442 
4443  /* convert to text and return it */
4445 }
4446 
4447 /*
4448  * Create an md5 hash of a bytea field and return it as a hex string:
4449  * 16-byte md5 digest is represented in 32 hex characters.
4450  */
4451 Datum
4453 {
4454  bytea *in = PG_GETARG_BYTEA_PP(0);
4455  size_t len;
4456  char hexsum[MD5_HASH_LEN + 1];
4457 
4458  len = VARSIZE_ANY_EXHDR(in);
4459  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4460  ereport(ERROR,
4461  (errcode(ERRCODE_OUT_OF_MEMORY),
4462  errmsg("out of memory")));
4463 
4465 }
4466 
4467 /*
4468  * Return the size of a datum, possibly compressed
4469  *
4470  * Works on any data type
4471  */
4472 Datum
4474 {
4476  int32 result;
4477  int typlen;
4478 
4479  /* On first call, get the input type's typlen, and save at *fn_extra */
4480  if (fcinfo->flinfo->fn_extra == NULL)
4481  {
4482  /* Lookup the datatype of the supplied argument */
4483  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4484 
4485  typlen = get_typlen(argtypeid);
4486  if (typlen == 0) /* should not happen */
4487  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4488 
4489  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4490  sizeof(int));
4491  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4492  }
4493  else
4494  typlen = *((int *) fcinfo->flinfo->fn_extra);
4495 
4496  if (typlen == -1)
4497  {
4498  /* varlena type, possibly toasted */
4499  result = toast_datum_size(value);
4500  }
4501  else if (typlen == -2)
4502  {
4503  /* cstring */
4504  result = strlen(DatumGetCString(value)) + 1;
4505  }
4506  else
4507  {
4508  /* ordinary fixed-width type */
4509  result = typlen;
4510  }
4511 
4512  PG_RETURN_INT32(result);
4513 }
4514 
4515 /*
4516  * string_agg - Concatenates values and returns string.
4517  *
4518  * Syntax: string_agg(value text, delimiter text) RETURNS text
4519  *
4520  * Note: Any NULL values are ignored. The first-call delimiter isn't
4521  * actually used at all, and on subsequent calls the delimiter precedes
4522  * the associated value.
4523  */
4524 
4525 /* subroutine to initialize state */
4526 static StringInfo
4528 {
4529  StringInfo state;
4530  MemoryContext aggcontext;
4531  MemoryContext oldcontext;
4532 
4533  if (!AggCheckCallContext(fcinfo, &aggcontext))
4534  {
4535  /* cannot be called directly because of internal-type argument */
4536  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4537  }
4538 
4539  /*
4540  * Create state in aggregate context. It'll stay there across subsequent
4541  * calls.
4542  */
4543  oldcontext = MemoryContextSwitchTo(aggcontext);
4544  state = makeStringInfo();
4545  MemoryContextSwitchTo(oldcontext);
4546 
4547  return state;
4548 }
4549 
4550 Datum
4552 {
4553  StringInfo state;
4554 
4555  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4556 
4557  /* Append the value unless null. */
4558  if (!PG_ARGISNULL(1))
4559  {
4560  /* On the first time through, we ignore the delimiter. */
4561  if (state == NULL)
4562  state = makeStringAggState(fcinfo);
4563  else if (!PG_ARGISNULL(2))
4564  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4565 
4566  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4567  }
4568 
4569  /*
4570  * The transition type for string_agg() is declared to be "internal",
4571  * which is a pass-by-value type the same size as a pointer.
4572  */
4573  PG_RETURN_POINTER(state);
4574 }
4575 
4576 Datum
4578 {
4579  StringInfo state;
4580 
4581  /* cannot be called directly because of internal-type argument */
4582  Assert(AggCheckCallContext(fcinfo, NULL));
4583 
4584  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4585 
4586  if (state != NULL)
4588  else
4589  PG_RETURN_NULL();
4590 }
4591 
4592 /*
4593  * Implementation of both concat() and concat_ws().
4594  *
4595  * sepstr is the separator string to place between values.
4596  * argidx identifies the first argument to concatenate (counting from zero).
4597  * Returns NULL if result should be NULL, else text value.
4598  */
4599 static text *
4600 concat_internal(const char *sepstr, int argidx,
4601  FunctionCallInfo fcinfo)
4602 {
4603  text *result;
4604  StringInfoData str;
4605  bool first_arg = true;
4606  int i;
4607 
4608  /*
4609  * concat(VARIADIC some-array) is essentially equivalent to
4610  * array_to_text(), ie concat the array elements with the given separator.
4611  * So we just pass the case off to that code.
4612  */
4613  if (get_fn_expr_variadic(fcinfo->flinfo))
4614  {
4615  ArrayType *arr;
4616 
4617  /* Should have just the one argument */
4618  Assert(argidx == PG_NARGS() - 1);
4619 
4620  /* concat(VARIADIC NULL) is defined as NULL */
4621  if (PG_ARGISNULL(argidx))
4622  return NULL;
4623 
4624  /*
4625  * Non-null argument had better be an array. We assume that any call
4626  * context that could let get_fn_expr_variadic return true will have
4627  * checked that a VARIADIC-labeled parameter actually is an array. So
4628  * it should be okay to just Assert that it's an array rather than
4629  * doing a full-fledged error check.
4630  */
4632 
4633  /* OK, safe to fetch the array value */
4634  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4635 
4636  /*
4637  * And serialize the array. We tell array_to_text to ignore null
4638  * elements, which matches the behavior of the loop below.
4639  */
4640  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4641  }
4642 
4643  /* Normal case without explicit VARIADIC marker */
4644  initStringInfo(&str);
4645 
4646  for (i = argidx; i < PG_NARGS(); i++)
4647  {
4648  if (!PG_ARGISNULL(i))
4649  {
4651  Oid valtype;
4652  Oid typOutput;
4653  bool typIsVarlena;
4654 
4655  /* add separator if appropriate */
4656  if (first_arg)
4657  first_arg = false;
4658  else
4659  appendStringInfoString(&str, sepstr);
4660 
4661  /* call the appropriate type output function, append the result */
4662  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4663  if (!OidIsValid(valtype))
4664  elog(ERROR, "could not determine data type of concat() input");
4665  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4667  OidOutputFunctionCall(typOutput, value));
4668  }
4669  }
4670 
4671  result = cstring_to_text_with_len(str.data, str.len);
4672  pfree(str.data);
4673 
4674  return result;
4675 }
4676 
4677 /*
4678  * Concatenate all arguments. NULL arguments are ignored.
4679  */
4680 Datum
4682 {
4683  text *result;
4684 
4685  result = concat_internal("", 0, fcinfo);
4686  if (result == NULL)
4687  PG_RETURN_NULL();
4688  PG_RETURN_TEXT_P(result);
4689 }
4690 
4691 /*
4692  * Concatenate all but first argument value with separators. The first
4693  * parameter is used as the separator. NULL arguments are ignored.
4694  */
4695 Datum
4697 {
4698  char *sep;
4699  text *result;
4700 
4701  /* return NULL when separator is NULL */
4702  if (PG_ARGISNULL(0))
4703  PG_RETURN_NULL();
4705 
4706  result = concat_internal(sep, 1, fcinfo);
4707  if (result == NULL)
4708  PG_RETURN_NULL();
4709  PG_RETURN_TEXT_P(result);
4710 }
4711 
4712 /*
4713  * Return first n characters in the string. When n is negative,
4714  * return all but last |n| characters.
4715  */
4716 Datum
4718 {
4719  text *str = PG_GETARG_TEXT_PP(0);
4720  const char *p = VARDATA_ANY(str);
4721  int len = VARSIZE_ANY_EXHDR(str);
4722  int n = PG_GETARG_INT32(1);
4723  int rlen;
4724 
4725  if (n < 0)
4726  n = pg_mbstrlen_with_len(p, len) + n;
4727  rlen = pg_mbcharcliplen(p, len, n);
4728 
4730 }
4731 
4732 /*
4733  * Return last n characters in the string. When n is negative,
4734  * return all but first |n| characters.
4735  */
4736 Datum
4738 {
4739  text *str = PG_GETARG_TEXT_PP(0);
4740  const char *p = VARDATA_ANY(str);
4741  int len = VARSIZE_ANY_EXHDR(str);
4742  int n = PG_GETARG_INT32(1);
4743  int off;
4744 
4745  if (n < 0)
4746  n = -n;
4747  else
4748  n = pg_mbstrlen_with_len(p, len) - n;
4749  off = pg_mbcharcliplen(p, len, n);
4750 
4751  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4752 }
4753 
4754 /*
4755  * Return reversed string
4756  */
4757 Datum
4759 {
4760  text *str = PG_GETARG_TEXT_PP(0);
4761  const char *p = VARDATA_ANY(str);
4762  int len = VARSIZE_ANY_EXHDR(str);
4763  const char *endp = p + len;
4764  text *result;
4765  char *dst;
4766 
4767  result = palloc(len + VARHDRSZ);
4768  dst = (char *) VARDATA(result) + len;
4769  SET_VARSIZE(result, len + VARHDRSZ);
4770 
4772  {
4773  /* multibyte version */
4774  while (p < endp)
4775  {
4776  int sz;
4777 
4778  sz = pg_mblen(p);
4779  dst -= sz;
4780  memcpy(dst, p, sz);
4781  p += sz;
4782  }
4783  }
4784  else
4785  {
4786  /* single byte version */
4787  while (p < endp)
4788  *(--dst) = *p++;
4789  }
4790 
4791  PG_RETURN_TEXT_P(result);
4792 }
4793 
4794 
4795 /*
4796  * Support macros for text_format()
4797  */
4798 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4799 
4800 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4801  do { \
4802  if (++(ptr) >= (end_ptr)) \
4803  ereport(ERROR, \
4804  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4805  errmsg("unterminated format() type specifier"), \
4806  errhint("For a single \"%%\" use \"%%%%\"."))); \
4807  } while (0)
4808 
4809 /*
4810  * Returns a formatted string
4811  */
4812 Datum
4814 {
4815  text *fmt;
4816  StringInfoData str;
4817  const char *cp;
4818  const char *start_ptr;
4819  const char *end_ptr;
4820  text *result;
4821  int arg;
4822  bool funcvariadic;
4823  int nargs;
4824  Datum *elements = NULL;
4825  bool *nulls = NULL;
4826  Oid element_type = InvalidOid;
4827  Oid prev_type = InvalidOid;
4828  Oid prev_width_type = InvalidOid;
4829  FmgrInfo typoutputfinfo;
4830  FmgrInfo typoutputinfo_width;
4831 
4832  /* When format string is null, immediately return null */
4833  if (PG_ARGISNULL(0))
4834  PG_RETURN_NULL();
4835 
4836  /* If argument is marked VARIADIC, expand array into elements */
4837  if (get_fn_expr_variadic(fcinfo->flinfo))
4838  {
4839  ArrayType *arr;
4840  int16 elmlen;
4841  bool elmbyval;
4842  char elmalign;
4843  int nitems;
4844 
4845  /* Should have just the one argument */
4846  Assert(PG_NARGS() == 2);
4847 
4848  /* If argument is NULL, we treat it as zero-length array */
4849  if (PG_ARGISNULL(1))
4850  nitems = 0;
4851  else
4852  {
4853  /*
4854  * Non-null argument had better be an array. We assume that any
4855  * call context that could let get_fn_expr_variadic return true
4856  * will have checked that a VARIADIC-labeled parameter actually is
4857  * an array. So it should be okay to just Assert that it's an
4858  * array rather than doing a full-fledged error check.
4859  */
4861 
4862  /* OK, safe to fetch the array value */
4863  arr = PG_GETARG_ARRAYTYPE_P(1);
4864 
4865  /* Get info about array element type */
4866  element_type = ARR_ELEMTYPE(arr);
4867  get_typlenbyvalalign(element_type,
4868  &elmlen, &elmbyval, &elmalign);
4869 
4870  /* Extract all array elements */
4871  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4872  &elements, &nulls, &nitems);
4873  }
4874 
4875  nargs = nitems + 1;
4876  funcvariadic = true;
4877  }
4878  else
4879  {
4880  /* Non-variadic case, we'll process the arguments individually */
4881  nargs = PG_NARGS();
4882  funcvariadic = false;
4883  }
4884 
4885  /* Setup for main loop. */
4886  fmt = PG_GETARG_TEXT_PP(0);
4887  start_ptr = VARDATA_ANY(fmt);
4888  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4889  initStringInfo(&str);
4890  arg = 1; /* next argument position to print */
4891 
4892  /* Scan format string, looking for conversion specifiers. */
4893  for (cp = start_ptr; cp < end_ptr; cp++)
4894  {
4895  int argpos;
4896  int widthpos;
4897  int flags;
4898  int width;
4899  Datum value;
4900  bool isNull;
4901  Oid typid;
4902 
4903  /*
4904  * If it's not the start of a conversion specifier, just copy it to
4905  * the output buffer.
4906  */
4907  if (*cp != '%')
4908  {
4909  appendStringInfoCharMacro(&str, *cp);
4910  continue;
4911  }
4912 
4913  ADVANCE_PARSE_POINTER(cp, end_ptr);
4914 
4915  /* Easy case: %% outputs a single % */
4916  if (*cp == '%')
4917  {
4918  appendStringInfoCharMacro(&str, *cp);
4919  continue;
4920  }
4921 
4922  /* Parse the optional portions of the format specifier */
4923  cp = text_format_parse_format(cp, end_ptr,
4924  &argpos, &widthpos,
4925  &flags, &width);
4926 
4927  /*
4928  * Next we should see the main conversion specifier. Whether or not
4929  * an argument position was present, it's known that at least one
4930  * character remains in the string at this point. Experience suggests
4931  * that it's worth checking that that character is one of the expected
4932  * ones before we try to fetch arguments, so as to produce the least
4933  * confusing response to a mis-formatted specifier.
4934  */
4935  if (strchr("sIL", *cp) == NULL)
4936  ereport(ERROR,
4937  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4938  errmsg("unrecognized format() type specifier \"%c\"",
4939  *cp),
4940  errhint("For a single \"%%\" use \"%%%%\".")));
4941 
4942  /* If indirect width was specified, get its value */
4943  if (widthpos >= 0)
4944  {
4945  /* Collect the specified or next argument position */
4946  if (widthpos > 0)
4947  arg = widthpos;
4948  if (arg >= nargs)
4949  ereport(ERROR,
4950  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4951  errmsg("too few arguments for format()")));
4952 
4953  /* Get the value and type of the selected argument */
4954  if (!funcvariadic)
4955  {
4956  value = PG_GETARG_DATUM(arg);
4957  isNull = PG_ARGISNULL(arg);
4958  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4959  }
4960  else
4961  {
4962  value = elements[arg - 1];
4963  isNull = nulls[arg - 1];
4964  typid = element_type;
4965  }
4966  if (!OidIsValid(typid))
4967  elog(ERROR, "could not determine data type of format() input");
4968 
4969  arg++;
4970 
4971  /* We can treat NULL width the same as zero */
4972  if (isNull)
4973  width = 0;
4974  else if (typid == INT4OID)
4975  width = DatumGetInt32(value);
4976  else if (typid == INT2OID)
4977  width = DatumGetInt16(value);
4978  else
4979  {
4980  /* For less-usual datatypes, convert to text then to int */
4981  char *str;
4982 
4983  if (typid != prev_width_type)
4984  {
4985  Oid typoutputfunc;
4986  bool typIsVarlena;
4987 
4988  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4989  fmgr_info(typoutputfunc, &typoutputinfo_width);
4990  prev_width_type = typid;
4991  }
4992 
4993  str = OutputFunctionCall(&typoutputinfo_width, value);
4994 
4995  /* pg_atoi will complain about bad data or overflow */
4996  width = pg_atoi(str, sizeof(int), '\0');
4997 
4998  pfree(str);
4999  }
5000  }
5001 
5002  /* Collect the specified or next argument position */
5003  if (argpos > 0)
5004  arg = argpos;
5005  if (arg >= nargs)
5006  ereport(ERROR,
5007  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5008  errmsg("too few arguments for format()")));
5009 
5010  /* Get the value and type of the selected argument */
5011  if (!funcvariadic)
5012  {
5013  value = PG_GETARG_DATUM(arg);
5014  isNull = PG_ARGISNULL(arg);
5015  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5016  }
5017  else
5018  {
5019  value = elements[arg - 1];
5020  isNull = nulls[arg - 1];
5021  typid = element_type;
5022  }
5023  if (!OidIsValid(typid))
5024  elog(ERROR, "could not determine data type of format() input");
5025 
5026  arg++;
5027 
5028  /*
5029  * Get the appropriate typOutput function, reusing previous one if
5030  * same type as previous argument. That's particularly useful in the
5031  * variadic-array case, but often saves work even for ordinary calls.
5032  */
5033  if (typid != prev_type)
5034  {
5035  Oid typoutputfunc;
5036  bool typIsVarlena;
5037 
5038  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5039  fmgr_info(typoutputfunc, &typoutputfinfo);
5040  prev_type = typid;
5041  }
5042 
5043  /*
5044  * And now we can format the value.
5045  */
5046  switch (*cp)
5047  {
5048  case 's':
5049  case 'I':
5050  case 'L':
5051  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5052  value, isNull,
5053  flags, width);
5054  break;
5055  default:
5056  /* should not get here, because of previous check */
5057  ereport(ERROR,
5058  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5059  errmsg("unrecognized format() type specifier \"%c\"",
5060  *cp),
5061  errhint("For a single \"%%\" use \"%%%%\".")));
5062  break;
5063  }
5064  }
5065 
5066  /* Don't need deconstruct_array results anymore. */
5067  if (elements != NULL)
5068  pfree(elements);
5069  if (nulls != NULL)
5070  pfree(nulls);
5071 
5072  /* Generate results. */
5073  result = cstring_to_text_with_len(str.data, str.len);
5074  pfree(str.data);
5075 
5076  PG_RETURN_TEXT_P(result);
5077 }
5078 
5079 /*
5080  * Parse contiguous digits as a decimal number.
5081  *
5082  * Returns true if some digits could be parsed.
5083  * The value is returned into *value, and *ptr is advanced to the next
5084  * character to be parsed.
5085  *
5086  * Note parsing invariant: at least one character is known available before
5087  * string end (end_ptr) at entry, and this is still true at exit.
5088  */
5089 static bool
5090 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5091 {
5092  bool found = false;
5093  const char *cp = *ptr;
5094  int val = 0;
5095 
5096  while (*cp >= '0' && *cp <= '9')
5097  {
5098  int newval = val * 10 + (*cp - '0');
5099 
5100  if (newval / 10 != val) /* overflow? */
5101  ereport(ERROR,
5102  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5103  errmsg("number is out of range")));
5104  val = newval;
5105  ADVANCE_PARSE_POINTER(cp, end_ptr);
5106  found = true;
5107  }
5108 
5109  *ptr = cp;
5110  *value = val;
5111 
5112  return found;
5113 }
5114 
5115 /*
5116  * Parse a format specifier (generally following the SUS printf spec).
5117  *
5118  * We have already advanced over the initial '%', and we are looking for
5119  * [argpos][flags][width]type (but the type character is not consumed here).
5120  *
5121  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5122  * Output parameters:
5123  * argpos: argument position for value to be printed. -1 means unspecified.
5124  * widthpos: argument position for width. Zero means the argument position
5125  * was unspecified (ie, take the next arg) and -1 means no width
5126  * argument (width was omitted or specified as a constant).
5127  * flags: bitmask of flags.
5128  * width: directly-specified width value. Zero means the width was omitted
5129  * (note it's not necessary to distinguish this case from an explicit
5130  * zero width value).
5131  *
5132  * The function result is the next character position to be parsed, ie, the
5133  * location where the type character is/should be.
5134  *
5135  * Note parsing invariant: at least one character is known available before
5136  * string end (end_ptr) at entry, and this is still true at exit.
5137  */
5138 static const char *
5139 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5140  int *argpos, int *widthpos,
5141  int *flags, int *width)
5142 {
5143  const char *cp = start_ptr;
5144  int n;
5145 
5146  /* set defaults for output parameters */
5147  *argpos = -1;
5148  *widthpos = -1;
5149  *flags = 0;
5150  *width = 0;
5151 
5152  /* try to identify first number */
5153  if (text_format_parse_digits(&cp, end_ptr, &n))
5154  {
5155  if (*cp != '$')
5156  {
5157  /* Must be just a width and a type, so we're done */
5158  *width = n;
5159  return cp;
5160  }
5161  /* The number was argument position */
5162  *argpos = n;
5163  /* Explicit 0 for argument index is immediately refused */
5164  if (n == 0)
5165  ereport(ERROR,
5166  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5167  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5168  ADVANCE_PARSE_POINTER(cp, end_ptr);
5169  }
5170 
5171  /* Handle flags (only minus is supported now) */
5172  while (*cp == '-')
5173  {
5174  *flags |= TEXT_FORMAT_FLAG_MINUS;
5175  ADVANCE_PARSE_POINTER(cp, end_ptr);
5176  }
5177 
5178  if (*cp == '*')
5179  {
5180  /* Handle indirect width */
5181  ADVANCE_PARSE_POINTER(cp, end_ptr);
5182  if (text_format_parse_digits(&cp, end_ptr, &n))
5183  {
5184  /* number in this position must be closed by $ */
5185  if (*cp != '$')
5186  ereport(ERROR,
5187  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5188  errmsg("width argument position must be ended by \"$\"")));
5189  /* The number was width argument position */
5190  *widthpos = n;
5191  /* Explicit 0 for argument index is immediately refused */
5192  if (n == 0)
5193  ereport(ERROR,
5194  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5195  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5196  ADVANCE_PARSE_POINTER(cp, end_ptr);
5197  }
5198  else
5199  *widthpos = 0; /* width's argument position is unspecified */
5200  }
5201  else
5202  {
5203  /* Check for direct width specification */
5204  if (text_format_parse_digits(&cp, end_ptr, &n))
5205  *width = n;
5206  }
5207 
5208  /* cp should now be pointing at type character */
5209  return cp;
5210 }
5211 
5212 /*
5213  * Format a %s, %I, or %L conversion
5214  */
5215 static void
5217  FmgrInfo *typOutputInfo,
5218  Datum value, bool isNull,
5219  int flags, int width)
5220 {
5221  char *str;
5222 
5223  /* Handle NULL arguments before trying to stringify the value. */
5224  if (isNull)
5225  {
5226  if (conversion == 's')
5227  text_format_append_string(buf, "", flags, width);
5228  else if (conversion == 'L')
5229  text_format_append_string(buf, "NULL", flags, width);
5230  else if (conversion == 'I')
5231  ereport(ERROR,
5232  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5233  errmsg("null values cannot be formatted as an SQL identifier")));
5234  return;
5235  }
5236 
5237  /* Stringify. */
5238  str = OutputFunctionCall(typOutputInfo, value);
5239 
5240  /* Escape. */
5241  if (conversion == 'I')
5242  {
5243  /* quote_identifier may or may not allocate a new string. */
5244  text_format_append_string(buf, quote_identifier(str), flags, width);
5245  }
5246  else if (conversion == 'L')
5247  {
5248  char *qstr = quote_literal_cstr(str);
5249 
5250  text_format_append_string(buf, qstr, flags, width);
5251  /* quote_literal_cstr() always allocates a new string */
5252  pfree(qstr);
5253  }
5254  else
5255  text_format_append_string(buf, str, flags, width);
5256 
5257  /* Cleanup. */
5258  pfree(str);
5259 }
5260 
5261 /*
5262  * Append str to buf, padding as directed by flags/width
5263  */
5264 static void
5266  int flags, int width)
5267 {
5268  bool align_to_left = false;
5269  int len;
5270 
5271  /* fast path for typical easy case */
5272  if (width == 0)
5273  {
5274  appendStringInfoString(buf, str);
5275  return;
5276  }
5277 
5278  if (width < 0)
5279  {
5280  /* Negative width: implicit '-' flag, then take absolute value */
5281  align_to_left = true;
5282  /* -INT_MIN is undefined */
5283  if (width <= INT_MIN)
5284  ereport(ERROR,
5285  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5286  errmsg("number is out of range")));
5287  width = -width;
5288  }
5289  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5290  align_to_left = true;
5291 
5292  len = pg_mbstrlen(str);
5293  if (align_to_left)
5294  {
5295  /* left justify */
5296  appendStringInfoString(buf, str);
5297  if (len < width)
5298  appendStringInfoSpaces(buf, width - len);
5299  }
5300  else
5301  {
5302  /* right justify */
5303  if (len < width)
5304  appendStringInfoSpaces(buf, width - len);
5305  appendStringInfoString(buf, str);
5306  }
5307 }
5308 
5309 /*
5310  * text_format_nv - nonvariadic wrapper for text_format function.
5311  *
5312  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5313  * which checks that all built-in functions that share the implementing C
5314  * function take the same number of arguments.
5315  */
5316 Datum
5318 {
5319  return text_format(fcinfo);
5320 }
5321 
5322 /*
5323  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5324  * for this use case.
5325  */
5326 static inline bool
5327 rest_of_char_same(const char *s1, const char *s2, int len)
5328 {
5329  while (len > 0)
5330  {
5331  len--;
5332  if (s1[len] != s2[len])
5333  return false;
5334  }
5335  return true;
5336 }
5337 
5338 /* Expand each Levenshtein distance variant */
5339 #include "levenshtein.c"
5340 #define LEVENSHTEIN_LESS_EQUAL
5341 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2561
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4020
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2697
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1995
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:252
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
#define DatumGetUInt32(X)
Definition: postgres.h:494
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4813
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1962
int length(const List *list)
Definition: list.c:1271
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4800
Definition: fmgr.h:53
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3775
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:315
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2974
static struct @76 value
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:873
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3910
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1024
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2600
#define VARDATA_ANY(PTR)
Definition: postgres.h:349
#define VARDATA(PTR)
Definition: postgres.h:305
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
MemoryContext fn_mcxt
Definition: fmgr.h:62
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
#define MD5_HASH_LEN
Definition: varlena.c:4425
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:9968
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1664
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2513
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:263
#define DatumGetInt32(X)
Definition: postgres.h:480
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2545
#define HEXBASE
Definition: varlena.c:4371
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:306
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3554
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3456
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5216
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:1989
#define PointerGetDatum(X)
Definition: postgres.h:564
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:532
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:224
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1121
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:359
#define VARHDRSZ
Definition: c.h:441
Datum md5_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4452
char * pstrdup(const char *in)
Definition: mcxt.c:1165
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:521
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:249
StringInfo makeStringInfo(void)
Definition: stringinfo.c:29
StringInfoData * StringInfo
Definition: stringinfo.h:46
#define Min(x, y)
Definition: c.h:802
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:277
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2150
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2125
#define PG_RETURN_INT32(x)
Definition: fmgr.h:298
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:262
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2631
#define INT4OID
Definition: pg_type.h:316
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2362
int errcode(int sqlerrcode)
Definition: elog.c:575
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:163
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:247
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2935
pg_wchar * wstr2
Definition: varlena.c:52
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:484
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4551
Datum md5_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4428
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:313
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3424
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2706
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3352
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:624
#define OidIsValid(objectId)
Definition: c.h:534
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1739
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:379
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:215
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1036
bool trace_sort
Definition: tuplesort.c:154
#define PG_GET_COLLATION()
Definition: fmgr.h:155
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2776
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4681
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:662
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5265
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4228
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4696
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:253
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2664
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1925
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:270
int pg_locale_t
Definition: pg_locale.h:71
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1926
static int32 text_length(Datum str)
Definition: varlena.c:642
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:805
bool typbyval
Definition: array.h:221
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4401
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:316
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:267
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4045
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3416
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3254
FmgrInfo * flinfo
Definition: fmgr.h:71
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
#define wcscoll_l
Definition: win32.h:358
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:135
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4212
void pfree(void *pointer)
Definition: mcxt.c:992
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:353
#define REG_OKAY
Definition: regex.h:137
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:110
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4577
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1013
#define ERROR
Definition: elog.h:43
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3637
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1123
#define DatumGetCString(X)
Definition: postgres.h:574
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:409
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:2220
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3476
#define ARR_DIMS(a)
Definition: array.h:275
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:159
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:2174
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1233
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4034
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:831
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:257
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:737
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:816
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:589
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4254
Definition: c.h:489
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3540
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2442
#define INT2OID
Definition: pg_type.h:308
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1600
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:189
#define ARR_DATA_PTR(a)
Definition: array.h:303
hyperLogLogState abbr_card
Definition: varlena.c:73
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2454
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1635
int16 typlen
Definition: array.h:220
static char * buf
Definition: pg_test_fsync.c:65
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:65
#define memmove(d, s, c)
Definition: c.h:1058
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:163
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3129
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
char typdelim
Definition: array.h:223
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2788
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3031
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:696
#define DatumGetInt16(X)
Definition: postgres.h:452
#define DatumGetBool(X)
Definition: postgres.h:401
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Definition: geqo_px.c:46
unsigned int uint32
Definition: c.h:265
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
void * ssup_extra
Definition: sortsupport.h:87
ArrayType * create_singleton_array(FunctionCallInfo fcinfo, Oid element_type, Datum element, bool isNull, int ndims)
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1074
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:797
int bytea_output
Definition: varlena.c:41
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:169
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1575
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2869
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:246
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4473
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1694
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:122
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2475
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5055
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5090
unsigned int pg_wchar
Definition: mbprint.c:31
#define DatumGetVarStringPP(X)
Definition: varlena.c:94
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3071
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:681
List * lappend(List *list, void *datum)
Definition: list.c:128
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3054
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:588
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:57
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:201
void initStringInfo(StringInfo str)
Definition: stringinfo.c:65
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:444
void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
Definition: varlena.c:1766
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1260
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1679
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:512
uint8 bits8
Definition: c.h:272
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:783
#define TextDatumGetCString(d)
Definition: builtins.h:91
void * palloc0(Size size)
Definition: mcxt.c:920
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5317
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4758
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2683
int bpchartruelen(char *s, int len)
Definition: varchar.c:660
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3764
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:219
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:785
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1709
#define VARSIZE_ANY(PTR)
Definition: postgres.h:336
#define strxfrm_l
Definition: win32.h:357
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1367
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3496
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4377
hyperLogLogState full_card
Definition: varlena.c:74
#define PG_RETURN_VOID()
Definition: fmgr.h:293
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:314
#define Max(x, y)
Definition: c.h:796
text * cstring_to_text(const char *s)
Definition: varlena.c:151
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:604
#define PG_ARGISNULL(n)
Definition: fmgr.h:166
#define NULL
Definition: c.h:226
bool pg_md5_hash(const void *buff, size_t len, char *hexsum)
Definition: md5.c:293
#define Assert(condition)
Definition: c.h:671
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:307
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:570
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4737
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4600
int varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
Definition: varlena.c:1384
Oid typioparam
Definition: array.h:224
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:306
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:565
size_t Size
Definition: c.h:353
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2328
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5327
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2497
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:550
#define newval
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:268
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3384
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:3660
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:510
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:216
#define PG_NARGS()
Definition: fmgr.h:160
#define C_COLLATION_OID
Definition: pg_collation.h:71
void * fn_extra
Definition: fmgr.h:61
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3670
#define ARR_NDIM(a)
Definition: array.h:271
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:2825
#define TEXTBUFLEN
Definition: varlena.c:85
Oid typiofunc
Definition: array.h:225
#define DatumGetPointer(X)
Definition: postgres.h:557
char typalign
Definition: array.h:222
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3475
char * text_to_cstring(const text *t)
Definition: varlena.c:184
pg_wchar * wstr1
Definition: varlena.c:51
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:4991
#define DatumGetBpCharPP(X)
Definition: fmgr.h:251
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2525
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1724
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:577
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:172
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1915
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:2006
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:425
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:342
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4527
FmgrInfo proc
Definition: array.h:226
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:452
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2765
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2577
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2600
void list_free(List *list)
Definition: list.c:1133
int i
Oid element_type
Definition: array.h:219
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:495
static char * locale
Definition: initdb.c:122
void * arg
static bool text_isequal(text *txt1, text *txt2)
Definition: varlena.c:4007
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:233
#define VAL(CH)
Definition: varlena.c:242
Definition: c.h:435
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:4717
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
#define elog
Definition: elog.h:219
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:5139
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2898
Datum bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:267
#define strcoll_l
Definition: win32.h:356
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:183
NameData * Name
Definition: c.h:493
#define PG_GETARG_INT64(n)
Definition: fmgr.h:238
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3436
Definition: pg_list.h:45
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
#define ARR_ELEMTYPE(a)
Definition: array.h:273
#define ARR_NULLBITMAP(a)
Definition: array.h:281
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:240
Definition: regex.h:55
long val
Definition: informix.c:689
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:173
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:557
#define PG_RETURN_NULL()
Definition: fmgr.h:289
#define PG_RETURN_NAME(x)
Definition: fmgr.h:307
#define TEXT_FORMAT_FLAG_MINUS
Definition: varlena.c:4798
int32 pg_atoi(const char *s, int size, int c)
Definition: numutils.c:37
#define PG_GETARG_NAME(n)
Definition: fmgr.h:234
static int text_position(text *t1, text *t2)
Definition: varlena.c:1097
Datum text_pattern_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:2529
#define DIG(VAL)
Definition: varlena.c:243
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
int digits
Definition: informix.c:691
Datum bytea_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:3518
Datum byteacat(PG_FUNCTION_ARGS)
Definition: varlena.c:2616
void get_type_io_data(Oid typid, IOFuncSelector which_func, int16 *typlen, bool *typbyval, char *typalign, char *typdelim, Oid *typioparam, Oid *func)
Definition: lsyscache.c:2043