PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "lib/hyperloglog.h"
25 #include "libpq/md5.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "port/pg_bswap.h"
30 #include "regex/regex.h"
31 #include "utils/builtins.h"
32 #include "utils/bytea.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 
38 
39 /* GUC variable */
41 
42 typedef struct varlena unknown;
43 typedef struct varlena VarString;
44 
45 typedef struct
46 {
47  bool use_wchar; /* T if multibyte encoding */
48  char *str1; /* use these if not use_wchar */
49  char *str2; /* note: these point to original texts */
50  pg_wchar *wstr1; /* use these if use_wchar */
51  pg_wchar *wstr2; /* note: these are palloc'd */
52  int len1; /* string lengths in logical characters */
53  int len2;
54  /* Skip table for Boyer-Moore-Horspool search algorithm: */
55  int skiptablemask; /* mask for ANDing with skiptable subscripts */
56  int skiptable[256]; /* skip distance for given mismatched char */
58 
59 typedef struct
60 {
61  char *buf1; /* 1st string, or abbreviation original string
62  * buf */
63  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
64  int buflen1;
65  int buflen2;
66  int last_len1; /* Length of last buf1 string/strxfrm() input */
67  int last_len2; /* Length of last buf2 string/strxfrm() blob */
68  int last_returned; /* Last comparison result (cache) */
69  bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
70  bool collate_c;
71  bool bpchar; /* Sorting pbchar, not varchar/text/bytea? */
72  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
73  hyperLogLogState full_card; /* Full key cardinality state */
74  double prop_card; /* Required cardinality proportion */
75 #ifdef HAVE_LOCALE_T
77 #endif
79 
80 /*
81  * This should be large enough that most strings will fit, but small enough
82  * that we feel comfortable putting it on the stack
83  */
84 #define TEXTBUFLEN 1024
85 
86 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
87 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
88 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
89 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
90 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
91 
92 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
93 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
94 
95 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
96 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
97 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
98 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
99 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
100 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
101 static int32 text_length(Datum str);
102 static text *text_catenate(text *t1, text *t2);
103 static text *text_substring(Datum str,
104  int32 start,
105  int32 length,
106  bool length_not_specified);
107 static text *text_overlay(text *t1, text *t2, int sp, int sl);
108 static int text_position(text *t1, text *t2);
109 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
110 static int text_position_next(int start_pos, TextPositionState *state);
112 static int text_cmp(text *arg1, text *arg2, Oid collid);
113 static bytea *bytea_catenate(bytea *t1, bytea *t2);
114 static bytea *bytea_substring(Datum str,
115  int S,
116  int L,
117  bool length_not_specified);
118 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
119 static void appendStringInfoText(StringInfo str, const text *t);
122  const char *fldsep, const char *null_string);
124 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
125  int *value);
126 static const char *text_format_parse_format(const char *start_ptr,
127  const char *end_ptr,
128  int *argpos, int *widthpos,
129  int *flags, int *width);
130 static void text_format_string_conversion(StringInfo buf, char conversion,
131  FmgrInfo *typOutputInfo,
132  Datum value, bool isNull,
133  int flags, int width);
134 static void text_format_append_string(StringInfo buf, const char *str,
135  int flags, int width);
136 
137 
138 /*****************************************************************************
139  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
140  *****************************************************************************/
141 
142 /*
143  * cstring_to_text
144  *
145  * Create a text value from a null-terminated C string.
146  *
147  * The new text value is freshly palloc'd with a full-size VARHDR.
148  */
149 text *
150 cstring_to_text(const char *s)
151 {
152  return cstring_to_text_with_len(s, strlen(s));
153 }
154 
155 /*
156  * cstring_to_text_with_len
157  *
158  * Same as cstring_to_text except the caller specifies the string length;
159  * the string need not be null_terminated.
160  */
161 text *
162 cstring_to_text_with_len(const char *s, int len)
163 {
164  text *result = (text *) palloc(len + VARHDRSZ);
165 
166  SET_VARSIZE(result, len + VARHDRSZ);
167  memcpy(VARDATA(result), s, len);
168 
169  return result;
170 }
171 
172 /*
173  * text_to_cstring
174  *
175  * Create a palloc'd, null-terminated C string from a text value.
176  *
177  * We support being passed a compressed or toasted text value.
178  * This is a bit bogus since such values shouldn't really be referred to as
179  * "text *", but it seems useful for robustness. If we didn't handle that
180  * case here, we'd need another routine that did, anyway.
181  */
182 char *
184 {
185  /* must cast away the const, unfortunately */
186  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
187  int len = VARSIZE_ANY_EXHDR(tunpacked);
188  char *result;
189 
190  result = (char *) palloc(len + 1);
191  memcpy(result, VARDATA_ANY(tunpacked), len);
192  result[len] = '\0';
193 
194  if (tunpacked != t)
195  pfree(tunpacked);
196 
197  return result;
198 }
199 
200 /*
201  * text_to_cstring_buffer
202  *
203  * Copy a text value into a caller-supplied buffer of size dst_len.
204  *
205  * The text string is truncated if necessary to fit. The result is
206  * guaranteed null-terminated (unless dst_len == 0).
207  *
208  * We support being passed a compressed or toasted text value.
209  * This is a bit bogus since such values shouldn't really be referred to as
210  * "text *", but it seems useful for robustness. If we didn't handle that
211  * case here, we'd need another routine that did, anyway.
212  */
213 void
214 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
215 {
216  /* must cast away the const, unfortunately */
217  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
218  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
219 
220  if (dst_len > 0)
221  {
222  dst_len--;
223  if (dst_len >= src_len)
224  dst_len = src_len;
225  else /* ensure truncation is encoding-safe */
226  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
227  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
228  dst[dst_len] = '\0';
229  }
230 
231  if (srcunpacked != src)
232  pfree(srcunpacked);
233 }
234 
235 
236 /*****************************************************************************
237  * USER I/O ROUTINES *
238  *****************************************************************************/
239 
240 
241 #define VAL(CH) ((CH) - '0')
242 #define DIG(VAL) ((VAL) + '0')
243 
244 /*
245  * byteain - converts from printable representation of byte array
246  *
247  * Non-printable characters must be passed as '\nnn' (octal) and are
248  * converted to internal form. '\' must be passed as '\\'.
249  * ereport(ERROR, ...) if bad form.
250  *
251  * BUGS:
252  * The input is scanned twice.
253  * The error checking of input is minimal.
254  */
255 Datum
257 {
258  char *inputText = PG_GETARG_CSTRING(0);
259  char *tp;
260  char *rp;
261  int bc;
262  bytea *result;
263 
264  /* Recognize hex input */
265  if (inputText[0] == '\\' && inputText[1] == 'x')
266  {
267  size_t len = strlen(inputText);
268 
269  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
270  result = palloc(bc);
271  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
272  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
273 
274  PG_RETURN_BYTEA_P(result);
275  }
276 
277  /* Else, it's the traditional escaped style */
278  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
279  {
280  if (tp[0] != '\\')
281  tp++;
282  else if ((tp[0] == '\\') &&
283  (tp[1] >= '0' && tp[1] <= '3') &&
284  (tp[2] >= '0' && tp[2] <= '7') &&
285  (tp[3] >= '0' && tp[3] <= '7'))
286  tp += 4;
287  else if ((tp[0] == '\\') &&
288  (tp[1] == '\\'))
289  tp += 2;
290  else
291  {
292  /*
293  * one backslash, not followed by another or ### valid octal
294  */
295  ereport(ERROR,
296  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
297  errmsg("invalid input syntax for type bytea")));
298  }
299  }
300 
301  bc += VARHDRSZ;
302 
303  result = (bytea *) palloc(bc);
304  SET_VARSIZE(result, bc);
305 
306  tp = inputText;
307  rp = VARDATA(result);
308  while (*tp != '\0')
309  {
310  if (tp[0] != '\\')
311  *rp++ = *tp++;
312  else if ((tp[0] == '\\') &&
313  (tp[1] >= '0' && tp[1] <= '3') &&
314  (tp[2] >= '0' && tp[2] <= '7') &&
315  (tp[3] >= '0' && tp[3] <= '7'))
316  {
317  bc = VAL(tp[1]);
318  bc <<= 3;
319  bc += VAL(tp[2]);
320  bc <<= 3;
321  *rp++ = bc + VAL(tp[3]);
322 
323  tp += 4;
324  }
325  else if ((tp[0] == '\\') &&
326  (tp[1] == '\\'))
327  {
328  *rp++ = '\\';
329  tp += 2;
330  }
331  else
332  {
333  /*
334  * We should never get here. The first pass should not allow it.
335  */
336  ereport(ERROR,
337  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
338  errmsg("invalid input syntax for type bytea")));
339  }
340  }
341 
342  PG_RETURN_BYTEA_P(result);
343 }
344 
345 /*
346  * byteaout - converts to printable representation of byte array
347  *
348  * In the traditional escaped format, non-printable characters are
349  * printed as '\nnn' (octal) and '\' as '\\'.
350  */
351 Datum
353 {
354  bytea *vlena = PG_GETARG_BYTEA_PP(0);
355  char *result;
356  char *rp;
357 
359  {
360  /* Print hex format */
361  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
362  *rp++ = '\\';
363  *rp++ = 'x';
364  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
365  }
366  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
367  {
368  /* Print traditional escaped format */
369  char *vp;
370  int len;
371  int i;
372 
373  len = 1; /* empty string has 1 char */
374  vp = VARDATA_ANY(vlena);
375  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
376  {
377  if (*vp == '\\')
378  len += 2;
379  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
380  len += 4;
381  else
382  len++;
383  }
384  rp = result = (char *) palloc(len);
385  vp = VARDATA_ANY(vlena);
386  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
387  {
388  if (*vp == '\\')
389  {
390  *rp++ = '\\';
391  *rp++ = '\\';
392  }
393  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
394  {
395  int val; /* holds unprintable chars */
396 
397  val = *vp;
398  rp[0] = '\\';
399  rp[3] = DIG(val & 07);
400  val >>= 3;
401  rp[2] = DIG(val & 07);
402  val >>= 3;
403  rp[1] = DIG(val & 03);
404  rp += 4;
405  }
406  else
407  *rp++ = *vp;
408  }
409  }
410  else
411  {
412  elog(ERROR, "unrecognized bytea_output setting: %d",
413  bytea_output);
414  rp = result = NULL; /* keep compiler quiet */
415  }
416  *rp = '\0';
417  PG_RETURN_CSTRING(result);
418 }
419 
420 /*
421  * bytearecv - converts external binary format to bytea
422  */
423 Datum
425 {
427  bytea *result;
428  int nbytes;
429 
430  nbytes = buf->len - buf->cursor;
431  result = (bytea *) palloc(nbytes + VARHDRSZ);
432  SET_VARSIZE(result, nbytes + VARHDRSZ);
433  pq_copymsgbytes(buf, VARDATA(result), nbytes);
434  PG_RETURN_BYTEA_P(result);
435 }
436 
437 /*
438  * byteasend - converts bytea to binary format
439  *
440  * This is a special case: just copy the input...
441  */
442 Datum
444 {
445  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
446 
447  PG_RETURN_BYTEA_P(vlena);
448 }
449 
450 Datum
452 {
454 
455  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
456 
457  /* Append the value unless null. */
458  if (!PG_ARGISNULL(1))
459  {
461 
462  /* On the first time through, we ignore the delimiter. */
463  if (state == NULL)
464  state = makeStringAggState(fcinfo);
465  else if (!PG_ARGISNULL(2))
466  {
467  bytea *delim = PG_GETARG_BYTEA_PP(2);
468 
470  }
471 
473  }
474 
475  /*
476  * The transition type for string_agg() is declared to be "internal",
477  * which is a pass-by-value type the same size as a pointer.
478  */
479  PG_RETURN_POINTER(state);
480 }
481 
482 Datum
484 {
486 
487  /* cannot be called directly because of internal-type argument */
488  Assert(AggCheckCallContext(fcinfo, NULL));
489 
490  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
491 
492  if (state != NULL)
493  {
494  bytea *result;
495 
496  result = (bytea *) palloc(state->len + VARHDRSZ);
497  SET_VARSIZE(result, state->len + VARHDRSZ);
498  memcpy(VARDATA(result), state->data, state->len);
499  PG_RETURN_BYTEA_P(result);
500  }
501  else
502  PG_RETURN_NULL();
503 }
504 
505 /*
506  * textin - converts "..." to internal representation
507  */
508 Datum
510 {
511  char *inputText = PG_GETARG_CSTRING(0);
512 
513  PG_RETURN_TEXT_P(cstring_to_text(inputText));
514 }
515 
516 /*
517  * textout - converts internal representation to "..."
518  */
519 Datum
521 {
522  Datum txt = PG_GETARG_DATUM(0);
523 
525 }
526 
527 /*
528  * textrecv - converts external binary format to text
529  */
530 Datum
532 {
534  text *result;
535  char *str;
536  int nbytes;
537 
538  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
539 
540  result = cstring_to_text_with_len(str, nbytes);
541  pfree(str);
542  PG_RETURN_TEXT_P(result);
543 }
544 
545 /*
546  * textsend - converts text to binary format
547  */
548 Datum
550 {
551  text *t = PG_GETARG_TEXT_PP(0);
553 
554  pq_begintypsend(&buf);
557 }
558 
559 
560 /*
561  * unknownin - converts "..." to internal representation
562  */
563 Datum
565 {
566  char *str = PG_GETARG_CSTRING(0);
567 
568  /* representation is same as cstring */
570 }
571 
572 /*
573  * unknownout - converts internal representation to "..."
574  */
575 Datum
577 {
578  /* representation is same as cstring */
579  char *str = PG_GETARG_CSTRING(0);
580 
582 }
583 
584 /*
585  * unknownrecv - converts external binary format to unknown
586  */
587 Datum
589 {
591  char *str;
592  int nbytes;
593 
594  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
595  /* representation is same as cstring */
596  PG_RETURN_CSTRING(str);
597 }
598 
599 /*
600  * unknownsend - converts unknown to binary format
601  */
602 Datum
604 {
605  /* representation is same as cstring */
606  char *str = PG_GETARG_CSTRING(0);
608 
609  pq_begintypsend(&buf);
610  pq_sendtext(&buf, str, strlen(str));
612 }
613 
614 
615 /* ========== PUBLIC ROUTINES ========== */
616 
617 /*
618  * textlen -
619  * returns the logical length of a text*
620  * (which is less than the VARSIZE of the text*)
621  */
622 Datum
624 {
625  Datum str = PG_GETARG_DATUM(0);
626 
627  /* try to avoid decompressing argument */
629 }
630 
631 /*
632  * text_length -
633  * Does the real work for textlen()
634  *
635  * This is broken out so it can be called directly by other string processing
636  * functions. Note that the argument is passed as a Datum, to indicate that
637  * it may still be in compressed form. We can avoid decompressing it at all
638  * in some cases.
639  */
640 static int32
642 {
643  /* fastpath when max encoding length is one */
646  else
647  {
648  text *t = DatumGetTextPP(str);
649 
651  VARSIZE_ANY_EXHDR(t)));
652  }
653 }
654 
655 /*
656  * textoctetlen -
657  * returns the physical length of a text*
658  * (which is less than the VARSIZE of the text*)
659  */
660 Datum
662 {
663  Datum str = PG_GETARG_DATUM(0);
664 
665  /* We need not detoast the input at all */
667 }
668 
669 /*
670  * textcat -
671  * takes two text* and returns a text* that is the concatenation of
672  * the two.
673  *
674  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
675  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
676  * Allocate space for output in all cases.
677  * XXX - thomas 1997-07-10
678  */
679 Datum
681 {
682  text *t1 = PG_GETARG_TEXT_PP(0);
683  text *t2 = PG_GETARG_TEXT_PP(1);
684 
686 }
687 
688 /*
689  * text_catenate
690  * Guts of textcat(), broken out so it can be used by other functions
691  *
692  * Arguments can be in short-header form, but not compressed or out-of-line
693  */
694 static text *
696 {
697  text *result;
698  int len1,
699  len2,
700  len;
701  char *ptr;
702 
703  len1 = VARSIZE_ANY_EXHDR(t1);
704  len2 = VARSIZE_ANY_EXHDR(t2);
705 
706  /* paranoia ... probably should throw error instead? */
707  if (len1 < 0)
708  len1 = 0;
709  if (len2 < 0)
710  len2 = 0;
711 
712  len = len1 + len2 + VARHDRSZ;
713  result = (text *) palloc(len);
714 
715  /* Set size of result string... */
716  SET_VARSIZE(result, len);
717 
718  /* Fill data field of result string... */
719  ptr = VARDATA(result);
720  if (len1 > 0)
721  memcpy(ptr, VARDATA_ANY(t1), len1);
722  if (len2 > 0)
723  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
724 
725  return result;
726 }
727 
728 /*
729  * charlen_to_bytelen()
730  * Compute the number of bytes occupied by n characters starting at *p
731  *
732  * It is caller's responsibility that there actually are n characters;
733  * the string need not be null-terminated.
734  */
735 static int
736 charlen_to_bytelen(const char *p, int n)
737 {
739  {
740  /* Optimization for single-byte encodings */
741  return n;
742  }
743  else
744  {
745  const char *s;
746 
747  for (s = p; n > 0; n--)
748  s += pg_mblen(s);
749 
750  return s - p;
751  }
752 }
753 
754 /*
755  * text_substr()
756  * Return a substring starting at the specified position.
757  * - thomas 1997-12-31
758  *
759  * Input:
760  * - string
761  * - starting position (is one-based)
762  * - string length
763  *
764  * If the starting position is zero or less, then return from the start of the string
765  * adjusting the length to be consistent with the "negative start" per SQL.
766  * If the length is less than zero, return the remaining string.
767  *
768  * Added multibyte support.
769  * - Tatsuo Ishii 1998-4-21
770  * Changed behavior if starting position is less than one to conform to SQL behavior.
771  * Formerly returned the entire string; now returns a portion.
772  * - Thomas Lockhart 1998-12-10
773  * Now uses faster TOAST-slicing interface
774  * - John Gray 2002-02-22
775  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
776  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
777  * error; if E < 1, return '', not entire string). Fixed MB related bug when
778  * S > LC and < LC + 4 sometimes garbage characters are returned.
779  * - Joe Conway 2002-08-10
780  */
781 Datum
783 {
785  PG_GETARG_INT32(1),
786  PG_GETARG_INT32(2),
787  false));
788 }
789 
790 /*
791  * text_substr_no_len -
792  * Wrapper to avoid opr_sanity failure due to
793  * one function accepting a different number of args.
794  */
795 Datum
797 {
799  PG_GETARG_INT32(1),
800  -1, true));
801 }
802 
803 /*
804  * text_substring -
805  * Does the real work for text_substr() and text_substr_no_len()
806  *
807  * This is broken out so it can be called directly by other string processing
808  * functions. Note that the argument is passed as a Datum, to indicate that
809  * it may still be in compressed/toasted form. We can avoid detoasting all
810  * of it in some cases.
811  *
812  * The result is always a freshly palloc'd datum.
813  */
814 static text *
815 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
816 {
818  int32 S = start; /* start position */
819  int32 S1; /* adjusted start position */
820  int32 L1; /* adjusted substring length */
821 
822  /* life is easy if the encoding max length is 1 */
823  if (eml == 1)
824  {
825  S1 = Max(S, 1);
826 
827  if (length_not_specified) /* special case - get length to end of
828  * string */
829  L1 = -1;
830  else
831  {
832  /* end position */
833  int E = S + length;
834 
835  /*
836  * A negative value for L is the only way for the end position to
837  * be before the start. SQL99 says to throw an error.
838  */
839  if (E < S)
840  ereport(ERROR,
841  (errcode(ERRCODE_SUBSTRING_ERROR),
842  errmsg("negative substring length not allowed")));
843 
844  /*
845  * A zero or negative value for the end position can happen if the
846  * start was negative or one. SQL99 says to return a zero-length
847  * string.
848  */
849  if (E < 1)
850  return cstring_to_text("");
851 
852  L1 = E - S1;
853  }
854 
855  /*
856  * If the start position is past the end of the string, SQL99 says to
857  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
858  * that for us. Convert to zero-based starting position
859  */
860  return DatumGetTextPSlice(str, S1 - 1, L1);
861  }
862  else if (eml > 1)
863  {
864  /*
865  * When encoding max length is > 1, we can't get LC without
866  * detoasting, so we'll grab a conservatively large slice now and go
867  * back later to do the right thing
868  */
869  int32 slice_start;
870  int32 slice_size;
871  int32 slice_strlen;
872  text *slice;
873  int32 E1;
874  int32 i;
875  char *p;
876  char *s;
877  text *ret;
878 
879  /*
880  * if S is past the end of the string, the tuple toaster will return a
881  * zero-length string to us
882  */
883  S1 = Max(S, 1);
884 
885  /*
886  * We need to start at position zero because there is no way to know
887  * in advance which byte offset corresponds to the supplied start
888  * position.
889  */
890  slice_start = 0;
891 
892  if (length_not_specified) /* special case - get length to end of
893  * string */
894  slice_size = L1 = -1;
895  else
896  {
897  int E = S + length;
898 
899  /*
900  * A negative value for L is the only way for the end position to
901  * be before the start. SQL99 says to throw an error.
902  */
903  if (E < S)
904  ereport(ERROR,
905  (errcode(ERRCODE_SUBSTRING_ERROR),
906  errmsg("negative substring length not allowed")));
907 
908  /*
909  * A zero or negative value for the end position can happen if the
910  * start was negative or one. SQL99 says to return a zero-length
911  * string.
912  */
913  if (E < 1)
914  return cstring_to_text("");
915 
916  /*
917  * if E is past the end of the string, the tuple toaster will
918  * truncate the length for us
919  */
920  L1 = E - S1;
921 
922  /*
923  * Total slice size in bytes can't be any longer than the start
924  * position plus substring length times the encoding max length.
925  */
926  slice_size = (S1 + L1) * eml;
927  }
928 
929  /*
930  * If we're working with an untoasted source, no need to do an extra
931  * copying step.
932  */
935  slice = DatumGetTextPSlice(str, slice_start, slice_size);
936  else
937  slice = (text *) DatumGetPointer(str);
938 
939  /* see if we got back an empty string */
940  if (VARSIZE_ANY_EXHDR(slice) == 0)
941  {
942  if (slice != (text *) DatumGetPointer(str))
943  pfree(slice);
944  return cstring_to_text("");
945  }
946 
947  /* Now we can get the actual length of the slice in MB characters */
948  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
949  VARSIZE_ANY_EXHDR(slice));
950 
951  /*
952  * Check that the start position wasn't > slice_strlen. If so, SQL99
953  * says to return a zero-length string.
954  */
955  if (S1 > slice_strlen)
956  {
957  if (slice != (text *) DatumGetPointer(str))
958  pfree(slice);
959  return cstring_to_text("");
960  }
961 
962  /*
963  * Adjust L1 and E1 now that we know the slice string length. Again
964  * remember that S1 is one based, and slice_start is zero based.
965  */
966  if (L1 > -1)
967  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
968  else
969  E1 = slice_start + 1 + slice_strlen;
970 
971  /*
972  * Find the start position in the slice; remember S1 is not zero based
973  */
974  p = VARDATA_ANY(slice);
975  for (i = 0; i < S1 - 1; i++)
976  p += pg_mblen(p);
977 
978  /* hang onto a pointer to our start position */
979  s = p;
980 
981  /*
982  * Count the actual bytes used by the substring of the requested
983  * length.
984  */
985  for (i = S1; i < E1; i++)
986  p += pg_mblen(p);
987 
988  ret = (text *) palloc(VARHDRSZ + (p - s));
989  SET_VARSIZE(ret, VARHDRSZ + (p - s));
990  memcpy(VARDATA(ret), s, (p - s));
991 
992  if (slice != (text *) DatumGetPointer(str))
993  pfree(slice);
994 
995  return ret;
996  }
997  else
998  elog(ERROR, "invalid backend encoding: encoding max length < 1");
999 
1000  /* not reached: suppress compiler warning */
1001  return NULL;
1002 }
1003 
1004 /*
1005  * textoverlay
1006  * Replace specified substring of first string with second
1007  *
1008  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1009  * This code is a direct implementation of what the standard says.
1010  */
1011 Datum
1013 {
1014  text *t1 = PG_GETARG_TEXT_PP(0);
1015  text *t2 = PG_GETARG_TEXT_PP(1);
1016  int sp = PG_GETARG_INT32(2); /* substring start position */
1017  int sl = PG_GETARG_INT32(3); /* substring length */
1018 
1019  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1020 }
1021 
1022 Datum
1024 {
1025  text *t1 = PG_GETARG_TEXT_PP(0);
1026  text *t2 = PG_GETARG_TEXT_PP(1);
1027  int sp = PG_GETARG_INT32(2); /* substring start position */
1028  int sl;
1029 
1030  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1031  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1032 }
1033 
1034 static text *
1035 text_overlay(text *t1, text *t2, int sp, int sl)
1036 {
1037  text *result;
1038  text *s1;
1039  text *s2;
1040  int sp_pl_sl;
1041 
1042  /*
1043  * Check for possible integer-overflow cases. For negative sp, throw a
1044  * "substring length" error because that's what should be expected
1045  * according to the spec's definition of OVERLAY().
1046  */
1047  if (sp <= 0)
1048  ereport(ERROR,
1049  (errcode(ERRCODE_SUBSTRING_ERROR),
1050  errmsg("negative substring length not allowed")));
1051  sp_pl_sl = sp + sl;
1052  if (sp_pl_sl <= sl)
1053  ereport(ERROR,
1054  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1055  errmsg("integer out of range")));
1056 
1057  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1058  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1059  result = text_catenate(s1, t2);
1060  result = text_catenate(result, s2);
1061 
1062  return result;
1063 }
1064 
1065 /*
1066  * textpos -
1067  * Return the position of the specified substring.
1068  * Implements the SQL POSITION() function.
1069  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1070  * - thomas 1997-07-27
1071  */
1072 Datum
1074 {
1075  text *str = PG_GETARG_TEXT_PP(0);
1076  text *search_str = PG_GETARG_TEXT_PP(1);
1077 
1078  PG_RETURN_INT32((int32) text_position(str, search_str));
1079 }
1080 
1081 /*
1082  * text_position -
1083  * Does the real work for textpos()
1084  *
1085  * Inputs:
1086  * t1 - string to be searched
1087  * t2 - pattern to match within t1
1088  * Result:
1089  * Character index of the first matched char, starting from 1,
1090  * or 0 if no match.
1091  *
1092  * This is broken out so it can be called directly by other string processing
1093  * functions.
1094  */
1095 static int
1097 {
1099  int result;
1100 
1101  text_position_setup(t1, t2, &state);
1102  result = text_position_next(1, &state);
1103  text_position_cleanup(&state);
1104  return result;
1105 }
1106 
1107 
1108 /*
1109  * text_position_setup, text_position_next, text_position_cleanup -
1110  * Component steps of text_position()
1111  *
1112  * These are broken out so that a string can be efficiently searched for
1113  * multiple occurrences of the same pattern. text_position_next may be
1114  * called multiple times with increasing values of start_pos, which is
1115  * the 1-based character position to start the search from. The "state"
1116  * variable is normally just a local variable in the caller.
1117  */
1118 
1119 static void
1121 {
1122  int len1 = VARSIZE_ANY_EXHDR(t1);
1123  int len2 = VARSIZE_ANY_EXHDR(t2);
1124 
1126  {
1127  /* simple case - single byte encoding */
1128  state->use_wchar = false;
1129  state->str1 = VARDATA_ANY(t1);
1130  state->str2 = VARDATA_ANY(t2);
1131  state->len1 = len1;
1132  state->len2 = len2;
1133  }
1134  else
1135  {
1136  /* not as simple - multibyte encoding */
1137  pg_wchar *p1,
1138  *p2;
1139 
1140  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1141  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1142  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1143  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1144 
1145  state->use_wchar = true;
1146  state->wstr1 = p1;
1147  state->wstr2 = p2;
1148  state->len1 = len1;
1149  state->len2 = len2;
1150  }
1151 
1152  /*
1153  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1154  * notes we use the terminology that the "haystack" is the string to be
1155  * searched (t1) and the "needle" is the pattern being sought (t2).
1156  *
1157  * If the needle is empty or bigger than the haystack then there is no
1158  * point in wasting cycles initializing the table. We also choose not to
1159  * use B-M-H for needles of length 1, since the skip table can't possibly
1160  * save anything in that case.
1161  */
1162  if (len1 >= len2 && len2 > 1)
1163  {
1164  int searchlength = len1 - len2;
1165  int skiptablemask;
1166  int last;
1167  int i;
1168 
1169  /*
1170  * First we must determine how much of the skip table to use. The
1171  * declaration of TextPositionState allows up to 256 elements, but for
1172  * short search problems we don't really want to have to initialize so
1173  * many elements --- it would take too long in comparison to the
1174  * actual search time. So we choose a useful skip table size based on
1175  * the haystack length minus the needle length. The closer the needle
1176  * length is to the haystack length the less useful skipping becomes.
1177  *
1178  * Note: since we use bit-masking to select table elements, the skip
1179  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1180  */
1181  if (searchlength < 16)
1182  skiptablemask = 3;
1183  else if (searchlength < 64)
1184  skiptablemask = 7;
1185  else if (searchlength < 128)
1186  skiptablemask = 15;
1187  else if (searchlength < 512)
1188  skiptablemask = 31;
1189  else if (searchlength < 2048)
1190  skiptablemask = 63;
1191  else if (searchlength < 4096)
1192  skiptablemask = 127;
1193  else
1194  skiptablemask = 255;
1195  state->skiptablemask = skiptablemask;
1196 
1197  /*
1198  * Initialize the skip table. We set all elements to the needle
1199  * length, since this is the correct skip distance for any character
1200  * not found in the needle.
1201  */
1202  for (i = 0; i <= skiptablemask; i++)
1203  state->skiptable[i] = len2;
1204 
1205  /*
1206  * Now examine the needle. For each character except the last one,
1207  * set the corresponding table element to the appropriate skip
1208  * distance. Note that when two characters share the same skip table
1209  * entry, the one later in the needle must determine the skip
1210  * distance.
1211  */
1212  last = len2 - 1;
1213 
1214  if (!state->use_wchar)
1215  {
1216  const char *str2 = state->str2;
1217 
1218  for (i = 0; i < last; i++)
1219  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1220  }
1221  else
1222  {
1223  const pg_wchar *wstr2 = state->wstr2;
1224 
1225  for (i = 0; i < last; i++)
1226  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1227  }
1228  }
1229 }
1230 
1231 static int
1233 {
1234  int haystack_len = state->len1;
1235  int needle_len = state->len2;
1236  int skiptablemask = state->skiptablemask;
1237 
1238  Assert(start_pos > 0); /* else caller error */
1239 
1240  if (needle_len <= 0)
1241  return start_pos; /* result for empty pattern */
1242 
1243  start_pos--; /* adjust for zero based arrays */
1244 
1245  /* Done if the needle can't possibly fit */
1246  if (haystack_len < start_pos + needle_len)
1247  return 0;
1248 
1249  if (!state->use_wchar)
1250  {
1251  /* simple case - single byte encoding */
1252  const char *haystack = state->str1;
1253  const char *needle = state->str2;
1254  const char *haystack_end = &haystack[haystack_len];
1255  const char *hptr;
1256 
1257  if (needle_len == 1)
1258  {
1259  /* No point in using B-M-H for a one-character needle */
1260  char nchar = *needle;
1261 
1262  hptr = &haystack[start_pos];
1263  while (hptr < haystack_end)
1264  {
1265  if (*hptr == nchar)
1266  return hptr - haystack + 1;
1267  hptr++;
1268  }
1269  }
1270  else
1271  {
1272  const char *needle_last = &needle[needle_len - 1];
1273 
1274  /* Start at startpos plus the length of the needle */
1275  hptr = &haystack[start_pos + needle_len - 1];
1276  while (hptr < haystack_end)
1277  {
1278  /* Match the needle scanning *backward* */
1279  const char *nptr;
1280  const char *p;
1281 
1282  nptr = needle_last;
1283  p = hptr;
1284  while (*nptr == *p)
1285  {
1286  /* Matched it all? If so, return 1-based position */
1287  if (nptr == needle)
1288  return p - haystack + 1;
1289  nptr--, p--;
1290  }
1291 
1292  /*
1293  * No match, so use the haystack char at hptr to decide how
1294  * far to advance. If the needle had any occurrence of that
1295  * character (or more precisely, one sharing the same
1296  * skiptable entry) before its last character, then we advance
1297  * far enough to align the last such needle character with
1298  * that haystack position. Otherwise we can advance by the
1299  * whole needle length.
1300  */
1301  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1302  }
1303  }
1304  }
1305  else
1306  {
1307  /* The multibyte char version. This works exactly the same way. */
1308  const pg_wchar *haystack = state->wstr1;
1309  const pg_wchar *needle = state->wstr2;
1310  const pg_wchar *haystack_end = &haystack[haystack_len];
1311  const pg_wchar *hptr;
1312 
1313  if (needle_len == 1)
1314  {
1315  /* No point in using B-M-H for a one-character needle */
1316  pg_wchar nchar = *needle;
1317 
1318  hptr = &haystack[start_pos];
1319  while (hptr < haystack_end)
1320  {
1321  if (*hptr == nchar)
1322  return hptr - haystack + 1;
1323  hptr++;
1324  }
1325  }
1326  else
1327  {
1328  const pg_wchar *needle_last = &needle[needle_len - 1];
1329 
1330  /* Start at startpos plus the length of the needle */
1331  hptr = &haystack[start_pos + needle_len - 1];
1332  while (hptr < haystack_end)
1333  {
1334  /* Match the needle scanning *backward* */
1335  const pg_wchar *nptr;
1336  const pg_wchar *p;
1337 
1338  nptr = needle_last;
1339  p = hptr;
1340  while (*nptr == *p)
1341  {
1342  /* Matched it all? If so, return 1-based position */
1343  if (nptr == needle)
1344  return p - haystack + 1;
1345  nptr--, p--;
1346  }
1347 
1348  /*
1349  * No match, so use the haystack char at hptr to decide how
1350  * far to advance. If the needle had any occurrence of that
1351  * character (or more precisely, one sharing the same
1352  * skiptable entry) before its last character, then we advance
1353  * far enough to align the last such needle character with
1354  * that haystack position. Otherwise we can advance by the
1355  * whole needle length.
1356  */
1357  hptr += state->skiptable[*hptr & skiptablemask];
1358  }
1359  }
1360  }
1361 
1362  return 0; /* not found */
1363 }
1364 
1365 static void
1367 {
1368  if (state->use_wchar)
1369  {
1370  pfree(state->wstr1);
1371  pfree(state->wstr2);
1372  }
1373 }
1374 
1375 /* varstr_cmp()
1376  * Comparison function for text strings with given lengths.
1377  * Includes locale support, but must copy strings to temporary memory
1378  * to allow null-termination for inputs to strcoll().
1379  * Returns an integer less than, equal to, or greater than zero, indicating
1380  * whether arg1 is less than, equal to, or greater than arg2.
1381  */
1382 int
1383 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1384 {
1385  int result;
1386 
1387  /*
1388  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1389  * have to do some memory copying. This turns out to be significantly
1390  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1391  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1392  */
1393  if (lc_collate_is_c(collid))
1394  {
1395  result = memcmp(arg1, arg2, Min(len1, len2));
1396  if ((result == 0) && (len1 != len2))
1397  result = (len1 < len2) ? -1 : 1;
1398  }
1399  else
1400  {
1401  char a1buf[TEXTBUFLEN];
1402  char a2buf[TEXTBUFLEN];
1403  char *a1p,
1404  *a2p;
1405 
1406 #ifdef HAVE_LOCALE_T
1407  pg_locale_t mylocale = 0;
1408 #endif
1409 
1410  if (collid != DEFAULT_COLLATION_OID)
1411  {
1412  if (!OidIsValid(collid))
1413  {
1414  /*
1415  * This typically means that the parser could not resolve a
1416  * conflict of implicit collations, so report it that way.
1417  */
1418  ereport(ERROR,
1419  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1420  errmsg("could not determine which collation to use for string comparison"),
1421  errhint("Use the COLLATE clause to set the collation explicitly.")));
1422  }
1423 #ifdef HAVE_LOCALE_T
1424  mylocale = pg_newlocale_from_collation(collid);
1425 #endif
1426  }
1427 
1428  /*
1429  * memcmp() can't tell us which of two unequal strings sorts first,
1430  * but it's a cheap way to tell if they're equal. Testing shows that
1431  * memcmp() followed by strcoll() is only trivially slower than
1432  * strcoll() by itself, so we don't lose much if this doesn't work out
1433  * very often, and if it does - for example, because there are many
1434  * equal strings in the input - then we win big by avoiding expensive
1435  * collation-aware comparisons.
1436  */
1437  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1438  return 0;
1439 
1440 #ifdef WIN32
1441  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1442  if (GetDatabaseEncoding() == PG_UTF8)
1443  {
1444  int a1len;
1445  int a2len;
1446  int r;
1447 
1448  if (len1 >= TEXTBUFLEN / 2)
1449  {
1450  a1len = len1 * 2 + 2;
1451  a1p = palloc(a1len);
1452  }
1453  else
1454  {
1455  a1len = TEXTBUFLEN;
1456  a1p = a1buf;
1457  }
1458  if (len2 >= TEXTBUFLEN / 2)
1459  {
1460  a2len = len2 * 2 + 2;
1461  a2p = palloc(a2len);
1462  }
1463  else
1464  {
1465  a2len = TEXTBUFLEN;
1466  a2p = a2buf;
1467  }
1468 
1469  /* stupid Microsloth API does not work for zero-length input */
1470  if (len1 == 0)
1471  r = 0;
1472  else
1473  {
1474  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1475  (LPWSTR) a1p, a1len / 2);
1476  if (!r)
1477  ereport(ERROR,
1478  (errmsg("could not convert string to UTF-16: error code %lu",
1479  GetLastError())));
1480  }
1481  ((LPWSTR) a1p)[r] = 0;
1482 
1483  if (len2 == 0)
1484  r = 0;
1485  else
1486  {
1487  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1488  (LPWSTR) a2p, a2len / 2);
1489  if (!r)
1490  ereport(ERROR,
1491  (errmsg("could not convert string to UTF-16: error code %lu",
1492  GetLastError())));
1493  }
1494  ((LPWSTR) a2p)[r] = 0;
1495 
1496  errno = 0;
1497 #ifdef HAVE_LOCALE_T
1498  if (mylocale)
1499  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1500  else
1501 #endif
1502  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1503  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1504  * headers */
1505  ereport(ERROR,
1506  (errmsg("could not compare Unicode strings: %m")));
1507 
1508  /*
1509  * In some locales wcscoll() can claim that nonidentical strings
1510  * are equal. Believing that would be bad news for a number of
1511  * reasons, so we follow Perl's lead and sort "equal" strings
1512  * according to strcmp (on the UTF-8 representation).
1513  */
1514  if (result == 0)
1515  {
1516  result = memcmp(arg1, arg2, Min(len1, len2));
1517  if ((result == 0) && (len1 != len2))
1518  result = (len1 < len2) ? -1 : 1;
1519  }
1520 
1521  if (a1p != a1buf)
1522  pfree(a1p);
1523  if (a2p != a2buf)
1524  pfree(a2p);
1525 
1526  return result;
1527  }
1528 #endif /* WIN32 */
1529 
1530  if (len1 >= TEXTBUFLEN)
1531  a1p = (char *) palloc(len1 + 1);
1532  else
1533  a1p = a1buf;
1534  if (len2 >= TEXTBUFLEN)
1535  a2p = (char *) palloc(len2 + 1);
1536  else
1537  a2p = a2buf;
1538 
1539  memcpy(a1p, arg1, len1);
1540  a1p[len1] = '\0';
1541  memcpy(a2p, arg2, len2);
1542  a2p[len2] = '\0';
1543 
1544 #ifdef HAVE_LOCALE_T
1545  if (mylocale)
1546  result = strcoll_l(a1p, a2p, mylocale);
1547  else
1548 #endif
1549  result = strcoll(a1p, a2p);
1550 
1551  /*
1552  * In some locales strcoll() can claim that nonidentical strings are
1553  * equal. Believing that would be bad news for a number of reasons,
1554  * so we follow Perl's lead and sort "equal" strings according to
1555  * strcmp().
1556  */
1557  if (result == 0)
1558  result = strcmp(a1p, a2p);
1559 
1560  if (a1p != a1buf)
1561  pfree(a1p);
1562  if (a2p != a2buf)
1563  pfree(a2p);
1564  }
1565 
1566  return result;
1567 }
1568 
1569 /* text_cmp()
1570  * Internal comparison function for text strings.
1571  * Returns -1, 0 or 1
1572  */
1573 static int
1574 text_cmp(text *arg1, text *arg2, Oid collid)
1575 {
1576  char *a1p,
1577  *a2p;
1578  int len1,
1579  len2;
1580 
1581  a1p = VARDATA_ANY(arg1);
1582  a2p = VARDATA_ANY(arg2);
1583 
1584  len1 = VARSIZE_ANY_EXHDR(arg1);
1585  len2 = VARSIZE_ANY_EXHDR(arg2);
1586 
1587  return varstr_cmp(a1p, len1, a2p, len2, collid);
1588 }
1589 
1590 /*
1591  * Comparison functions for text strings.
1592  *
1593  * Note: btree indexes need these routines not to leak memory; therefore,
1594  * be careful to free working copies of toasted datums. Most places don't
1595  * need to be so careful.
1596  */
1597 
1598 Datum
1600 {
1601  Datum arg1 = PG_GETARG_DATUM(0);
1602  Datum arg2 = PG_GETARG_DATUM(1);
1603  bool result;
1604  Size len1,
1605  len2;
1606 
1607  /*
1608  * Since we only care about equality or not-equality, we can avoid all the
1609  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1610  * don't even have to do a bitwise comparison if we can show the lengths
1611  * of the strings are unequal; which might save us from having to detoast
1612  * one or both values.
1613  */
1614  len1 = toast_raw_datum_size(arg1);
1615  len2 = toast_raw_datum_size(arg2);
1616  if (len1 != len2)
1617  result = false;
1618  else
1619  {
1620  text *targ1 = DatumGetTextPP(arg1);
1621  text *targ2 = DatumGetTextPP(arg2);
1622 
1623  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1624  len1 - VARHDRSZ) == 0);
1625 
1626  PG_FREE_IF_COPY(targ1, 0);
1627  PG_FREE_IF_COPY(targ2, 1);
1628  }
1629 
1630  PG_RETURN_BOOL(result);
1631 }
1632 
1633 Datum
1635 {
1636  Datum arg1 = PG_GETARG_DATUM(0);
1637  Datum arg2 = PG_GETARG_DATUM(1);
1638  bool result;
1639  Size len1,
1640  len2;
1641 
1642  /* See comment in texteq() */
1643  len1 = toast_raw_datum_size(arg1);
1644  len2 = toast_raw_datum_size(arg2);
1645  if (len1 != len2)
1646  result = true;
1647  else
1648  {
1649  text *targ1 = DatumGetTextPP(arg1);
1650  text *targ2 = DatumGetTextPP(arg2);
1651 
1652  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1653  len1 - VARHDRSZ) != 0);
1654 
1655  PG_FREE_IF_COPY(targ1, 0);
1656  PG_FREE_IF_COPY(targ2, 1);
1657  }
1658 
1659  PG_RETURN_BOOL(result);
1660 }
1661 
1662 Datum
1664 {
1665  text *arg1 = PG_GETARG_TEXT_PP(0);
1666  text *arg2 = PG_GETARG_TEXT_PP(1);
1667  bool result;
1668 
1669  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1670 
1671  PG_FREE_IF_COPY(arg1, 0);
1672  PG_FREE_IF_COPY(arg2, 1);
1673 
1674  PG_RETURN_BOOL(result);
1675 }
1676 
1677 Datum
1679 {
1680  text *arg1 = PG_GETARG_TEXT_PP(0);
1681  text *arg2 = PG_GETARG_TEXT_PP(1);
1682  bool result;
1683 
1684  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1685 
1686  PG_FREE_IF_COPY(arg1, 0);
1687  PG_FREE_IF_COPY(arg2, 1);
1688 
1689  PG_RETURN_BOOL(result);
1690 }
1691 
1692 Datum
1694 {
1695  text *arg1 = PG_GETARG_TEXT_PP(0);
1696  text *arg2 = PG_GETARG_TEXT_PP(1);
1697  bool result;
1698 
1699  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1700 
1701  PG_FREE_IF_COPY(arg1, 0);
1702  PG_FREE_IF_COPY(arg2, 1);
1703 
1704  PG_RETURN_BOOL(result);
1705 }
1706 
1707 Datum
1709 {
1710  text *arg1 = PG_GETARG_TEXT_PP(0);
1711  text *arg2 = PG_GETARG_TEXT_PP(1);
1712  bool result;
1713 
1714  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1715 
1716  PG_FREE_IF_COPY(arg1, 0);
1717  PG_FREE_IF_COPY(arg2, 1);
1718 
1719  PG_RETURN_BOOL(result);
1720 }
1721 
1722 Datum
1724 {
1725  text *arg1 = PG_GETARG_TEXT_PP(0);
1726  text *arg2 = PG_GETARG_TEXT_PP(1);
1727  int32 result;
1728 
1729  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1730 
1731  PG_FREE_IF_COPY(arg1, 0);
1732  PG_FREE_IF_COPY(arg2, 1);
1733 
1734  PG_RETURN_INT32(result);
1735 }
1736 
1737 Datum
1739 {
1741  Oid collid = ssup->ssup_collation;
1742  MemoryContext oldcontext;
1743 
1744  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1745 
1746  /* Use generic string SortSupport */
1747  varstr_sortsupport(ssup, collid, false);
1748 
1749  MemoryContextSwitchTo(oldcontext);
1750 
1751  PG_RETURN_VOID();
1752 }
1753 
1754 /*
1755  * Generic sortsupport interface for character type's operator classes.
1756  * Includes locale support, and support for BpChar semantics (i.e. removing
1757  * trailing spaces before comparison).
1758  *
1759  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1760  * same representation. Callers that always use the C collation (e.g.
1761  * non-collatable type callers like bytea) may have NUL bytes in their strings;
1762  * this will not work with any other collation, though.
1763  */
1764 void
1766 {
1767  bool abbreviate = ssup->abbreviate;
1768  bool collate_c = false;
1769  VarStringSortSupport *sss;
1770 
1771 #ifdef HAVE_LOCALE_T
1772  pg_locale_t locale = 0;
1773 #endif
1774 
1775  /*
1776  * If possible, set ssup->comparator to a function which can be used to
1777  * directly compare two datums. If we can do this, we'll avoid the
1778  * overhead of a trip through the fmgr layer for every comparison, which
1779  * can be substantial.
1780  *
1781  * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1782  * uses strcoll() to perform comparisons and knows about the special
1783  * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1784  * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1785  * both of which use memcmp() rather than strcoll().
1786  *
1787  * There is a further exception on Windows. When the database encoding is
1788  * UTF-8 and we are not using the C collation, complex hacks are required.
1789  * We don't currently have a comparator that handles that case, so we fall
1790  * back on the slow method of having the sort code invoke bttextcmp() (in
1791  * the case of text) via the fmgr trampoline.
1792  */
1793  if (lc_collate_is_c(collid))
1794  {
1795  if (!bpchar)
1796  ssup->comparator = varstrfastcmp_c;
1797  else
1798  ssup->comparator = bpcharfastcmp_c;
1799 
1800  collate_c = true;
1801  }
1802 #ifdef WIN32
1803  else if (GetDatabaseEncoding() == PG_UTF8)
1804  return;
1805 #endif
1806  else
1807  {
1809 
1810  /*
1811  * We need a collation-sensitive comparison. To make things faster,
1812  * we'll figure out the collation based on the locale id and cache the
1813  * result.
1814  */
1815  if (collid != DEFAULT_COLLATION_OID)
1816  {
1817  if (!OidIsValid(collid))
1818  {
1819  /*
1820  * This typically means that the parser could not resolve a
1821  * conflict of implicit collations, so report it that way.
1822  */
1823  ereport(ERROR,
1824  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1825  errmsg("could not determine which collation to use for string comparison"),
1826  errhint("Use the COLLATE clause to set the collation explicitly.")));
1827  }
1828 #ifdef HAVE_LOCALE_T
1829  locale = pg_newlocale_from_collation(collid);
1830 #endif
1831  }
1832  }
1833 
1834  /*
1835  * Unfortunately, it seems that abbreviation for non-C collations is
1836  * broken on many common platforms; testing of multiple versions of glibc
1837  * reveals that, for many locales, strcoll() and strxfrm() do not return
1838  * consistent results, which is fatal to this optimization. While no
1839  * other libc other than Cygwin has so far been shown to have a problem,
1840  * we take the conservative course of action for right now and disable
1841  * this categorically. (Users who are certain this isn't a problem on
1842  * their system can define TRUST_STRXFRM.)
1843  *
1844  * Even apart from the risk of broken locales, it's possible that there
1845  * are platforms where the use of abbreviated keys should be disabled at
1846  * compile time. Having only 4 byte datums could make worst-case
1847  * performance drastically more likely, for example. Moreover, Darwin's
1848  * strxfrm() implementations is known to not effectively concentrate a
1849  * significant amount of entropy from the original string in earlier
1850  * transformed blobs. It's possible that other supported platforms are
1851  * similarly encumbered. So, if we ever get past disabling this
1852  * categorically, we may still want or need to disable it for particular
1853  * platforms.
1854  */
1855 #ifndef TRUST_STRXFRM
1856  if (!collate_c)
1857  abbreviate = false;
1858 #endif
1859 
1860  /*
1861  * If we're using abbreviated keys, or if we're using a locale-aware
1862  * comparison, we need to initialize a StringSortSupport object. Both
1863  * cases will make use of the temporary buffers we initialize here for
1864  * scratch space (and to detect requirement for BpChar semantics from
1865  * caller), and the abbreviation case requires additional state.
1866  */
1867  if (abbreviate || !collate_c)
1868  {
1869  sss = palloc(sizeof(VarStringSortSupport));
1870  sss->buf1 = palloc(TEXTBUFLEN);
1871  sss->buflen1 = TEXTBUFLEN;
1872  sss->buf2 = palloc(TEXTBUFLEN);
1873  sss->buflen2 = TEXTBUFLEN;
1874  /* Start with invalid values */
1875  sss->last_len1 = -1;
1876  sss->last_len2 = -1;
1877  /* Initialize */
1878  sss->last_returned = 0;
1879 #ifdef HAVE_LOCALE_T
1880  sss->locale = locale;
1881 #endif
1882 
1883  /*
1884  * To avoid somehow confusing a strxfrm() blob and an original string,
1885  * constantly keep track of the variety of data that buf1 and buf2
1886  * currently contain.
1887  *
1888  * Comparisons may be interleaved with conversion calls. Frequently,
1889  * conversions and comparisons are batched into two distinct phases,
1890  * but the correctness of caching cannot hinge upon this. For
1891  * comparison caching, buffer state is only trusted if cache_blob is
1892  * found set to false, whereas strxfrm() caching only trusts the state
1893  * when cache_blob is found set to true.
1894  *
1895  * Arbitrarily initialize cache_blob to true.
1896  */
1897  sss->cache_blob = true;
1898  sss->collate_c = collate_c;
1899  sss->bpchar = bpchar;
1900  ssup->ssup_extra = sss;
1901 
1902  /*
1903  * If possible, plan to use the abbreviated keys optimization. The
1904  * core code may switch back to authoritative comparator should
1905  * abbreviation be aborted.
1906  */
1907  if (abbreviate)
1908  {
1909  sss->prop_card = 0.20;
1910  initHyperLogLog(&sss->abbr_card, 10);
1911  initHyperLogLog(&sss->full_card, 10);
1912  ssup->abbrev_full_comparator = ssup->comparator;
1913  ssup->comparator = varstrcmp_abbrev;
1916  }
1917  }
1918 }
1919 
1920 /*
1921  * sortsupport comparison func (for C locale case)
1922  */
1923 static int
1925 {
1926  VarString *arg1 = DatumGetVarStringPP(x);
1927  VarString *arg2 = DatumGetVarStringPP(y);
1928  char *a1p,
1929  *a2p;
1930  int len1,
1931  len2,
1932  result;
1933 
1934  a1p = VARDATA_ANY(arg1);
1935  a2p = VARDATA_ANY(arg2);
1936 
1937  len1 = VARSIZE_ANY_EXHDR(arg1);
1938  len2 = VARSIZE_ANY_EXHDR(arg2);
1939 
1940  result = memcmp(a1p, a2p, Min(len1, len2));
1941  if ((result == 0) && (len1 != len2))
1942  result = (len1 < len2) ? -1 : 1;
1943 
1944  /* We can't afford to leak memory here. */
1945  if (PointerGetDatum(arg1) != x)
1946  pfree(arg1);
1947  if (PointerGetDatum(arg2) != y)
1948  pfree(arg2);
1949 
1950  return result;
1951 }
1952 
1953 /*
1954  * sortsupport comparison func (for BpChar C locale case)
1955  *
1956  * BpChar outsources its sortsupport to this module. Specialization for the
1957  * varstr_sortsupport BpChar case, modeled on
1958  * internal_bpchar_pattern_compare().
1959  */
1960 static int
1962 {
1963  BpChar *arg1 = DatumGetBpCharPP(x);
1964  BpChar *arg2 = DatumGetBpCharPP(y);
1965  char *a1p,
1966  *a2p;
1967  int len1,
1968  len2,
1969  result;
1970 
1971  a1p = VARDATA_ANY(arg1);
1972  a2p = VARDATA_ANY(arg2);
1973 
1974  len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
1975  len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
1976 
1977  result = memcmp(a1p, a2p, Min(len1, len2));
1978  if ((result == 0) && (len1 != len2))
1979  result = (len1 < len2) ? -1 : 1;
1980 
1981  /* We can't afford to leak memory here. */
1982  if (PointerGetDatum(arg1) != x)
1983  pfree(arg1);
1984  if (PointerGetDatum(arg2) != y)
1985  pfree(arg2);
1986 
1987  return result;
1988 }
1989 
1990 /*
1991  * sortsupport comparison func (for locale case)
1992  */
1993 static int
1995 {
1996  VarString *arg1 = DatumGetVarStringPP(x);
1997  VarString *arg2 = DatumGetVarStringPP(y);
1998  bool arg1_match;
2000 
2001  /* working state */
2002  char *a1p,
2003  *a2p;
2004  int len1,
2005  len2,
2006  result;
2007 
2008  a1p = VARDATA_ANY(arg1);
2009  a2p = VARDATA_ANY(arg2);
2010 
2011  len1 = VARSIZE_ANY_EXHDR(arg1);
2012  len2 = VARSIZE_ANY_EXHDR(arg2);
2013 
2014  /* Fast pre-check for equality, as discussed in varstr_cmp() */
2015  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2016  {
2017  /*
2018  * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2019  * last_len2. Existing contents of buffers might still be used by
2020  * next call.
2021  *
2022  * It's fine to allow the comparison of BpChar padding bytes here,
2023  * even though that implies that the memcmp() will usually be
2024  * performed for BpChar callers (though multibyte characters could
2025  * still prevent that from occurring). The memcmp() is still very
2026  * cheap, and BpChar's funny semantics have us remove trailing spaces
2027  * (not limited to padding), so we need make no distinction between
2028  * padding space characters and "real" space characters.
2029  */
2030  result = 0;
2031  goto done;
2032  }
2033 
2034  if (sss->bpchar)
2035  {
2036  /* Get true number of bytes, ignoring trailing spaces */
2037  len1 = bpchartruelen(a1p, len1);
2038  len2 = bpchartruelen(a2p, len2);
2039  }
2040 
2041  if (len1 >= sss->buflen1)
2042  {
2043  pfree(sss->buf1);
2044  sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2045  sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2046  }
2047  if (len2 >= sss->buflen2)
2048  {
2049  pfree(sss->buf2);
2050  sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2051  sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2052  }
2053 
2054  /*
2055  * We're likely to be asked to compare the same strings repeatedly, and
2056  * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2057  * comparisons, even though in general there is no reason to think that
2058  * that will work out (every string datum may be unique). Caching does
2059  * not slow things down measurably when it doesn't work out, and can speed
2060  * things up by rather a lot when it does. In part, this is because the
2061  * memcmp() compares data from cachelines that are needed in L1 cache even
2062  * when the last comparison's result cannot be reused.
2063  */
2064  arg1_match = true;
2065  if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2066  {
2067  arg1_match = false;
2068  memcpy(sss->buf1, a1p, len1);
2069  sss->buf1[len1] = '\0';
2070  sss->last_len1 = len1;
2071  }
2072 
2073  /*
2074  * If we're comparing the same two strings as last time, we can return the
2075  * same answer without calling strcoll() again. This is more likely than
2076  * it seems (at least with moderate to low cardinality sets), because
2077  * quicksort compares the same pivot against many values.
2078  */
2079  if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2080  {
2081  memcpy(sss->buf2, a2p, len2);
2082  sss->buf2[len2] = '\0';
2083  sss->last_len2 = len2;
2084  }
2085  else if (arg1_match && !sss->cache_blob)
2086  {
2087  /* Use result cached following last actual strcoll() call */
2088  result = sss->last_returned;
2089  goto done;
2090  }
2091 
2092 #ifdef HAVE_LOCALE_T
2093  if (sss->locale)
2094  result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
2095  else
2096 #endif
2097  result = strcoll(sss->buf1, sss->buf2);
2098 
2099  /*
2100  * In some locales strcoll() can claim that nonidentical strings are
2101  * equal. Believing that would be bad news for a number of reasons, so we
2102  * follow Perl's lead and sort "equal" strings according to strcmp().
2103  */
2104  if (result == 0)
2105  result = strcmp(sss->buf1, sss->buf2);
2106 
2107  /* Cache result, perhaps saving an expensive strcoll() call next time */
2108  sss->cache_blob = false;
2109  sss->last_returned = result;
2110 done:
2111  /* We can't afford to leak memory here. */
2112  if (PointerGetDatum(arg1) != x)
2113  pfree(arg1);
2114  if (PointerGetDatum(arg2) != y)
2115  pfree(arg2);
2116 
2117  return result;
2118 }
2119 
2120 /*
2121  * Abbreviated key comparison func
2122  */
2123 static int
2125 {
2126  /*
2127  * When 0 is returned, the core system will call varstrfastcmp_c()
2128  * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2129  * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2130  * authoritatively, for the same reason that there is a strcoll()
2131  * tie-breaker call to strcmp() in varstr_cmp().
2132  */
2133  if (x > y)
2134  return 1;
2135  else if (x == y)
2136  return 0;
2137  else
2138  return -1;
2139 }
2140 
2141 /*
2142  * Conversion routine for sortsupport. Converts original to abbreviated key
2143  * representation. Our encoding strategy is simple -- pack the first 8 bytes
2144  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2145  * stored in reverse order), and treat it as an unsigned integer. When the "C"
2146  * locale is used, or in case of bytea, just memcpy() from original instead.
2147  */
2148 static Datum
2150 {
2152  VarString *authoritative = DatumGetVarStringPP(original);
2153  char *authoritative_data = VARDATA_ANY(authoritative);
2154 
2155  /* working state */
2156  Datum res;
2157  char *pres;
2158  int len;
2159  uint32 hash;
2160 
2161  pres = (char *) &res;
2162  /* memset(), so any non-overwritten bytes are NUL */
2163  memset(pres, 0, sizeof(Datum));
2164  len = VARSIZE_ANY_EXHDR(authoritative);
2165 
2166  /* Get number of bytes, ignoring trailing spaces */
2167  if (sss->bpchar)
2168  len = bpchartruelen(authoritative_data, len);
2169 
2170  /*
2171  * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2172  * abbreviate keys. The full comparator for the C locale is always
2173  * memcmp(). It would be incorrect to allow bytea callers (callers that
2174  * always force the C collation -- bytea isn't a collatable type, but this
2175  * approach is convenient) to use strxfrm(). This is because bytea
2176  * strings may contain NUL bytes. Besides, this should be faster, too.
2177  *
2178  * More generally, it's okay that bytea callers can have NUL bytes in
2179  * strings because varstrcmp_abbrev() need not make a distinction between
2180  * terminating NUL bytes, and NUL bytes representing actual NULs in the
2181  * authoritative representation. Hopefully a comparison at or past one
2182  * abbreviated key's terminating NUL byte will resolve the comparison
2183  * without consulting the authoritative representation; specifically, some
2184  * later non-NUL byte in the longer string can resolve the comparison
2185  * against a subsequent terminating NUL in the shorter string. There will
2186  * usually be what is effectively a "length-wise" resolution there and
2187  * then.
2188  *
2189  * If that doesn't work out -- if all bytes in the longer string
2190  * positioned at or past the offset of the smaller string's (first)
2191  * terminating NUL are actually representative of NUL bytes in the
2192  * authoritative binary string (perhaps with some *terminating* NUL bytes
2193  * towards the end of the longer string iff it happens to still be small)
2194  * -- then an authoritative tie-breaker will happen, and do the right
2195  * thing: explicitly consider string length.
2196  */
2197  if (sss->collate_c)
2198  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2199  else
2200  {
2201  Size bsize;
2202 
2203  /*
2204  * We're not using the C collation, so fall back on strxfrm.
2205  */
2206 
2207  /* By convention, we use buffer 1 to store and NUL-terminate */
2208  if (len >= sss->buflen1)
2209  {
2210  pfree(sss->buf1);
2211  sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2212  sss->buf1 = palloc(sss->buflen1);
2213  }
2214 
2215  /* Might be able to reuse strxfrm() blob from last call */
2216  if (sss->last_len1 == len && sss->cache_blob &&
2217  memcmp(sss->buf1, authoritative_data, len) == 0)
2218  {
2219  memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2220  /* No change affecting cardinality, so no hashing required */
2221  goto done;
2222  }
2223 
2224  /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
2225  memcpy(sss->buf1, authoritative_data, len);
2226  sss->buf1[len] = '\0';
2227  sss->last_len1 = len;
2228 
2229  for (;;)
2230  {
2231 #ifdef HAVE_LOCALE_T
2232  if (sss->locale)
2233  bsize = strxfrm_l(sss->buf2, sss->buf1,
2234  sss->buflen2, sss->locale);
2235  else
2236 #endif
2237  bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2238 
2239  sss->last_len2 = bsize;
2240  if (bsize < sss->buflen2)
2241  break;
2242 
2243  /*
2244  * The C standard states that the contents of the buffer is now
2245  * unspecified. Grow buffer, and retry.
2246  */
2247  pfree(sss->buf2);
2248  sss->buflen2 = Max(bsize + 1,
2249  Min(sss->buflen2 * 2, MaxAllocSize));
2250  sss->buf2 = palloc(sss->buflen2);
2251  }
2252 
2253  /*
2254  * Every Datum byte is always compared. This is safe because the
2255  * strxfrm() blob is itself NUL terminated, leaving no danger of
2256  * misinterpreting any NUL bytes not intended to be interpreted as
2257  * logically representing termination.
2258  *
2259  * (Actually, even if there were NUL bytes in the blob it would be
2260  * okay. See remarks on bytea case above.)
2261  */
2262  memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2263  }
2264 
2265  /*
2266  * Maintain approximate cardinality of both abbreviated keys and original,
2267  * authoritative keys using HyperLogLog. Used as cheap insurance against
2268  * the worst case, where we do many string transformations for no saving
2269  * in full strcoll()-based comparisons. These statistics are used by
2270  * varstr_abbrev_abort().
2271  *
2272  * First, Hash key proper, or a significant fraction of it. Mix in length
2273  * in order to compensate for cases where differences are past
2274  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2275  */
2276  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2277  Min(len, PG_CACHE_LINE_SIZE)));
2278 
2279  if (len > PG_CACHE_LINE_SIZE)
2280  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2281 
2282  addHyperLogLog(&sss->full_card, hash);
2283 
2284  /* Hash abbreviated key */
2285 #if SIZEOF_DATUM == 8
2286  {
2287  uint32 lohalf,
2288  hihalf;
2289 
2290  lohalf = (uint32) res;
2291  hihalf = (uint32) (res >> 32);
2292  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2293  }
2294 #else /* SIZEOF_DATUM != 8 */
2295  hash = DatumGetUInt32(hash_uint32((uint32) res));
2296 #endif
2297 
2298  addHyperLogLog(&sss->abbr_card, hash);
2299 
2300  /* Cache result, perhaps saving an expensive strxfrm() call next time */
2301  sss->cache_blob = true;
2302 done:
2303 
2304  /*
2305  * Byteswap on little-endian machines.
2306  *
2307  * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2308  * comparator) works correctly on all platforms. If we didn't do this,
2309  * the comparator would have to call memcmp() with a pair of pointers to
2310  * the first byte of each abbreviated key, which is slower.
2311  */
2312  res = DatumBigEndianToNative(res);
2313 
2314  /* Don't leak memory here */
2315  if (PointerGetDatum(authoritative) != original)
2316  pfree(authoritative);
2317 
2318  return res;
2319 }
2320 
2321 /*
2322  * Callback for estimating effectiveness of abbreviated key optimization, using
2323  * heuristic rules. Returns value indicating if the abbreviation optimization
2324  * should be aborted, based on its projected effectiveness.
2325  */
2326 static bool
2327 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2328 {
2330  double abbrev_distinct,
2331  key_distinct;
2332 
2333  Assert(ssup->abbreviate);
2334 
2335  /* Have a little patience */
2336  if (memtupcount < 100)
2337  return false;
2338 
2339  abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2340  key_distinct = estimateHyperLogLog(&sss->full_card);
2341 
2342  /*
2343  * Clamp cardinality estimates to at least one distinct value. While
2344  * NULLs are generally disregarded, if only NULL values were seen so far,
2345  * that might misrepresent costs if we failed to clamp.
2346  */
2347  if (abbrev_distinct <= 1.0)
2348  abbrev_distinct = 1.0;
2349 
2350  if (key_distinct <= 1.0)
2351  key_distinct = 1.0;
2352 
2353  /*
2354  * In the worst case all abbreviated keys are identical, while at the same
2355  * time there are differences within full key strings not captured in
2356  * abbreviations.
2357  */
2358 #ifdef TRACE_SORT
2359  if (trace_sort)
2360  {
2361  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2362 
2363  elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2364  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2365  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2366  sss->prop_card);
2367  }
2368 #endif
2369 
2370  /*
2371  * If the number of distinct abbreviated keys approximately matches the
2372  * number of distinct authoritative original keys, that's reason enough to
2373  * proceed. We can win even with a very low cardinality set if most
2374  * tie-breakers only memcmp(). This is by far the most important
2375  * consideration.
2376  *
2377  * While comparisons that are resolved at the abbreviated key level are
2378  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2379  * those two outcomes are so much cheaper than a full strcoll() once
2380  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2381  * cardinality against the overall size of the set in order to more
2382  * accurately model costs. Assume that an abbreviated comparison, and an
2383  * abbreviated comparison with a cheap memcmp()-based authoritative
2384  * resolution are equivalent.
2385  */
2386  if (abbrev_distinct > key_distinct * sss->prop_card)
2387  {
2388  /*
2389  * When we have exceeded 10,000 tuples, decay required cardinality
2390  * aggressively for next call.
2391  *
2392  * This is useful because the number of comparisons required on
2393  * average increases at a linearithmic rate, and at roughly 10,000
2394  * tuples that factor will start to dominate over the linear costs of
2395  * string transformation (this is a conservative estimate). The decay
2396  * rate is chosen to be a little less aggressive than halving -- which
2397  * (since we're called at points at which memtupcount has doubled)
2398  * would never see the cost model actually abort past the first call
2399  * following a decay. This decay rate is mostly a precaution against
2400  * a sudden, violent swing in how well abbreviated cardinality tracks
2401  * full key cardinality. The decay also serves to prevent a marginal
2402  * case from being aborted too late, when too much has already been
2403  * invested in string transformation.
2404  *
2405  * It's possible for sets of several million distinct strings with
2406  * mere tens of thousands of distinct abbreviated keys to still
2407  * benefit very significantly. This will generally occur provided
2408  * each abbreviated key is a proxy for a roughly uniform number of the
2409  * set's full keys. If it isn't so, we hope to catch that early and
2410  * abort. If it isn't caught early, by the time the problem is
2411  * apparent it's probably not worth aborting.
2412  */
2413  if (memtupcount > 10000)
2414  sss->prop_card *= 0.65;
2415 
2416  return false;
2417  }
2418 
2419  /*
2420  * Abort abbreviation strategy.
2421  *
2422  * The worst case, where all abbreviated keys are identical while all
2423  * original strings differ will typically only see a regression of about
2424  * 10% in execution time for small to medium sized lists of strings.
2425  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2426  * often expect very large improvements, particularly with sets of strings
2427  * of moderately high to high abbreviated cardinality. There is little to
2428  * lose but much to gain, which our strategy reflects.
2429  */
2430 #ifdef TRACE_SORT
2431  if (trace_sort)
2432  elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2433  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2434  memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2435 #endif
2436 
2437  return true;
2438 }
2439 
2440 Datum
2442 {
2443  text *arg1 = PG_GETARG_TEXT_PP(0);
2444  text *arg2 = PG_GETARG_TEXT_PP(1);
2445  text *result;
2446 
2447  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2448 
2449  PG_RETURN_TEXT_P(result);
2450 }
2451 
2452 Datum
2454 {
2455  text *arg1 = PG_GETARG_TEXT_PP(0);
2456  text *arg2 = PG_GETARG_TEXT_PP(1);
2457  text *result;
2458 
2459  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2460 
2461  PG_RETURN_TEXT_P(result);
2462 }
2463 
2464 
2465 /*
2466  * The following operators support character-by-character comparison
2467  * of text datums, to allow building indexes suitable for LIKE clauses.
2468  * Note that the regular texteq/textne comparison operators, and regular
2469  * support functions 1 and 2 with "C" collation are assumed to be
2470  * compatible with these!
2471  */
2472 
2473 static int
2475 {
2476  int result;
2477  int len1,
2478  len2;
2479 
2480  len1 = VARSIZE_ANY_EXHDR(arg1);
2481  len2 = VARSIZE_ANY_EXHDR(arg2);
2482 
2483  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2484  if (result != 0)
2485  return result;
2486  else if (len1 < len2)
2487  return -1;
2488  else if (len1 > len2)
2489  return 1;
2490  else
2491  return 0;
2492 }
2493 
2494 
2495 Datum
2497 {
2498  text *arg1 = PG_GETARG_TEXT_PP(0);
2499  text *arg2 = PG_GETARG_TEXT_PP(1);
2500  int result;
2501 
2502  result = internal_text_pattern_compare(arg1, arg2);
2503 
2504  PG_FREE_IF_COPY(arg1, 0);
2505  PG_FREE_IF_COPY(arg2, 1);
2506 
2507  PG_RETURN_BOOL(result < 0);
2508 }
2509 
2510 
2511 Datum
2513 {
2514  text *arg1 = PG_GETARG_TEXT_PP(0);
2515  text *arg2 = PG_GETARG_TEXT_PP(1);
2516  int result;
2517 
2518  result = internal_text_pattern_compare(arg1, arg2);
2519 
2520  PG_FREE_IF_COPY(arg1, 0);
2521  PG_FREE_IF_COPY(arg2, 1);
2522 
2523  PG_RETURN_BOOL(result <= 0);
2524 }
2525 
2526 
2527 Datum
2529 {
2530  text *arg1 = PG_GETARG_TEXT_PP(0);
2531  text *arg2 = PG_GETARG_TEXT_PP(1);
2532  int result;
2533 
2534  result = internal_text_pattern_compare(arg1, arg2);
2535 
2536  PG_FREE_IF_COPY(arg1, 0);
2537  PG_FREE_IF_COPY(arg2, 1);
2538 
2539  PG_RETURN_BOOL(result >= 0);
2540 }
2541 
2542 
2543 Datum
2545 {
2546  text *arg1 = PG_GETARG_TEXT_PP(0);
2547  text *arg2 = PG_GETARG_TEXT_PP(1);
2548  int result;
2549 
2550  result = internal_text_pattern_compare(arg1, arg2);
2551 
2552  PG_FREE_IF_COPY(arg1, 0);
2553  PG_FREE_IF_COPY(arg2, 1);
2554 
2555  PG_RETURN_BOOL(result > 0);
2556 }
2557 
2558 
2559 Datum
2561 {
2562  text *arg1 = PG_GETARG_TEXT_PP(0);
2563  text *arg2 = PG_GETARG_TEXT_PP(1);
2564  int result;
2565 
2566  result = internal_text_pattern_compare(arg1, arg2);
2567 
2568  PG_FREE_IF_COPY(arg1, 0);
2569  PG_FREE_IF_COPY(arg2, 1);
2570 
2571  PG_RETURN_INT32(result);
2572 }
2573 
2574 
2575 Datum
2577 {
2579  MemoryContext oldcontext;
2580 
2581  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2582 
2583  /* Use generic string SortSupport, forcing "C" collation */
2584  varstr_sortsupport(ssup, C_COLLATION_OID, false);
2585 
2586  MemoryContextSwitchTo(oldcontext);
2587 
2588  PG_RETURN_VOID();
2589 }
2590 
2591 
2592 /*-------------------------------------------------------------
2593  * byteaoctetlen
2594  *
2595  * get the number of bytes contained in an instance of type 'bytea'
2596  *-------------------------------------------------------------
2597  */
2598 Datum
2600 {
2601  Datum str = PG_GETARG_DATUM(0);
2602 
2603  /* We need not detoast the input at all */
2605 }
2606 
2607 /*
2608  * byteacat -
2609  * takes two bytea* and returns a bytea* that is the concatenation of
2610  * the two.
2611  *
2612  * Cloned from textcat and modified as required.
2613  */
2614 Datum
2616 {
2617  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2618  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2619 
2621 }
2622 
2623 /*
2624  * bytea_catenate
2625  * Guts of byteacat(), broken out so it can be used by other functions
2626  *
2627  * Arguments can be in short-header form, but not compressed or out-of-line
2628  */
2629 static bytea *
2631 {
2632  bytea *result;
2633  int len1,
2634  len2,
2635  len;
2636  char *ptr;
2637 
2638  len1 = VARSIZE_ANY_EXHDR(t1);
2639  len2 = VARSIZE_ANY_EXHDR(t2);
2640 
2641  /* paranoia ... probably should throw error instead? */
2642  if (len1 < 0)
2643  len1 = 0;
2644  if (len2 < 0)
2645  len2 = 0;
2646 
2647  len = len1 + len2 + VARHDRSZ;
2648  result = (bytea *) palloc(len);
2649 
2650  /* Set size of result string... */
2651  SET_VARSIZE(result, len);
2652 
2653  /* Fill data field of result string... */
2654  ptr = VARDATA(result);
2655  if (len1 > 0)
2656  memcpy(ptr, VARDATA_ANY(t1), len1);
2657  if (len2 > 0)
2658  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2659 
2660  return result;
2661 }
2662 
2663 #define PG_STR_GET_BYTEA(str_) \
2664  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2665 
2666 /*
2667  * bytea_substr()
2668  * Return a substring starting at the specified position.
2669  * Cloned from text_substr and modified as required.
2670  *
2671  * Input:
2672  * - string
2673  * - starting position (is one-based)
2674  * - string length (optional)
2675  *
2676  * If the starting position is zero or less, then return from the start of the string
2677  * adjusting the length to be consistent with the "negative start" per SQL.
2678  * If the length is less than zero, an ERROR is thrown. If no third argument
2679  * (length) is provided, the length to the end of the string is assumed.
2680  */
2681 Datum
2683 {
2685  PG_GETARG_INT32(1),
2686  PG_GETARG_INT32(2),
2687  false));
2688 }
2689 
2690 /*
2691  * bytea_substr_no_len -
2692  * Wrapper to avoid opr_sanity failure due to
2693  * one function accepting a different number of args.
2694  */
2695 Datum
2697 {
2699  PG_GETARG_INT32(1),
2700  -1,
2701  true));
2702 }
2703 
2704 static bytea *
2706  int S,
2707  int L,
2708  bool length_not_specified)
2709 {
2710  int S1; /* adjusted start position */
2711  int L1; /* adjusted substring length */
2712 
2713  S1 = Max(S, 1);
2714 
2715  if (length_not_specified)
2716  {
2717  /*
2718  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2719  * end of the string if we pass it a negative value for length.
2720  */
2721  L1 = -1;
2722  }
2723  else
2724  {
2725  /* end position */
2726  int E = S + L;
2727 
2728  /*
2729  * A negative value for L is the only way for the end position to be
2730  * before the start. SQL99 says to throw an error.
2731  */
2732  if (E < S)
2733  ereport(ERROR,
2734  (errcode(ERRCODE_SUBSTRING_ERROR),
2735  errmsg("negative substring length not allowed")));
2736 
2737  /*
2738  * A zero or negative value for the end position can happen if the
2739  * start was negative or one. SQL99 says to return a zero-length
2740  * string.
2741  */
2742  if (E < 1)
2743  return PG_STR_GET_BYTEA("");
2744 
2745  L1 = E - S1;
2746  }
2747 
2748  /*
2749  * If the start position is past the end of the string, SQL99 says to
2750  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2751  * us. Convert to zero-based starting position
2752  */
2753  return DatumGetByteaPSlice(str, S1 - 1, L1);
2754 }
2755 
2756 /*
2757  * byteaoverlay
2758  * Replace specified substring of first string with second
2759  *
2760  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2761  * This code is a direct implementation of what the standard says.
2762  */
2763 Datum
2765 {
2766  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2767  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2768  int sp = PG_GETARG_INT32(2); /* substring start position */
2769  int sl = PG_GETARG_INT32(3); /* substring length */
2770 
2771  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2772 }
2773 
2774 Datum
2776 {
2777  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2778  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2779  int sp = PG_GETARG_INT32(2); /* substring start position */
2780  int sl;
2781 
2782  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2783  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2784 }
2785 
2786 static bytea *
2787 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2788 {
2789  bytea *result;
2790  bytea *s1;
2791  bytea *s2;
2792  int sp_pl_sl;
2793 
2794  /*
2795  * Check for possible integer-overflow cases. For negative sp, throw a
2796  * "substring length" error because that's what should be expected
2797  * according to the spec's definition of OVERLAY().
2798  */
2799  if (sp <= 0)
2800  ereport(ERROR,
2801  (errcode(ERRCODE_SUBSTRING_ERROR),
2802  errmsg("negative substring length not allowed")));
2803  sp_pl_sl = sp + sl;
2804  if (sp_pl_sl <= sl)
2805  ereport(ERROR,
2806  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2807  errmsg("integer out of range")));
2808 
2809  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2810  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2811  result = bytea_catenate(s1, t2);
2812  result = bytea_catenate(result, s2);
2813 
2814  return result;
2815 }
2816 
2817 /*
2818  * byteapos -
2819  * Return the position of the specified substring.
2820  * Implements the SQL POSITION() function.
2821  * Cloned from textpos and modified as required.
2822  */
2823 Datum
2825 {
2826  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2827  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2828  int pos;
2829  int px,
2830  p;
2831  int len1,
2832  len2;
2833  char *p1,
2834  *p2;
2835 
2836  len1 = VARSIZE_ANY_EXHDR(t1);
2837  len2 = VARSIZE_ANY_EXHDR(t2);
2838 
2839  if (len2 <= 0)
2840  PG_RETURN_INT32(1); /* result for empty pattern */
2841 
2842  p1 = VARDATA_ANY(t1);
2843  p2 = VARDATA_ANY(t2);
2844 
2845  pos = 0;
2846  px = (len1 - len2);
2847  for (p = 0; p <= px; p++)
2848  {
2849  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2850  {
2851  pos = p + 1;
2852  break;
2853  };
2854  p1++;
2855  };
2856 
2857  PG_RETURN_INT32(pos);
2858 }
2859 
2860 /*-------------------------------------------------------------
2861  * byteaGetByte
2862  *
2863  * this routine treats "bytea" as an array of bytes.
2864  * It returns the Nth byte (a number between 0 and 255).
2865  *-------------------------------------------------------------
2866  */
2867 Datum
2869 {
2870  bytea *v = PG_GETARG_BYTEA_PP(0);
2871  int32 n = PG_GETARG_INT32(1);
2872  int len;
2873  int byte;
2874 
2875  len = VARSIZE_ANY_EXHDR(v);
2876 
2877  if (n < 0 || n >= len)
2878  ereport(ERROR,
2879  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2880  errmsg("index %d out of valid range, 0..%d",
2881  n, len - 1)));
2882 
2883  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2884 
2885  PG_RETURN_INT32(byte);
2886 }
2887 
2888 /*-------------------------------------------------------------
2889  * byteaGetBit
2890  *
2891  * This routine treats a "bytea" type like an array of bits.
2892  * It returns the value of the Nth bit (0 or 1).
2893  *
2894  *-------------------------------------------------------------
2895  */
2896 Datum
2898 {
2899  bytea *v = PG_GETARG_BYTEA_PP(0);
2900  int32 n = PG_GETARG_INT32(1);
2901  int byteNo,
2902  bitNo;
2903  int len;
2904  int byte;
2905 
2906  len = VARSIZE_ANY_EXHDR(v);
2907 
2908  if (n < 0 || n >= len * 8)
2909  ereport(ERROR,
2910  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2911  errmsg("index %d out of valid range, 0..%d",
2912  n, len * 8 - 1)));
2913 
2914  byteNo = n / 8;
2915  bitNo = n % 8;
2916 
2917  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2918 
2919  if (byte & (1 << bitNo))
2920  PG_RETURN_INT32(1);
2921  else
2922  PG_RETURN_INT32(0);
2923 }
2924 
2925 /*-------------------------------------------------------------
2926  * byteaSetByte
2927  *
2928  * Given an instance of type 'bytea' creates a new one with
2929  * the Nth byte set to the given value.
2930  *
2931  *-------------------------------------------------------------
2932  */
2933 Datum
2935 {
2936  bytea *v = PG_GETARG_BYTEA_P(0);
2937  int32 n = PG_GETARG_INT32(1);
2938  int32 newByte = PG_GETARG_INT32(2);
2939  int len;
2940  bytea *res;
2941 
2942  len = VARSIZE(v) - VARHDRSZ;
2943 
2944  if (n < 0 || n >= len)
2945  ereport(ERROR,
2946  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2947  errmsg("index %d out of valid range, 0..%d",
2948  n, len - 1)));
2949 
2950  /*
2951  * Make a copy of the original varlena.
2952  */
2953  res = (bytea *) palloc(VARSIZE(v));
2954  memcpy((char *) res, (char *) v, VARSIZE(v));
2955 
2956  /*
2957  * Now set the byte.
2958  */
2959  ((unsigned char *) VARDATA(res))[n] = newByte;
2960 
2961  PG_RETURN_BYTEA_P(res);
2962 }
2963 
2964 /*-------------------------------------------------------------
2965  * byteaSetBit
2966  *
2967  * Given an instance of type 'bytea' creates a new one with
2968  * the Nth bit set to the given value.
2969  *
2970  *-------------------------------------------------------------
2971  */
2972 Datum
2974 {
2975  bytea *v = PG_GETARG_BYTEA_P(0);
2976  int32 n = PG_GETARG_INT32(1);
2977  int32 newBit = PG_GETARG_INT32(2);
2978  bytea *res;
2979  int len;
2980  int oldByte,
2981  newByte;
2982  int byteNo,
2983  bitNo;
2984 
2985  len = VARSIZE(v) - VARHDRSZ;
2986 
2987  if (n < 0 || n >= len * 8)
2988  ereport(ERROR,
2989  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2990  errmsg("index %d out of valid range, 0..%d",
2991  n, len * 8 - 1)));
2992 
2993  byteNo = n / 8;
2994  bitNo = n % 8;
2995 
2996  /*
2997  * sanity check!
2998  */
2999  if (newBit != 0 && newBit != 1)
3000  ereport(ERROR,
3001  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3002  errmsg("new bit must be 0 or 1")));
3003 
3004  /*
3005  * Make a copy of the original varlena.
3006  */
3007  res = (bytea *) palloc(VARSIZE(v));
3008  memcpy((char *) res, (char *) v, VARSIZE(v));
3009 
3010  /*
3011  * Update the byte.
3012  */
3013  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3014 
3015  if (newBit == 0)
3016  newByte = oldByte & (~(1 << bitNo));
3017  else
3018  newByte = oldByte | (1 << bitNo);
3019 
3020  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3021 
3022  PG_RETURN_BYTEA_P(res);
3023 }
3024 
3025 
3026 /* text_name()
3027  * Converts a text type to a Name type.
3028  */
3029 Datum
3031 {
3032  text *s = PG_GETARG_TEXT_PP(0);
3033  Name result;
3034  int len;
3035 
3036  len = VARSIZE_ANY_EXHDR(s);
3037 
3038  /* Truncate oversize input */
3039  if (len >= NAMEDATALEN)
3040  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3041 
3042  /* We use palloc0 here to ensure result is zero-padded */
3043  result = (Name) palloc0(NAMEDATALEN);
3044  memcpy(NameStr(*result), VARDATA_ANY(s), len);
3045 
3046  PG_RETURN_NAME(result);
3047 }
3048 
3049 /* name_text()
3050  * Converts a Name type to a text type.
3051  */
3052 Datum
3054 {
3055  Name s = PG_GETARG_NAME(0);
3056 
3058 }
3059 
3060 
3061 /*
3062  * textToQualifiedNameList - convert a text object to list of names
3063  *
3064  * This implements the input parsing needed by nextval() and other
3065  * functions that take a text parameter representing a qualified name.
3066  * We split the name at dots, downcase if not double-quoted, and
3067  * truncate names if they're too long.
3068  */
3069 List *
3071 {
3072  char *rawname;
3073  List *result = NIL;
3074  List *namelist;
3075  ListCell *l;
3076 
3077  /* Convert to C string (handles possible detoasting). */
3078  /* Note we rely on being able to modify rawname below. */
3079  rawname = text_to_cstring(textval);
3080 
3081  if (!SplitIdentifierString(rawname, '.', &namelist))
3082  ereport(ERROR,
3083  (errcode(ERRCODE_INVALID_NAME),
3084  errmsg("invalid name syntax")));
3085 
3086  if (namelist == NIL)
3087  ereport(ERROR,
3088  (errcode(ERRCODE_INVALID_NAME),
3089  errmsg("invalid name syntax")));
3090 
3091  foreach(l, namelist)
3092  {
3093  char *curname = (char *) lfirst(l);
3094 
3095  result = lappend(result, makeString(pstrdup(curname)));
3096  }
3097 
3098  pfree(rawname);
3099  list_free(namelist);
3100 
3101  return result;
3102 }
3103 
3104 /*
3105  * SplitIdentifierString --- parse a string containing identifiers
3106  *
3107  * This is the guts of textToQualifiedNameList, and is exported for use in
3108  * other situations such as parsing GUC variables. In the GUC case, it's
3109  * important to avoid memory leaks, so the API is designed to minimize the
3110  * amount of stuff that needs to be allocated and freed.
3111  *
3112  * Inputs:
3113  * rawstring: the input string; must be overwritable! On return, it's
3114  * been modified to contain the separated identifiers.
3115  * separator: the separator punctuation expected between identifiers
3116  * (typically '.' or ','). Whitespace may also appear around
3117  * identifiers.
3118  * Outputs:
3119  * namelist: filled with a palloc'd list of pointers to identifiers within
3120  * rawstring. Caller should list_free() this even on error return.
3121  *
3122  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3123  *
3124  * Note that an empty string is considered okay here, though not in
3125  * textToQualifiedNameList.
3126  */
3127 bool
3128 SplitIdentifierString(char *rawstring, char separator,
3129  List **namelist)
3130 {
3131  char *nextp = rawstring;
3132  bool done = false;
3133 
3134  *namelist = NIL;
3135 
3136  while (isspace((unsigned char) *nextp))
3137  nextp++; /* skip leading whitespace */
3138 
3139  if (*nextp == '\0')
3140  return true; /* allow empty string */
3141 
3142  /* At the top of the loop, we are at start of a new identifier. */
3143  do
3144  {
3145  char *curname;
3146  char *endp;
3147 
3148  if (*nextp == '"')
3149  {
3150  /* Quoted name --- collapse quote-quote pairs, no downcasing */
3151  curname = nextp + 1;
3152  for (;;)
3153  {
3154  endp = strchr(nextp + 1, '"');
3155  if (endp == NULL)
3156  return false; /* mismatched quotes */
3157  if (endp[1] != '"')
3158  break; /* found end of quoted name */
3159  /* Collapse adjacent quotes into one quote, and look again */
3160  memmove(endp, endp + 1, strlen(endp));
3161  nextp = endp;
3162  }
3163  /* endp now points at the terminating quote */
3164  nextp = endp + 1;
3165  }
3166  else
3167  {
3168  /* Unquoted name --- extends to separator or whitespace */
3169  char *downname;
3170  int len;
3171 
3172  curname = nextp;
3173  while (*nextp && *nextp != separator &&
3174  !isspace((unsigned char) *nextp))
3175  nextp++;
3176  endp = nextp;
3177  if (curname == nextp)
3178  return false; /* empty unquoted name not allowed */
3179 
3180  /*
3181  * Downcase the identifier, using same code as main lexer does.
3182  *
3183  * XXX because we want to overwrite the input in-place, we cannot
3184  * support a downcasing transformation that increases the string
3185  * length. This is not a problem given the current implementation
3186  * of downcase_truncate_identifier, but we'll probably have to do
3187  * something about this someday.
3188  */
3189  len = endp - curname;
3190  downname = downcase_truncate_identifier(curname, len, false);
3191  Assert(strlen(downname) <= len);
3192  strncpy(curname, downname, len); /* strncpy is required here */
3193  pfree(downname);
3194  }
3195 
3196  while (isspace((unsigned char) *nextp))
3197  nextp++; /* skip trailing whitespace */
3198 
3199  if (*nextp == separator)
3200  {
3201  nextp++;
3202  while (isspace((unsigned char) *nextp))
3203  nextp++; /* skip leading whitespace for next */
3204  /* we expect another name, so done remains false */
3205  }
3206  else if (*nextp == '\0')
3207  done = true;
3208  else
3209  return false; /* invalid syntax */
3210 
3211  /* Now safe to overwrite separator with a null */
3212  *endp = '\0';
3213 
3214  /* Truncate name if it's overlength */
3215  truncate_identifier(curname, strlen(curname), false);
3216 
3217  /*
3218  * Finished isolating current name --- add it to list
3219  */
3220  *namelist = lappend(*namelist, curname);
3221 
3222  /* Loop back if we didn't reach end of string */
3223  } while (!done);
3224 
3225  return true;
3226 }
3227 
3228 
3229 /*
3230  * SplitDirectoriesString --- parse a string containing directory names
3231  *
3232  * This is similar to SplitIdentifierString, except that the parsing
3233  * rules are meant to handle pathnames instead of identifiers: there is
3234  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3235  * and we apply canonicalize_path() to each extracted string. Because of the
3236  * last, the returned strings are separately palloc'd rather than being
3237  * pointers into rawstring --- but we still scribble on rawstring.
3238  *
3239  * Inputs:
3240  * rawstring: the input string; must be modifiable!
3241  * separator: the separator punctuation expected between directories
3242  * (typically ',' or ';'). Whitespace may also appear around
3243  * directories.
3244  * Outputs:
3245  * namelist: filled with a palloc'd list of directory names.
3246  * Caller should list_free_deep() this even on error return.
3247  *
3248  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3249  *
3250  * Note that an empty string is considered okay here.
3251  */
3252 bool
3253 SplitDirectoriesString(char *rawstring, char separator,
3254  List **namelist)
3255 {
3256  char *nextp = rawstring;
3257  bool done = false;
3258 
3259  *namelist = NIL;
3260 
3261  while (isspace((unsigned char) *nextp))
3262  nextp++; /* skip leading whitespace */
3263 
3264  if (*nextp == '\0')
3265  return true; /* allow empty string */
3266 
3267  /* At the top of the loop, we are at start of a new directory. */
3268  do
3269  {
3270  char *curname;
3271  char *endp;
3272 
3273  if (*nextp == '"')
3274  {
3275  /* Quoted name --- collapse quote-quote pairs */
3276  curname = nextp + 1;
3277  for (;;)
3278  {
3279  endp = strchr(nextp + 1, '"');
3280  if (endp == NULL)
3281  return false; /* mismatched quotes */
3282  if (endp[1] != '"')
3283  break; /* found end of quoted name */
3284  /* Collapse adjacent quotes into one quote, and look again */
3285  memmove(endp, endp + 1, strlen(endp));
3286  nextp = endp;
3287  }
3288  /* endp now points at the terminating quote */
3289  nextp = endp + 1;
3290  }
3291  else
3292  {
3293  /* Unquoted name --- extends to separator or end of string */
3294  curname = endp = nextp;
3295  while (*nextp && *nextp != separator)
3296  {
3297  /* trailing whitespace should not be included in name */
3298  if (!isspace((unsigned char) *nextp))
3299  endp = nextp + 1;
3300  nextp++;
3301  }
3302  if (curname == endp)
3303  return false; /* empty unquoted name not allowed */
3304  }
3305 
3306  while (isspace((unsigned char) *nextp))
3307  nextp++; /* skip trailing whitespace */
3308 
3309  if (*nextp == separator)
3310  {
3311  nextp++;
3312  while (isspace((unsigned char) *nextp))
3313  nextp++; /* skip leading whitespace for next */
3314  /* we expect another name, so done remains false */
3315  }
3316  else if (*nextp == '\0')
3317  done = true;
3318  else
3319  return false; /* invalid syntax */
3320 
3321  /* Now safe to overwrite separator with a null */
3322  *endp = '\0';
3323 
3324  /* Truncate path if it's overlength */
3325  if (strlen(curname) >= MAXPGPATH)
3326  curname[MAXPGPATH - 1] = '\0';
3327 
3328  /*
3329  * Finished isolating current name --- add it to list
3330  */
3331  curname = pstrdup(curname);
3332  canonicalize_path(curname);
3333  *namelist = lappend(*namelist, curname);
3334 
3335  /* Loop back if we didn't reach end of string */
3336  } while (!done);
3337 
3338  return true;
3339 }
3340 
3341 
3342 /*****************************************************************************
3343  * Comparison Functions used for bytea
3344  *
3345  * Note: btree indexes need these routines not to leak memory; therefore,
3346  * be careful to free working copies of toasted datums. Most places don't
3347  * need to be so careful.
3348  *****************************************************************************/
3349 
3350 Datum
3352 {
3353  Datum arg1 = PG_GETARG_DATUM(0);
3354  Datum arg2 = PG_GETARG_DATUM(1);
3355  bool result;
3356  Size len1,
3357  len2;
3358 
3359  /*
3360  * We can use a fast path for unequal lengths, which might save us from
3361  * having to detoast one or both values.
3362  */
3363  len1 = toast_raw_datum_size(arg1);
3364  len2 = toast_raw_datum_size(arg2);
3365  if (len1 != len2)
3366  result = false;
3367  else
3368  {
3369  bytea *barg1 = DatumGetByteaPP(arg1);
3370  bytea *barg2 = DatumGetByteaPP(arg2);
3371 
3372  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3373  len1 - VARHDRSZ) == 0);
3374 
3375  PG_FREE_IF_COPY(barg1, 0);
3376  PG_FREE_IF_COPY(barg2, 1);
3377  }
3378 
3379  PG_RETURN_BOOL(result);
3380 }
3381 
3382 Datum
3384 {
3385  Datum arg1 = PG_GETARG_DATUM(0);
3386  Datum arg2 = PG_GETARG_DATUM(1);
3387  bool result;
3388  Size len1,
3389  len2;
3390 
3391  /*
3392  * We can use a fast path for unequal lengths, which might save us from
3393  * having to detoast one or both values.
3394  */
3395  len1 = toast_raw_datum_size(arg1);
3396  len2 = toast_raw_datum_size(arg2);
3397  if (len1 != len2)
3398  result = true;
3399  else
3400  {
3401  bytea *barg1 = DatumGetByteaPP(arg1);
3402  bytea *barg2 = DatumGetByteaPP(arg2);
3403 
3404  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3405  len1 - VARHDRSZ) != 0);
3406 
3407  PG_FREE_IF_COPY(barg1, 0);
3408  PG_FREE_IF_COPY(barg2, 1);
3409  }
3410 
3411  PG_RETURN_BOOL(result);
3412 }
3413 
3414 Datum
3416 {
3417  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3418  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3419  int len1,
3420  len2;
3421  int cmp;
3422 
3423  len1 = VARSIZE_ANY_EXHDR(arg1);
3424  len2 = VARSIZE_ANY_EXHDR(arg2);
3425 
3426  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3427 
3428  PG_FREE_IF_COPY(arg1, 0);
3429  PG_FREE_IF_COPY(arg2, 1);
3430 
3431  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3432 }
3433 
3434 Datum
3436 {
3437  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3438  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3439  int len1,
3440  len2;
3441  int cmp;
3442 
3443  len1 = VARSIZE_ANY_EXHDR(arg1);
3444  len2 = VARSIZE_ANY_EXHDR(arg2);
3445 
3446  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3447 
3448  PG_FREE_IF_COPY(arg1, 0);
3449  PG_FREE_IF_COPY(arg2, 1);
3450 
3451  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3452 }
3453 
3454 Datum
3456 {
3457  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3458  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3459  int len1,
3460  len2;
3461  int cmp;
3462 
3463  len1 = VARSIZE_ANY_EXHDR(arg1);
3464  len2 = VARSIZE_ANY_EXHDR(arg2);
3465 
3466  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3467 
3468  PG_FREE_IF_COPY(arg1, 0);
3469  PG_FREE_IF_COPY(arg2, 1);
3470 
3471  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3472 }
3473 
3474 Datum
3476 {
3477  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3478  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3479  int len1,
3480  len2;
3481  int cmp;
3482 
3483  len1 = VARSIZE_ANY_EXHDR(arg1);
3484  len2 = VARSIZE_ANY_EXHDR(arg2);
3485 
3486  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3487 
3488  PG_FREE_IF_COPY(arg1, 0);
3489  PG_FREE_IF_COPY(arg2, 1);
3490 
3491  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3492 }
3493 
3494 Datum
3496 {
3497  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3498  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3499  int len1,
3500  len2;
3501  int cmp;
3502 
3503  len1 = VARSIZE_ANY_EXHDR(arg1);
3504  len2 = VARSIZE_ANY_EXHDR(arg2);
3505 
3506  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3507  if ((cmp == 0) && (len1 != len2))
3508  cmp = (len1 < len2) ? -1 : 1;
3509 
3510  PG_FREE_IF_COPY(arg1, 0);
3511  PG_FREE_IF_COPY(arg2, 1);
3512 
3513  PG_RETURN_INT32(cmp);
3514 }
3515 
3516 Datum
3518 {
3520  MemoryContext oldcontext;
3521 
3522  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3523 
3524  /* Use generic string SortSupport, forcing "C" collation */
3525  varstr_sortsupport(ssup, C_COLLATION_OID, false);
3526 
3527  MemoryContextSwitchTo(oldcontext);
3528 
3529  PG_RETURN_VOID();
3530 }
3531 
3532 /*
3533  * appendStringInfoText
3534  *
3535  * Append a text to str.
3536  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3537  */
3538 static void
3540 {
3542 }
3543 
3544 /*
3545  * replace_text
3546  * replace all occurrences of 'old_sub_str' in 'orig_str'
3547  * with 'new_sub_str' to form 'new_str'
3548  *
3549  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3550  * otherwise returns 'new_str'
3551  */
3552 Datum
3554 {
3555  text *src_text = PG_GETARG_TEXT_PP(0);
3556  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3557  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3558  int src_text_len;
3559  int from_sub_text_len;
3561  text *ret_text;
3562  int start_posn;
3563  int curr_posn;
3564  int chunk_len;
3565  char *start_ptr;
3566  StringInfoData str;
3567 
3568  text_position_setup(src_text, from_sub_text, &state);
3569 
3570  /*
3571  * Note: we check the converted string length, not the original, because
3572  * they could be different if the input contained invalid encoding.
3573  */
3574  src_text_len = state.len1;
3575  from_sub_text_len = state.len2;
3576 
3577  /* Return unmodified source string if empty source or pattern */
3578  if (src_text_len < 1 || from_sub_text_len < 1)
3579  {
3580  text_position_cleanup(&state);
3581  PG_RETURN_TEXT_P(src_text);
3582  }
3583 
3584  start_posn = 1;
3585  curr_posn = text_position_next(1, &state);
3586 
3587  /* When the from_sub_text is not found, there is nothing to do. */
3588  if (curr_posn == 0)
3589  {
3590  text_position_cleanup(&state);
3591  PG_RETURN_TEXT_P(src_text);
3592  }
3593 
3594  /* start_ptr points to the start_posn'th character of src_text */
3595  start_ptr = VARDATA_ANY(src_text);
3596 
3597  initStringInfo(&str);
3598 
3599  do
3600  {
3602 
3603  /* copy the data skipped over by last text_position_next() */
3604  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3605  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3606 
3607  appendStringInfoText(&str, to_sub_text);
3608 
3609  start_posn = curr_posn;
3610  start_ptr += chunk_len;
3611  start_posn += from_sub_text_len;
3612  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3613 
3614  curr_posn = text_position_next(start_posn, &state);
3615  }
3616  while (curr_posn > 0);
3617 
3618  /* copy trailing data */
3619  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3620  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3621 
3622  text_position_cleanup(&state);
3623 
3624  ret_text = cstring_to_text_with_len(str.data, str.len);
3625  pfree(str.data);
3626 
3627  PG_RETURN_TEXT_P(ret_text);
3628 }
3629 
3630 /*
3631  * check_replace_text_has_escape_char
3632  *
3633  * check whether replace_text contains escape char.
3634  */
3635 static bool
3637 {
3638  const char *p = VARDATA_ANY(replace_text);
3639  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3640 
3642  {
3643  for (; p < p_end; p++)
3644  {
3645  if (*p == '\\')
3646  return true;
3647  }
3648  }
3649  else
3650  {
3651  for (; p < p_end; p += pg_mblen(p))
3652  {
3653  if (*p == '\\')
3654  return true;
3655  }
3656  }
3657 
3658  return false;
3659 }
3660 
3661 /*
3662  * appendStringInfoRegexpSubstr
3663  *
3664  * Append replace_text to str, substituting regexp back references for
3665  * \n escapes. start_ptr is the start of the match in the source string,
3666  * at logical character position data_pos.
3667  */
3668 static void
3670  regmatch_t *pmatch,
3671  char *start_ptr, int data_pos)
3672 {
3673  const char *p = VARDATA_ANY(replace_text);
3674  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3675  int eml = pg_database_encoding_max_length();
3676 
3677  for (;;)
3678  {
3679  const char *chunk_start = p;
3680  int so;
3681  int eo;
3682 
3683  /* Find next escape char. */
3684  if (eml == 1)
3685  {
3686  for (; p < p_end && *p != '\\'; p++)
3687  /* nothing */ ;
3688  }
3689  else
3690  {
3691  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3692  /* nothing */ ;
3693  }
3694 
3695  /* Copy the text we just scanned over, if any. */
3696  if (p > chunk_start)
3697  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3698 
3699  /* Done if at end of string, else advance over escape char. */
3700  if (p >= p_end)
3701  break;
3702  p++;
3703 
3704  if (p >= p_end)
3705  {
3706  /* Escape at very end of input. Treat same as unexpected char */
3707  appendStringInfoChar(str, '\\');
3708  break;
3709  }
3710 
3711  if (*p >= '1' && *p <= '9')
3712  {
3713  /* Use the back reference of regexp. */
3714  int idx = *p - '0';
3715 
3716  so = pmatch[idx].rm_so;
3717  eo = pmatch[idx].rm_eo;
3718  p++;
3719  }
3720  else if (*p == '&')
3721  {
3722  /* Use the entire matched string. */
3723  so = pmatch[0].rm_so;
3724  eo = pmatch[0].rm_eo;
3725  p++;
3726  }
3727  else if (*p == '\\')
3728  {
3729  /* \\ means transfer one \ to output. */
3730  appendStringInfoChar(str, '\\');
3731  p++;
3732  continue;
3733  }
3734  else
3735  {
3736  /*
3737  * If escape char is not followed by any expected char, just treat
3738  * it as ordinary data to copy. (XXX would it be better to throw
3739  * an error?)
3740  */
3741  appendStringInfoChar(str, '\\');
3742  continue;
3743  }
3744 
3745  if (so != -1 && eo != -1)
3746  {
3747  /*
3748  * Copy the text that is back reference of regexp. Note so and eo
3749  * are counted in characters not bytes.
3750  */
3751  char *chunk_start;
3752  int chunk_len;
3753 
3754  Assert(so >= data_pos);
3755  chunk_start = start_ptr;
3756  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3757  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3758  appendBinaryStringInfo(str, chunk_start, chunk_len);
3759  }
3760  }
3761 }
3762 
3763 #define REGEXP_REPLACE_BACKREF_CNT 10
3764 
3765 /*
3766  * replace_text_regexp
3767  *
3768  * replace text that matches to regexp in src_text to replace_text.
3769  *
3770  * Note: to avoid having to include regex.h in builtins.h, we declare
3771  * the regexp argument as void *, but really it's regex_t *.
3772  */
3773 text *
3774 replace_text_regexp(text *src_text, void *regexp,
3775  text *replace_text, bool glob)
3776 {
3777  text *ret_text;
3778  regex_t *re = (regex_t *) regexp;
3779  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3782  pg_wchar *data;
3783  size_t data_len;
3784  int search_start;
3785  int data_pos;
3786  char *start_ptr;
3787  bool have_escape;
3788 
3789  initStringInfo(&buf);
3790 
3791  /* Convert data string to wide characters. */
3792  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3793  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3794 
3795  /* Check whether replace_text has escape char. */
3796  have_escape = check_replace_text_has_escape_char(replace_text);
3797 
3798  /* start_ptr points to the data_pos'th character of src_text */
3799  start_ptr = (char *) VARDATA_ANY(src_text);
3800  data_pos = 0;
3801 
3802  search_start = 0;
3803  while (search_start <= data_len)
3804  {
3805  int regexec_result;
3806 
3808 
3809  regexec_result = pg_regexec(re,
3810  data,
3811  data_len,
3812  search_start,
3813  NULL, /* no details */
3815  pmatch,
3816  0);
3817 
3818  if (regexec_result == REG_NOMATCH)
3819  break;
3820 
3821  if (regexec_result != REG_OKAY)
3822  {
3823  char errMsg[100];
3824 
3826  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3827  ereport(ERROR,
3828  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3829  errmsg("regular expression failed: %s", errMsg)));
3830  }
3831 
3832  /*
3833  * Copy the text to the left of the match position. Note we are given
3834  * character not byte indexes.
3835  */
3836  if (pmatch[0].rm_so - data_pos > 0)
3837  {
3838  int chunk_len;
3839 
3840  chunk_len = charlen_to_bytelen(start_ptr,
3841  pmatch[0].rm_so - data_pos);
3842  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3843 
3844  /*
3845  * Advance start_ptr over that text, to avoid multiple rescans of
3846  * it if the replace_text contains multiple back-references.
3847  */
3848  start_ptr += chunk_len;
3849  data_pos = pmatch[0].rm_so;
3850  }
3851 
3852  /*
3853  * Copy the replace_text. Process back references when the
3854  * replace_text has escape characters.
3855  */
3856  if (have_escape)
3857  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3858  start_ptr, data_pos);
3859  else
3860  appendStringInfoText(&buf, replace_text);
3861 
3862  /* Advance start_ptr and data_pos over the matched text. */
3863  start_ptr += charlen_to_bytelen(start_ptr,
3864  pmatch[0].rm_eo - data_pos);
3865  data_pos = pmatch[0].rm_eo;
3866 
3867  /*
3868  * When global option is off, replace the first instance only.
3869  */
3870  if (!glob)
3871  break;
3872 
3873  /*
3874  * Advance search position. Normally we start the next search at the
3875  * end of the previous match; but if the match was of zero length, we
3876  * have to advance by one character, or we'd just find the same match
3877  * again.
3878  */
3879  search_start = data_pos;
3880  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3881  search_start++;
3882  }
3883 
3884  /*
3885  * Copy the text to the right of the last match.
3886  */
3887  if (data_pos < data_len)
3888  {
3889  int chunk_len;
3890 
3891  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3892  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3893  }
3894 
3895  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3896  pfree(buf.data);
3897  pfree(data);
3898 
3899  return ret_text;
3900 }
3901 
3902 /*
3903  * split_text
3904  * parse input string
3905  * return ord item (1 based)
3906  * based on provided field separator
3907  */
3908 Datum
3910 {
3911  text *inputstring = PG_GETARG_TEXT_PP(0);
3912  text *fldsep = PG_GETARG_TEXT_PP(1);
3913  int fldnum = PG_GETARG_INT32(2);
3914  int inputstring_len;
3915  int fldsep_len;
3917  int start_posn;
3918  int end_posn;
3919  text *result_text;
3920 
3921  /* field number is 1 based */
3922  if (fldnum < 1)
3923  ereport(ERROR,
3924  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3925  errmsg("field position must be greater than zero")));
3926 
3927  text_position_setup(inputstring, fldsep, &state);
3928 
3929  /*
3930  * Note: we check the converted string length, not the original, because
3931  * they could be different if the input contained invalid encoding.
3932  */
3933  inputstring_len = state.len1;
3934  fldsep_len = state.len2;
3935 
3936  /* return empty string for empty input string */
3937  if (inputstring_len < 1)
3938  {
3939  text_position_cleanup(&state);
3941  }
3942 
3943  /* empty field separator */
3944  if (fldsep_len < 1)
3945  {
3946  text_position_cleanup(&state);
3947  /* if first field, return input string, else empty string */
3948  if (fldnum == 1)
3949  PG_RETURN_TEXT_P(inputstring);
3950  else
3952  }
3953 
3954  /* identify bounds of first field */
3955  start_posn = 1;
3956  end_posn = text_position_next(1, &state);
3957 
3958  /* special case if fldsep not found at all */
3959  if (end_posn == 0)
3960  {
3961  text_position_cleanup(&state);
3962  /* if field 1 requested, return input string, else empty string */
3963  if (fldnum == 1)
3964  PG_RETURN_TEXT_P(inputstring);
3965  else
3967  }
3968 
3969  while (end_posn > 0 && --fldnum > 0)
3970  {
3971  /* identify bounds of next field */
3972  start_posn = end_posn + fldsep_len;
3973  end_posn = text_position_next(start_posn, &state);
3974  }
3975 
3976  text_position_cleanup(&state);
3977 
3978  if (fldnum > 0)
3979  {
3980  /* N'th field separator not found */
3981  /* if last field requested, return it, else empty string */
3982  if (fldnum == 1)
3983  result_text = text_substring(PointerGetDatum(inputstring),
3984  start_posn,
3985  -1,
3986  true);
3987  else
3988  result_text = cstring_to_text("");
3989  }
3990  else
3991  {
3992  /* non-last field requested */
3993  result_text = text_substring(PointerGetDatum(inputstring),
3994  start_posn,
3995  end_posn - start_posn,
3996  false);
3997  }
3998 
3999  PG_RETURN_TEXT_P(result_text);
4000 }
4001 
4002 /*
4003  * Convenience function to return true when two text params are equal.
4004  */
4005 static bool
4006 text_isequal(text *txt1, text *txt2)
4007 {
4009  PointerGetDatum(txt1),
4010  PointerGetDatum(txt2)));
4011 }
4012 
4013 /*
4014  * text_to_array
4015  * parse input string and return text array of elements,
4016  * based on provided field separator
4017  */
4018 Datum
4020 {
4021  return text_to_array_internal(fcinfo);
4022 }
4023 
4024 /*
4025  * text_to_array_null
4026  * parse input string and return text array of elements,
4027  * based on provided field separator and null string
4028  *
4029  * This is a separate entry point only to prevent the regression tests from
4030  * complaining about different argument sets for the same internal function.
4031  */
4032 Datum
4034 {
4035  return text_to_array_internal(fcinfo);
4036 }
4037 
4038 /*
4039  * common code for text_to_array and text_to_array_null functions
4040  *
4041  * These are not strict so we have to test for null inputs explicitly.
4042  */
4043 static Datum
4045 {
4046  text *inputstring;
4047  text *fldsep;
4048  text *null_string;
4049  int inputstring_len;
4050  int fldsep_len;
4051  char *start_ptr;
4052  text *result_text;
4053  bool is_null;
4054  ArrayBuildState *astate = NULL;
4055 
4056  /* when input string is NULL, then result is NULL too */
4057  if (PG_ARGISNULL(0))
4058  PG_RETURN_NULL();
4059 
4060  inputstring = PG_GETARG_TEXT_PP(0);
4061 
4062  /* fldsep can be NULL */
4063  if (!PG_ARGISNULL(1))
4064  fldsep = PG_GETARG_TEXT_PP(1);
4065  else
4066  fldsep = NULL;
4067 
4068  /* null_string can be NULL or omitted */
4069  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4070  null_string = PG_GETARG_TEXT_PP(2);
4071  else
4072  null_string = NULL;
4073 
4074  if (fldsep != NULL)
4075  {
4076  /*
4077  * Normal case with non-null fldsep. Use the text_position machinery
4078  * to search for occurrences of fldsep.
4079  */
4081  int fldnum;
4082  int start_posn;
4083  int end_posn;
4084  int chunk_len;
4085 
4086  text_position_setup(inputstring, fldsep, &state);
4087 
4088  /*
4089  * Note: we check the converted string length, not the original,
4090  * because they could be different if the input contained invalid
4091  * encoding.
4092  */
4093  inputstring_len = state.len1;
4094  fldsep_len = state.len2;
4095 
4096  /* return empty array for empty input string */
4097  if (inputstring_len < 1)
4098  {
4099  text_position_cleanup(&state);
4101  }
4102 
4103  /*
4104  * empty field separator: return the input string as a one-element
4105  * array
4106  */
4107  if (fldsep_len < 1)
4108  {
4109  text_position_cleanup(&state);
4110  /* single element can be a NULL too */
4111  is_null = null_string ? text_isequal(inputstring, null_string) : false;
4113  PointerGetDatum(inputstring),
4114  is_null, 1));
4115  }
4116 
4117  start_posn = 1;
4118  /* start_ptr points to the start_posn'th character of inputstring */
4119  start_ptr = VARDATA_ANY(inputstring);
4120 
4121  for (fldnum = 1;; fldnum++) /* field number is 1 based */
4122  {
4124 
4125  end_posn = text_position_next(start_posn, &state);
4126 
4127  if (end_posn == 0)
4128  {
4129  /* fetch last field */
4130  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4131  }
4132  else
4133  {
4134  /* fetch non-last field */
4135  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4136  }
4137 
4138  /* must build a temp text datum to pass to accumArrayResult */
4139  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4140  is_null = null_string ? text_isequal(result_text, null_string) : false;
4141 
4142  /* stash away this field */
4143  astate = accumArrayResult(astate,
4144  PointerGetDatum(result_text),
4145  is_null,
4146  TEXTOID,
4148 
4149  pfree(result_text);
4150 
4151  if (end_posn == 0)
4152  break;
4153 
4154  start_posn = end_posn;
4155  start_ptr += chunk_len;
4156  start_posn += fldsep_len;
4157  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4158  }
4159 
4160  text_position_cleanup(&state);
4161  }
4162  else
4163  {
4164  /*
4165  * When fldsep is NULL, each character in the inputstring becomes an
4166  * element in the result array. The separator is effectively the
4167  * space between characters.
4168  */
4169  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4170 
4171  /* return empty array for empty input string */
4172  if (inputstring_len < 1)
4174 
4175  start_ptr = VARDATA_ANY(inputstring);
4176 
4177  while (inputstring_len > 0)
4178  {
4179  int chunk_len = pg_mblen(start_ptr);
4180 
4182 
4183  /* must build a temp text datum to pass to accumArrayResult */
4184  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4185  is_null = null_string ? text_isequal(result_text, null_string) : false;
4186 
4187  /* stash away this field */
4188  astate = accumArrayResult(astate,
4189  PointerGetDatum(result_text),
4190  is_null,
4191  TEXTOID,
4193 
4194  pfree(result_text);
4195 
4196  start_ptr += chunk_len;
4197  inputstring_len -= chunk_len;
4198  }
4199  }
4200 
4203 }
4204 
4205 /*
4206  * array_to_text
4207  * concatenate Cstring representation of input array elements
4208  * using provided field separator
4209  */
4210 Datum
4212 {
4214  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4215 
4216  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4217 }
4218 
4219 /*
4220  * array_to_text_null
4221  * concatenate Cstring representation of input array elements
4222  * using provided field separator and null string
4223  *
4224  * This version is not strict so we have to test for null inputs explicitly.
4225  */
4226 Datum
4228 {
4229  ArrayType *v;
4230  char *fldsep;
4231  char *null_string;
4232 
4233  /* returns NULL when first or second parameter is NULL */
4234  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4235  PG_RETURN_NULL();
4236 
4237  v = PG_GETARG_ARRAYTYPE_P(0);
4238  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4239 
4240  /* NULL null string is passed through as a null pointer */
4241  if (!PG_ARGISNULL(2))
4242  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4243  else
4244  null_string = NULL;
4245 
4246  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4247 }
4248 
4249 /*
4250  * common code for array_to_text and array_to_text_null functions
4251  */
4252 static text *
4254  const char *fldsep, const char *null_string)
4255 {
4256  text *result;
4257  int nitems,
4258  *dims,
4259  ndims;
4260  Oid element_type;
4261  int typlen;
4262  bool typbyval;
4263  char typalign;
4265  bool printed = false;
4266  char *p;
4267  bits8 *bitmap;
4268  int bitmask;
4269  int i;
4270  ArrayMetaState *my_extra;
4271 
4272  ndims = ARR_NDIM(v);
4273  dims = ARR_DIMS(v);
4274  nitems = ArrayGetNItems(ndims, dims);
4275 
4276  /* if there are no elements, return an empty string */
4277  if (nitems == 0)
4278  return cstring_to_text_with_len("", 0);
4279 
4280  element_type = ARR_ELEMTYPE(v);
4281  initStringInfo(&buf);
4282 
4283  /*
4284  * We arrange to look up info about element type, including its output
4285  * conversion proc, only once per series of calls, assuming the element
4286  * type doesn't change underneath us.
4287  */
4288  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4289  if (my_extra == NULL)
4290  {
4291  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4292  sizeof(ArrayMetaState));
4293  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4294  my_extra->element_type = ~element_type;
4295  }
4296 
4297  if (my_extra->element_type != element_type)
4298  {
4299  /*
4300  * Get info about element type, including its output conversion proc
4301  */
4302  get_type_io_data(element_type, IOFunc_output,
4303  &my_extra->typlen, &my_extra->typbyval,
4304  &my_extra->typalign, &my_extra->typdelim,
4305  &my_extra->typioparam, &my_extra->typiofunc);
4306  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4307  fcinfo->flinfo->fn_mcxt);
4308  my_extra->element_type = element_type;
4309  }
4310  typlen = my_extra->typlen;
4311  typbyval = my_extra->typbyval;
4312  typalign = my_extra->typalign;
4313 
4314  p = ARR_DATA_PTR(v);
4315  bitmap = ARR_NULLBITMAP(v);
4316  bitmask = 1;
4317 
4318  for (i = 0; i < nitems; i++)
4319  {
4320  Datum itemvalue;
4321  char *value;
4322 
4323  /* Get source element, checking for NULL */
4324  if (bitmap && (*bitmap & bitmask) == 0)
4325  {
4326  /* if null_string is NULL, we just ignore null elements */
4327  if (null_string != NULL)
4328  {
4329  if (printed)
4330  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4331  else
4332  appendStringInfoString(&buf, null_string);
4333  printed = true;
4334  }
4335  }
4336  else
4337  {
4338  itemvalue = fetch_att(p, typbyval, typlen);
4339 
4340  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4341 
4342  if (printed)
4343  appendStringInfo(&buf, "%s%s", fldsep, value);
4344  else
4345  appendStringInfoString(&buf, value);
4346  printed = true;
4347 
4348  p = att_addlength_pointer(p, typlen, p);
4349  p = (char *) att_align_nominal(p, typalign);
4350  }
4351 
4352  /* advance bitmap pointer if any */
4353  if (bitmap)
4354  {
4355  bitmask <<= 1;
4356  if (bitmask == 0x100)
4357  {
4358  bitmap++;
4359  bitmask = 1;
4360  }
4361  }
4362  }
4363 
4364  result = cstring_to_text_with_len(buf.data, buf.len);
4365  pfree(buf.data);
4366 
4367  return result;
4368 }
4369 
4370 #define HEXBASE 16
4371 /*
4372  * Convert an int32 to a string containing a base 16 (hex) representation of
4373  * the number.
4374  */
4375 Datum
4377 {
4379  char *ptr;
4380  const char *digits = "0123456789abcdef";
4381  char buf[32]; /* bigger than needed, but reasonable */
4382 
4383  ptr = buf + sizeof(buf) - 1;
4384  *ptr = '\0';
4385 
4386  do
4387  {
4388  *--ptr = digits[value % HEXBASE];
4389  value /= HEXBASE;
4390  } while (ptr > buf && value);
4391 
4393 }
4394 
4395 /*
4396  * Convert an int64 to a string containing a base 16 (hex) representation of
4397  * the number.
4398  */
4399 Datum
4401 {
4402  uint64 value = (uint64) PG_GETARG_INT64(0);
4403  char *ptr;
4404  const char *digits = "0123456789abcdef";
4405  char buf[32]; /* bigger than needed, but reasonable */
4406 
4407  ptr = buf + sizeof(buf) - 1;
4408  *ptr = '\0';
4409 
4410  do
4411  {
4412  *--ptr = digits[value % HEXBASE];
4413  value /= HEXBASE;
4414  } while (ptr > buf && value);
4415 
4417 }
4418 
4419 /*
4420  * Create an md5 hash of a text string and return it as hex
4421  *
4422  * md5 produces a 16 byte (128 bit) hash; double it for hex
4423  */
4424 #define MD5_HASH_LEN 32
4425 
4426 Datum
4428 {
4429  text *in_text = PG_GETARG_TEXT_PP(0);
4430  size_t len;
4431  char hexsum[MD5_HASH_LEN + 1];
4432 
4433  /* Calculate the length of the buffer using varlena metadata */
4434  len = VARSIZE_ANY_EXHDR(in_text);
4435 
4436  /* get the hash result */
4437  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4438  ereport(ERROR,
4439  (errcode(ERRCODE_OUT_OF_MEMORY),
4440  errmsg("out of memory")));
4441 
4442  /* convert to text and return it */
4444 }
4445 
4446 /*
4447  * Create an md5 hash of a bytea field and return it as a hex string:
4448  * 16-byte md5 digest is represented in 32 hex characters.
4449  */
4450 Datum
4452 {
4453  bytea *in = PG_GETARG_BYTEA_PP(0);
4454  size_t len;
4455  char hexsum[MD5_HASH_LEN + 1];
4456 
4457  len = VARSIZE_ANY_EXHDR(in);
4458  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4459  ereport(ERROR,
4460  (errcode(ERRCODE_OUT_OF_MEMORY),
4461  errmsg("out of memory")));
4462 
4464 }
4465 
4466 /*
4467  * Return the size of a datum, possibly compressed
4468  *
4469  * Works on any data type
4470  */
4471 Datum
4473 {
4475  int32 result;
4476  int typlen;
4477 
4478  /* On first call, get the input type's typlen, and save at *fn_extra */
4479  if (fcinfo->flinfo->fn_extra == NULL)
4480  {
4481  /* Lookup the datatype of the supplied argument */
4482  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4483 
4484  typlen = get_typlen(argtypeid);
4485  if (typlen == 0) /* should not happen */
4486  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4487 
4488  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4489  sizeof(int));
4490  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4491  }
4492  else
4493  typlen = *((int *) fcinfo->flinfo->fn_extra);
4494 
4495  if (typlen == -1)
4496  {
4497  /* varlena type, possibly toasted */
4498  result = toast_datum_size(value);
4499  }
4500  else if (typlen == -2)
4501  {
4502  /* cstring */
4503  result = strlen(DatumGetCString(value)) + 1;
4504  }
4505  else
4506  {
4507  /* ordinary fixed-width type */
4508  result = typlen;
4509  }
4510 
4511  PG_RETURN_INT32(result);
4512 }
4513 
4514 /*
4515  * string_agg - Concatenates values and returns string.
4516  *
4517  * Syntax: string_agg(value text, delimiter text) RETURNS text
4518  *
4519  * Note: Any NULL values are ignored. The first-call delimiter isn't
4520  * actually used at all, and on subsequent calls the delimiter precedes
4521  * the associated value.
4522  */
4523 
4524 /* subroutine to initialize state */
4525 static StringInfo
4527 {
4528  StringInfo state;
4529  MemoryContext aggcontext;
4530  MemoryContext oldcontext;
4531 
4532  if (!AggCheckCallContext(fcinfo, &aggcontext))
4533  {
4534  /* cannot be called directly because of internal-type argument */
4535  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4536  }
4537 
4538  /*
4539  * Create state in aggregate context. It'll stay there across subsequent
4540  * calls.
4541  */
4542  oldcontext = MemoryContextSwitchTo(aggcontext);
4543  state = makeStringInfo();
4544  MemoryContextSwitchTo(oldcontext);
4545 
4546  return state;
4547 }
4548 
4549 Datum
4551 {
4552  StringInfo state;
4553 
4554  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4555 
4556  /* Append the value unless null. */
4557  if (!PG_ARGISNULL(1))
4558  {
4559  /* On the first time through, we ignore the delimiter. */
4560  if (state == NULL)
4561  state = makeStringAggState(fcinfo);
4562  else if (!PG_ARGISNULL(2))
4563  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4564 
4565  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4566  }
4567 
4568  /*
4569  * The transition type for string_agg() is declared to be "internal",
4570  * which is a pass-by-value type the same size as a pointer.
4571  */
4572  PG_RETURN_POINTER(state);
4573 }
4574 
4575 Datum
4577 {
4578  StringInfo state;
4579 
4580  /* cannot be called directly because of internal-type argument */
4581  Assert(AggCheckCallContext(fcinfo, NULL));
4582 
4583  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4584 
4585  if (state != NULL)
4587  else
4588  PG_RETURN_NULL();
4589 }
4590 
4591 /*
4592  * Implementation of both concat() and concat_ws().
4593  *
4594  * sepstr is the separator string to place between values.
4595  * argidx identifies the first argument to concatenate (counting from zero).
4596  * Returns NULL if result should be NULL, else text value.
4597  */
4598 static text *
4599 concat_internal(const char *sepstr, int argidx,
4600  FunctionCallInfo fcinfo)
4601 {
4602  text *result;
4603  StringInfoData str;
4604  bool first_arg = true;
4605  int i;
4606 
4607  /*
4608  * concat(VARIADIC some-array) is essentially equivalent to
4609  * array_to_text(), ie concat the array elements with the given separator.
4610  * So we just pass the case off to that code.
4611  */
4612  if (get_fn_expr_variadic(fcinfo->flinfo))
4613  {
4614  ArrayType *arr;
4615 
4616  /* Should have just the one argument */
4617  Assert(argidx == PG_NARGS() - 1);
4618 
4619  /* concat(VARIADIC NULL) is defined as NULL */
4620  if (PG_ARGISNULL(argidx))
4621  return NULL;
4622 
4623  /*
4624  * Non-null argument had better be an array. We assume that any call
4625  * context that could let get_fn_expr_variadic return true will have
4626  * checked that a VARIADIC-labeled parameter actually is an array. So
4627  * it should be okay to just Assert that it's an array rather than
4628  * doing a full-fledged error check.
4629  */
4631 
4632  /* OK, safe to fetch the array value */
4633  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4634 
4635  /*
4636  * And serialize the array. We tell array_to_text to ignore null
4637  * elements, which matches the behavior of the loop below.
4638  */
4639  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4640  }
4641 
4642  /* Normal case without explicit VARIADIC marker */
4643  initStringInfo(&str);
4644 
4645  for (i = argidx; i < PG_NARGS(); i++)
4646  {
4647  if (!PG_ARGISNULL(i))
4648  {
4650  Oid valtype;
4651  Oid typOutput;
4652  bool typIsVarlena;
4653 
4654  /* add separator if appropriate */
4655  if (first_arg)
4656  first_arg = false;
4657  else
4658  appendStringInfoString(&str, sepstr);
4659 
4660  /* call the appropriate type output function, append the result */
4661  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4662  if (!OidIsValid(valtype))
4663  elog(ERROR, "could not determine data type of concat() input");
4664  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4666  OidOutputFunctionCall(typOutput, value));
4667  }
4668  }
4669 
4670  result = cstring_to_text_with_len(str.data, str.len);
4671  pfree(str.data);
4672 
4673  return result;
4674 }
4675 
4676 /*
4677  * Concatenate all arguments. NULL arguments are ignored.
4678  */
4679 Datum
4681 {
4682  text *result;
4683 
4684  result = concat_internal("", 0, fcinfo);
4685  if (result == NULL)
4686  PG_RETURN_NULL();
4687  PG_RETURN_TEXT_P(result);
4688 }
4689 
4690 /*
4691  * Concatenate all but first argument value with separators. The first
4692  * parameter is used as the separator. NULL arguments are ignored.
4693  */
4694 Datum
4696 {
4697  char *sep;
4698  text *result;
4699 
4700  /* return NULL when separator is NULL */
4701  if (PG_ARGISNULL(0))
4702  PG_RETURN_NULL();
4704 
4705  result = concat_internal(sep, 1, fcinfo);
4706  if (result == NULL)
4707  PG_RETURN_NULL();
4708  PG_RETURN_TEXT_P(result);
4709 }
4710 
4711 /*
4712  * Return first n characters in the string. When n is negative,
4713  * return all but last |n| characters.
4714  */
4715 Datum
4717 {
4718  text *str = PG_GETARG_TEXT_PP(0);
4719  const char *p = VARDATA_ANY(str);
4720  int len = VARSIZE_ANY_EXHDR(str);
4721  int n = PG_GETARG_INT32(1);
4722  int rlen;
4723 
4724  if (n < 0)
4725  n = pg_mbstrlen_with_len(p, len) + n;
4726  rlen = pg_mbcharcliplen(p, len, n);
4727 
4729 }
4730 
4731 /*
4732  * Return last n characters in the string. When n is negative,
4733  * return all but first |n| characters.
4734  */
4735 Datum
4737 {
4738  text *str = PG_GETARG_TEXT_PP(0);
4739  const char *p = VARDATA_ANY(str);
4740  int len = VARSIZE_ANY_EXHDR(str);
4741  int n = PG_GETARG_INT32(1);
4742  int off;
4743 
4744  if (n < 0)
4745  n = -n;
4746  else
4747  n = pg_mbstrlen_with_len(p, len) - n;
4748  off = pg_mbcharcliplen(p, len, n);
4749 
4750  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4751 }
4752 
4753 /*
4754  * Return reversed string
4755  */
4756 Datum
4758 {
4759  text *str = PG_GETARG_TEXT_PP(0);
4760  const char *p = VARDATA_ANY(str);
4761  int len = VARSIZE_ANY_EXHDR(str);
4762  const char *endp = p + len;
4763  text *result;
4764  char *dst;
4765 
4766  result = palloc(len + VARHDRSZ);
4767  dst = (char *) VARDATA(result) + len;
4768  SET_VARSIZE(result, len + VARHDRSZ);
4769 
4771  {
4772  /* multibyte version */
4773  while (p < endp)
4774  {
4775  int sz;
4776 
4777  sz = pg_mblen(p);
4778  dst -= sz;
4779  memcpy(dst, p, sz);
4780  p += sz;
4781  }
4782  }
4783  else
4784  {
4785  /* single byte version */
4786  while (p < endp)
4787  *(--dst) = *p++;
4788  }
4789 
4790  PG_RETURN_TEXT_P(result);
4791 }
4792 
4793 
4794 /*
4795  * Support macros for text_format()
4796  */
4797 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4798 
4799 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4800  do { \
4801  if (++(ptr) >= (end_ptr)) \
4802  ereport(ERROR, \
4803  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4804  errmsg("unterminated format() type specifier"), \
4805  errhint("For a single \"%%\" use \"%%%%\"."))); \
4806  } while (0)
4807 
4808 /*
4809  * Returns a formatted string
4810  */
4811 Datum
4813 {
4814  text *fmt;
4815  StringInfoData str;
4816  const char *cp;
4817  const char *start_ptr;
4818  const char *end_ptr;
4819  text *result;
4820  int arg;
4821  bool funcvariadic;
4822  int nargs;
4823  Datum *elements = NULL;
4824  bool *nulls = NULL;
4825  Oid element_type = InvalidOid;
4826  Oid prev_type = InvalidOid;
4827  Oid prev_width_type = InvalidOid;
4828  FmgrInfo typoutputfinfo;
4829  FmgrInfo typoutputinfo_width;
4830 
4831  /* When format string is null, immediately return null */
4832  if (PG_ARGISNULL(0))
4833  PG_RETURN_NULL();
4834 
4835  /* If argument is marked VARIADIC, expand array into elements */
4836  if (get_fn_expr_variadic(fcinfo->flinfo))
4837  {
4838  ArrayType *arr;
4839  int16 elmlen;
4840  bool elmbyval;
4841  char elmalign;
4842  int nitems;
4843 
4844  /* Should have just the one argument */
4845  Assert(PG_NARGS() == 2);
4846 
4847  /* If argument is NULL, we treat it as zero-length array */
4848  if (PG_ARGISNULL(1))
4849  nitems = 0;
4850  else
4851  {
4852  /*
4853  * Non-null argument had better be an array. We assume that any
4854  * call context that could let get_fn_expr_variadic return true
4855  * will have checked that a VARIADIC-labeled parameter actually is
4856  * an array. So it should be okay to just Assert that it's an
4857  * array rather than doing a full-fledged error check.
4858  */
4860 
4861  /* OK, safe to fetch the array value */
4862  arr = PG_GETARG_ARRAYTYPE_P(1);
4863 
4864  /* Get info about array element type */
4865  element_type = ARR_ELEMTYPE(arr);
4866  get_typlenbyvalalign(element_type,
4867  &elmlen, &elmbyval, &elmalign);
4868 
4869  /* Extract all array elements */
4870  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4871  &elements, &nulls, &nitems);
4872  }
4873 
4874  nargs = nitems + 1;
4875  funcvariadic = true;
4876  }
4877  else
4878  {
4879  /* Non-variadic case, we'll process the arguments individually */
4880  nargs = PG_NARGS();
4881  funcvariadic = false;
4882  }
4883 
4884  /* Setup for main loop. */
4885  fmt = PG_GETARG_TEXT_PP(0);
4886  start_ptr = VARDATA_ANY(fmt);
4887  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4888  initStringInfo(&str);
4889  arg = 1; /* next argument position to print */
4890 
4891  /* Scan format string, looking for conversion specifiers. */
4892  for (cp = start_ptr; cp < end_ptr; cp++)
4893  {
4894  int argpos;
4895  int widthpos;
4896  int flags;
4897  int width;
4898  Datum value;
4899  bool isNull;
4900  Oid typid;
4901 
4902  /*
4903  * If it's not the start of a conversion specifier, just copy it to
4904  * the output buffer.
4905  */
4906  if (*cp != '%')
4907  {
4908  appendStringInfoCharMacro(&str, *cp);
4909  continue;
4910  }
4911 
4912  ADVANCE_PARSE_POINTER(cp, end_ptr);
4913 
4914  /* Easy case: %% outputs a single % */
4915  if (*cp == '%')
4916  {
4917  appendStringInfoCharMacro(&str, *cp);
4918  continue;
4919  }
4920 
4921  /* Parse the optional portions of the format specifier */
4922  cp = text_format_parse_format(cp, end_ptr,
4923  &argpos, &widthpos,
4924  &flags, &width);
4925 
4926  /*
4927  * Next we should see the main conversion specifier. Whether or not
4928  * an argument position was present, it's known that at least one
4929  * character remains in the string at this point. Experience suggests
4930  * that it's worth checking that that character is one of the expected
4931  * ones before we try to fetch arguments, so as to produce the least
4932  * confusing response to a mis-formatted specifier.
4933  */
4934  if (strchr("sIL", *cp) == NULL)
4935  ereport(ERROR,
4936  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4937  errmsg("unrecognized format() type specifier \"%c\"",
4938  *cp),
4939  errhint("For a single \"%%\" use \"%%%%\".")));
4940 
4941  /* If indirect width was specified, get its value */
4942  if (widthpos >= 0)
4943  {
4944  /* Collect the specified or next argument position */
4945  if (widthpos > 0)
4946  arg = widthpos;
4947  if (arg >= nargs)
4948  ereport(ERROR,
4949  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4950  errmsg("too few arguments for format()")));
4951 
4952  /* Get the value and type of the selected argument */
4953  if (!funcvariadic)
4954  {
4955  value = PG_GETARG_DATUM(arg);
4956  isNull = PG_ARGISNULL(arg);
4957  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4958  }
4959  else
4960  {
4961  value = elements[arg - 1];
4962  isNull = nulls[arg - 1];
4963  typid = element_type;
4964  }
4965  if (!OidIsValid(typid))
4966  elog(ERROR, "could not determine data type of format() input");
4967 
4968  arg++;
4969 
4970  /* We can treat NULL width the same as zero */
4971  if (isNull)
4972  width = 0;
4973  else if (typid == INT4OID)
4974  width = DatumGetInt32(value);
4975  else if (typid == INT2OID)
4976  width = DatumGetInt16(value);
4977  else
4978  {
4979  /* For less-usual datatypes, convert to text then to int */
4980  char *str;
4981 
4982  if (typid != prev_width_type)
4983  {
4984  Oid typoutputfunc;
4985  bool typIsVarlena;
4986 
4987  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4988  fmgr_info(typoutputfunc, &typoutputinfo_width);
4989  prev_width_type = typid;
4990  }
4991 
4992  str = OutputFunctionCall(&typoutputinfo_width, value);
4993 
4994  /* pg_atoi will complain about bad data or overflow */
4995  width = pg_atoi(str, sizeof(int), '\0');
4996 
4997  pfree(str);
4998  }
4999  }
5000 
5001  /* Collect the specified or next argument position */
5002  if (argpos > 0)
5003  arg = argpos;
5004  if (arg >= nargs)
5005  ereport(ERROR,
5006  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5007  errmsg("too few arguments for format")));
5008 
5009  /* Get the value and type of the selected argument */
5010  if (!funcvariadic)
5011  {
5012  value = PG_GETARG_DATUM(arg);
5013  isNull = PG_ARGISNULL(arg);
5014  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5015  }
5016  else
5017  {
5018  value = elements[arg - 1];
5019  isNull = nulls[arg - 1];
5020  typid = element_type;
5021  }
5022  if (!OidIsValid(typid))
5023  elog(ERROR, "could not determine data type of format() input");
5024 
5025  arg++;
5026 
5027  /*
5028  * Get the appropriate typOutput function, reusing previous one if
5029  * same type as previous argument. That's particularly useful in the
5030  * variadic-array case, but often saves work even for ordinary calls.
5031  */
5032  if (typid != prev_type)
5033  {
5034  Oid typoutputfunc;
5035  bool typIsVarlena;
5036 
5037  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5038  fmgr_info(typoutputfunc, &typoutputfinfo);
5039  prev_type = typid;
5040  }
5041 
5042  /*
5043  * And now we can format the value.
5044  */
5045  switch (*cp)
5046  {
5047  case 's':
5048  case 'I':
5049  case 'L':
5050  text_format_string_conversion(&str, *cp, &typoutputfinfo,
5051  value, isNull,
5052  flags, width);
5053  break;
5054  default:
5055  /* should not get here, because of previous check */
5056  ereport(ERROR,
5057  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5058  errmsg("unrecognized format() type specifier \"%c\"",
5059  *cp),
5060  errhint("For a single \"%%\" use \"%%%%\".")));
5061  break;
5062  }
5063  }
5064 
5065  /* Don't need deconstruct_array results anymore. */
5066  if (elements != NULL)
5067  pfree(elements);
5068  if (nulls != NULL)
5069  pfree(nulls);
5070 
5071  /* Generate results. */
5072  result = cstring_to_text_with_len(str.data, str.len);
5073  pfree(str.data);
5074 
5075  PG_RETURN_TEXT_P(result);
5076 }
5077 
5078 /*
5079  * Parse contiguous digits as a decimal number.
5080  *
5081  * Returns true if some digits could be parsed.
5082  * The value is returned into *value, and *ptr is advanced to the next
5083  * character to be parsed.
5084  *
5085  * Note parsing invariant: at least one character is known available before
5086  * string end (end_ptr) at entry, and this is still true at exit.
5087  */
5088 static bool
5089 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5090 {
5091  bool found = false;
5092  const char *cp = *ptr;
5093  int val = 0;
5094 
5095  while (*cp >= '0' && *cp <= '9')
5096  {
5097  int newval = val * 10 + (*cp - '0');
5098 
5099  if (newval / 10 != val) /* overflow? */
5100  ereport(ERROR,
5101  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5102  errmsg("number is out of range")));
5103  val = newval;
5104  ADVANCE_PARSE_POINTER(cp, end_ptr);
5105  found = true;
5106  }
5107 
5108  *ptr = cp;
5109  *value = val;
5110 
5111  return found;
5112 }
5113 
5114 /*
5115  * Parse a format specifier (generally following the SUS printf spec).
5116  *
5117  * We have already advanced over the initial '%', and we are looking for
5118  * [argpos][flags][width]type (but the type character is not consumed here).
5119  *
5120  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5121  * Output parameters:
5122  * argpos: argument position for value to be printed. -1 means unspecified.
5123  * widthpos: argument position for width. Zero means the argument position
5124  * was unspecified (ie, take the next arg) and -1 means no width
5125  * argument (width was omitted or specified as a constant).
5126  * flags: bitmask of flags.
5127  * width: directly-specified width value. Zero means the width was omitted
5128  * (note it's not necessary to distinguish this case from an explicit
5129  * zero width value).
5130  *
5131  * The function result is the next character position to be parsed, ie, the
5132  * location where the type character is/should be.
5133  *
5134  * Note parsing invariant: at least one character is known available before
5135  * string end (end_ptr) at entry, and this is still true at exit.
5136  */
5137 static const char *
5138 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5139  int *argpos, int *widthpos,
5140  int *flags, int *width)
5141 {
5142  const char *cp = start_ptr;
5143  int n;
5144 
5145  /* set defaults for output parameters */
5146  *argpos = -1;
5147  *widthpos = -1;
5148  *flags = 0;
5149  *width = 0;
5150 
5151  /* try to identify first number */
5152  if (text_format_parse_digits(&cp, end_ptr, &n))
5153  {
5154  if (*cp != '$')
5155  {
5156  /* Must be just a width and a type, so we're done */
5157  *width = n;
5158  return cp;
5159  }
5160  /* The number was argument position */
5161  *argpos = n;
5162  /* Explicit 0 for argument index is immediately refused */
5163  if (n == 0)
5164  ereport(ERROR,
5165  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5166  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5167  ADVANCE_PARSE_POINTER(cp, end_ptr);
5168  }
5169 
5170  /* Handle flags (only minus is supported now) */
5171  while (*cp == '-')
5172  {
5173  *flags |= TEXT_FORMAT_FLAG_MINUS;
5174  ADVANCE_PARSE_POINTER(cp, end_ptr);
5175  }
5176 
5177  if (*cp == '*')
5178  {
5179  /* Handle indirect width */
5180  ADVANCE_PARSE_POINTER(cp, end_ptr);
5181  if (text_format_parse_digits(&cp, end_ptr, &n))
5182  {
5183  /* number in this position must be closed by $ */
5184  if (*cp != '$')
5185  ereport(ERROR,
5186  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5187  errmsg("width argument position must be ended by \"$\"")));
5188  /* The number was width argument position */
5189  *widthpos = n;
5190  /* Explicit 0 for argument index is immediately refused */
5191  if (n == 0)
5192  ereport(ERROR,
5193  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5194  errmsg("format specifies argument 0, but arguments are numbered from 1")));
5195  ADVANCE_PARSE_POINTER(cp, end_ptr);
5196  }
5197  else
5198  *widthpos = 0; /* width's argument position is unspecified */
5199  }
5200  else
5201  {
5202  /* Check for direct width specification */
5203  if (text_format_parse_digits(&cp, end_ptr, &n))
5204  *width = n;
5205  }
5206 
5207  /* cp should now be pointing at type character */
5208  return cp;
5209 }
5210 
5211 /*
5212  * Format a %s, %I, or %L conversion
5213  */
5214 static void
5216  FmgrInfo *typOutputInfo,
5217  Datum value, bool isNull,
5218  int flags, int width)
5219 {
5220  char *str;
5221 
5222  /* Handle NULL arguments before trying to stringify the value. */
5223  if (isNull)
5224  {
5225  if (conversion == 's')
5226  text_format_append_string(buf, "", flags, width);
5227  else if (conversion == 'L')
5228  text_format_append_string(buf, "NULL", flags, width);
5229  else if (conversion == 'I')
5230  ereport(ERROR,
5231  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5232  errmsg("null values cannot be formatted as an SQL identifier")));
5233  return;
5234  }
5235 
5236  /* Stringify. */
5237  str = OutputFunctionCall(typOutputInfo, value);
5238 
5239  /* Escape. */
5240  if (conversion == 'I')
5241  {
5242  /* quote_identifier may or may not allocate a new string. */
5243  text_format_append_string(buf, quote_identifier(str), flags, width);
5244  }
5245  else if (conversion == 'L')
5246  {
5247  char *qstr = quote_literal_cstr(str);
5248 
5249  text_format_append_string(buf, qstr, flags, width);
5250  /* quote_literal_cstr() always allocates a new string */
5251  pfree(qstr);
5252  }
5253  else
5254  text_format_append_string(buf, str, flags, width);
5255 
5256  /* Cleanup. */
5257  pfree(str);
5258 }
5259 
5260 /*
5261  * Append str to buf, padding as directed by flags/width
5262  */
5263 static void
5265  int flags, int width)
5266 {
5267  bool align_to_left = false;
5268  int len;
5269 
5270  /* fast path for typical easy case */
5271  if (width == 0)
5272  {
5273  appendStringInfoString(buf, str);
5274  return;
5275  }
5276 
5277  if (width < 0)
5278  {
5279  /* Negative width: implicit '-' flag, then take absolute value */
5280  align_to_left = true;
5281  /* -INT_MIN is undefined */
5282  if (width <= INT_MIN)
5283  ereport(ERROR,
5284  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5285  errmsg("number is out of range")));
5286  width = -width;
5287  }
5288  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5289  align_to_left = true;
5290 
5291  len = pg_mbstrlen(str);
5292  if (align_to_left)
5293  {
5294  /* left justify */
5295  appendStringInfoString(buf, str);
5296  if (len < width)
5297  appendStringInfoSpaces(buf, width - len);
5298  }
5299  else
5300  {
5301  /* right justify */
5302  if (len < width)
5303  appendStringInfoSpaces(buf, width - len);
5304  appendStringInfoString(buf, str);
5305  }
5306 }
5307 
5308 /*
5309  * text_format_nv - nonvariadic wrapper for text_format function.
5310  *
5311  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5312  * which checks that all built-in functions that share the implementing C
5313  * function take the same number of arguments.
5314  */
5315 Datum
5317 {
5318  return text_format(fcinfo);
5319 }
5320 
5321 /*
5322  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5323  * for this use case.
5324  */
5325 static inline bool
5326 rest_of_char_same(const char *s1, const char *s2, int len)
5327 {
5328  while (len > 0)
5329  {
5330  len--;
5331  if (s1[len] != s2[len])
5332  return false;
5333  }
5334  return true;
5335 }
5336 
5337 /* Expand each Levenshtein distance variant */
5338 #include "levenshtein.c"
5339 #define LEVENSHTEIN_LESS_EQUAL
5340 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2560
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4019
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2696
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1994
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:252
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:352
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
#define DatumGetUInt32(X)
Definition: postgres.h:494
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4812
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1961
int length(const List *list)
Definition: list.c:1271
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4799
Definition: fmgr.h:53
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3774
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:315
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2973
static struct @76 value
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:873
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3909
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1023
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2600
#define VARDATA_ANY(PTR)
Definition: postgres.h:349
#define VARDATA(PTR)
Definition: postgres.h:305
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
MemoryContext fn_mcxt
Definition: fmgr.h:62
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
#define MD5_HASH_LEN
Definition: varlena.c:4424
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:9515
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1663
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2512
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:263
#define DatumGetInt32(X)
Definition: postgres.h:480
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2544
#define HEXBASE
Definition: varlena.c:4370
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:306
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3553
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3455
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:5215
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:1989
#define PointerGetDatum(X)
Definition: postgres.h:564
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:531
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:224
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1120
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:358
#define VARHDRSZ
Definition: c.h:440
Datum md5_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4451
char * pstrdup(const char *in)
Definition: mcxt.c:1168
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:520
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:249
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:798
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:277
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2149
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2124
#define PG_RETURN_INT32(x)
Definition: fmgr.h:298
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:262
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2630
#define INT4OID
Definition: pg_type.h:316
void canonicalize_path(char *path)
Definition: path.c:254
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2455
int errcode(int sqlerrcode)
Definition: elog.c:575
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:162
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:247
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2934
pg_wchar * wstr2
Definition: varlena.c:51
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:483
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4550
Datum md5_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4427
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:313
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3424
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2705
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3351
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:623
#define OidIsValid(objectId)
Definition: c.h:530
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1738
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:378
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:214
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1035
bool trace_sort
Definition: tuplesort.c:153
#define PG_GET_COLLATION()
Definition: fmgr.h:155
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2775
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4680
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:661
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5264
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4227
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4695
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:253
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2663
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1924
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:270
int pg_locale_t
Definition: pg_locale.h:71
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1943
static int32 text_length(Datum str)
Definition: varlena.c:641
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:805
bool typbyval
Definition: array.h:221
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:187
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4400
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:316
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:267
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:4044
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3415
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3253
FmgrInfo * flinfo
Definition: fmgr.h:71
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
#define wcscoll_l
Definition: win32.h:358
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4211
void pfree(void *pointer)
Definition: mcxt.c:995
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:351
#define REG_OKAY
Definition: regex.h:137
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4576
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1012
#define ERROR
Definition: elog.h:43
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3636
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1011
#define DatumGetCString(X)
Definition: postgres.h:574
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:407
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:2313
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3475
#define ARR_DIMS(a)
Definition: array.h:275
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:160
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:2267
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1232
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4033
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:831
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:256
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:736
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:815
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:588
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4253
Definition: c.h:488
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3539
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2441
#define INT2OID
Definition: pg_type.h:308
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1599
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
#define ARR_DATA_PTR(a)
Definition: array.h:303
hyperLogLogState abbr_card
Definition: varlena.c:72
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2453
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1634
int16 typlen
Definition: array.h:220
static char * buf
Definition: pg_test_fsync.c:65
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:64
#define memmove(d, s, c)
Definition: c.h:1038
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:162
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3128
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
char typdelim
Definition: array.h:223
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2787
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3030
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:695
#define DatumGetInt16(X)
Definition: postgres.h:452
#define DatumGetBool(X)
Definition: postgres.h:401
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Definition: geqo_px.c:46
unsigned int uint32
Definition: c.h:265
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
void * ssup_extra
Definition: sortsupport.h:87
ArrayType * create_singleton_array(FunctionCallInfo fcinfo, Oid element_type, Datum element, bool isNull, int ndims)
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1073
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:796
int bytea_output
Definition: varlena.c:40
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:170
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1574
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2868
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:246
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4472
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1693
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:122
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2474
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5057
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5089
unsigned int pg_wchar
Definition: mbprint.c:31
#define DatumGetVarStringPP(X)
Definition: varlena.c:93
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3070
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:680
List * lappend(List *list, void *datum)
Definition: list.c:128
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3053
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:587
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:56
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:443
void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
Definition: varlena.c:1765
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1148
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1678
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:513
uint8 bits8
Definition: c.h:272
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:782
#define TextDatumGetCString(d)
Definition: builtins.h:807
void * palloc0(Size size)
Definition: mcxt.c:923
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5316
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4757
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2682
int bpchartruelen(char *s, int len)
Definition: varchar.c:660
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3763
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:187
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:785
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1708
#define VARSIZE_ANY(PTR)
Definition: postgres.h:336
#define strxfrm_l
Definition: win32.h:357
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1366
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3495
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4376
hyperLogLogState full_card
Definition: varlena.c:73
#define PG_RETURN_VOID()
Definition: fmgr.h:293
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:314
#define Max(x, y)
Definition: c.h:792
text * cstring_to_text(const char *s)
Definition: varlena.c:150
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:603
#define PG_ARGISNULL(n)
Definition: fmgr.h:166
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:667
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:313
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:308
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:569
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4736
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4599
int varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
Definition: varlena.c:1383
Oid typioparam
Definition: array.h:224
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:306
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:564
size_t Size
Definition: c.h:352
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2327
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5326
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2496
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:549
#define newval
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:268
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3383
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:3564
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:509
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:216
#define PG_NARGS()
Definition: fmgr.h:160
#define C_COLLATION_OID
Definition: pg_collation.h:71
void * fn_extra
Definition: fmgr.h:61
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3669
#define ARR_NDIM(a)
Definition: array.h:271
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:2824
#define TEXTBUFLEN
Definition: varlena.c:84
Oid typiofunc
Definition: array.h:225
#define DatumGetPointer(X)
Definition: postgres.h:557
char typalign
Definition: array.h:222
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3475
char * text_to_cstring(const text *t)
Definition: varlena.c:183
pg_wchar * wstr1
Definition: varlena.c:50
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:4993
#define DatumGetBpCharPP(X)
Definition: fmgr.h:251
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2525
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1723
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:576
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:172
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1915
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:2048
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:424
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:342
void * palloc(Size size)
Definition: mcxt.c:894
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4526
FmgrInfo proc
Definition: array.h:226
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:451
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:752
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2764
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2576
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2599
void list_free(List *list)
Definition: list.c:1133
int i
Oid element_type
Definition: array.h:219
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:494
static char * locale
Definition: initdb.c:126
void * arg
static bool text_isequal(text *txt1, text *txt2)
Definition: varlena.c:4006
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:233
#define VAL(CH)
Definition: varlena.c:241
Definition: c.h:434
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:4716
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
#define elog
Definition: elog.h:218
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:5138
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2897
bool pg_md5_hash(const void *buff, size_t len, char *hexsum)
Definition: md5.c:290
Datum bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:266
#define strcoll_l
Definition: win32.h:356
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:183
NameData * Name
Definition: c.h:492
#define PG_GETARG_INT64(n)
Definition: fmgr.h:238
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3435
Definition: pg_list.h:45
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
#define ARR_ELEMTYPE(a)
Definition: array.h:273
#define ARR_NULLBITMAP(a)
Definition: array.h:281
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:208
Definition: regex.h:55
long val
Definition: informix.c:689
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:173
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:550
#define PG_RETURN_NULL()
Definition: fmgr.h:289
#define PG_RETURN_NAME(x)
Definition: fmgr.h:307
#define TEXT_FORMAT_FLAG_MINUS
Definition: varlena.c:4797
int32 pg_atoi(const char *s, int size, int c)
Definition: numutils.c:37
#define PG_GETARG_NAME(n)
Definition: fmgr.h:234
static int text_position(text *t1, text *t2)
Definition: varlena.c:1096
Datum text_pattern_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:2528
#define DIG(VAL)
Definition: varlena.c:242
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:702
int digits
Definition: informix.c:691
Datum bytea_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:3517
Datum byteacat(PG_FUNCTION_ARGS)
Definition: varlena.c:2615
void get_type_io_data(Oid typid, IOFuncSelector which_func, int16 *typlen, bool *typbyval, char *typalign, char *typdelim, Oid *typioparam, Oid *func)
Definition: lsyscache.c:2043