PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "lib/hyperloglog.h"
25 #include "libpq/md5.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "regex/regex.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/lsyscache.h"
33 #include "utils/memutils.h"
34 #include "utils/pg_locale.h"
35 #include "utils/sortsupport.h"
36 
37 
38 /* GUC variable */
40 
41 typedef struct varlena unknown;
42 
43 typedef struct
44 {
45  bool use_wchar; /* T if multibyte encoding */
46  char *str1; /* use these if not use_wchar */
47  char *str2; /* note: these point to original texts */
48  pg_wchar *wstr1; /* use these if use_wchar */
49  pg_wchar *wstr2; /* note: these are palloc'd */
50  int len1; /* string lengths in logical characters */
51  int len2;
52  /* Skip table for Boyer-Moore-Horspool search algorithm: */
53  int skiptablemask; /* mask for ANDing with skiptable subscripts */
54  int skiptable[256]; /* skip distance for given mismatched char */
56 
57 typedef struct
58 {
59  char *buf1; /* 1st string, or abbreviation original string
60  * buf */
61  char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
62  int buflen1;
63  int buflen2;
64  bool collate_c;
65  hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
66  hyperLogLogState full_card; /* Full key cardinality state */
67  double prop_card; /* Required cardinality proportion */
68 #ifdef HAVE_LOCALE_T
70 #endif
72 
73 /*
74  * This should be large enough that most strings will fit, but small enough
75  * that we feel comfortable putting it on the stack
76  */
77 #define TEXTBUFLEN 1024
78 
79 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
80 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
81 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
82 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
83 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
84 
85 static void btsortsupport_worker(SortSupport ssup, Oid collid);
86 static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
87 static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
88 static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup);
89 static Datum bttext_abbrev_convert(Datum original, SortSupport ssup);
90 static bool bttext_abbrev_abort(int memtupcount, SortSupport ssup);
91 static int32 text_length(Datum str);
92 static text *text_catenate(text *t1, text *t2);
93 static text *text_substring(Datum str,
94  int32 start,
95  int32 length,
96  bool length_not_specified);
97 static text *text_overlay(text *t1, text *t2, int sp, int sl);
98 static int text_position(text *t1, text *t2);
99 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
100 static int text_position_next(int start_pos, TextPositionState *state);
102 static int text_cmp(text *arg1, text *arg2, Oid collid);
103 static bytea *bytea_catenate(bytea *t1, bytea *t2);
104 static bytea *bytea_substring(Datum str,
105  int S,
106  int L,
107  bool length_not_specified);
108 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
109 static void appendStringInfoText(StringInfo str, const text *t);
112  const char *fldsep, const char *null_string);
114 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
115  int *value);
116 static const char *text_format_parse_format(const char *start_ptr,
117  const char *end_ptr,
118  int *argpos, int *widthpos,
119  int *flags, int *width);
120 static void text_format_string_conversion(StringInfo buf, char conversion,
121  FmgrInfo *typOutputInfo,
122  Datum value, bool isNull,
123  int flags, int width);
124 static void text_format_append_string(StringInfo buf, const char *str,
125  int flags, int width);
126 
127 
128 /*****************************************************************************
129  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
130  *****************************************************************************/
131 
132 /*
133  * cstring_to_text
134  *
135  * Create a text value from a null-terminated C string.
136  *
137  * The new text value is freshly palloc'd with a full-size VARHDR.
138  */
139 text *
140 cstring_to_text(const char *s)
141 {
142  return cstring_to_text_with_len(s, strlen(s));
143 }
144 
145 /*
146  * cstring_to_text_with_len
147  *
148  * Same as cstring_to_text except the caller specifies the string length;
149  * the string need not be null_terminated.
150  */
151 text *
152 cstring_to_text_with_len(const char *s, int len)
153 {
154  text *result = (text *) palloc(len + VARHDRSZ);
155 
156  SET_VARSIZE(result, len + VARHDRSZ);
157  memcpy(VARDATA(result), s, len);
158 
159  return result;
160 }
161 
162 /*
163  * text_to_cstring
164  *
165  * Create a palloc'd, null-terminated C string from a text value.
166  *
167  * We support being passed a compressed or toasted text value.
168  * This is a bit bogus since such values shouldn't really be referred to as
169  * "text *", but it seems useful for robustness. If we didn't handle that
170  * case here, we'd need another routine that did, anyway.
171  */
172 char *
174 {
175  /* must cast away the const, unfortunately */
176  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
177  int len = VARSIZE_ANY_EXHDR(tunpacked);
178  char *result;
179 
180  result = (char *) palloc(len + 1);
181  memcpy(result, VARDATA_ANY(tunpacked), len);
182  result[len] = '\0';
183 
184  if (tunpacked != t)
185  pfree(tunpacked);
186 
187  return result;
188 }
189 
190 /*
191  * text_to_cstring_buffer
192  *
193  * Copy a text value into a caller-supplied buffer of size dst_len.
194  *
195  * The text string is truncated if necessary to fit. The result is
196  * guaranteed null-terminated (unless dst_len == 0).
197  *
198  * We support being passed a compressed or toasted text value.
199  * This is a bit bogus since such values shouldn't really be referred to as
200  * "text *", but it seems useful for robustness. If we didn't handle that
201  * case here, we'd need another routine that did, anyway.
202  */
203 void
204 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
205 {
206  /* must cast away the const, unfortunately */
207  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
208  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
209 
210  if (dst_len > 0)
211  {
212  dst_len--;
213  if (dst_len >= src_len)
214  dst_len = src_len;
215  else /* ensure truncation is encoding-safe */
216  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
217  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
218  dst[dst_len] = '\0';
219  }
220 
221  if (srcunpacked != src)
222  pfree(srcunpacked);
223 }
224 
225 
226 /*****************************************************************************
227  * USER I/O ROUTINES *
228  *****************************************************************************/
229 
230 
231 #define VAL(CH) ((CH) - '0')
232 #define DIG(VAL) ((VAL) + '0')
233 
234 /*
235  * byteain - converts from printable representation of byte array
236  *
237  * Non-printable characters must be passed as '\nnn' (octal) and are
238  * converted to internal form. '\' must be passed as '\\'.
239  * ereport(ERROR, ...) if bad form.
240  *
241  * BUGS:
242  * The input is scanned twice.
243  * The error checking of input is minimal.
244  */
245 Datum
247 {
248  char *inputText = PG_GETARG_CSTRING(0);
249  char *tp;
250  char *rp;
251  int bc;
252  bytea *result;
253 
254  /* Recognize hex input */
255  if (inputText[0] == '\\' && inputText[1] == 'x')
256  {
257  size_t len = strlen(inputText);
258 
259  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
260  result = palloc(bc);
261  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
262  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
263 
264  PG_RETURN_BYTEA_P(result);
265  }
266 
267  /* Else, it's the traditional escaped style */
268  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
269  {
270  if (tp[0] != '\\')
271  tp++;
272  else if ((tp[0] == '\\') &&
273  (tp[1] >= '0' && tp[1] <= '3') &&
274  (tp[2] >= '0' && tp[2] <= '7') &&
275  (tp[3] >= '0' && tp[3] <= '7'))
276  tp += 4;
277  else if ((tp[0] == '\\') &&
278  (tp[1] == '\\'))
279  tp += 2;
280  else
281  {
282  /*
283  * one backslash, not followed by another or ### valid octal
284  */
285  ereport(ERROR,
286  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
287  errmsg("invalid input syntax for type bytea")));
288  }
289  }
290 
291  bc += VARHDRSZ;
292 
293  result = (bytea *) palloc(bc);
294  SET_VARSIZE(result, bc);
295 
296  tp = inputText;
297  rp = VARDATA(result);
298  while (*tp != '\0')
299  {
300  if (tp[0] != '\\')
301  *rp++ = *tp++;
302  else if ((tp[0] == '\\') &&
303  (tp[1] >= '0' && tp[1] <= '3') &&
304  (tp[2] >= '0' && tp[2] <= '7') &&
305  (tp[3] >= '0' && tp[3] <= '7'))
306  {
307  bc = VAL(tp[1]);
308  bc <<= 3;
309  bc += VAL(tp[2]);
310  bc <<= 3;
311  *rp++ = bc + VAL(tp[3]);
312 
313  tp += 4;
314  }
315  else if ((tp[0] == '\\') &&
316  (tp[1] == '\\'))
317  {
318  *rp++ = '\\';
319  tp += 2;
320  }
321  else
322  {
323  /*
324  * We should never get here. The first pass should not allow it.
325  */
326  ereport(ERROR,
327  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
328  errmsg("invalid input syntax for type bytea")));
329  }
330  }
331 
332  PG_RETURN_BYTEA_P(result);
333 }
334 
335 /*
336  * byteaout - converts to printable representation of byte array
337  *
338  * In the traditional escaped format, non-printable characters are
339  * printed as '\nnn' (octal) and '\' as '\\'.
340  */
341 Datum
343 {
344  bytea *vlena = PG_GETARG_BYTEA_PP(0);
345  char *result;
346  char *rp;
347 
349  {
350  /* Print hex format */
351  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
352  *rp++ = '\\';
353  *rp++ = 'x';
354  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
355  }
356  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
357  {
358  /* Print traditional escaped format */
359  char *vp;
360  int len;
361  int i;
362 
363  len = 1; /* empty string has 1 char */
364  vp = VARDATA_ANY(vlena);
365  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
366  {
367  if (*vp == '\\')
368  len += 2;
369  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
370  len += 4;
371  else
372  len++;
373  }
374  rp = result = (char *) palloc(len);
375  vp = VARDATA_ANY(vlena);
376  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
377  {
378  if (*vp == '\\')
379  {
380  *rp++ = '\\';
381  *rp++ = '\\';
382  }
383  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
384  {
385  int val; /* holds unprintable chars */
386 
387  val = *vp;
388  rp[0] = '\\';
389  rp[3] = DIG(val & 07);
390  val >>= 3;
391  rp[2] = DIG(val & 07);
392  val >>= 3;
393  rp[1] = DIG(val & 03);
394  rp += 4;
395  }
396  else
397  *rp++ = *vp;
398  }
399  }
400  else
401  {
402  elog(ERROR, "unrecognized bytea_output setting: %d",
403  bytea_output);
404  rp = result = NULL; /* keep compiler quiet */
405  }
406  *rp = '\0';
407  PG_RETURN_CSTRING(result);
408 }
409 
410 /*
411  * bytearecv - converts external binary format to bytea
412  */
413 Datum
415 {
417  bytea *result;
418  int nbytes;
419 
420  nbytes = buf->len - buf->cursor;
421  result = (bytea *) palloc(nbytes + VARHDRSZ);
422  SET_VARSIZE(result, nbytes + VARHDRSZ);
423  pq_copymsgbytes(buf, VARDATA(result), nbytes);
424  PG_RETURN_BYTEA_P(result);
425 }
426 
427 /*
428  * byteasend - converts bytea to binary format
429  *
430  * This is a special case: just copy the input...
431  */
432 Datum
434 {
435  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
436 
437  PG_RETURN_BYTEA_P(vlena);
438 }
439 
440 Datum
442 {
444 
445  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
446 
447  /* Append the value unless null. */
448  if (!PG_ARGISNULL(1))
449  {
451 
452  /* On the first time through, we ignore the delimiter. */
453  if (state == NULL)
454  state = makeStringAggState(fcinfo);
455  else if (!PG_ARGISNULL(2))
456  {
457  bytea *delim = PG_GETARG_BYTEA_PP(2);
458 
460  }
461 
463  }
464 
465  /*
466  * The transition type for string_agg() is declared to be "internal",
467  * which is a pass-by-value type the same size as a pointer.
468  */
469  PG_RETURN_POINTER(state);
470 }
471 
472 Datum
474 {
476 
477  /* cannot be called directly because of internal-type argument */
478  Assert(AggCheckCallContext(fcinfo, NULL));
479 
480  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
481 
482  if (state != NULL)
483  {
484  bytea *result;
485 
486  result = (bytea *) palloc(state->len + VARHDRSZ);
487  SET_VARSIZE(result, state->len + VARHDRSZ);
488  memcpy(VARDATA(result), state->data, state->len);
489  PG_RETURN_BYTEA_P(result);
490  }
491  else
492  PG_RETURN_NULL();
493 }
494 
495 /*
496  * textin - converts "..." to internal representation
497  */
498 Datum
500 {
501  char *inputText = PG_GETARG_CSTRING(0);
502 
503  PG_RETURN_TEXT_P(cstring_to_text(inputText));
504 }
505 
506 /*
507  * textout - converts internal representation to "..."
508  */
509 Datum
511 {
512  Datum txt = PG_GETARG_DATUM(0);
513 
515 }
516 
517 /*
518  * textrecv - converts external binary format to text
519  */
520 Datum
522 {
524  text *result;
525  char *str;
526  int nbytes;
527 
528  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
529 
530  result = cstring_to_text_with_len(str, nbytes);
531  pfree(str);
532  PG_RETURN_TEXT_P(result);
533 }
534 
535 /*
536  * textsend - converts text to binary format
537  */
538 Datum
540 {
541  text *t = PG_GETARG_TEXT_PP(0);
543 
544  pq_begintypsend(&buf);
547 }
548 
549 
550 /*
551  * unknownin - converts "..." to internal representation
552  */
553 Datum
555 {
556  char *str = PG_GETARG_CSTRING(0);
557 
558  /* representation is same as cstring */
560 }
561 
562 /*
563  * unknownout - converts internal representation to "..."
564  */
565 Datum
567 {
568  /* representation is same as cstring */
569  char *str = PG_GETARG_CSTRING(0);
570 
572 }
573 
574 /*
575  * unknownrecv - converts external binary format to unknown
576  */
577 Datum
579 {
581  char *str;
582  int nbytes;
583 
584  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
585  /* representation is same as cstring */
586  PG_RETURN_CSTRING(str);
587 }
588 
589 /*
590  * unknownsend - converts unknown to binary format
591  */
592 Datum
594 {
595  /* representation is same as cstring */
596  char *str = PG_GETARG_CSTRING(0);
598 
599  pq_begintypsend(&buf);
600  pq_sendtext(&buf, str, strlen(str));
602 }
603 
604 
605 /* ========== PUBLIC ROUTINES ========== */
606 
607 /*
608  * textlen -
609  * returns the logical length of a text*
610  * (which is less than the VARSIZE of the text*)
611  */
612 Datum
614 {
615  Datum str = PG_GETARG_DATUM(0);
616 
617  /* try to avoid decompressing argument */
619 }
620 
621 /*
622  * text_length -
623  * Does the real work for textlen()
624  *
625  * This is broken out so it can be called directly by other string processing
626  * functions. Note that the argument is passed as a Datum, to indicate that
627  * it may still be in compressed form. We can avoid decompressing it at all
628  * in some cases.
629  */
630 static int32
632 {
633  /* fastpath when max encoding length is one */
636  else
637  {
638  text *t = DatumGetTextPP(str);
639 
641  VARSIZE_ANY_EXHDR(t)));
642  }
643 }
644 
645 /*
646  * textoctetlen -
647  * returns the physical length of a text*
648  * (which is less than the VARSIZE of the text*)
649  */
650 Datum
652 {
653  Datum str = PG_GETARG_DATUM(0);
654 
655  /* We need not detoast the input at all */
657 }
658 
659 /*
660  * textcat -
661  * takes two text* and returns a text* that is the concatenation of
662  * the two.
663  *
664  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
665  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
666  * Allocate space for output in all cases.
667  * XXX - thomas 1997-07-10
668  */
669 Datum
671 {
672  text *t1 = PG_GETARG_TEXT_PP(0);
673  text *t2 = PG_GETARG_TEXT_PP(1);
674 
676 }
677 
678 /*
679  * text_catenate
680  * Guts of textcat(), broken out so it can be used by other functions
681  *
682  * Arguments can be in short-header form, but not compressed or out-of-line
683  */
684 static text *
686 {
687  text *result;
688  int len1,
689  len2,
690  len;
691  char *ptr;
692 
693  len1 = VARSIZE_ANY_EXHDR(t1);
694  len2 = VARSIZE_ANY_EXHDR(t2);
695 
696  /* paranoia ... probably should throw error instead? */
697  if (len1 < 0)
698  len1 = 0;
699  if (len2 < 0)
700  len2 = 0;
701 
702  len = len1 + len2 + VARHDRSZ;
703  result = (text *) palloc(len);
704 
705  /* Set size of result string... */
706  SET_VARSIZE(result, len);
707 
708  /* Fill data field of result string... */
709  ptr = VARDATA(result);
710  if (len1 > 0)
711  memcpy(ptr, VARDATA_ANY(t1), len1);
712  if (len2 > 0)
713  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
714 
715  return result;
716 }
717 
718 /*
719  * charlen_to_bytelen()
720  * Compute the number of bytes occupied by n characters starting at *p
721  *
722  * It is caller's responsibility that there actually are n characters;
723  * the string need not be null-terminated.
724  */
725 static int
726 charlen_to_bytelen(const char *p, int n)
727 {
729  {
730  /* Optimization for single-byte encodings */
731  return n;
732  }
733  else
734  {
735  const char *s;
736 
737  for (s = p; n > 0; n--)
738  s += pg_mblen(s);
739 
740  return s - p;
741  }
742 }
743 
744 /*
745  * text_substr()
746  * Return a substring starting at the specified position.
747  * - thomas 1997-12-31
748  *
749  * Input:
750  * - string
751  * - starting position (is one-based)
752  * - string length
753  *
754  * If the starting position is zero or less, then return from the start of the string
755  * adjusting the length to be consistent with the "negative start" per SQL.
756  * If the length is less than zero, return the remaining string.
757  *
758  * Added multibyte support.
759  * - Tatsuo Ishii 1998-4-21
760  * Changed behavior if starting position is less than one to conform to SQL behavior.
761  * Formerly returned the entire string; now returns a portion.
762  * - Thomas Lockhart 1998-12-10
763  * Now uses faster TOAST-slicing interface
764  * - John Gray 2002-02-22
765  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
766  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
767  * error; if E < 1, return '', not entire string). Fixed MB related bug when
768  * S > LC and < LC + 4 sometimes garbage characters are returned.
769  * - Joe Conway 2002-08-10
770  */
771 Datum
773 {
775  PG_GETARG_INT32(1),
776  PG_GETARG_INT32(2),
777  false));
778 }
779 
780 /*
781  * text_substr_no_len -
782  * Wrapper to avoid opr_sanity failure due to
783  * one function accepting a different number of args.
784  */
785 Datum
787 {
789  PG_GETARG_INT32(1),
790  -1, true));
791 }
792 
793 /*
794  * text_substring -
795  * Does the real work for text_substr() and text_substr_no_len()
796  *
797  * This is broken out so it can be called directly by other string processing
798  * functions. Note that the argument is passed as a Datum, to indicate that
799  * it may still be in compressed/toasted form. We can avoid detoasting all
800  * of it in some cases.
801  *
802  * The result is always a freshly palloc'd datum.
803  */
804 static text *
805 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
806 {
808  int32 S = start; /* start position */
809  int32 S1; /* adjusted start position */
810  int32 L1; /* adjusted substring length */
811 
812  /* life is easy if the encoding max length is 1 */
813  if (eml == 1)
814  {
815  S1 = Max(S, 1);
816 
817  if (length_not_specified) /* special case - get length to end of
818  * string */
819  L1 = -1;
820  else
821  {
822  /* end position */
823  int E = S + length;
824 
825  /*
826  * A negative value for L is the only way for the end position to
827  * be before the start. SQL99 says to throw an error.
828  */
829  if (E < S)
830  ereport(ERROR,
831  (errcode(ERRCODE_SUBSTRING_ERROR),
832  errmsg("negative substring length not allowed")));
833 
834  /*
835  * A zero or negative value for the end position can happen if the
836  * start was negative or one. SQL99 says to return a zero-length
837  * string.
838  */
839  if (E < 1)
840  return cstring_to_text("");
841 
842  L1 = E - S1;
843  }
844 
845  /*
846  * If the start position is past the end of the string, SQL99 says to
847  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
848  * that for us. Convert to zero-based starting position
849  */
850  return DatumGetTextPSlice(str, S1 - 1, L1);
851  }
852  else if (eml > 1)
853  {
854  /*
855  * When encoding max length is > 1, we can't get LC without
856  * detoasting, so we'll grab a conservatively large slice now and go
857  * back later to do the right thing
858  */
859  int32 slice_start;
860  int32 slice_size;
861  int32 slice_strlen;
862  text *slice;
863  int32 E1;
864  int32 i;
865  char *p;
866  char *s;
867  text *ret;
868 
869  /*
870  * if S is past the end of the string, the tuple toaster will return a
871  * zero-length string to us
872  */
873  S1 = Max(S, 1);
874 
875  /*
876  * We need to start at position zero because there is no way to know
877  * in advance which byte offset corresponds to the supplied start
878  * position.
879  */
880  slice_start = 0;
881 
882  if (length_not_specified) /* special case - get length to end of
883  * string */
884  slice_size = L1 = -1;
885  else
886  {
887  int E = S + length;
888 
889  /*
890  * A negative value for L is the only way for the end position to
891  * be before the start. SQL99 says to throw an error.
892  */
893  if (E < S)
894  ereport(ERROR,
895  (errcode(ERRCODE_SUBSTRING_ERROR),
896  errmsg("negative substring length not allowed")));
897 
898  /*
899  * A zero or negative value for the end position can happen if the
900  * start was negative or one. SQL99 says to return a zero-length
901  * string.
902  */
903  if (E < 1)
904  return cstring_to_text("");
905 
906  /*
907  * if E is past the end of the string, the tuple toaster will
908  * truncate the length for us
909  */
910  L1 = E - S1;
911 
912  /*
913  * Total slice size in bytes can't be any longer than the start
914  * position plus substring length times the encoding max length.
915  */
916  slice_size = (S1 + L1) * eml;
917  }
918 
919  /*
920  * If we're working with an untoasted source, no need to do an extra
921  * copying step.
922  */
925  slice = DatumGetTextPSlice(str, slice_start, slice_size);
926  else
927  slice = (text *) DatumGetPointer(str);
928 
929  /* see if we got back an empty string */
930  if (VARSIZE_ANY_EXHDR(slice) == 0)
931  {
932  if (slice != (text *) DatumGetPointer(str))
933  pfree(slice);
934  return cstring_to_text("");
935  }
936 
937  /* Now we can get the actual length of the slice in MB characters */
938  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
939  VARSIZE_ANY_EXHDR(slice));
940 
941  /*
942  * Check that the start position wasn't > slice_strlen. If so, SQL99
943  * says to return a zero-length string.
944  */
945  if (S1 > slice_strlen)
946  {
947  if (slice != (text *) DatumGetPointer(str))
948  pfree(slice);
949  return cstring_to_text("");
950  }
951 
952  /*
953  * Adjust L1 and E1 now that we know the slice string length. Again
954  * remember that S1 is one based, and slice_start is zero based.
955  */
956  if (L1 > -1)
957  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
958  else
959  E1 = slice_start + 1 + slice_strlen;
960 
961  /*
962  * Find the start position in the slice; remember S1 is not zero based
963  */
964  p = VARDATA_ANY(slice);
965  for (i = 0; i < S1 - 1; i++)
966  p += pg_mblen(p);
967 
968  /* hang onto a pointer to our start position */
969  s = p;
970 
971  /*
972  * Count the actual bytes used by the substring of the requested
973  * length.
974  */
975  for (i = S1; i < E1; i++)
976  p += pg_mblen(p);
977 
978  ret = (text *) palloc(VARHDRSZ + (p - s));
979  SET_VARSIZE(ret, VARHDRSZ + (p - s));
980  memcpy(VARDATA(ret), s, (p - s));
981 
982  if (slice != (text *) DatumGetPointer(str))
983  pfree(slice);
984 
985  return ret;
986  }
987  else
988  elog(ERROR, "invalid backend encoding: encoding max length < 1");
989 
990  /* not reached: suppress compiler warning */
991  return NULL;
992 }
993 
994 /*
995  * textoverlay
996  * Replace specified substring of first string with second
997  *
998  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
999  * This code is a direct implementation of what the standard says.
1000  */
1001 Datum
1003 {
1004  text *t1 = PG_GETARG_TEXT_PP(0);
1005  text *t2 = PG_GETARG_TEXT_PP(1);
1006  int sp = PG_GETARG_INT32(2); /* substring start position */
1007  int sl = PG_GETARG_INT32(3); /* substring length */
1008 
1009  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1010 }
1011 
1012 Datum
1014 {
1015  text *t1 = PG_GETARG_TEXT_PP(0);
1016  text *t2 = PG_GETARG_TEXT_PP(1);
1017  int sp = PG_GETARG_INT32(2); /* substring start position */
1018  int sl;
1019 
1020  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1021  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1022 }
1023 
1024 static text *
1025 text_overlay(text *t1, text *t2, int sp, int sl)
1026 {
1027  text *result;
1028  text *s1;
1029  text *s2;
1030  int sp_pl_sl;
1031 
1032  /*
1033  * Check for possible integer-overflow cases. For negative sp, throw a
1034  * "substring length" error because that's what should be expected
1035  * according to the spec's definition of OVERLAY().
1036  */
1037  if (sp <= 0)
1038  ereport(ERROR,
1039  (errcode(ERRCODE_SUBSTRING_ERROR),
1040  errmsg("negative substring length not allowed")));
1041  sp_pl_sl = sp + sl;
1042  if (sp_pl_sl <= sl)
1043  ereport(ERROR,
1044  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1045  errmsg("integer out of range")));
1046 
1047  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1048  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1049  result = text_catenate(s1, t2);
1050  result = text_catenate(result, s2);
1051 
1052  return result;
1053 }
1054 
1055 /*
1056  * textpos -
1057  * Return the position of the specified substring.
1058  * Implements the SQL POSITION() function.
1059  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1060  * - thomas 1997-07-27
1061  */
1062 Datum
1064 {
1065  text *str = PG_GETARG_TEXT_PP(0);
1066  text *search_str = PG_GETARG_TEXT_PP(1);
1067 
1068  PG_RETURN_INT32((int32) text_position(str, search_str));
1069 }
1070 
1071 /*
1072  * text_position -
1073  * Does the real work for textpos()
1074  *
1075  * Inputs:
1076  * t1 - string to be searched
1077  * t2 - pattern to match within t1
1078  * Result:
1079  * Character index of the first matched char, starting from 1,
1080  * or 0 if no match.
1081  *
1082  * This is broken out so it can be called directly by other string processing
1083  * functions.
1084  */
1085 static int
1087 {
1089  int result;
1090 
1091  text_position_setup(t1, t2, &state);
1092  result = text_position_next(1, &state);
1093  text_position_cleanup(&state);
1094  return result;
1095 }
1096 
1097 
1098 /*
1099  * text_position_setup, text_position_next, text_position_cleanup -
1100  * Component steps of text_position()
1101  *
1102  * These are broken out so that a string can be efficiently searched for
1103  * multiple occurrences of the same pattern. text_position_next may be
1104  * called multiple times with increasing values of start_pos, which is
1105  * the 1-based character position to start the search from. The "state"
1106  * variable is normally just a local variable in the caller.
1107  */
1108 
1109 static void
1111 {
1112  int len1 = VARSIZE_ANY_EXHDR(t1);
1113  int len2 = VARSIZE_ANY_EXHDR(t2);
1114 
1116  {
1117  /* simple case - single byte encoding */
1118  state->use_wchar = false;
1119  state->str1 = VARDATA_ANY(t1);
1120  state->str2 = VARDATA_ANY(t2);
1121  state->len1 = len1;
1122  state->len2 = len2;
1123  }
1124  else
1125  {
1126  /* not as simple - multibyte encoding */
1127  pg_wchar *p1,
1128  *p2;
1129 
1130  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1131  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1132  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1133  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1134 
1135  state->use_wchar = true;
1136  state->wstr1 = p1;
1137  state->wstr2 = p2;
1138  state->len1 = len1;
1139  state->len2 = len2;
1140  }
1141 
1142  /*
1143  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1144  * notes we use the terminology that the "haystack" is the string to be
1145  * searched (t1) and the "needle" is the pattern being sought (t2).
1146  *
1147  * If the needle is empty or bigger than the haystack then there is no
1148  * point in wasting cycles initializing the table. We also choose not to
1149  * use B-M-H for needles of length 1, since the skip table can't possibly
1150  * save anything in that case.
1151  */
1152  if (len1 >= len2 && len2 > 1)
1153  {
1154  int searchlength = len1 - len2;
1155  int skiptablemask;
1156  int last;
1157  int i;
1158 
1159  /*
1160  * First we must determine how much of the skip table to use. The
1161  * declaration of TextPositionState allows up to 256 elements, but for
1162  * short search problems we don't really want to have to initialize so
1163  * many elements --- it would take too long in comparison to the
1164  * actual search time. So we choose a useful skip table size based on
1165  * the haystack length minus the needle length. The closer the needle
1166  * length is to the haystack length the less useful skipping becomes.
1167  *
1168  * Note: since we use bit-masking to select table elements, the skip
1169  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1170  */
1171  if (searchlength < 16)
1172  skiptablemask = 3;
1173  else if (searchlength < 64)
1174  skiptablemask = 7;
1175  else if (searchlength < 128)
1176  skiptablemask = 15;
1177  else if (searchlength < 512)
1178  skiptablemask = 31;
1179  else if (searchlength < 2048)
1180  skiptablemask = 63;
1181  else if (searchlength < 4096)
1182  skiptablemask = 127;
1183  else
1184  skiptablemask = 255;
1185  state->skiptablemask = skiptablemask;
1186 
1187  /*
1188  * Initialize the skip table. We set all elements to the needle
1189  * length, since this is the correct skip distance for any character
1190  * not found in the needle.
1191  */
1192  for (i = 0; i <= skiptablemask; i++)
1193  state->skiptable[i] = len2;
1194 
1195  /*
1196  * Now examine the needle. For each character except the last one,
1197  * set the corresponding table element to the appropriate skip
1198  * distance. Note that when two characters share the same skip table
1199  * entry, the one later in the needle must determine the skip
1200  * distance.
1201  */
1202  last = len2 - 1;
1203 
1204  if (!state->use_wchar)
1205  {
1206  const char *str2 = state->str2;
1207 
1208  for (i = 0; i < last; i++)
1209  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1210  }
1211  else
1212  {
1213  const pg_wchar *wstr2 = state->wstr2;
1214 
1215  for (i = 0; i < last; i++)
1216  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1217  }
1218  }
1219 }
1220 
1221 static int
1223 {
1224  int haystack_len = state->len1;
1225  int needle_len = state->len2;
1226  int skiptablemask = state->skiptablemask;
1227 
1228  Assert(start_pos > 0); /* else caller error */
1229 
1230  if (needle_len <= 0)
1231  return start_pos; /* result for empty pattern */
1232 
1233  start_pos--; /* adjust for zero based arrays */
1234 
1235  /* Done if the needle can't possibly fit */
1236  if (haystack_len < start_pos + needle_len)
1237  return 0;
1238 
1239  if (!state->use_wchar)
1240  {
1241  /* simple case - single byte encoding */
1242  const char *haystack = state->str1;
1243  const char *needle = state->str2;
1244  const char *haystack_end = &haystack[haystack_len];
1245  const char *hptr;
1246 
1247  if (needle_len == 1)
1248  {
1249  /* No point in using B-M-H for a one-character needle */
1250  char nchar = *needle;
1251 
1252  hptr = &haystack[start_pos];
1253  while (hptr < haystack_end)
1254  {
1255  if (*hptr == nchar)
1256  return hptr - haystack + 1;
1257  hptr++;
1258  }
1259  }
1260  else
1261  {
1262  const char *needle_last = &needle[needle_len - 1];
1263 
1264  /* Start at startpos plus the length of the needle */
1265  hptr = &haystack[start_pos + needle_len - 1];
1266  while (hptr < haystack_end)
1267  {
1268  /* Match the needle scanning *backward* */
1269  const char *nptr;
1270  const char *p;
1271 
1272  nptr = needle_last;
1273  p = hptr;
1274  while (*nptr == *p)
1275  {
1276  /* Matched it all? If so, return 1-based position */
1277  if (nptr == needle)
1278  return p - haystack + 1;
1279  nptr--, p--;
1280  }
1281 
1282  /*
1283  * No match, so use the haystack char at hptr to decide how
1284  * far to advance. If the needle had any occurrence of that
1285  * character (or more precisely, one sharing the same
1286  * skiptable entry) before its last character, then we advance
1287  * far enough to align the last such needle character with
1288  * that haystack position. Otherwise we can advance by the
1289  * whole needle length.
1290  */
1291  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1292  }
1293  }
1294  }
1295  else
1296  {
1297  /* The multibyte char version. This works exactly the same way. */
1298  const pg_wchar *haystack = state->wstr1;
1299  const pg_wchar *needle = state->wstr2;
1300  const pg_wchar *haystack_end = &haystack[haystack_len];
1301  const pg_wchar *hptr;
1302 
1303  if (needle_len == 1)
1304  {
1305  /* No point in using B-M-H for a one-character needle */
1306  pg_wchar nchar = *needle;
1307 
1308  hptr = &haystack[start_pos];
1309  while (hptr < haystack_end)
1310  {
1311  if (*hptr == nchar)
1312  return hptr - haystack + 1;
1313  hptr++;
1314  }
1315  }
1316  else
1317  {
1318  const pg_wchar *needle_last = &needle[needle_len - 1];
1319 
1320  /* Start at startpos plus the length of the needle */
1321  hptr = &haystack[start_pos + needle_len - 1];
1322  while (hptr < haystack_end)
1323  {
1324  /* Match the needle scanning *backward* */
1325  const pg_wchar *nptr;
1326  const pg_wchar *p;
1327 
1328  nptr = needle_last;
1329  p = hptr;
1330  while (*nptr == *p)
1331  {
1332  /* Matched it all? If so, return 1-based position */
1333  if (nptr == needle)
1334  return p - haystack + 1;
1335  nptr--, p--;
1336  }
1337 
1338  /*
1339  * No match, so use the haystack char at hptr to decide how
1340  * far to advance. If the needle had any occurrence of that
1341  * character (or more precisely, one sharing the same
1342  * skiptable entry) before its last character, then we advance
1343  * far enough to align the last such needle character with
1344  * that haystack position. Otherwise we can advance by the
1345  * whole needle length.
1346  */
1347  hptr += state->skiptable[*hptr & skiptablemask];
1348  }
1349  }
1350  }
1351 
1352  return 0; /* not found */
1353 }
1354 
1355 static void
1357 {
1358  if (state->use_wchar)
1359  {
1360  pfree(state->wstr1);
1361  pfree(state->wstr2);
1362  }
1363 }
1364 
1365 /* varstr_cmp()
1366  * Comparison function for text strings with given lengths.
1367  * Includes locale support, but must copy strings to temporary memory
1368  * to allow null-termination for inputs to strcoll().
1369  * Returns an integer less than, equal to, or greater than zero, indicating
1370  * whether arg1 is less than, equal to, or greater than arg2.
1371  */
1372 int
1373 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1374 {
1375  int result;
1376 
1377  /*
1378  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1379  * have to do some memory copying. This turns out to be significantly
1380  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1381  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1382  */
1383  if (lc_collate_is_c(collid))
1384  {
1385  result = memcmp(arg1, arg2, Min(len1, len2));
1386  if ((result == 0) && (len1 != len2))
1387  result = (len1 < len2) ? -1 : 1;
1388  }
1389  else
1390  {
1391  char a1buf[TEXTBUFLEN];
1392  char a2buf[TEXTBUFLEN];
1393  char *a1p,
1394  *a2p;
1395 
1396 #ifdef HAVE_LOCALE_T
1397  pg_locale_t mylocale = 0;
1398 #endif
1399 
1400  if (collid != DEFAULT_COLLATION_OID)
1401  {
1402  if (!OidIsValid(collid))
1403  {
1404  /*
1405  * This typically means that the parser could not resolve a
1406  * conflict of implicit collations, so report it that way.
1407  */
1408  ereport(ERROR,
1409  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1410  errmsg("could not determine which collation to use for string comparison"),
1411  errhint("Use the COLLATE clause to set the collation explicitly.")));
1412  }
1413 #ifdef HAVE_LOCALE_T
1414  mylocale = pg_newlocale_from_collation(collid);
1415 #endif
1416  }
1417 
1418  /*
1419  * memcmp() can't tell us which of two unequal strings sorts first,
1420  * but it's a cheap way to tell if they're equal. Testing shows that
1421  * memcmp() followed by strcoll() is only trivially slower than
1422  * strcoll() by itself, so we don't lose much if this doesn't work out
1423  * very often, and if it does - for example, because there are many
1424  * equal strings in the input - then we win big by avoiding expensive
1425  * collation-aware comparisons.
1426  */
1427  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1428  return 0;
1429 
1430 #ifdef WIN32
1431  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1432  if (GetDatabaseEncoding() == PG_UTF8)
1433  {
1434  int a1len;
1435  int a2len;
1436  int r;
1437 
1438  if (len1 >= TEXTBUFLEN / 2)
1439  {
1440  a1len = len1 * 2 + 2;
1441  a1p = palloc(a1len);
1442  }
1443  else
1444  {
1445  a1len = TEXTBUFLEN;
1446  a1p = a1buf;
1447  }
1448  if (len2 >= TEXTBUFLEN / 2)
1449  {
1450  a2len = len2 * 2 + 2;
1451  a2p = palloc(a2len);
1452  }
1453  else
1454  {
1455  a2len = TEXTBUFLEN;
1456  a2p = a2buf;
1457  }
1458 
1459  /* stupid Microsloth API does not work for zero-length input */
1460  if (len1 == 0)
1461  r = 0;
1462  else
1463  {
1464  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1465  (LPWSTR) a1p, a1len / 2);
1466  if (!r)
1467  ereport(ERROR,
1468  (errmsg("could not convert string to UTF-16: error code %lu",
1469  GetLastError())));
1470  }
1471  ((LPWSTR) a1p)[r] = 0;
1472 
1473  if (len2 == 0)
1474  r = 0;
1475  else
1476  {
1477  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1478  (LPWSTR) a2p, a2len / 2);
1479  if (!r)
1480  ereport(ERROR,
1481  (errmsg("could not convert string to UTF-16: error code %lu",
1482  GetLastError())));
1483  }
1484  ((LPWSTR) a2p)[r] = 0;
1485 
1486  errno = 0;
1487 #ifdef HAVE_LOCALE_T
1488  if (mylocale)
1489  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1490  else
1491 #endif
1492  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1493  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1494  * headers */
1495  ereport(ERROR,
1496  (errmsg("could not compare Unicode strings: %m")));
1497 
1498  /*
1499  * In some locales wcscoll() can claim that nonidentical strings
1500  * are equal. Believing that would be bad news for a number of
1501  * reasons, so we follow Perl's lead and sort "equal" strings
1502  * according to strcmp (on the UTF-8 representation).
1503  */
1504  if (result == 0)
1505  {
1506  result = memcmp(arg1, arg2, Min(len1, len2));
1507  if ((result == 0) && (len1 != len2))
1508  result = (len1 < len2) ? -1 : 1;
1509  }
1510 
1511  if (a1p != a1buf)
1512  pfree(a1p);
1513  if (a2p != a2buf)
1514  pfree(a2p);
1515 
1516  return result;
1517  }
1518 #endif /* WIN32 */
1519 
1520  if (len1 >= TEXTBUFLEN)
1521  a1p = (char *) palloc(len1 + 1);
1522  else
1523  a1p = a1buf;
1524  if (len2 >= TEXTBUFLEN)
1525  a2p = (char *) palloc(len2 + 1);
1526  else
1527  a2p = a2buf;
1528 
1529  memcpy(a1p, arg1, len1);
1530  a1p[len1] = '\0';
1531  memcpy(a2p, arg2, len2);
1532  a2p[len2] = '\0';
1533 
1534 #ifdef HAVE_LOCALE_T
1535  if (mylocale)
1536  result = strcoll_l(a1p, a2p, mylocale);
1537  else
1538 #endif
1539  result = strcoll(a1p, a2p);
1540 
1541  /*
1542  * In some locales strcoll() can claim that nonidentical strings are
1543  * equal. Believing that would be bad news for a number of reasons,
1544  * so we follow Perl's lead and sort "equal" strings according to
1545  * strcmp().
1546  */
1547  if (result == 0)
1548  result = strcmp(a1p, a2p);
1549 
1550  if (a1p != a1buf)
1551  pfree(a1p);
1552  if (a2p != a2buf)
1553  pfree(a2p);
1554  }
1555 
1556  return result;
1557 }
1558 
1559 /* text_cmp()
1560  * Internal comparison function for text strings.
1561  * Returns -1, 0 or 1
1562  */
1563 static int
1564 text_cmp(text *arg1, text *arg2, Oid collid)
1565 {
1566  char *a1p,
1567  *a2p;
1568  int len1,
1569  len2;
1570 
1571  a1p = VARDATA_ANY(arg1);
1572  a2p = VARDATA_ANY(arg2);
1573 
1574  len1 = VARSIZE_ANY_EXHDR(arg1);
1575  len2 = VARSIZE_ANY_EXHDR(arg2);
1576 
1577  return varstr_cmp(a1p, len1, a2p, len2, collid);
1578 }
1579 
1580 /*
1581  * Comparison functions for text strings.
1582  *
1583  * Note: btree indexes need these routines not to leak memory; therefore,
1584  * be careful to free working copies of toasted datums. Most places don't
1585  * need to be so careful.
1586  */
1587 
1588 Datum
1590 {
1591  Datum arg1 = PG_GETARG_DATUM(0);
1592  Datum arg2 = PG_GETARG_DATUM(1);
1593  bool result;
1594  Size len1,
1595  len2;
1596 
1597  /*
1598  * Since we only care about equality or not-equality, we can avoid all the
1599  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1600  * don't even have to do a bitwise comparison if we can show the lengths
1601  * of the strings are unequal; which might save us from having to detoast
1602  * one or both values.
1603  */
1604  len1 = toast_raw_datum_size(arg1);
1605  len2 = toast_raw_datum_size(arg2);
1606  if (len1 != len2)
1607  result = false;
1608  else
1609  {
1610  text *targ1 = DatumGetTextPP(arg1);
1611  text *targ2 = DatumGetTextPP(arg2);
1612 
1613  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1614  len1 - VARHDRSZ) == 0);
1615 
1616  PG_FREE_IF_COPY(targ1, 0);
1617  PG_FREE_IF_COPY(targ2, 1);
1618  }
1619 
1620  PG_RETURN_BOOL(result);
1621 }
1622 
1623 Datum
1625 {
1626  Datum arg1 = PG_GETARG_DATUM(0);
1627  Datum arg2 = PG_GETARG_DATUM(1);
1628  bool result;
1629  Size len1,
1630  len2;
1631 
1632  /* See comment in texteq() */
1633  len1 = toast_raw_datum_size(arg1);
1634  len2 = toast_raw_datum_size(arg2);
1635  if (len1 != len2)
1636  result = true;
1637  else
1638  {
1639  text *targ1 = DatumGetTextPP(arg1);
1640  text *targ2 = DatumGetTextPP(arg2);
1641 
1642  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1643  len1 - VARHDRSZ) != 0);
1644 
1645  PG_FREE_IF_COPY(targ1, 0);
1646  PG_FREE_IF_COPY(targ2, 1);
1647  }
1648 
1649  PG_RETURN_BOOL(result);
1650 }
1651 
1652 Datum
1654 {
1655  text *arg1 = PG_GETARG_TEXT_PP(0);
1656  text *arg2 = PG_GETARG_TEXT_PP(1);
1657  bool result;
1658 
1659  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1660 
1661  PG_FREE_IF_COPY(arg1, 0);
1662  PG_FREE_IF_COPY(arg2, 1);
1663 
1664  PG_RETURN_BOOL(result);
1665 }
1666 
1667 Datum
1669 {
1670  text *arg1 = PG_GETARG_TEXT_PP(0);
1671  text *arg2 = PG_GETARG_TEXT_PP(1);
1672  bool result;
1673 
1674  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1675 
1676  PG_FREE_IF_COPY(arg1, 0);
1677  PG_FREE_IF_COPY(arg2, 1);
1678 
1679  PG_RETURN_BOOL(result);
1680 }
1681 
1682 Datum
1684 {
1685  text *arg1 = PG_GETARG_TEXT_PP(0);
1686  text *arg2 = PG_GETARG_TEXT_PP(1);
1687  bool result;
1688 
1689  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1690 
1691  PG_FREE_IF_COPY(arg1, 0);
1692  PG_FREE_IF_COPY(arg2, 1);
1693 
1694  PG_RETURN_BOOL(result);
1695 }
1696 
1697 Datum
1699 {
1700  text *arg1 = PG_GETARG_TEXT_PP(0);
1701  text *arg2 = PG_GETARG_TEXT_PP(1);
1702  bool result;
1703 
1704  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1705 
1706  PG_FREE_IF_COPY(arg1, 0);
1707  PG_FREE_IF_COPY(arg2, 1);
1708 
1709  PG_RETURN_BOOL(result);
1710 }
1711 
1712 Datum
1714 {
1715  text *arg1 = PG_GETARG_TEXT_PP(0);
1716  text *arg2 = PG_GETARG_TEXT_PP(1);
1717  int32 result;
1718 
1719  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1720 
1721  PG_FREE_IF_COPY(arg1, 0);
1722  PG_FREE_IF_COPY(arg2, 1);
1723 
1724  PG_RETURN_INT32(result);
1725 }
1726 
1727 Datum
1729 {
1731  Oid collid = ssup->ssup_collation;
1732  MemoryContext oldcontext;
1733 
1734  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1735 
1736  btsortsupport_worker(ssup, collid);
1737 
1738  MemoryContextSwitchTo(oldcontext);
1739 
1740  PG_RETURN_VOID();
1741 }
1742 
1743 static void
1745 {
1746  bool abbreviate = ssup->abbreviate;
1747  bool collate_c = false;
1748  TextSortSupport *tss;
1749 
1750 #ifdef HAVE_LOCALE_T
1751  pg_locale_t locale = 0;
1752 #endif
1753 
1754  /*
1755  * If possible, set ssup->comparator to a function which can be used to
1756  * directly compare two datums. If we can do this, we'll avoid the
1757  * overhead of a trip through the fmgr layer for every comparison, which
1758  * can be substantial.
1759  *
1760  * Most typically, we'll set the comparator to bttextfastcmp_locale, which
1761  * uses strcoll() to perform comparisons. However, if LC_COLLATE = C, we
1762  * can make things quite a bit faster with bttextfastcmp_c, which uses
1763  * memcmp() rather than strcoll().
1764  *
1765  * There is a further exception on Windows. When the database encoding is
1766  * UTF-8 and we are not using the C collation, complex hacks are required.
1767  * We don't currently have a comparator that handles that case, so we fall
1768  * back on the slow method of having the sort code invoke bttextcmp() via
1769  * the fmgr trampoline.
1770  */
1771  if (lc_collate_is_c(collid))
1772  {
1773  ssup->comparator = bttextfastcmp_c;
1774  collate_c = true;
1775  }
1776 #ifdef WIN32
1777  else if (GetDatabaseEncoding() == PG_UTF8)
1778  return;
1779 #endif
1780  else
1781  {
1783 
1784  /*
1785  * We need a collation-sensitive comparison. To make things faster,
1786  * we'll figure out the collation based on the locale id and cache the
1787  * result.
1788  */
1789  if (collid != DEFAULT_COLLATION_OID)
1790  {
1791  if (!OidIsValid(collid))
1792  {
1793  /*
1794  * This typically means that the parser could not resolve a
1795  * conflict of implicit collations, so report it that way.
1796  */
1797  ereport(ERROR,
1798  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1799  errmsg("could not determine which collation to use for string comparison"),
1800  errhint("Use the COLLATE clause to set the collation explicitly.")));
1801  }
1802 #ifdef HAVE_LOCALE_T
1803  locale = pg_newlocale_from_collation(collid);
1804 #endif
1805  }
1806  }
1807 
1808  /*
1809  * It's possible that there are platforms where the use of abbreviated
1810  * keys should be disabled at compile time. Having only 4 byte datums
1811  * could make worst-case performance drastically more likely, for example.
1812  * Moreover, Darwin's strxfrm() implementations is known to not
1813  * effectively concentrate a significant amount of entropy from the
1814  * original string in earlier transformed blobs. It's possible that other
1815  * supported platforms are similarly encumbered. However, even in those
1816  * cases, the abbreviated keys optimization may win, and if it doesn't,
1817  * the "abort abbreviation" code may rescue us. So, for now, we don't
1818  * disable this anywhere on the basis of performance.
1819  */
1820 
1821  /*
1822  * If we're using abbreviated keys, or if we're using a locale-aware
1823  * comparison, we need to initialize a TextSortSupport object. Both cases
1824  * will make use of the temporary buffers we initialize here for scratch
1825  * space, and the abbreviation case requires additional state.
1826  */
1827  if (abbreviate || !collate_c)
1828  {
1829  tss = palloc(sizeof(TextSortSupport));
1830  tss->buf1 = palloc(TEXTBUFLEN);
1831  tss->buflen1 = TEXTBUFLEN;
1832  tss->buf2 = palloc(TEXTBUFLEN);
1833  tss->buflen2 = TEXTBUFLEN;
1834 #ifdef HAVE_LOCALE_T
1835  tss->locale = locale;
1836 #endif
1837  tss->collate_c = collate_c;
1838  ssup->ssup_extra = tss;
1839 
1840  /*
1841  * If possible, plan to use the abbreviated keys optimization. The
1842  * core code may switch back to authoritative comparator should
1843  * abbreviation be aborted.
1844  */
1845  if (abbreviate)
1846  {
1847  tss->prop_card = 0.20;
1848  initHyperLogLog(&tss->abbr_card, 10);
1849  initHyperLogLog(&tss->full_card, 10);
1850  ssup->abbrev_full_comparator = ssup->comparator;
1851  ssup->comparator = bttextcmp_abbrev;
1854  }
1855  }
1856 }
1857 
1858 /*
1859  * sortsupport comparison func (for C locale case)
1860  */
1861 static int
1863 {
1864  text *arg1 = DatumGetTextPP(x);
1865  text *arg2 = DatumGetTextPP(y);
1866  char *a1p,
1867  *a2p;
1868  int len1,
1869  len2,
1870  result;
1871 
1872  a1p = VARDATA_ANY(arg1);
1873  a2p = VARDATA_ANY(arg2);
1874 
1875  len1 = VARSIZE_ANY_EXHDR(arg1);
1876  len2 = VARSIZE_ANY_EXHDR(arg2);
1877 
1878  result = memcmp(a1p, a2p, Min(len1, len2));
1879  if ((result == 0) && (len1 != len2))
1880  result = (len1 < len2) ? -1 : 1;
1881 
1882  /* We can't afford to leak memory here. */
1883  if (PointerGetDatum(arg1) != x)
1884  pfree(arg1);
1885  if (PointerGetDatum(arg2) != y)
1886  pfree(arg2);
1887 
1888  return result;
1889 }
1890 
1891 /*
1892  * sortsupport comparison func (for locale case)
1893  */
1894 static int
1896 {
1897  text *arg1 = DatumGetTextPP(x);
1898  text *arg2 = DatumGetTextPP(y);
1899  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1900 
1901  /* working state */
1902  char *a1p,
1903  *a2p;
1904  int len1,
1905  len2,
1906  result;
1907 
1908  a1p = VARDATA_ANY(arg1);
1909  a2p = VARDATA_ANY(arg2);
1910 
1911  len1 = VARSIZE_ANY_EXHDR(arg1);
1912  len2 = VARSIZE_ANY_EXHDR(arg2);
1913 
1914  /* Fast pre-check for equality, as discussed in varstr_cmp() */
1915  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
1916  {
1917  result = 0;
1918  goto done;
1919  }
1920 
1921  if (len1 >= tss->buflen1)
1922  {
1923  pfree(tss->buf1);
1924  tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
1925  tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
1926  }
1927  if (len2 >= tss->buflen2)
1928  {
1929  pfree(tss->buf2);
1930  tss->buflen2 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
1931  tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
1932  }
1933 
1934  memcpy(tss->buf1, a1p, len1);
1935  tss->buf1[len1] = '\0';
1936  memcpy(tss->buf2, a2p, len2);
1937  tss->buf2[len2] = '\0';
1938 
1939 #ifdef HAVE_LOCALE_T
1940  if (tss->locale)
1941  result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
1942  else
1943 #endif
1944  result = strcoll(tss->buf1, tss->buf2);
1945 
1946  /*
1947  * In some locales strcoll() can claim that nonidentical strings are
1948  * equal. Believing that would be bad news for a number of reasons, so we
1949  * follow Perl's lead and sort "equal" strings according to strcmp().
1950  */
1951  if (result == 0)
1952  result = strcmp(tss->buf1, tss->buf2);
1953 
1954 done:
1955  /* We can't afford to leak memory here. */
1956  if (PointerGetDatum(arg1) != x)
1957  pfree(arg1);
1958  if (PointerGetDatum(arg2) != y)
1959  pfree(arg2);
1960 
1961  return result;
1962 }
1963 
1964 /*
1965  * Abbreviated key comparison func
1966  */
1967 static int
1969 {
1970  char *a = (char *) &x;
1971  char *b = (char *) &y;
1972  int result;
1973 
1974  result = memcmp(a, b, sizeof(Datum));
1975 
1976  /*
1977  * When result = 0, the core system will call bttextfastcmp_c() or
1978  * bttextfastcmp_locale(). Even a strcmp() on two non-truncated strxfrm()
1979  * blobs cannot indicate *equality* authoritatively, for the same reason
1980  * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
1981  */
1982  return result;
1983 }
1984 
1985 /*
1986  * Conversion routine for sortsupport. Converts original text to abbreviated
1987  * key representation. Our encoding strategy is simple -- pack the first 8
1988  * bytes of a strxfrm() blob into a Datum.
1989  */
1990 static Datum
1992 {
1993  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1994  text *authoritative = DatumGetTextPP(original);
1995  char *authoritative_data = VARDATA_ANY(authoritative);
1996 
1997  /* working state */
1998  Datum res;
1999  char *pres;
2000  int len;
2001  uint32 hash;
2002 
2003  /*
2004  * Abbreviated key representation is a pass-by-value Datum that is treated
2005  * as a char array by the specialized comparator bttextcmp_abbrev().
2006  */
2007  pres = (char *) &res;
2008  /* memset(), so any non-overwritten bytes are NUL */
2009  memset(pres, 0, sizeof(Datum));
2010  len = VARSIZE_ANY_EXHDR(authoritative);
2011 
2012  /*
2013  * If we're using the C collation, use memcmp(), rather than strxfrm(), to
2014  * abbreviate keys. The full comparator for the C locale is always
2015  * memcmp(), and we can't risk having this give a different answer.
2016  * Besides, this should be faster, too.
2017  */
2018  if (tss->collate_c)
2019  memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2020  else
2021  {
2022  Size bsize;
2023 
2024  /*
2025  * We're not using the C collation, so fall back on strxfrm.
2026  */
2027 
2028  /* By convention, we use buffer 1 to store and NUL-terminate text */
2029  if (len >= tss->buflen1)
2030  {
2031  pfree(tss->buf1);
2032  tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
2033  tss->buf1 = palloc(tss->buflen1);
2034  }
2035 
2036  /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
2037  memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
2038  tss->buf1[len] = '\0';
2039 
2040  /* Don't leak memory here */
2041  if (PointerGetDatum(authoritative) != original)
2042  pfree(authoritative);
2043 
2044  for (;;)
2045  {
2046 #ifdef HAVE_LOCALE_T
2047  if (tss->locale)
2048  bsize = strxfrm_l(tss->buf2, tss->buf1,
2049  tss->buflen2, tss->locale);
2050  else
2051 #endif
2052  bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
2053 
2054  if (bsize < tss->buflen2)
2055  break;
2056 
2057  /*
2058  * The C standard states that the contents of the buffer is now
2059  * unspecified. Grow buffer, and retry.
2060  */
2061  pfree(tss->buf2);
2062  tss->buflen2 = Max(bsize + 1,
2063  Min(tss->buflen2 * 2, MaxAllocSize));
2064  tss->buf2 = palloc(tss->buflen2);
2065  }
2066 
2067  /*
2068  * Every Datum byte is always compared. This is safe because the
2069  * strxfrm() blob is itself NUL terminated, leaving no danger of
2070  * misinterpreting any NUL bytes not intended to be interpreted as
2071  * logically representing termination.
2072  */
2073  memcpy(pres, tss->buf2, Min(sizeof(Datum), bsize));
2074  }
2075 
2076  /*
2077  * Maintain approximate cardinality of both abbreviated keys and original,
2078  * authoritative keys using HyperLogLog. Used as cheap insurance against
2079  * the worst case, where we do many string transformations for no saving
2080  * in full strcoll()-based comparisons. These statistics are used by
2081  * bttext_abbrev_abort().
2082  *
2083  * First, Hash key proper, or a significant fraction of it. Mix in length
2084  * in order to compensate for cases where differences are past
2085  * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2086  */
2087  hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2088  Min(len, PG_CACHE_LINE_SIZE)));
2089 
2090  if (len > PG_CACHE_LINE_SIZE)
2091  hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2092 
2093  addHyperLogLog(&tss->full_card, hash);
2094 
2095  /* Hash abbreviated key */
2096 #if SIZEOF_DATUM == 8
2097  {
2098  uint32 lohalf,
2099  hihalf;
2100 
2101  lohalf = (uint32) res;
2102  hihalf = (uint32) (res >> 32);
2103  hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2104  }
2105 #else /* SIZEOF_DATUM != 8 */
2106  hash = DatumGetUInt32(hash_uint32((uint32) res));
2107 #endif
2108 
2109  addHyperLogLog(&tss->abbr_card, hash);
2110 
2111  return res;
2112 }
2113 
2114 /*
2115  * Callback for estimating effectiveness of abbreviated key optimization, using
2116  * heuristic rules. Returns value indicating if the abbreviation optimization
2117  * should be aborted, based on its projected effectiveness.
2118  */
2119 static bool
2120 bttext_abbrev_abort(int memtupcount, SortSupport ssup)
2121 {
2122  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
2123  double abbrev_distinct,
2124  key_distinct;
2125 
2126  Assert(ssup->abbreviate);
2127 
2128  /* Have a little patience */
2129  if (memtupcount < 100)
2130  return false;
2131 
2132  abbrev_distinct = estimateHyperLogLog(&tss->abbr_card);
2133  key_distinct = estimateHyperLogLog(&tss->full_card);
2134 
2135  /*
2136  * Clamp cardinality estimates to at least one distinct value. While
2137  * NULLs are generally disregarded, if only NULL values were seen so far,
2138  * that might misrepresent costs if we failed to clamp.
2139  */
2140  if (abbrev_distinct <= 1.0)
2141  abbrev_distinct = 1.0;
2142 
2143  if (key_distinct <= 1.0)
2144  key_distinct = 1.0;
2145 
2146  /*
2147  * In the worst case all abbreviated keys are identical, while at the same
2148  * time there are differences within full key strings not captured in
2149  * abbreviations.
2150  */
2151 #ifdef TRACE_SORT
2152  if (trace_sort)
2153  {
2154  double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2155 
2156  elog(LOG, "bttext_abbrev: abbrev_distinct after %d: %f "
2157  "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2158  memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2159  tss->prop_card);
2160  }
2161 #endif
2162 
2163  /*
2164  * If the number of distinct abbreviated keys approximately matches the
2165  * number of distinct authoritative original keys, that's reason enough to
2166  * proceed. We can win even with a very low cardinality set if most
2167  * tie-breakers only memcmp(). This is by far the most important
2168  * consideration.
2169  *
2170  * While comparisons that are resolved at the abbreviated key level are
2171  * considerably cheaper than tie-breakers resolved with memcmp(), both of
2172  * those two outcomes are so much cheaper than a full strcoll() once
2173  * sorting is underway that it doesn't seem worth it to weigh abbreviated
2174  * cardinality against the overall size of the set in order to more
2175  * accurately model costs. Assume that an abbreviated comparison, and an
2176  * abbreviated comparison with a cheap memcmp()-based authoritative
2177  * resolution are equivalent.
2178  */
2179  if (abbrev_distinct > key_distinct * tss->prop_card)
2180  {
2181  /*
2182  * When we have exceeded 10,000 tuples, decay required cardinality
2183  * aggressively for next call.
2184  *
2185  * This is useful because the number of comparisons required on
2186  * average increases at a linearithmic rate, and at roughly 10,000
2187  * tuples that factor will start to dominate over the linear costs of
2188  * string transformation (this is a conservative estimate). The decay
2189  * rate is chosen to be a little less aggressive than halving -- which
2190  * (since we're called at points at which memtupcount has doubled)
2191  * would never see the cost model actually abort past the first call
2192  * following a decay. This decay rate is mostly a precaution against
2193  * a sudden, violent swing in how well abbreviated cardinality tracks
2194  * full key cardinality. The decay also serves to prevent a marginal
2195  * case from being aborted too late, when too much has already been
2196  * invested in string transformation.
2197  *
2198  * It's possible for sets of several million distinct strings with
2199  * mere tens of thousands of distinct abbreviated keys to still
2200  * benefit very significantly. This will generally occur provided
2201  * each abbreviated key is a proxy for a roughly uniform number of the
2202  * set's full keys. If it isn't so, we hope to catch that early and
2203  * abort. If it isn't caught early, by the time the problem is
2204  * apparent it's probably not worth aborting.
2205  */
2206  if (memtupcount > 10000)
2207  tss->prop_card *= 0.65;
2208 
2209  return false;
2210  }
2211 
2212  /*
2213  * Abort abbreviation strategy.
2214  *
2215  * The worst case, where all abbreviated keys are identical while all
2216  * original strings differ will typically only see a regression of about
2217  * 10% in execution time for small to medium sized lists of strings.
2218  * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2219  * often expect very large improvements, particularly with sets of strings
2220  * of moderately high to high abbreviated cardinality. There is little to
2221  * lose but much to gain, which our strategy reflects.
2222  */
2223 #ifdef TRACE_SORT
2224  if (trace_sort)
2225  elog(LOG, "bttext_abbrev: aborted abbreviation at %d "
2226  "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2227  memtupcount, abbrev_distinct, key_distinct, tss->prop_card);
2228 #endif
2229 
2230  return true;
2231 }
2232 
2233 Datum
2235 {
2236  text *arg1 = PG_GETARG_TEXT_PP(0);
2237  text *arg2 = PG_GETARG_TEXT_PP(1);
2238  text *result;
2239 
2240  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2241 
2242  PG_RETURN_TEXT_P(result);
2243 }
2244 
2245 Datum
2247 {
2248  text *arg1 = PG_GETARG_TEXT_PP(0);
2249  text *arg2 = PG_GETARG_TEXT_PP(1);
2250  text *result;
2251 
2252  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2253 
2254  PG_RETURN_TEXT_P(result);
2255 }
2256 
2257 
2258 /*
2259  * The following operators support character-by-character comparison
2260  * of text datums, to allow building indexes suitable for LIKE clauses.
2261  * Note that the regular texteq/textne comparison operators are assumed
2262  * to be compatible with these!
2263  */
2264 
2265 static int
2267 {
2268  int result;
2269  int len1,
2270  len2;
2271 
2272  len1 = VARSIZE_ANY_EXHDR(arg1);
2273  len2 = VARSIZE_ANY_EXHDR(arg2);
2274 
2275  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2276  if (result != 0)
2277  return result;
2278  else if (len1 < len2)
2279  return -1;
2280  else if (len1 > len2)
2281  return 1;
2282  else
2283  return 0;
2284 }
2285 
2286 
2287 Datum
2289 {
2290  text *arg1 = PG_GETARG_TEXT_PP(0);
2291  text *arg2 = PG_GETARG_TEXT_PP(1);
2292  int result;
2293 
2294  result = internal_text_pattern_compare(arg1, arg2);
2295 
2296  PG_FREE_IF_COPY(arg1, 0);
2297  PG_FREE_IF_COPY(arg2, 1);
2298 
2299  PG_RETURN_BOOL(result < 0);
2300 }
2301 
2302 
2303 Datum
2305 {
2306  text *arg1 = PG_GETARG_TEXT_PP(0);
2307  text *arg2 = PG_GETARG_TEXT_PP(1);
2308  int result;
2309 
2310  result = internal_text_pattern_compare(arg1, arg2);
2311 
2312  PG_FREE_IF_COPY(arg1, 0);
2313  PG_FREE_IF_COPY(arg2, 1);
2314 
2315  PG_RETURN_BOOL(result <= 0);
2316 }
2317 
2318 
2319 Datum
2321 {
2322  text *arg1 = PG_GETARG_TEXT_PP(0);
2323  text *arg2 = PG_GETARG_TEXT_PP(1);
2324  int result;
2325 
2326  result = internal_text_pattern_compare(arg1, arg2);
2327 
2328  PG_FREE_IF_COPY(arg1, 0);
2329  PG_FREE_IF_COPY(arg2, 1);
2330 
2331  PG_RETURN_BOOL(result >= 0);
2332 }
2333 
2334 
2335 Datum
2337 {
2338  text *arg1 = PG_GETARG_TEXT_PP(0);
2339  text *arg2 = PG_GETARG_TEXT_PP(1);
2340  int result;
2341 
2342  result = internal_text_pattern_compare(arg1, arg2);
2343 
2344  PG_FREE_IF_COPY(arg1, 0);
2345  PG_FREE_IF_COPY(arg2, 1);
2346 
2347  PG_RETURN_BOOL(result > 0);
2348 }
2349 
2350 
2351 Datum
2353 {
2354  text *arg1 = PG_GETARG_TEXT_PP(0);
2355  text *arg2 = PG_GETARG_TEXT_PP(1);
2356  int result;
2357 
2358  result = internal_text_pattern_compare(arg1, arg2);
2359 
2360  PG_FREE_IF_COPY(arg1, 0);
2361  PG_FREE_IF_COPY(arg2, 1);
2362 
2363  PG_RETURN_INT32(result);
2364 }
2365 
2366 
2367 /*-------------------------------------------------------------
2368  * byteaoctetlen
2369  *
2370  * get the number of bytes contained in an instance of type 'bytea'
2371  *-------------------------------------------------------------
2372  */
2373 Datum
2375 {
2376  Datum str = PG_GETARG_DATUM(0);
2377 
2378  /* We need not detoast the input at all */
2380 }
2381 
2382 /*
2383  * byteacat -
2384  * takes two bytea* and returns a bytea* that is the concatenation of
2385  * the two.
2386  *
2387  * Cloned from textcat and modified as required.
2388  */
2389 Datum
2391 {
2392  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2393  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2394 
2396 }
2397 
2398 /*
2399  * bytea_catenate
2400  * Guts of byteacat(), broken out so it can be used by other functions
2401  *
2402  * Arguments can be in short-header form, but not compressed or out-of-line
2403  */
2404 static bytea *
2406 {
2407  bytea *result;
2408  int len1,
2409  len2,
2410  len;
2411  char *ptr;
2412 
2413  len1 = VARSIZE_ANY_EXHDR(t1);
2414  len2 = VARSIZE_ANY_EXHDR(t2);
2415 
2416  /* paranoia ... probably should throw error instead? */
2417  if (len1 < 0)
2418  len1 = 0;
2419  if (len2 < 0)
2420  len2 = 0;
2421 
2422  len = len1 + len2 + VARHDRSZ;
2423  result = (bytea *) palloc(len);
2424 
2425  /* Set size of result string... */
2426  SET_VARSIZE(result, len);
2427 
2428  /* Fill data field of result string... */
2429  ptr = VARDATA(result);
2430  if (len1 > 0)
2431  memcpy(ptr, VARDATA_ANY(t1), len1);
2432  if (len2 > 0)
2433  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2434 
2435  return result;
2436 }
2437 
2438 #define PG_STR_GET_BYTEA(str_) \
2439  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2440 
2441 /*
2442  * bytea_substr()
2443  * Return a substring starting at the specified position.
2444  * Cloned from text_substr and modified as required.
2445  *
2446  * Input:
2447  * - string
2448  * - starting position (is one-based)
2449  * - string length (optional)
2450  *
2451  * If the starting position is zero or less, then return from the start of the string
2452  * adjusting the length to be consistent with the "negative start" per SQL.
2453  * If the length is less than zero, an ERROR is thrown. If no third argument
2454  * (length) is provided, the length to the end of the string is assumed.
2455  */
2456 Datum
2458 {
2460  PG_GETARG_INT32(1),
2461  PG_GETARG_INT32(2),
2462  false));
2463 }
2464 
2465 /*
2466  * bytea_substr_no_len -
2467  * Wrapper to avoid opr_sanity failure due to
2468  * one function accepting a different number of args.
2469  */
2470 Datum
2472 {
2474  PG_GETARG_INT32(1),
2475  -1,
2476  true));
2477 }
2478 
2479 static bytea *
2481  int S,
2482  int L,
2483  bool length_not_specified)
2484 {
2485  int S1; /* adjusted start position */
2486  int L1; /* adjusted substring length */
2487 
2488  S1 = Max(S, 1);
2489 
2490  if (length_not_specified)
2491  {
2492  /*
2493  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2494  * end of the string if we pass it a negative value for length.
2495  */
2496  L1 = -1;
2497  }
2498  else
2499  {
2500  /* end position */
2501  int E = S + L;
2502 
2503  /*
2504  * A negative value for L is the only way for the end position to be
2505  * before the start. SQL99 says to throw an error.
2506  */
2507  if (E < S)
2508  ereport(ERROR,
2509  (errcode(ERRCODE_SUBSTRING_ERROR),
2510  errmsg("negative substring length not allowed")));
2511 
2512  /*
2513  * A zero or negative value for the end position can happen if the
2514  * start was negative or one. SQL99 says to return a zero-length
2515  * string.
2516  */
2517  if (E < 1)
2518  return PG_STR_GET_BYTEA("");
2519 
2520  L1 = E - S1;
2521  }
2522 
2523  /*
2524  * If the start position is past the end of the string, SQL99 says to
2525  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2526  * us. Convert to zero-based starting position
2527  */
2528  return DatumGetByteaPSlice(str, S1 - 1, L1);
2529 }
2530 
2531 /*
2532  * byteaoverlay
2533  * Replace specified substring of first string with second
2534  *
2535  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2536  * This code is a direct implementation of what the standard says.
2537  */
2538 Datum
2540 {
2541  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2542  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2543  int sp = PG_GETARG_INT32(2); /* substring start position */
2544  int sl = PG_GETARG_INT32(3); /* substring length */
2545 
2546  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2547 }
2548 
2549 Datum
2551 {
2552  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2553  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2554  int sp = PG_GETARG_INT32(2); /* substring start position */
2555  int sl;
2556 
2557  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2558  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2559 }
2560 
2561 static bytea *
2562 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2563 {
2564  bytea *result;
2565  bytea *s1;
2566  bytea *s2;
2567  int sp_pl_sl;
2568 
2569  /*
2570  * Check for possible integer-overflow cases. For negative sp, throw a
2571  * "substring length" error because that's what should be expected
2572  * according to the spec's definition of OVERLAY().
2573  */
2574  if (sp <= 0)
2575  ereport(ERROR,
2576  (errcode(ERRCODE_SUBSTRING_ERROR),
2577  errmsg("negative substring length not allowed")));
2578  sp_pl_sl = sp + sl;
2579  if (sp_pl_sl <= sl)
2580  ereport(ERROR,
2581  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2582  errmsg("integer out of range")));
2583 
2584  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2585  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2586  result = bytea_catenate(s1, t2);
2587  result = bytea_catenate(result, s2);
2588 
2589  return result;
2590 }
2591 
2592 /*
2593  * byteapos -
2594  * Return the position of the specified substring.
2595  * Implements the SQL POSITION() function.
2596  * Cloned from textpos and modified as required.
2597  */
2598 Datum
2600 {
2601  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2602  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2603  int pos;
2604  int px,
2605  p;
2606  int len1,
2607  len2;
2608  char *p1,
2609  *p2;
2610 
2611  len1 = VARSIZE_ANY_EXHDR(t1);
2612  len2 = VARSIZE_ANY_EXHDR(t2);
2613 
2614  if (len2 <= 0)
2615  PG_RETURN_INT32(1); /* result for empty pattern */
2616 
2617  p1 = VARDATA_ANY(t1);
2618  p2 = VARDATA_ANY(t2);
2619 
2620  pos = 0;
2621  px = (len1 - len2);
2622  for (p = 0; p <= px; p++)
2623  {
2624  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2625  {
2626  pos = p + 1;
2627  break;
2628  };
2629  p1++;
2630  };
2631 
2632  PG_RETURN_INT32(pos);
2633 }
2634 
2635 /*-------------------------------------------------------------
2636  * byteaGetByte
2637  *
2638  * this routine treats "bytea" as an array of bytes.
2639  * It returns the Nth byte (a number between 0 and 255).
2640  *-------------------------------------------------------------
2641  */
2642 Datum
2644 {
2645  bytea *v = PG_GETARG_BYTEA_PP(0);
2646  int32 n = PG_GETARG_INT32(1);
2647  int len;
2648  int byte;
2649 
2650  len = VARSIZE_ANY_EXHDR(v);
2651 
2652  if (n < 0 || n >= len)
2653  ereport(ERROR,
2654  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2655  errmsg("index %d out of valid range, 0..%d",
2656  n, len - 1)));
2657 
2658  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2659 
2660  PG_RETURN_INT32(byte);
2661 }
2662 
2663 /*-------------------------------------------------------------
2664  * byteaGetBit
2665  *
2666  * This routine treats a "bytea" type like an array of bits.
2667  * It returns the value of the Nth bit (0 or 1).
2668  *
2669  *-------------------------------------------------------------
2670  */
2671 Datum
2673 {
2674  bytea *v = PG_GETARG_BYTEA_PP(0);
2675  int32 n = PG_GETARG_INT32(1);
2676  int byteNo,
2677  bitNo;
2678  int len;
2679  int byte;
2680 
2681  len = VARSIZE_ANY_EXHDR(v);
2682 
2683  if (n < 0 || n >= len * 8)
2684  ereport(ERROR,
2685  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2686  errmsg("index %d out of valid range, 0..%d",
2687  n, len * 8 - 1)));
2688 
2689  byteNo = n / 8;
2690  bitNo = n % 8;
2691 
2692  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2693 
2694  if (byte & (1 << bitNo))
2695  PG_RETURN_INT32(1);
2696  else
2697  PG_RETURN_INT32(0);
2698 }
2699 
2700 /*-------------------------------------------------------------
2701  * byteaSetByte
2702  *
2703  * Given an instance of type 'bytea' creates a new one with
2704  * the Nth byte set to the given value.
2705  *
2706  *-------------------------------------------------------------
2707  */
2708 Datum
2710 {
2711  bytea *v = PG_GETARG_BYTEA_P(0);
2712  int32 n = PG_GETARG_INT32(1);
2713  int32 newByte = PG_GETARG_INT32(2);
2714  int len;
2715  bytea *res;
2716 
2717  len = VARSIZE(v) - VARHDRSZ;
2718 
2719  if (n < 0 || n >= len)
2720  ereport(ERROR,
2721  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2722  errmsg("index %d out of valid range, 0..%d",
2723  n, len - 1)));
2724 
2725  /*
2726  * Make a copy of the original varlena.
2727  */
2728  res = (bytea *) palloc(VARSIZE(v));
2729  memcpy((char *) res, (char *) v, VARSIZE(v));
2730 
2731  /*
2732  * Now set the byte.
2733  */
2734  ((unsigned char *) VARDATA(res))[n] = newByte;
2735 
2736  PG_RETURN_BYTEA_P(res);
2737 }
2738 
2739 /*-------------------------------------------------------------
2740  * byteaSetBit
2741  *
2742  * Given an instance of type 'bytea' creates a new one with
2743  * the Nth bit set to the given value.
2744  *
2745  *-------------------------------------------------------------
2746  */
2747 Datum
2749 {
2750  bytea *v = PG_GETARG_BYTEA_P(0);
2751  int32 n = PG_GETARG_INT32(1);
2752  int32 newBit = PG_GETARG_INT32(2);
2753  bytea *res;
2754  int len;
2755  int oldByte,
2756  newByte;
2757  int byteNo,
2758  bitNo;
2759 
2760  len = VARSIZE(v) - VARHDRSZ;
2761 
2762  if (n < 0 || n >= len * 8)
2763  ereport(ERROR,
2764  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2765  errmsg("index %d out of valid range, 0..%d",
2766  n, len * 8 - 1)));
2767 
2768  byteNo = n / 8;
2769  bitNo = n % 8;
2770 
2771  /*
2772  * sanity check!
2773  */
2774  if (newBit != 0 && newBit != 1)
2775  ereport(ERROR,
2776  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2777  errmsg("new bit must be 0 or 1")));
2778 
2779  /*
2780  * Make a copy of the original varlena.
2781  */
2782  res = (bytea *) palloc(VARSIZE(v));
2783  memcpy((char *) res, (char *) v, VARSIZE(v));
2784 
2785  /*
2786  * Update the byte.
2787  */
2788  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
2789 
2790  if (newBit == 0)
2791  newByte = oldByte & (~(1 << bitNo));
2792  else
2793  newByte = oldByte | (1 << bitNo);
2794 
2795  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
2796 
2797  PG_RETURN_BYTEA_P(res);
2798 }
2799 
2800 
2801 /* text_name()
2802  * Converts a text type to a Name type.
2803  */
2804 Datum
2806 {
2807  text *s = PG_GETARG_TEXT_PP(0);
2808  Name result;
2809  int len;
2810 
2811  len = VARSIZE_ANY_EXHDR(s);
2812 
2813  /* Truncate oversize input */
2814  if (len >= NAMEDATALEN)
2815  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
2816 
2817  /* We use palloc0 here to ensure result is zero-padded */
2818  result = (Name) palloc0(NAMEDATALEN);
2819  memcpy(NameStr(*result), VARDATA_ANY(s), len);
2820 
2821  PG_RETURN_NAME(result);
2822 }
2823 
2824 /* name_text()
2825  * Converts a Name type to a text type.
2826  */
2827 Datum
2829 {
2830  Name s = PG_GETARG_NAME(0);
2831 
2833 }
2834 
2835 
2836 /*
2837  * textToQualifiedNameList - convert a text object to list of names
2838  *
2839  * This implements the input parsing needed by nextval() and other
2840  * functions that take a text parameter representing a qualified name.
2841  * We split the name at dots, downcase if not double-quoted, and
2842  * truncate names if they're too long.
2843  */
2844 List *
2846 {
2847  char *rawname;
2848  List *result = NIL;
2849  List *namelist;
2850  ListCell *l;
2851 
2852  /* Convert to C string (handles possible detoasting). */
2853  /* Note we rely on being able to modify rawname below. */
2854  rawname = text_to_cstring(textval);
2855 
2856  if (!SplitIdentifierString(rawname, '.', &namelist))
2857  ereport(ERROR,
2858  (errcode(ERRCODE_INVALID_NAME),
2859  errmsg("invalid name syntax")));
2860 
2861  if (namelist == NIL)
2862  ereport(ERROR,
2863  (errcode(ERRCODE_INVALID_NAME),
2864  errmsg("invalid name syntax")));
2865 
2866  foreach(l, namelist)
2867  {
2868  char *curname = (char *) lfirst(l);
2869 
2870  result = lappend(result, makeString(pstrdup(curname)));
2871  }
2872 
2873  pfree(rawname);
2874  list_free(namelist);
2875 
2876  return result;
2877 }
2878 
2879 /*
2880  * SplitIdentifierString --- parse a string containing identifiers
2881  *
2882  * This is the guts of textToQualifiedNameList, and is exported for use in
2883  * other situations such as parsing GUC variables. In the GUC case, it's
2884  * important to avoid memory leaks, so the API is designed to minimize the
2885  * amount of stuff that needs to be allocated and freed.
2886  *
2887  * Inputs:
2888  * rawstring: the input string; must be overwritable! On return, it's
2889  * been modified to contain the separated identifiers.
2890  * separator: the separator punctuation expected between identifiers
2891  * (typically '.' or ','). Whitespace may also appear around
2892  * identifiers.
2893  * Outputs:
2894  * namelist: filled with a palloc'd list of pointers to identifiers within
2895  * rawstring. Caller should list_free() this even on error return.
2896  *
2897  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2898  *
2899  * Note that an empty string is considered okay here, though not in
2900  * textToQualifiedNameList.
2901  */
2902 bool
2903 SplitIdentifierString(char *rawstring, char separator,
2904  List **namelist)
2905 {
2906  char *nextp = rawstring;
2907  bool done = false;
2908 
2909  *namelist = NIL;
2910 
2911  while (isspace((unsigned char) *nextp))
2912  nextp++; /* skip leading whitespace */
2913 
2914  if (*nextp == '\0')
2915  return true; /* allow empty string */
2916 
2917  /* At the top of the loop, we are at start of a new identifier. */
2918  do
2919  {
2920  char *curname;
2921  char *endp;
2922 
2923  if (*nextp == '\"')
2924  {
2925  /* Quoted name --- collapse quote-quote pairs, no downcasing */
2926  curname = nextp + 1;
2927  for (;;)
2928  {
2929  endp = strchr(nextp + 1, '\"');
2930  if (endp == NULL)
2931  return false; /* mismatched quotes */
2932  if (endp[1] != '\"')
2933  break; /* found end of quoted name */
2934  /* Collapse adjacent quotes into one quote, and look again */
2935  memmove(endp, endp + 1, strlen(endp));
2936  nextp = endp;
2937  }
2938  /* endp now points at the terminating quote */
2939  nextp = endp + 1;
2940  }
2941  else
2942  {
2943  /* Unquoted name --- extends to separator or whitespace */
2944  char *downname;
2945  int len;
2946 
2947  curname = nextp;
2948  while (*nextp && *nextp != separator &&
2949  !isspace((unsigned char) *nextp))
2950  nextp++;
2951  endp = nextp;
2952  if (curname == nextp)
2953  return false; /* empty unquoted name not allowed */
2954 
2955  /*
2956  * Downcase the identifier, using same code as main lexer does.
2957  *
2958  * XXX because we want to overwrite the input in-place, we cannot
2959  * support a downcasing transformation that increases the string
2960  * length. This is not a problem given the current implementation
2961  * of downcase_truncate_identifier, but we'll probably have to do
2962  * something about this someday.
2963  */
2964  len = endp - curname;
2965  downname = downcase_truncate_identifier(curname, len, false);
2966  Assert(strlen(downname) <= len);
2967  strncpy(curname, downname, len); /* strncpy is required here */
2968  pfree(downname);
2969  }
2970 
2971  while (isspace((unsigned char) *nextp))
2972  nextp++; /* skip trailing whitespace */
2973 
2974  if (*nextp == separator)
2975  {
2976  nextp++;
2977  while (isspace((unsigned char) *nextp))
2978  nextp++; /* skip leading whitespace for next */
2979  /* we expect another name, so done remains false */
2980  }
2981  else if (*nextp == '\0')
2982  done = true;
2983  else
2984  return false; /* invalid syntax */
2985 
2986  /* Now safe to overwrite separator with a null */
2987  *endp = '\0';
2988 
2989  /* Truncate name if it's overlength */
2990  truncate_identifier(curname, strlen(curname), false);
2991 
2992  /*
2993  * Finished isolating current name --- add it to list
2994  */
2995  *namelist = lappend(*namelist, curname);
2996 
2997  /* Loop back if we didn't reach end of string */
2998  } while (!done);
2999 
3000  return true;
3001 }
3002 
3003 
3004 /*
3005  * SplitDirectoriesString --- parse a string containing directory names
3006  *
3007  * This is similar to SplitIdentifierString, except that the parsing
3008  * rules are meant to handle pathnames instead of identifiers: there is
3009  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3010  * and we apply canonicalize_path() to each extracted string. Because of the
3011  * last, the returned strings are separately palloc'd rather than being
3012  * pointers into rawstring --- but we still scribble on rawstring.
3013  *
3014  * Inputs:
3015  * rawstring: the input string; must be modifiable!
3016  * separator: the separator punctuation expected between directories
3017  * (typically ',' or ';'). Whitespace may also appear around
3018  * directories.
3019  * Outputs:
3020  * namelist: filled with a palloc'd list of directory names.
3021  * Caller should list_free_deep() this even on error return.
3022  *
3023  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3024  *
3025  * Note that an empty string is considered okay here.
3026  */
3027 bool
3028 SplitDirectoriesString(char *rawstring, char separator,
3029  List **namelist)
3030 {
3031  char *nextp = rawstring;
3032  bool done = false;
3033 
3034  *namelist = NIL;
3035 
3036  while (isspace((unsigned char) *nextp))
3037  nextp++; /* skip leading whitespace */
3038 
3039  if (*nextp == '\0')
3040  return true; /* allow empty string */
3041 
3042  /* At the top of the loop, we are at start of a new directory. */
3043  do
3044  {
3045  char *curname;
3046  char *endp;
3047 
3048  if (*nextp == '\"')
3049  {
3050  /* Quoted name --- collapse quote-quote pairs */
3051  curname = nextp + 1;
3052  for (;;)
3053  {
3054  endp = strchr(nextp + 1, '\"');
3055  if (endp == NULL)
3056  return false; /* mismatched quotes */
3057  if (endp[1] != '\"')
3058  break; /* found end of quoted name */
3059  /* Collapse adjacent quotes into one quote, and look again */
3060  memmove(endp, endp + 1, strlen(endp));
3061  nextp = endp;
3062  }
3063  /* endp now points at the terminating quote */
3064  nextp = endp + 1;
3065  }
3066  else
3067  {
3068  /* Unquoted name --- extends to separator or end of string */
3069  curname = endp = nextp;
3070  while (*nextp && *nextp != separator)
3071  {
3072  /* trailing whitespace should not be included in name */
3073  if (!isspace((unsigned char) *nextp))
3074  endp = nextp + 1;
3075  nextp++;
3076  }
3077  if (curname == endp)
3078  return false; /* empty unquoted name not allowed */
3079  }
3080 
3081  while (isspace((unsigned char) *nextp))
3082  nextp++; /* skip trailing whitespace */
3083 
3084  if (*nextp == separator)
3085  {
3086  nextp++;
3087  while (isspace((unsigned char) *nextp))
3088  nextp++; /* skip leading whitespace for next */
3089  /* we expect another name, so done remains false */
3090  }
3091  else if (*nextp == '\0')
3092  done = true;
3093  else
3094  return false; /* invalid syntax */
3095 
3096  /* Now safe to overwrite separator with a null */
3097  *endp = '\0';
3098 
3099  /* Truncate path if it's overlength */
3100  if (strlen(curname) >= MAXPGPATH)
3101  curname[MAXPGPATH - 1] = '\0';
3102 
3103  /*
3104  * Finished isolating current name --- add it to list
3105  */
3106  curname = pstrdup(curname);
3107  canonicalize_path(curname);
3108  *namelist = lappend(*namelist, curname);
3109 
3110  /* Loop back if we didn't reach end of string */
3111  } while (!done);
3112 
3113  return true;
3114 }
3115 
3116 
3117 /*****************************************************************************
3118  * Comparison Functions used for bytea
3119  *
3120  * Note: btree indexes need these routines not to leak memory; therefore,
3121  * be careful to free working copies of toasted datums. Most places don't
3122  * need to be so careful.
3123  *****************************************************************************/
3124 
3125 Datum
3127 {
3128  Datum arg1 = PG_GETARG_DATUM(0);
3129  Datum arg2 = PG_GETARG_DATUM(1);
3130  bool result;
3131  Size len1,
3132  len2;
3133 
3134  /*
3135  * We can use a fast path for unequal lengths, which might save us from
3136  * having to detoast one or both values.
3137  */
3138  len1 = toast_raw_datum_size(arg1);
3139  len2 = toast_raw_datum_size(arg2);
3140  if (len1 != len2)
3141  result = false;
3142  else
3143  {
3144  bytea *barg1 = DatumGetByteaPP(arg1);
3145  bytea *barg2 = DatumGetByteaPP(arg2);
3146 
3147  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3148  len1 - VARHDRSZ) == 0);
3149 
3150  PG_FREE_IF_COPY(barg1, 0);
3151  PG_FREE_IF_COPY(barg2, 1);
3152  }
3153 
3154  PG_RETURN_BOOL(result);
3155 }
3156 
3157 Datum
3159 {
3160  Datum arg1 = PG_GETARG_DATUM(0);
3161  Datum arg2 = PG_GETARG_DATUM(1);
3162  bool result;
3163  Size len1,
3164  len2;
3165 
3166  /*
3167  * We can use a fast path for unequal lengths, which might save us from
3168  * having to detoast one or both values.
3169  */
3170  len1 = toast_raw_datum_size(arg1);
3171  len2 = toast_raw_datum_size(arg2);
3172  if (len1 != len2)
3173  result = true;
3174  else
3175  {
3176  bytea *barg1 = DatumGetByteaPP(arg1);
3177  bytea *barg2 = DatumGetByteaPP(arg2);
3178 
3179  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3180  len1 - VARHDRSZ) != 0);
3181 
3182  PG_FREE_IF_COPY(barg1, 0);
3183  PG_FREE_IF_COPY(barg2, 1);
3184  }
3185 
3186  PG_RETURN_BOOL(result);
3187 }
3188 
3189 Datum
3191 {
3192  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3193  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3194  int len1,
3195  len2;
3196  int cmp;
3197 
3198  len1 = VARSIZE_ANY_EXHDR(arg1);
3199  len2 = VARSIZE_ANY_EXHDR(arg2);
3200 
3201  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3202 
3203  PG_FREE_IF_COPY(arg1, 0);
3204  PG_FREE_IF_COPY(arg2, 1);
3205 
3206  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3207 }
3208 
3209 Datum
3211 {
3212  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3213  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3214  int len1,
3215  len2;
3216  int cmp;
3217 
3218  len1 = VARSIZE_ANY_EXHDR(arg1);
3219  len2 = VARSIZE_ANY_EXHDR(arg2);
3220 
3221  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3222 
3223  PG_FREE_IF_COPY(arg1, 0);
3224  PG_FREE_IF_COPY(arg2, 1);
3225 
3226  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3227 }
3228 
3229 Datum
3231 {
3232  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3233  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3234  int len1,
3235  len2;
3236  int cmp;
3237 
3238  len1 = VARSIZE_ANY_EXHDR(arg1);
3239  len2 = VARSIZE_ANY_EXHDR(arg2);
3240 
3241  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3242 
3243  PG_FREE_IF_COPY(arg1, 0);
3244  PG_FREE_IF_COPY(arg2, 1);
3245 
3246  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3247 }
3248 
3249 Datum
3251 {
3252  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3253  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3254  int len1,
3255  len2;
3256  int cmp;
3257 
3258  len1 = VARSIZE_ANY_EXHDR(arg1);
3259  len2 = VARSIZE_ANY_EXHDR(arg2);
3260 
3261  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3262 
3263  PG_FREE_IF_COPY(arg1, 0);
3264  PG_FREE_IF_COPY(arg2, 1);
3265 
3266  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3267 }
3268 
3269 Datum
3271 {
3272  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3273  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3274  int len1,
3275  len2;
3276  int cmp;
3277 
3278  len1 = VARSIZE_ANY_EXHDR(arg1);
3279  len2 = VARSIZE_ANY_EXHDR(arg2);
3280 
3281  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3282  if ((cmp == 0) && (len1 != len2))
3283  cmp = (len1 < len2) ? -1 : 1;
3284 
3285  PG_FREE_IF_COPY(arg1, 0);
3286  PG_FREE_IF_COPY(arg2, 1);
3287 
3288  PG_RETURN_INT32(cmp);
3289 }
3290 
3291 /*
3292  * appendStringInfoText
3293  *
3294  * Append a text to str.
3295  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3296  */
3297 static void
3299 {
3301 }
3302 
3303 /*
3304  * replace_text
3305  * replace all occurrences of 'old_sub_str' in 'orig_str'
3306  * with 'new_sub_str' to form 'new_str'
3307  *
3308  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3309  * otherwise returns 'new_str'
3310  */
3311 Datum
3313 {
3314  text *src_text = PG_GETARG_TEXT_PP(0);
3315  text *from_sub_text = PG_GETARG_TEXT_PP(1);
3316  text *to_sub_text = PG_GETARG_TEXT_PP(2);
3317  int src_text_len;
3318  int from_sub_text_len;
3320  text *ret_text;
3321  int start_posn;
3322  int curr_posn;
3323  int chunk_len;
3324  char *start_ptr;
3325  StringInfoData str;
3326 
3327  text_position_setup(src_text, from_sub_text, &state);
3328 
3329  /*
3330  * Note: we check the converted string length, not the original, because
3331  * they could be different if the input contained invalid encoding.
3332  */
3333  src_text_len = state.len1;
3334  from_sub_text_len = state.len2;
3335 
3336  /* Return unmodified source string if empty source or pattern */
3337  if (src_text_len < 1 || from_sub_text_len < 1)
3338  {
3339  text_position_cleanup(&state);
3340  PG_RETURN_TEXT_P(src_text);
3341  }
3342 
3343  start_posn = 1;
3344  curr_posn = text_position_next(1, &state);
3345 
3346  /* When the from_sub_text is not found, there is nothing to do. */
3347  if (curr_posn == 0)
3348  {
3349  text_position_cleanup(&state);
3350  PG_RETURN_TEXT_P(src_text);
3351  }
3352 
3353  /* start_ptr points to the start_posn'th character of src_text */
3354  start_ptr = VARDATA_ANY(src_text);
3355 
3356  initStringInfo(&str);
3357 
3358  do
3359  {
3361 
3362  /* copy the data skipped over by last text_position_next() */
3363  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3364  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3365 
3366  appendStringInfoText(&str, to_sub_text);
3367 
3368  start_posn = curr_posn;
3369  start_ptr += chunk_len;
3370  start_posn += from_sub_text_len;
3371  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3372 
3373  curr_posn = text_position_next(start_posn, &state);
3374  }
3375  while (curr_posn > 0);
3376 
3377  /* copy trailing data */
3378  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3379  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3380 
3381  text_position_cleanup(&state);
3382 
3383  ret_text = cstring_to_text_with_len(str.data, str.len);
3384  pfree(str.data);
3385 
3386  PG_RETURN_TEXT_P(ret_text);
3387 }
3388 
3389 /*
3390  * check_replace_text_has_escape_char
3391  *
3392  * check whether replace_text contains escape char.
3393  */
3394 static bool
3396 {
3397  const char *p = VARDATA_ANY(replace_text);
3398  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3399 
3401  {
3402  for (; p < p_end; p++)
3403  {
3404  if (*p == '\\')
3405  return true;
3406  }
3407  }
3408  else
3409  {
3410  for (; p < p_end; p += pg_mblen(p))
3411  {
3412  if (*p == '\\')
3413  return true;
3414  }
3415  }
3416 
3417  return false;
3418 }
3419 
3420 /*
3421  * appendStringInfoRegexpSubstr
3422  *
3423  * Append replace_text to str, substituting regexp back references for
3424  * \n escapes. start_ptr is the start of the match in the source string,
3425  * at logical character position data_pos.
3426  */
3427 static void
3429  regmatch_t *pmatch,
3430  char *start_ptr, int data_pos)
3431 {
3432  const char *p = VARDATA_ANY(replace_text);
3433  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3434  int eml = pg_database_encoding_max_length();
3435 
3436  for (;;)
3437  {
3438  const char *chunk_start = p;
3439  int so;
3440  int eo;
3441 
3442  /* Find next escape char. */
3443  if (eml == 1)
3444  {
3445  for (; p < p_end && *p != '\\'; p++)
3446  /* nothing */ ;
3447  }
3448  else
3449  {
3450  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3451  /* nothing */ ;
3452  }
3453 
3454  /* Copy the text we just scanned over, if any. */
3455  if (p > chunk_start)
3456  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3457 
3458  /* Done if at end of string, else advance over escape char. */
3459  if (p >= p_end)
3460  break;
3461  p++;
3462 
3463  if (p >= p_end)
3464  {
3465  /* Escape at very end of input. Treat same as unexpected char */
3466  appendStringInfoChar(str, '\\');
3467  break;
3468  }
3469 
3470  if (*p >= '1' && *p <= '9')
3471  {
3472  /* Use the back reference of regexp. */
3473  int idx = *p - '0';
3474 
3475  so = pmatch[idx].rm_so;
3476  eo = pmatch[idx].rm_eo;
3477  p++;
3478  }
3479  else if (*p == '&')
3480  {
3481  /* Use the entire matched string. */
3482  so = pmatch[0].rm_so;
3483  eo = pmatch[0].rm_eo;
3484  p++;
3485  }
3486  else if (*p == '\\')
3487  {
3488  /* \\ means transfer one \ to output. */
3489  appendStringInfoChar(str, '\\');
3490  p++;
3491  continue;
3492  }
3493  else
3494  {
3495  /*
3496  * If escape char is not followed by any expected char, just treat
3497  * it as ordinary data to copy. (XXX would it be better to throw
3498  * an error?)
3499  */
3500  appendStringInfoChar(str, '\\');
3501  continue;
3502  }
3503 
3504  if (so != -1 && eo != -1)
3505  {
3506  /*
3507  * Copy the text that is back reference of regexp. Note so and eo
3508  * are counted in characters not bytes.
3509  */
3510  char *chunk_start;
3511  int chunk_len;
3512 
3513  Assert(so >= data_pos);
3514  chunk_start = start_ptr;
3515  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3516  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3517  appendBinaryStringInfo(str, chunk_start, chunk_len);
3518  }
3519  }
3520 }
3521 
3522 #define REGEXP_REPLACE_BACKREF_CNT 10
3523 
3524 /*
3525  * replace_text_regexp
3526  *
3527  * replace text that matches to regexp in src_text to replace_text.
3528  *
3529  * Note: to avoid having to include regex.h in builtins.h, we declare
3530  * the regexp argument as void *, but really it's regex_t *.
3531  */
3532 text *
3533 replace_text_regexp(text *src_text, void *regexp,
3534  text *replace_text, bool glob)
3535 {
3536  text *ret_text;
3537  regex_t *re = (regex_t *) regexp;
3538  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3541  pg_wchar *data;
3542  size_t data_len;
3543  int search_start;
3544  int data_pos;
3545  char *start_ptr;
3546  bool have_escape;
3547 
3548  initStringInfo(&buf);
3549 
3550  /* Convert data string to wide characters. */
3551  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3552  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3553 
3554  /* Check whether replace_text has escape char. */
3555  have_escape = check_replace_text_has_escape_char(replace_text);
3556 
3557  /* start_ptr points to the data_pos'th character of src_text */
3558  start_ptr = (char *) VARDATA_ANY(src_text);
3559  data_pos = 0;
3560 
3561  search_start = 0;
3562  while (search_start <= data_len)
3563  {
3564  int regexec_result;
3565 
3567 
3568  regexec_result = pg_regexec(re,
3569  data,
3570  data_len,
3571  search_start,
3572  NULL, /* no details */
3574  pmatch,
3575  0);
3576 
3577  if (regexec_result == REG_NOMATCH)
3578  break;
3579 
3580  if (regexec_result != REG_OKAY)
3581  {
3582  char errMsg[100];
3583 
3585  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3586  ereport(ERROR,
3587  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3588  errmsg("regular expression failed: %s", errMsg)));
3589  }
3590 
3591  /*
3592  * Copy the text to the left of the match position. Note we are given
3593  * character not byte indexes.
3594  */
3595  if (pmatch[0].rm_so - data_pos > 0)
3596  {
3597  int chunk_len;
3598 
3599  chunk_len = charlen_to_bytelen(start_ptr,
3600  pmatch[0].rm_so - data_pos);
3601  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3602 
3603  /*
3604  * Advance start_ptr over that text, to avoid multiple rescans of
3605  * it if the replace_text contains multiple back-references.
3606  */
3607  start_ptr += chunk_len;
3608  data_pos = pmatch[0].rm_so;
3609  }
3610 
3611  /*
3612  * Copy the replace_text. Process back references when the
3613  * replace_text has escape characters.
3614  */
3615  if (have_escape)
3616  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3617  start_ptr, data_pos);
3618  else
3619  appendStringInfoText(&buf, replace_text);
3620 
3621  /* Advance start_ptr and data_pos over the matched text. */
3622  start_ptr += charlen_to_bytelen(start_ptr,
3623  pmatch[0].rm_eo - data_pos);
3624  data_pos = pmatch[0].rm_eo;
3625 
3626  /*
3627  * When global option is off, replace the first instance only.
3628  */
3629  if (!glob)
3630  break;
3631 
3632  /*
3633  * Advance search position. Normally we start the next search at the
3634  * end of the previous match; but if the match was of zero length, we
3635  * have to advance by one character, or we'd just find the same match
3636  * again.
3637  */
3638  search_start = data_pos;
3639  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3640  search_start++;
3641  }
3642 
3643  /*
3644  * Copy the text to the right of the last match.
3645  */
3646  if (data_pos < data_len)
3647  {
3648  int chunk_len;
3649 
3650  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3651  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3652  }
3653 
3654  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3655  pfree(buf.data);
3656  pfree(data);
3657 
3658  return ret_text;
3659 }
3660 
3661 /*
3662  * split_text
3663  * parse input string
3664  * return ord item (1 based)
3665  * based on provided field separator
3666  */
3667 Datum
3669 {
3670  text *inputstring = PG_GETARG_TEXT_PP(0);
3671  text *fldsep = PG_GETARG_TEXT_PP(1);
3672  int fldnum = PG_GETARG_INT32(2);
3673  int inputstring_len;
3674  int fldsep_len;
3676  int start_posn;
3677  int end_posn;
3678  text *result_text;
3679 
3680  /* field number is 1 based */
3681  if (fldnum < 1)
3682  ereport(ERROR,
3683  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3684  errmsg("field position must be greater than zero")));
3685 
3686  text_position_setup(inputstring, fldsep, &state);
3687 
3688  /*
3689  * Note: we check the converted string length, not the original, because
3690  * they could be different if the input contained invalid encoding.
3691  */
3692  inputstring_len = state.len1;
3693  fldsep_len = state.len2;
3694 
3695  /* return empty string for empty input string */
3696  if (inputstring_len < 1)
3697  {
3698  text_position_cleanup(&state);
3700  }
3701 
3702  /* empty field separator */
3703  if (fldsep_len < 1)
3704  {
3705  text_position_cleanup(&state);
3706  /* if first field, return input string, else empty string */
3707  if (fldnum == 1)
3708  PG_RETURN_TEXT_P(inputstring);
3709  else
3711  }
3712 
3713  /* identify bounds of first field */
3714  start_posn = 1;
3715  end_posn = text_position_next(1, &state);
3716 
3717  /* special case if fldsep not found at all */
3718  if (end_posn == 0)
3719  {
3720  text_position_cleanup(&state);
3721  /* if field 1 requested, return input string, else empty string */
3722  if (fldnum == 1)
3723  PG_RETURN_TEXT_P(inputstring);
3724  else
3726  }
3727 
3728  while (end_posn > 0 && --fldnum > 0)
3729  {
3730  /* identify bounds of next field */
3731  start_posn = end_posn + fldsep_len;
3732  end_posn = text_position_next(start_posn, &state);
3733  }
3734 
3735  text_position_cleanup(&state);
3736 
3737  if (fldnum > 0)
3738  {
3739  /* N'th field separator not found */
3740  /* if last field requested, return it, else empty string */
3741  if (fldnum == 1)
3742  result_text = text_substring(PointerGetDatum(inputstring),
3743  start_posn,
3744  -1,
3745  true);
3746  else
3747  result_text = cstring_to_text("");
3748  }
3749  else
3750  {
3751  /* non-last field requested */
3752  result_text = text_substring(PointerGetDatum(inputstring),
3753  start_posn,
3754  end_posn - start_posn,
3755  false);
3756  }
3757 
3758  PG_RETURN_TEXT_P(result_text);
3759 }
3760 
3761 /*
3762  * Convenience function to return true when two text params are equal.
3763  */
3764 static bool
3765 text_isequal(text *txt1, text *txt2)
3766 {
3768  PointerGetDatum(txt1),
3769  PointerGetDatum(txt2)));
3770 }
3771 
3772 /*
3773  * text_to_array
3774  * parse input string and return text array of elements,
3775  * based on provided field separator
3776  */
3777 Datum
3779 {
3780  return text_to_array_internal(fcinfo);
3781 }
3782 
3783 /*
3784  * text_to_array_null
3785  * parse input string and return text array of elements,
3786  * based on provided field separator and null string
3787  *
3788  * This is a separate entry point only to prevent the regression tests from
3789  * complaining about different argument sets for the same internal function.
3790  */
3791 Datum
3793 {
3794  return text_to_array_internal(fcinfo);
3795 }
3796 
3797 /*
3798  * common code for text_to_array and text_to_array_null functions
3799  *
3800  * These are not strict so we have to test for null inputs explicitly.
3801  */
3802 static Datum
3804 {
3805  text *inputstring;
3806  text *fldsep;
3807  text *null_string;
3808  int inputstring_len;
3809  int fldsep_len;
3810  char *start_ptr;
3811  text *result_text;
3812  bool is_null;
3813  ArrayBuildState *astate = NULL;
3814 
3815  /* when input string is NULL, then result is NULL too */
3816  if (PG_ARGISNULL(0))
3817  PG_RETURN_NULL();
3818 
3819  inputstring = PG_GETARG_TEXT_PP(0);
3820 
3821  /* fldsep can be NULL */
3822  if (!PG_ARGISNULL(1))
3823  fldsep = PG_GETARG_TEXT_PP(1);
3824  else
3825  fldsep = NULL;
3826 
3827  /* null_string can be NULL or omitted */
3828  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
3829  null_string = PG_GETARG_TEXT_PP(2);
3830  else
3831  null_string = NULL;
3832 
3833  if (fldsep != NULL)
3834  {
3835  /*
3836  * Normal case with non-null fldsep. Use the text_position machinery
3837  * to search for occurrences of fldsep.
3838  */
3840  int fldnum;
3841  int start_posn;
3842  int end_posn;
3843  int chunk_len;
3844 
3845  text_position_setup(inputstring, fldsep, &state);
3846 
3847  /*
3848  * Note: we check the converted string length, not the original,
3849  * because they could be different if the input contained invalid
3850  * encoding.
3851  */
3852  inputstring_len = state.len1;
3853  fldsep_len = state.len2;
3854 
3855  /* return empty array for empty input string */
3856  if (inputstring_len < 1)
3857  {
3858  text_position_cleanup(&state);
3860  }
3861 
3862  /*
3863  * empty field separator: return the input string as a one-element
3864  * array
3865  */
3866  if (fldsep_len < 1)
3867  {
3868  text_position_cleanup(&state);
3869  /* single element can be a NULL too */
3870  is_null = null_string ? text_isequal(inputstring, null_string) : false;
3872  PointerGetDatum(inputstring),
3873  is_null, 1));
3874  }
3875 
3876  start_posn = 1;
3877  /* start_ptr points to the start_posn'th character of inputstring */
3878  start_ptr = VARDATA_ANY(inputstring);
3879 
3880  for (fldnum = 1;; fldnum++) /* field number is 1 based */
3881  {
3883 
3884  end_posn = text_position_next(start_posn, &state);
3885 
3886  if (end_posn == 0)
3887  {
3888  /* fetch last field */
3889  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
3890  }
3891  else
3892  {
3893  /* fetch non-last field */
3894  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
3895  }
3896 
3897  /* must build a temp text datum to pass to accumArrayResult */
3898  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3899  is_null = null_string ? text_isequal(result_text, null_string) : false;
3900 
3901  /* stash away this field */
3902  astate = accumArrayResult(astate,
3903  PointerGetDatum(result_text),
3904  is_null,
3905  TEXTOID,
3907 
3908  pfree(result_text);
3909 
3910  if (end_posn == 0)
3911  break;
3912 
3913  start_posn = end_posn;
3914  start_ptr += chunk_len;
3915  start_posn += fldsep_len;
3916  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
3917  }
3918 
3919  text_position_cleanup(&state);
3920  }
3921  else
3922  {
3923  /*
3924  * When fldsep is NULL, each character in the inputstring becomes an
3925  * element in the result array. The separator is effectively the
3926  * space between characters.
3927  */
3928  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
3929 
3930  /* return empty array for empty input string */
3931  if (inputstring_len < 1)
3933 
3934  start_ptr = VARDATA_ANY(inputstring);
3935 
3936  while (inputstring_len > 0)
3937  {
3938  int chunk_len = pg_mblen(start_ptr);
3939 
3941 
3942  /* must build a temp text datum to pass to accumArrayResult */
3943  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3944  is_null = null_string ? text_isequal(result_text, null_string) : false;
3945 
3946  /* stash away this field */
3947  astate = accumArrayResult(astate,
3948  PointerGetDatum(result_text),
3949  is_null,
3950  TEXTOID,
3952 
3953  pfree(result_text);
3954 
3955  start_ptr += chunk_len;
3956  inputstring_len -= chunk_len;
3957  }
3958  }
3959 
3962 }
3963 
3964 /*
3965  * array_to_text
3966  * concatenate Cstring representation of input array elements
3967  * using provided field separator
3968  */
3969 Datum
3971 {
3973  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3974 
3975  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
3976 }
3977 
3978 /*
3979  * array_to_text_null
3980  * concatenate Cstring representation of input array elements
3981  * using provided field separator and null string
3982  *
3983  * This version is not strict so we have to test for null inputs explicitly.
3984  */
3985 Datum
3987 {
3988  ArrayType *v;
3989  char *fldsep;
3990  char *null_string;
3991 
3992  /* returns NULL when first or second parameter is NULL */
3993  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
3994  PG_RETURN_NULL();
3995 
3996  v = PG_GETARG_ARRAYTYPE_P(0);
3997  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3998 
3999  /* NULL null string is passed through as a null pointer */
4000  if (!PG_ARGISNULL(2))
4001  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4002  else
4003  null_string = NULL;
4004 
4005  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4006 }
4007 
4008 /*
4009  * common code for array_to_text and array_to_text_null functions
4010  */
4011 static text *
4013  const char *fldsep, const char *null_string)
4014 {
4015  text *result;
4016  int nitems,
4017  *dims,
4018  ndims;
4019  Oid element_type;
4020  int typlen;
4021  bool typbyval;
4022  char typalign;
4024  bool printed = false;
4025  char *p;
4026  bits8 *bitmap;
4027  int bitmask;
4028  int i;
4029  ArrayMetaState *my_extra;
4030 
4031  ndims = ARR_NDIM(v);
4032  dims = ARR_DIMS(v);
4033  nitems = ArrayGetNItems(ndims, dims);
4034 
4035  /* if there are no elements, return an empty string */
4036  if (nitems == 0)
4037  return cstring_to_text_with_len("", 0);
4038 
4039  element_type = ARR_ELEMTYPE(v);
4040  initStringInfo(&buf);
4041 
4042  /*
4043  * We arrange to look up info about element type, including its output
4044  * conversion proc, only once per series of calls, assuming the element
4045  * type doesn't change underneath us.
4046  */
4047  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4048  if (my_extra == NULL)
4049  {
4050  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4051  sizeof(ArrayMetaState));
4052  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4053  my_extra->element_type = ~element_type;
4054  }
4055 
4056  if (my_extra->element_type != element_type)
4057  {
4058  /*
4059  * Get info about element type, including its output conversion proc
4060  */
4061  get_type_io_data(element_type, IOFunc_output,
4062  &my_extra->typlen, &my_extra->typbyval,
4063  &my_extra->typalign, &my_extra->typdelim,
4064  &my_extra->typioparam, &my_extra->typiofunc);
4065  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4066  fcinfo->flinfo->fn_mcxt);
4067  my_extra->element_type = element_type;
4068  }
4069  typlen = my_extra->typlen;
4070  typbyval = my_extra->typbyval;
4071  typalign = my_extra->typalign;
4072 
4073  p = ARR_DATA_PTR(v);
4074  bitmap = ARR_NULLBITMAP(v);
4075  bitmask = 1;
4076 
4077  for (i = 0; i < nitems; i++)
4078  {
4079  Datum itemvalue;
4080  char *value;
4081 
4082  /* Get source element, checking for NULL */
4083  if (bitmap && (*bitmap & bitmask) == 0)
4084  {
4085  /* if null_string is NULL, we just ignore null elements */
4086  if (null_string != NULL)
4087  {
4088  if (printed)
4089  appendStringInfo(&buf, "%s%s", fldsep, null_string);
4090  else
4091  appendStringInfoString(&buf, null_string);
4092  printed = true;
4093  }
4094  }
4095  else
4096  {
4097  itemvalue = fetch_att(p, typbyval, typlen);
4098 
4099  value = OutputFunctionCall(&my_extra->proc, itemvalue);
4100 
4101  if (printed)
4102  appendStringInfo(&buf, "%s%s", fldsep, value);
4103  else
4104  appendStringInfoString(&buf, value);
4105  printed = true;
4106 
4107  p = att_addlength_pointer(p, typlen, p);
4108  p = (char *) att_align_nominal(p, typalign);
4109  }
4110 
4111  /* advance bitmap pointer if any */
4112  if (bitmap)
4113  {
4114  bitmask <<= 1;
4115  if (bitmask == 0x100)
4116  {
4117  bitmap++;
4118  bitmask = 1;
4119  }
4120  }
4121  }
4122 
4123  result = cstring_to_text_with_len(buf.data, buf.len);
4124  pfree(buf.data);
4125 
4126  return result;
4127 }
4128 
4129 #define HEXBASE 16
4130 /*
4131  * Convert an int32 to a string containing a base 16 (hex) representation of
4132  * the number.
4133  */
4134 Datum
4136 {
4138  char *ptr;
4139  const char *digits = "0123456789abcdef";
4140  char buf[32]; /* bigger than needed, but reasonable */
4141 
4142  ptr = buf + sizeof(buf) - 1;
4143  *ptr = '\0';
4144 
4145  do
4146  {
4147  *--ptr = digits[value % HEXBASE];
4148  value /= HEXBASE;
4149  } while (ptr > buf && value);
4150 
4152 }
4153 
4154 /*
4155  * Convert an int64 to a string containing a base 16 (hex) representation of
4156  * the number.
4157  */
4158 Datum
4160 {
4161  uint64 value = (uint64) PG_GETARG_INT64(0);
4162  char *ptr;
4163  const char *digits = "0123456789abcdef";
4164  char buf[32]; /* bigger than needed, but reasonable */
4165 
4166  ptr = buf + sizeof(buf) - 1;
4167  *ptr = '\0';
4168 
4169  do
4170  {
4171  *--ptr = digits[value % HEXBASE];
4172  value /= HEXBASE;
4173  } while (ptr > buf && value);
4174 
4176 }
4177 
4178 /*
4179  * Create an md5 hash of a text string and return it as hex
4180  *
4181  * md5 produces a 16 byte (128 bit) hash; double it for hex
4182  */
4183 #define MD5_HASH_LEN 32
4184 
4185 Datum
4187 {
4188  text *in_text = PG_GETARG_TEXT_PP(0);
4189  size_t len;
4190  char hexsum[MD5_HASH_LEN + 1];
4191 
4192  /* Calculate the length of the buffer using varlena metadata */
4193  len = VARSIZE_ANY_EXHDR(in_text);
4194 
4195  /* get the hash result */
4196  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4197  ereport(ERROR,
4198  (errcode(ERRCODE_OUT_OF_MEMORY),
4199  errmsg("out of memory")));
4200 
4201  /* convert to text and return it */
4203 }
4204 
4205 /*
4206  * Create an md5 hash of a bytea field and return it as a hex string:
4207  * 16-byte md5 digest is represented in 32 hex characters.
4208  */
4209 Datum
4211 {
4212  bytea *in = PG_GETARG_BYTEA_PP(0);
4213  size_t len;
4214  char hexsum[MD5_HASH_LEN + 1];
4215 
4216  len = VARSIZE_ANY_EXHDR(in);
4217  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4218  ereport(ERROR,
4219  (errcode(ERRCODE_OUT_OF_MEMORY),
4220  errmsg("out of memory")));
4221 
4223 }
4224 
4225 /*
4226  * Return the size of a datum, possibly compressed
4227  *
4228  * Works on any data type
4229  */
4230 Datum
4232 {
4234  int32 result;
4235  int typlen;
4236 
4237  /* On first call, get the input type's typlen, and save at *fn_extra */
4238  if (fcinfo->flinfo->fn_extra == NULL)
4239  {
4240  /* Lookup the datatype of the supplied argument */
4241  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4242 
4243  typlen = get_typlen(argtypeid);
4244  if (typlen == 0) /* should not happen */
4245  elog(ERROR, "cache lookup failed for type %u", argtypeid);
4246 
4247  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4248  sizeof(int));
4249  *((int *) fcinfo->flinfo->fn_extra) = typlen;
4250  }
4251  else
4252  typlen = *((int *) fcinfo->flinfo->fn_extra);
4253 
4254  if (typlen == -1)
4255  {
4256  /* varlena type, possibly toasted */
4257  result = toast_datum_size(value);
4258  }
4259  else if (typlen == -2)
4260  {
4261  /* cstring */
4262  result = strlen(DatumGetCString(value)) + 1;
4263  }
4264  else
4265  {
4266  /* ordinary fixed-width type */
4267  result = typlen;
4268  }
4269 
4270  PG_RETURN_INT32(result);
4271 }
4272 
4273 /*
4274  * string_agg - Concatenates values and returns string.
4275  *
4276  * Syntax: string_agg(value text, delimiter text) RETURNS text
4277  *
4278  * Note: Any NULL values are ignored. The first-call delimiter isn't
4279  * actually used at all, and on subsequent calls the delimiter precedes
4280  * the associated value.
4281  */
4282 
4283 /* subroutine to initialize state */
4284 static StringInfo
4286 {
4287  StringInfo state;
4288  MemoryContext aggcontext;
4289  MemoryContext oldcontext;
4290 
4291  if (!AggCheckCallContext(fcinfo, &aggcontext))
4292  {
4293  /* cannot be called directly because of internal-type argument */
4294  elog(ERROR, "string_agg_transfn called in non-aggregate context");
4295  }
4296 
4297  /*
4298  * Create state in aggregate context. It'll stay there across subsequent
4299  * calls.
4300  */
4301  oldcontext = MemoryContextSwitchTo(aggcontext);
4302  state = makeStringInfo();
4303  MemoryContextSwitchTo(oldcontext);
4304 
4305  return state;
4306 }
4307 
4308 Datum
4310 {
4311  StringInfo state;
4312 
4313  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4314 
4315  /* Append the value unless null. */
4316  if (!PG_ARGISNULL(1))
4317  {
4318  /* On the first time through, we ignore the delimiter. */
4319  if (state == NULL)
4320  state = makeStringAggState(fcinfo);
4321  else if (!PG_ARGISNULL(2))
4322  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4323 
4324  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4325  }
4326 
4327  /*
4328  * The transition type for string_agg() is declared to be "internal",
4329  * which is a pass-by-value type the same size as a pointer.
4330  */
4331  PG_RETURN_POINTER(state);
4332 }
4333 
4334 Datum
4336 {
4337  StringInfo state;
4338 
4339  /* cannot be called directly because of internal-type argument */
4340  Assert(AggCheckCallContext(fcinfo, NULL));
4341 
4342  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4343 
4344  if (state != NULL)
4346  else
4347  PG_RETURN_NULL();
4348 }
4349 
4350 /*
4351  * Implementation of both concat() and concat_ws().
4352  *
4353  * sepstr is the separator string to place between values.
4354  * argidx identifies the first argument to concatenate (counting from zero).
4355  * Returns NULL if result should be NULL, else text value.
4356  */
4357 static text *
4358 concat_internal(const char *sepstr, int argidx,
4359  FunctionCallInfo fcinfo)
4360 {
4361  text *result;
4362  StringInfoData str;
4363  bool first_arg = true;
4364  int i;
4365 
4366  /*
4367  * concat(VARIADIC some-array) is essentially equivalent to
4368  * array_to_text(), ie concat the array elements with the given separator.
4369  * So we just pass the case off to that code.
4370  */
4371  if (get_fn_expr_variadic(fcinfo->flinfo))
4372  {
4373  ArrayType *arr;
4374 
4375  /* Should have just the one argument */
4376  Assert(argidx == PG_NARGS() - 1);
4377 
4378  /* concat(VARIADIC NULL) is defined as NULL */
4379  if (PG_ARGISNULL(argidx))
4380  return NULL;
4381 
4382  /*
4383  * Non-null argument had better be an array. We assume that any call
4384  * context that could let get_fn_expr_variadic return true will have
4385  * checked that a VARIADIC-labeled parameter actually is an array. So
4386  * it should be okay to just Assert that it's an array rather than
4387  * doing a full-fledged error check.
4388  */
4390 
4391  /* OK, safe to fetch the array value */
4392  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4393 
4394  /*
4395  * And serialize the array. We tell array_to_text to ignore null
4396  * elements, which matches the behavior of the loop below.
4397  */
4398  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4399  }
4400 
4401  /* Normal case without explicit VARIADIC marker */
4402  initStringInfo(&str);
4403 
4404  for (i = argidx; i < PG_NARGS(); i++)
4405  {
4406  if (!PG_ARGISNULL(i))
4407  {
4409  Oid valtype;
4410  Oid typOutput;
4411  bool typIsVarlena;
4412 
4413  /* add separator if appropriate */
4414  if (first_arg)
4415  first_arg = false;
4416  else
4417  appendStringInfoString(&str, sepstr);
4418 
4419  /* call the appropriate type output function, append the result */
4420  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4421  if (!OidIsValid(valtype))
4422  elog(ERROR, "could not determine data type of concat() input");
4423  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4425  OidOutputFunctionCall(typOutput, value));
4426  }
4427  }
4428 
4429  result = cstring_to_text_with_len(str.data, str.len);
4430  pfree(str.data);
4431 
4432  return result;
4433 }
4434 
4435 /*
4436  * Concatenate all arguments. NULL arguments are ignored.
4437  */
4438 Datum
4440 {
4441  text *result;
4442 
4443  result = concat_internal("", 0, fcinfo);
4444  if (result == NULL)
4445  PG_RETURN_NULL();
4446  PG_RETURN_TEXT_P(result);
4447 }
4448 
4449 /*
4450  * Concatenate all but first argument value with separators. The first
4451  * parameter is used as the separator. NULL arguments are ignored.
4452  */
4453 Datum
4455 {
4456  char *sep;
4457  text *result;
4458 
4459  /* return NULL when separator is NULL */
4460  if (PG_ARGISNULL(0))
4461  PG_RETURN_NULL();
4463 
4464  result = concat_internal(sep, 1, fcinfo);
4465  if (result == NULL)
4466  PG_RETURN_NULL();
4467  PG_RETURN_TEXT_P(result);
4468 }
4469 
4470 /*
4471  * Return first n characters in the string. When n is negative,
4472  * return all but last |n| characters.
4473  */
4474 Datum
4476 {
4477  text *str = PG_GETARG_TEXT_PP(0);
4478  const char *p = VARDATA_ANY(str);
4479  int len = VARSIZE_ANY_EXHDR(str);
4480  int n = PG_GETARG_INT32(1);
4481  int rlen;
4482 
4483  if (n < 0)
4484  n = pg_mbstrlen_with_len(p, len) + n;
4485  rlen = pg_mbcharcliplen(p, len, n);
4486 
4488 }
4489 
4490 /*
4491  * Return last n characters in the string. When n is negative,
4492  * return all but first |n| characters.
4493  */
4494 Datum
4496 {
4497  text *str = PG_GETARG_TEXT_PP(0);
4498  const char *p = VARDATA_ANY(str);
4499  int len = VARSIZE_ANY_EXHDR(str);
4500  int n = PG_GETARG_INT32(1);
4501  int off;
4502 
4503  if (n < 0)
4504  n = -n;
4505  else
4506  n = pg_mbstrlen_with_len(p, len) - n;
4507  off = pg_mbcharcliplen(p, len, n);
4508 
4509  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4510 }
4511 
4512 /*
4513  * Return reversed string
4514  */
4515 Datum
4517 {
4518  text *str = PG_GETARG_TEXT_PP(0);
4519  const char *p = VARDATA_ANY(str);
4520  int len = VARSIZE_ANY_EXHDR(str);
4521  const char *endp = p + len;
4522  text *result;
4523  char *dst;
4524 
4525  result = palloc(len + VARHDRSZ);
4526  dst = (char *) VARDATA(result) + len;
4527  SET_VARSIZE(result, len + VARHDRSZ);
4528 
4530  {
4531  /* multibyte version */
4532  while (p < endp)
4533  {
4534  int sz;
4535 
4536  sz = pg_mblen(p);
4537  dst -= sz;
4538  memcpy(dst, p, sz);
4539  p += sz;
4540  }
4541  }
4542  else
4543  {
4544  /* single byte version */
4545  while (p < endp)
4546  *(--dst) = *p++;
4547  }
4548 
4549  PG_RETURN_TEXT_P(result);
4550 }
4551 
4552 
4553 /*
4554  * Support macros for text_format()
4555  */
4556 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4557 
4558 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4559  do { \
4560  if (++(ptr) >= (end_ptr)) \
4561  ereport(ERROR, \
4562  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4563  errmsg("unterminated format specifier"))); \
4564  } while (0)
4565 
4566 /*
4567  * Returns a formatted string
4568  */
4569 Datum
4571 {
4572  text *fmt;
4573  StringInfoData str;
4574  const char *cp;
4575  const char *start_ptr;
4576  const char *end_ptr;
4577  text *result;
4578  int arg;
4579  bool funcvariadic;
4580  int nargs;
4581  Datum *elements = NULL;
4582  bool *nulls = NULL;
4583  Oid element_type = InvalidOid;
4584  Oid prev_type = InvalidOid;
4585  Oid prev_width_type = InvalidOid;
4586  FmgrInfo typoutputfinfo;
4587  FmgrInfo typoutputinfo_width;
4588 
4589  /* When format string is null, immediately return null */
4590  if (PG_ARGISNULL(0))
4591  PG_RETURN_NULL();
4592 
4593  /* If argument is marked VARIADIC, expand array into elements */
4594  if (get_fn_expr_variadic(fcinfo->flinfo))
4595  {
4596  ArrayType *arr;
4597  int16 elmlen;
4598  bool elmbyval;
4599  char elmalign;
4600  int nitems;
4601 
4602  /* Should have just the one argument */
4603  Assert(PG_NARGS() == 2);
4604 
4605  /* If argument is NULL, we treat it as zero-length array */
4606  if (PG_ARGISNULL(1))
4607  nitems = 0;
4608  else
4609  {
4610  /*
4611  * Non-null argument had better be an array. We assume that any
4612  * call context that could let get_fn_expr_variadic return true
4613  * will have checked that a VARIADIC-labeled parameter actually is
4614  * an array. So it should be okay to just Assert that it's an
4615  * array rather than doing a full-fledged error check.
4616  */
4618 
4619  /* OK, safe to fetch the array value */
4620  arr = PG_GETARG_ARRAYTYPE_P(1);
4621 
4622  /* Get info about array element type */
4623  element_type = ARR_ELEMTYPE(arr);
4624  get_typlenbyvalalign(element_type,
4625  &elmlen, &elmbyval, &elmalign);
4626 
4627  /* Extract all array elements */
4628  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4629  &elements, &nulls, &nitems);
4630  }
4631 
4632  nargs = nitems + 1;
4633  funcvariadic = true;
4634  }
4635  else
4636  {
4637  /* Non-variadic case, we'll process the arguments individually */
4638  nargs = PG_NARGS();
4639  funcvariadic = false;
4640  }
4641 
4642  /* Setup for main loop. */
4643  fmt = PG_GETARG_TEXT_PP(0);
4644  start_ptr = VARDATA_ANY(fmt);
4645  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4646  initStringInfo(&str);
4647  arg = 1; /* next argument position to print */
4648 
4649  /* Scan format string, looking for conversion specifiers. */
4650  for (cp = start_ptr; cp < end_ptr; cp++)
4651  {
4652  int argpos;
4653  int widthpos;
4654  int flags;
4655  int width;
4656  Datum value;
4657  bool isNull;
4658  Oid typid;
4659 
4660  /*
4661  * If it's not the start of a conversion specifier, just copy it to
4662  * the output buffer.
4663  */
4664  if (*cp != '%')
4665  {
4666  appendStringInfoCharMacro(&str, *cp);
4667  continue;
4668  }
4669 
4670  ADVANCE_PARSE_POINTER(cp, end_ptr);
4671 
4672  /* Easy case: %% outputs a single % */
4673  if (*cp == '%')
4674  {
4675  appendStringInfoCharMacro(&str, *cp);
4676  continue;
4677  }
4678 
4679  /* Parse the optional portions of the format specifier */
4680  cp = text_format_parse_format(cp, end_ptr,
4681  &argpos, &widthpos,
4682  &flags, &width);
4683 
4684  /*
4685  * Next we should see the main conversion specifier. Whether or not
4686  * an argument position was present, it's known that at least one
4687  * character remains in the string at this point. Experience suggests
4688  * that it's worth checking that that character is one of the expected
4689  * ones before we try to fetch arguments, so as to produce the least
4690  * confusing response to a mis-formatted specifier.
4691  */
4692  if (strchr("sIL", *cp) == NULL)
4693  ereport(ERROR,
4694  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4695  errmsg("unrecognized conversion type specifier \"%c\"",
4696  *cp)));
4697 
4698  /* If indirect width was specified, get its value */
4699  if (widthpos >= 0)
4700  {
4701  /* Collect the specified or next argument position */
4702  if (widthpos > 0)
4703  arg = widthpos;
4704  if (arg >= nargs)
4705  ereport(ERROR,
4706  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4707  errmsg("too few arguments for format")));
4708 
4709  /* Get the value and type of the selected argument */
4710  if (!funcvariadic)
4711  {
4712  value = PG_GETARG_DATUM(arg);
4713  isNull = PG_ARGISNULL(arg);
4714  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4715  }
4716  else
4717  {
4718  value = elements[arg - 1];
4719  isNull = nulls[arg - 1];
4720  typid = element_type;
4721  }
4722  if (!OidIsValid(typid))
4723  elog(ERROR, "could not determine data type of format() input");
4724 
4725  arg++;
4726 
4727  /* We can treat NULL width the same as zero */
4728  if (isNull)
4729  width = 0;
4730  else if (typid == INT4OID)
4731  width = DatumGetInt32(value);
4732  else if (typid == INT2OID)
4733  width = DatumGetInt16(value);
4734  else
4735  {
4736  /* For less-usual datatypes, convert to text then to int */
4737  char *str;
4738 
4739  if (typid != prev_width_type)
4740  {
4741  Oid typoutputfunc;
4742  bool typIsVarlena;
4743 
4744  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4745  fmgr_info(typoutputfunc, &typoutputinfo_width);
4746  prev_width_type = typid;
4747  }
4748 
4749  str = OutputFunctionCall(&typoutputinfo_width, value);
4750 
4751  /* pg_atoi will complain about bad data or overflow */
4752  width = pg_atoi(str, sizeof(int), '\0');
4753 
4754  pfree(str);
4755  }
4756  }
4757 
4758  /* Collect the specified or next argument position */
4759  if (argpos > 0)
4760  arg = argpos;
4761  if (arg >= nargs)
4762  ereport(ERROR,
4763  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4764  errmsg("too few arguments for format")));
4765 
4766  /* Get the value and type of the selected argument */
4767  if (!funcvariadic)
4768  {
4769  value = PG_GETARG_DATUM(arg);
4770  isNull = PG_ARGISNULL(arg);
4771  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4772  }
4773  else
4774  {
4775  value = elements[arg - 1];
4776  isNull = nulls[arg - 1];
4777  typid = element_type;
4778  }
4779  if (!OidIsValid(typid))
4780  elog(ERROR, "could not determine data type of format() input");
4781 
4782  arg++;
4783 
4784  /*
4785  * Get the appropriate typOutput function, reusing previous one if
4786  * same type as previous argument. That's particularly useful in the
4787  * variadic-array case, but often saves work even for ordinary calls.
4788  */
4789  if (typid != prev_type)
4790  {
4791  Oid typoutputfunc;
4792  bool typIsVarlena;
4793 
4794  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4795  fmgr_info(typoutputfunc, &typoutputfinfo);
4796  prev_type = typid;
4797  }
4798 
4799  /*
4800  * And now we can format the value.
4801  */
4802  switch (*cp)
4803  {
4804  case 's':
4805  case 'I':
4806  case 'L':
4807  text_format_string_conversion(&str, *cp, &typoutputfinfo,
4808  value, isNull,
4809  flags, width);
4810  break;
4811  default:
4812  /* should not get here, because of previous check */
4813  ereport(ERROR,
4814  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4815  errmsg("unrecognized conversion type specifier \"%c\"",
4816  *cp)));
4817  break;
4818  }
4819  }
4820 
4821  /* Don't need deconstruct_array results anymore. */
4822  if (elements != NULL)
4823  pfree(elements);
4824  if (nulls != NULL)
4825  pfree(nulls);
4826 
4827  /* Generate results. */
4828  result = cstring_to_text_with_len(str.data, str.len);
4829  pfree(str.data);
4830 
4831  PG_RETURN_TEXT_P(result);
4832 }
4833 
4834 /*
4835  * Parse contiguous digits as a decimal number.
4836  *
4837  * Returns true if some digits could be parsed.
4838  * The value is returned into *value, and *ptr is advanced to the next
4839  * character to be parsed.
4840  *
4841  * Note parsing invariant: at least one character is known available before
4842  * string end (end_ptr) at entry, and this is still true at exit.
4843  */
4844 static bool
4845 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
4846 {
4847  bool found = false;
4848  const char *cp = *ptr;
4849  int val = 0;
4850 
4851  while (*cp >= '0' && *cp <= '9')
4852  {
4853  int newval = val * 10 + (*cp - '0');
4854 
4855  if (newval / 10 != val) /* overflow? */
4856  ereport(ERROR,
4857  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4858  errmsg("number is out of range")));
4859  val = newval;
4860  ADVANCE_PARSE_POINTER(cp, end_ptr);
4861  found = true;
4862  }
4863 
4864  *ptr = cp;
4865  *value = val;
4866 
4867  return found;
4868 }
4869 
4870 /*
4871  * Parse a format specifier (generally following the SUS printf spec).
4872  *
4873  * We have already advanced over the initial '%', and we are looking for
4874  * [argpos][flags][width]type (but the type character is not consumed here).
4875  *
4876  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
4877  * Output parameters:
4878  * argpos: argument position for value to be printed. -1 means unspecified.
4879  * widthpos: argument position for width. Zero means the argument position
4880  * was unspecified (ie, take the next arg) and -1 means no width
4881  * argument (width was omitted or specified as a constant).
4882  * flags: bitmask of flags.
4883  * width: directly-specified width value. Zero means the width was omitted
4884  * (note it's not necessary to distinguish this case from an explicit
4885  * zero width value).
4886  *
4887  * The function result is the next character position to be parsed, ie, the
4888  * location where the type character is/should be.
4889  *
4890  * Note parsing invariant: at least one character is known available before
4891  * string end (end_ptr) at entry, and this is still true at exit.
4892  */
4893 static const char *
4894 text_format_parse_format(const char *start_ptr, const char *end_ptr,
4895  int *argpos, int *widthpos,
4896  int *flags, int *width)
4897 {
4898  const char *cp = start_ptr;
4899  int n;
4900 
4901  /* set defaults for output parameters */
4902  *argpos = -1;
4903  *widthpos = -1;
4904  *flags = 0;
4905  *width = 0;
4906 
4907  /* try to identify first number */
4908  if (text_format_parse_digits(&cp, end_ptr, &n))
4909  {
4910  if (*cp != '$')
4911  {
4912  /* Must be just a width and a type, so we're done */
4913  *width = n;
4914  return cp;
4915  }
4916  /* The number was argument position */
4917  *argpos = n;
4918  /* Explicit 0 for argument index is immediately refused */
4919  if (n == 0)
4920  ereport(ERROR,
4921  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4922  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4923  ADVANCE_PARSE_POINTER(cp, end_ptr);
4924  }
4925 
4926  /* Handle flags (only minus is supported now) */
4927  while (*cp == '-')
4928  {
4929  *flags |= TEXT_FORMAT_FLAG_MINUS;
4930  ADVANCE_PARSE_POINTER(cp, end_ptr);
4931  }
4932 
4933  if (*cp == '*')
4934  {
4935  /* Handle indirect width */
4936  ADVANCE_PARSE_POINTER(cp, end_ptr);
4937  if (text_format_parse_digits(&cp, end_ptr, &n))
4938  {
4939  /* number in this position must be closed by $ */
4940  if (*cp != '$')
4941  ereport(ERROR,
4942  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4943  errmsg("width argument position must be ended by \"$\"")));
4944  /* The number was width argument position */
4945  *widthpos = n;
4946  /* Explicit 0 for argument index is immediately refused */
4947  if (n == 0)
4948  ereport(ERROR,
4949  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4950  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4951  ADVANCE_PARSE_POINTER(cp, end_ptr);
4952  }
4953  else
4954  *widthpos = 0; /* width's argument position is unspecified */
4955  }
4956  else
4957  {
4958  /* Check for direct width specification */
4959  if (text_format_parse_digits(&cp, end_ptr, &n))
4960  *width = n;
4961  }
4962 
4963  /* cp should now be pointing at type character */
4964  return cp;
4965 }
4966 
4967 /*
4968  * Format a %s, %I, or %L conversion
4969  */
4970 static void
4972  FmgrInfo *typOutputInfo,
4973  Datum value, bool isNull,
4974  int flags, int width)
4975 {
4976  char *str;
4977 
4978  /* Handle NULL arguments before trying to stringify the value. */
4979  if (isNull)
4980  {
4981  if (conversion == 's')
4982  text_format_append_string(buf, "", flags, width);
4983  else if (conversion == 'L')
4984  text_format_append_string(buf, "NULL", flags, width);
4985  else if (conversion == 'I')
4986  ereport(ERROR,
4987  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4988  errmsg("null values cannot be formatted as an SQL identifier")));
4989  return;
4990  }
4991 
4992  /* Stringify. */
4993  str = OutputFunctionCall(typOutputInfo, value);
4994 
4995  /* Escape. */
4996  if (conversion == 'I')
4997  {
4998  /* quote_identifier may or may not allocate a new string. */
4999  text_format_append_string(buf, quote_identifier(str), flags, width);
5000  }
5001  else if (conversion == 'L')
5002  {
5003  char *qstr = quote_literal_cstr(str);
5004 
5005  text_format_append_string(buf, qstr, flags, width);
5006  /* quote_literal_cstr() always allocates a new string */
5007  pfree(qstr);
5008  }
5009  else
5010  text_format_append_string(buf, str, flags, width);
5011 
5012  /* Cleanup. */
5013  pfree(str);
5014 }
5015 
5016 /*
5017  * Append str to buf, padding as directed by flags/width
5018  */
5019 static void
5021  int flags, int width)
5022 {
5023  bool align_to_left = false;
5024  int len;
5025 
5026  /* fast path for typical easy case */
5027  if (width == 0)
5028  {
5029  appendStringInfoString(buf, str);
5030  return;
5031  }
5032 
5033  if (width < 0)
5034  {
5035  /* Negative width: implicit '-' flag, then take absolute value */
5036  align_to_left = true;
5037  /* -INT_MIN is undefined */
5038  if (width <= INT_MIN)
5039  ereport(ERROR,
5040  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5041  errmsg("number is out of range")));
5042  width = -width;
5043  }
5044  else if (flags & TEXT_FORMAT_FLAG_MINUS)
5045  align_to_left = true;
5046 
5047  len = pg_mbstrlen(str);
5048  if (align_to_left)
5049  {
5050  /* left justify */
5051  appendStringInfoString(buf, str);
5052  if (len < width)
5053  appendStringInfoSpaces(buf, width - len);
5054  }
5055  else
5056  {
5057  /* right justify */
5058  if (len < width)
5059  appendStringInfoSpaces(buf, width - len);
5060  appendStringInfoString(buf, str);
5061  }
5062 }
5063 
5064 /*
5065  * text_format_nv - nonvariadic wrapper for text_format function.
5066  *
5067  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5068  * which checks that all built-in functions that share the implementing C
5069  * function take the same number of arguments.
5070  */
5071 Datum
5073 {
5074  return text_format(fcinfo);
5075 }
5076 
5077 /*
5078  * Helper function for Levenshtein distance functions. Faster than memcmp(),
5079  * for this use case.
5080  */
5081 static inline bool
5082 rest_of_char_same(const char *s1, const char *s2, int len)
5083 {
5084  while (len > 0)
5085  {
5086  len--;
5087  if (s1[len] != s2[len])
5088  return false;
5089  }
5090  return true;
5091 }
5092 
5093 /* Expand each Levenshtein distance variant */
5094 #include "levenshtein.c"
5095 #define LEVENSHTEIN_LESS_EQUAL
5096 #include "levenshtein.c"
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2352
#define PG_CACHE_LINE_SIZE
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:3778
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2471
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Value * makeString(char *str)
Definition: value.c:53
signed short int16
Definition: c.h:228
hyperLogLogState abbr_card
Definition: varlena.c:65
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:342
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
#define DatumGetUInt32(X)
Definition: postgres.h:494
#define NIL
Definition: pg_list.h:69
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:4570
int length(const List *list)
Definition: list.c:1274
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:4558
Definition: fmgr.h:53
text * replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob)
Definition: varlena.c:3533
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:315
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2748
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:873
Datum split_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3668
int errhint(const char *fmt,...)
Definition: elog.c:978
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1013
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2536
#define VARDATA_ANY(PTR)
Definition: postgres.h:349
#define VARDATA(PTR)
Definition: postgres.h:305
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
MemoryContext fn_mcxt
Definition: fmgr.h:62
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:144
#define MD5_HASH_LEN
Definition: varlena.c:4183
char * buf1
Definition: varlena.c:59
static Datum bttext_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:1991
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:9300
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1653
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2304
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:263
#define DatumGetInt32(X)
Definition: postgres.h:480
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2336
#define HEXBASE
Definition: varlena.c:4129
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:306
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3312
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3230
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:4971
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:1925
#define PointerGetDatum(X)
Definition: postgres.h:564
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:131
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:521
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:224
static void text_position_setup(text *t1, text *t2, TextPositionState *state)
Definition: varlena.c:1110
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:358
#define VARHDRSZ
Definition: c.h:421
Datum md5_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4210
char * pstrdup(const char *in)
Definition: mcxt.c:1080
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:510
static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1862
regoff_t rm_so
Definition: regex.h:85
#define DatumGetTextPP(X)
Definition: fmgr.h:249
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
#define Min(x, y)
Definition: c.h:779
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:298
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:262
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2405
#define INT4OID
Definition: pg_type.h:316
void canonicalize_path(char *path)
Definition: path.c:220
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2455
int errcode(int sqlerrcode)
Definition: elog.c:569
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:162
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
#define DatumGetByteaPP(X)
Definition: fmgr.h:247
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2709
pg_wchar * wstr2
Definition: varlena.c:49
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:473
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4309
Datum md5_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4186
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:313
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3398
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:2480
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3126
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:613
#define OidIsValid(objectId)
Definition: c.h:511
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1728
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:378
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:204
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1025
bool trace_sort
Definition: tuplesort.c:128
#define PG_GET_COLLATION()
Definition: fmgr.h:155
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2550
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:4439
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:651
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:5020
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:3986
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:4454
regoff_t rm_eo
Definition: regex.h:86
signed int int32
Definition: c.h:229
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2438
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:270
int pg_locale_t
Definition: pg_locale.h:70
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1943
static int32 text_length(Datum str)
Definition: varlena.c:631
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:805
bool typbyval
Definition: array.h:221
#define NAMEDATALEN
double prop_card
Definition: varlena.c:67
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:177
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:4159
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:316
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:267
static void btsortsupport_worker(SortSupport ssup, Oid collid)
Definition: varlena.c:1744
static Datum text_to_array_internal(PG_FUNCTION_ARGS)
Definition: varlena.c:3803
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3190
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3028
FmgrInfo * flinfo
Definition: fmgr.h:71
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
#define wcscoll_l
Definition: win32.h:345
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3970
void pfree(void *pointer)
Definition: mcxt.c:913
Size toast_raw_datum_size(Datum value)
Definition: tuptoaster.c:351
#define REG_OKAY
Definition: regex.h:137
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:4335
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1002
#define ERROR
Definition: elog.h:41
char * s1
static bool check_replace_text_has_escape_char(const text *replace_text)
Definition: varlena.c:3395
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:941
#define DatumGetCString(X)
Definition: postgres.h:574
Size toast_datum_size(Datum value)
Definition: tuptoaster.c:407
static struct @72 value
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:2313
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3250
#define ARR_DIMS(a)
Definition: array.h:275
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:160
MemoryContext ssup_cxt
Definition: sortsupport.h:66
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:2267
static int text_position_next(int start_pos, TextPositionState *state)
Definition: varlena.c:1222
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:3792
#define MAXPGPATH
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:831
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:246
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:726
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:805
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:578
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4012
Definition: c.h:469
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3298
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2234
#define INT2OID
Definition: pg_type.h:308
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1589
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
#define ARR_DATA_PTR(a)
Definition: array.h:303
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2246
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1624
int16 typlen
Definition: array.h:220
static char * buf
Definition: pg_test_fsync.c:65
#define memmove(d, s, c)
Definition: c.h:1047
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:152
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:2903
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
char typdelim
Definition: array.h:223
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:2562
MemoryContext MemoryContextSwitchTo(MemoryContext context)
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:2805
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:685
#define DatumGetInt16(X)
Definition: postgres.h:452
#define DatumGetBool(X)
Definition: postgres.h:401
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
Definition: geqo_px.c:46
bool collate_c
Definition: varlena.c:64
char * buf2
Definition: varlena.c:61
unsigned int uint32
Definition: c.h:241
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
void * ssup_extra
Definition: sortsupport.h:87
ArrayType * create_singleton_array(FunctionCallInfo fcinfo, Oid element_type, Datum element, bool isNull, int ndims)
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1063
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
MemoryContext CurrentMemoryContext
Definition: mcxt.c:40
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:786
int bytea_output
Definition: varlena.c:39
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:170
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1564
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:2643
#define S(n, x)
Definition: sha1.c:55
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:246
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:4231
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1683
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:172
#define ereport(elevel, rest)
Definition: elog.h:132
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2266
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5031
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:4845
unsigned int pg_wchar
Definition: mbprint.c:30
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:2845
#define byte(x, n)
Definition: rijndael.c:68
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:670
List * lappend(List *list, void *datum)
Definition: list.c:131
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:2828
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:587
#define MaxAllocSize
Definition: memutils.h:40
int skiptable[256]
Definition: varlena.c:54
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:433
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1078
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1668
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:510
uint8 bits8
Definition: c.h:248
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:772
#define TextDatumGetCString(d)
Definition: builtins.h:777
void * palloc0(Size size)
Definition: mcxt.c:841
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:5072
char * s2
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:4516
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2457
static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1968
#define REGEXP_REPLACE_BACKREF_CNT
Definition: varlena.c:3522
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:187
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:785
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:139
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1698
#define VARSIZE_ANY(PTR)
Definition: postgres.h:336
#define strxfrm_l
Definition: win32.h:344
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1356
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3270
#define InvalidOid
Definition: postgres_ext.h:36
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:4135
#define PG_RETURN_VOID()
Definition: fmgr.h:293
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:314
#define Max(x, y)
Definition: c.h:773
text * cstring_to_text(const char *s)
Definition: varlena.c:140
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:593
#define PG_ARGISNULL(n)
Definition: fmgr.h:166
#define NULL
Definition: c.h:202
#define Assert(condition)
Definition: c.h:648
#define lfirst(lc)
Definition: pg_list.h:115
Definition: regguts.h:308
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:305
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:569
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:4495
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:4358
int varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
Definition: varlena.c:1373
Oid typioparam
Definition: array.h:224
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:306
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:554
static bool bttext_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2120
size_t Size
Definition: c.h:333
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:5082
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2288
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:539
#define newval
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:268
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3158
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:120
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:2773
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:499
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:216
#define PG_NARGS()
Definition: fmgr.h:160
void * fn_extra
Definition: fmgr.h:61
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:3428
#define ARR_NDIM(a)
Definition: array.h:271
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:2599
#define TEXTBUFLEN
Definition: varlena.c:77
Oid typiofunc
Definition: array.h:225
#define DatumGetPointer(X)
Definition: postgres.h:557
char typalign
Definition: array.h:222
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3449
char * text_to_cstring(const text *t)
Definition: varlena.c:173
pg_wchar * wstr1
Definition: varlena.c:48
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:4967
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2461
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1713
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:566
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:167
int16 get_typlen(Oid typid)
Definition: lsyscache.c:1851
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:2048
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:414
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:342
void * palloc(Size size)
Definition: mcxt.c:812
int errmsg(const char *fmt,...)
Definition: elog.c:791
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:71
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:4285
hyperLogLogState full_card
Definition: varlena.c:66
FmgrInfo proc
Definition: array.h:226
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:441
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:670
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:2539
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2374
void list_free(List *list)
Definition: list.c:1136
int i
Oid element_type
Definition: array.h:219
#define REG_NOMATCH
Definition: regex.h:138
#define NameStr(name)
Definition: c.h:475
static char * locale
Definition: initdb.c:108
void * arg
static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1895
static bool text_isequal(text *txt1, text *txt2)
Definition: varlena.c:3765
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:233
#define VAL(CH)
Definition: varlena.c:231
Definition: c.h:415
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:4475
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:96
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
#define elog
Definition: elog.h:228
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:4894
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:2672
bool pg_md5_hash(const void *buff, size_t len, char *hexsum)
Definition: md5.c:290
#define strcoll_l
Definition: win32.h:343
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
NameData * Name
Definition: c.h:473
#define PG_GETARG_INT64(n)
Definition: fmgr.h:238
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3210
Definition: pg_list.h:45
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:376
#define ARR_ELEMTYPE(a)
Definition: array.h:273
#define ARR_NULLBITMAP(a)
Definition: array.h:281
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:208
Definition: regex.h:55
long val
Definition: informix.c:685
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:550
#define PG_RETURN_NULL()
Definition: fmgr.h:289
#define PG_RETURN_NAME(x)
Definition: fmgr.h:307
#define TEXT_FORMAT_FLAG_MINUS
Definition: varlena.c:4556
int32 pg_atoi(const char *s, int size, int c)
Definition: numutils.c:37
#define PG_GETARG_NAME(n)
Definition: fmgr.h:234
static int text_position(text *t1, text *t2)
Definition: varlena.c:1086
Datum text_pattern_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:2320
#define DIG(VAL)
Definition: varlena.c:232
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:676
int digits
Definition: informix.c:687
Datum byteacat(PG_FUNCTION_ARGS)
Definition: varlena.c:2390
void get_type_io_data(Oid typid, IOFuncSelector which_func, int16 *typlen, bool *typbyval, char *typalign, char *typdelim, Oid *typioparam, Oid *func)
Definition: lsyscache.c:1979