PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/tuptoaster.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "libpq/md5.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/builtins.h"
29 #include "utils/bytea.h"
30 #include "utils/lsyscache.h"
31 #include "utils/memutils.h"
32 #include "utils/pg_locale.h"
33 #include "utils/sortsupport.h"
34 
35 
36 /* GUC variable */
38 
39 typedef struct varlena unknown;
40 
41 typedef struct
42 {
43  bool use_wchar; /* T if multibyte encoding */
44  char *str1; /* use these if not use_wchar */
45  char *str2; /* note: these point to original texts */
46  pg_wchar *wstr1; /* use these if use_wchar */
47  pg_wchar *wstr2; /* note: these are palloc'd */
48  int len1; /* string lengths in logical characters */
49  int len2;
50  /* Skip table for Boyer-Moore-Horspool search algorithm: */
51  int skiptablemask; /* mask for ANDing with skiptable subscripts */
52  int skiptable[256]; /* skip distance for given mismatched char */
54 
55 typedef struct
56 {
57  char *buf1; /* 1st string */
58  char *buf2; /* 2nd string */
59  int buflen1;
60  int buflen2;
61 #ifdef HAVE_LOCALE_T
63 #endif
65 
66 /*
67  * This should be large enough that most strings will fit, but small enough
68  * that we feel comfortable putting it on the stack
69  */
70 #define TEXTBUFLEN 1024
71 
72 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
73 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
74 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
75 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
76 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
77 
78 static void btsortsupport_worker(SortSupport ssup, Oid collid);
79 static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
80 static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
81 static int32 text_length(Datum str);
82 static text *text_catenate(text *t1, text *t2);
83 static text *text_substring(Datum str,
84  int32 start,
85  int32 length,
86  bool length_not_specified);
87 static text *text_overlay(text *t1, text *t2, int sp, int sl);
88 static int text_position(text *t1, text *t2);
89 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
90 static int text_position_next(int start_pos, TextPositionState *state);
92 static int text_cmp(text *arg1, text *arg2, Oid collid);
93 static bytea *bytea_catenate(bytea *t1, bytea *t2);
94 static bytea *bytea_substring(Datum str,
95  int S,
96  int L,
97  bool length_not_specified);
98 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
99 static void appendStringInfoText(StringInfo str, const text *t);
102  const char *fldsep, const char *null_string);
104 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
105  int *value);
106 static const char *text_format_parse_format(const char *start_ptr,
107  const char *end_ptr,
108  int *argpos, int *widthpos,
109  int *flags, int *width);
110 static void text_format_string_conversion(StringInfo buf, char conversion,
111  FmgrInfo *typOutputInfo,
112  Datum value, bool isNull,
113  int flags, int width);
114 static void text_format_append_string(StringInfo buf, const char *str,
115  int flags, int width);
116 
117 
118 /*****************************************************************************
119  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
120  *****************************************************************************/
121 
122 /*
123  * cstring_to_text
124  *
125  * Create a text value from a null-terminated C string.
126  *
127  * The new text value is freshly palloc'd with a full-size VARHDR.
128  */
129 text *
130 cstring_to_text(const char *s)
131 {
132  return cstring_to_text_with_len(s, strlen(s));
133 }
134 
135 /*
136  * cstring_to_text_with_len
137  *
138  * Same as cstring_to_text except the caller specifies the string length;
139  * the string need not be null_terminated.
140  */
141 text *
142 cstring_to_text_with_len(const char *s, int len)
143 {
144  text *result = (text *) palloc(len + VARHDRSZ);
145 
146  SET_VARSIZE(result, len + VARHDRSZ);
147  memcpy(VARDATA(result), s, len);
148 
149  return result;
150 }
151 
152 /*
153  * text_to_cstring
154  *
155  * Create a palloc'd, null-terminated C string from a text value.
156  *
157  * We support being passed a compressed or toasted text value.
158  * This is a bit bogus since such values shouldn't really be referred to as
159  * "text *", but it seems useful for robustness. If we didn't handle that
160  * case here, we'd need another routine that did, anyway.
161  */
162 char *
164 {
165  /* must cast away the const, unfortunately */
166  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
167  int len = VARSIZE_ANY_EXHDR(tunpacked);
168  char *result;
169 
170  result = (char *) palloc(len + 1);
171  memcpy(result, VARDATA_ANY(tunpacked), len);
172  result[len] = '\0';
173 
174  if (tunpacked != t)
175  pfree(tunpacked);
176 
177  return result;
178 }
179 
180 /*
181  * text_to_cstring_buffer
182  *
183  * Copy a text value into a caller-supplied buffer of size dst_len.
184  *
185  * The text string is truncated if necessary to fit. The result is
186  * guaranteed null-terminated (unless dst_len == 0).
187  *
188  * We support being passed a compressed or toasted text value.
189  * This is a bit bogus since such values shouldn't really be referred to as
190  * "text *", but it seems useful for robustness. If we didn't handle that
191  * case here, we'd need another routine that did, anyway.
192  */
193 void
194 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
195 {
196  /* must cast away the const, unfortunately */
197  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
198  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
199 
200  if (dst_len > 0)
201  {
202  dst_len--;
203  if (dst_len >= src_len)
204  dst_len = src_len;
205  else /* ensure truncation is encoding-safe */
206  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
207  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
208  dst[dst_len] = '\0';
209  }
210 
211  if (srcunpacked != src)
212  pfree(srcunpacked);
213 }
214 
215 
216 /*****************************************************************************
217  * USER I/O ROUTINES *
218  *****************************************************************************/
219 
220 
221 #define VAL(CH) ((CH) - '0')
222 #define DIG(VAL) ((VAL) + '0')
223 
224 /*
225  * byteain - converts from printable representation of byte array
226  *
227  * Non-printable characters must be passed as '\nnn' (octal) and are
228  * converted to internal form. '\' must be passed as '\\'.
229  * ereport(ERROR, ...) if bad form.
230  *
231  * BUGS:
232  * The input is scanned twice.
233  * The error checking of input is minimal.
234  */
235 Datum
237 {
238  char *inputText = PG_GETARG_CSTRING(0);
239  char *tp;
240  char *rp;
241  int bc;
242  bytea *result;
243 
244  /* Recognize hex input */
245  if (inputText[0] == '\\' && inputText[1] == 'x')
246  {
247  size_t len = strlen(inputText);
248 
249  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
250  result = palloc(bc);
251  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
252  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
253 
254  PG_RETURN_BYTEA_P(result);
255  }
256 
257  /* Else, it's the traditional escaped style */
258  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
259  {
260  if (tp[0] != '\\')
261  tp++;
262  else if ((tp[0] == '\\') &&
263  (tp[1] >= '0' && tp[1] <= '3') &&
264  (tp[2] >= '0' && tp[2] <= '7') &&
265  (tp[3] >= '0' && tp[3] <= '7'))
266  tp += 4;
267  else if ((tp[0] == '\\') &&
268  (tp[1] == '\\'))
269  tp += 2;
270  else
271  {
272  /*
273  * one backslash, not followed by another or ### valid octal
274  */
275  ereport(ERROR,
276  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
277  errmsg("invalid input syntax for type bytea")));
278  }
279  }
280 
281  bc += VARHDRSZ;
282 
283  result = (bytea *) palloc(bc);
284  SET_VARSIZE(result, bc);
285 
286  tp = inputText;
287  rp = VARDATA(result);
288  while (*tp != '\0')
289  {
290  if (tp[0] != '\\')
291  *rp++ = *tp++;
292  else if ((tp[0] == '\\') &&
293  (tp[1] >= '0' && tp[1] <= '3') &&
294  (tp[2] >= '0' && tp[2] <= '7') &&
295  (tp[3] >= '0' && tp[3] <= '7'))
296  {
297  bc = VAL(tp[1]);
298  bc <<= 3;
299  bc += VAL(tp[2]);
300  bc <<= 3;
301  *rp++ = bc + VAL(tp[3]);
302 
303  tp += 4;
304  }
305  else if ((tp[0] == '\\') &&
306  (tp[1] == '\\'))
307  {
308  *rp++ = '\\';
309  tp += 2;
310  }
311  else
312  {
313  /*
314  * We should never get here. The first pass should not allow it.
315  */
316  ereport(ERROR,
317  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318  errmsg("invalid input syntax for type bytea")));
319  }
320  }
321 
322  PG_RETURN_BYTEA_P(result);
323 }
324 
325 /*
326  * byteaout - converts to printable representation of byte array
327  *
328  * In the traditional escaped format, non-printable characters are
329  * printed as '\nnn' (octal) and '\' as '\\'.
330  */
331 Datum
333 {
334  bytea *vlena = PG_GETARG_BYTEA_PP(0);
335  char *result;
336  char *rp;
337 
339  {
340  /* Print hex format */
341  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
342  *rp++ = '\\';
343  *rp++ = 'x';
344  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
345  }
346  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
347  {
348  /* Print traditional escaped format */
349  char *vp;
350  int len;
351  int i;
352 
353  len = 1; /* empty string has 1 char */
354  vp = VARDATA_ANY(vlena);
355  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
356  {
357  if (*vp == '\\')
358  len += 2;
359  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
360  len += 4;
361  else
362  len++;
363  }
364  rp = result = (char *) palloc(len);
365  vp = VARDATA_ANY(vlena);
366  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
367  {
368  if (*vp == '\\')
369  {
370  *rp++ = '\\';
371  *rp++ = '\\';
372  }
373  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
374  {
375  int val; /* holds unprintable chars */
376 
377  val = *vp;
378  rp[0] = '\\';
379  rp[3] = DIG(val & 07);
380  val >>= 3;
381  rp[2] = DIG(val & 07);
382  val >>= 3;
383  rp[1] = DIG(val & 03);
384  rp += 4;
385  }
386  else
387  *rp++ = *vp;
388  }
389  }
390  else
391  {
392  elog(ERROR, "unrecognized bytea_output setting: %d",
393  bytea_output);
394  rp = result = NULL; /* keep compiler quiet */
395  }
396  *rp = '\0';
397  PG_RETURN_CSTRING(result);
398 }
399 
400 /*
401  * bytearecv - converts external binary format to bytea
402  */
403 Datum
405 {
407  bytea *result;
408  int nbytes;
409 
410  nbytes = buf->len - buf->cursor;
411  result = (bytea *) palloc(nbytes + VARHDRSZ);
412  SET_VARSIZE(result, nbytes + VARHDRSZ);
413  pq_copymsgbytes(buf, VARDATA(result), nbytes);
414  PG_RETURN_BYTEA_P(result);
415 }
416 
417 /*
418  * byteasend - converts bytea to binary format
419  *
420  * This is a special case: just copy the input...
421  */
422 Datum
424 {
425  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
426 
427  PG_RETURN_BYTEA_P(vlena);
428 }
429 
430 Datum
432 {
434 
435  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
436 
437  /* Append the value unless null. */
438  if (!PG_ARGISNULL(1))
439  {
441 
442  /* On the first time through, we ignore the delimiter. */
443  if (state == NULL)
444  state = makeStringAggState(fcinfo);
445  else if (!PG_ARGISNULL(2))
446  {
447  bytea *delim = PG_GETARG_BYTEA_PP(2);
448 
450  }
451 
453  }
454 
455  /*
456  * The transition type for string_agg() is declared to be "internal",
457  * which is a pass-by-value type the same size as a pointer.
458  */
459  PG_RETURN_POINTER(state);
460 }
461 
462 Datum
464 {
466 
467  /* cannot be called directly because of internal-type argument */
468  Assert(AggCheckCallContext(fcinfo, NULL));
469 
470  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
471 
472  if (state != NULL)
473  {
474  bytea *result;
475 
476  result = (bytea *) palloc(state->len + VARHDRSZ);
477  SET_VARSIZE(result, state->len + VARHDRSZ);
478  memcpy(VARDATA(result), state->data, state->len);
479  PG_RETURN_BYTEA_P(result);
480  }
481  else
482  PG_RETURN_NULL();
483 }
484 
485 /*
486  * textin - converts "..." to internal representation
487  */
488 Datum
490 {
491  char *inputText = PG_GETARG_CSTRING(0);
492 
493  PG_RETURN_TEXT_P(cstring_to_text(inputText));
494 }
495 
496 /*
497  * textout - converts internal representation to "..."
498  */
499 Datum
501 {
502  Datum txt = PG_GETARG_DATUM(0);
503 
505 }
506 
507 /*
508  * textrecv - converts external binary format to text
509  */
510 Datum
512 {
514  text *result;
515  char *str;
516  int nbytes;
517 
518  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
519 
520  result = cstring_to_text_with_len(str, nbytes);
521  pfree(str);
522  PG_RETURN_TEXT_P(result);
523 }
524 
525 /*
526  * textsend - converts text to binary format
527  */
528 Datum
530 {
531  text *t = PG_GETARG_TEXT_PP(0);
533 
534  pq_begintypsend(&buf);
537 }
538 
539 
540 /*
541  * unknownin - converts "..." to internal representation
542  */
543 Datum
545 {
546  char *str = PG_GETARG_CSTRING(0);
547 
548  /* representation is same as cstring */
550 }
551 
552 /*
553  * unknownout - converts internal representation to "..."
554  */
555 Datum
557 {
558  /* representation is same as cstring */
559  char *str = PG_GETARG_CSTRING(0);
560 
562 }
563 
564 /*
565  * unknownrecv - converts external binary format to unknown
566  */
567 Datum
569 {
571  char *str;
572  int nbytes;
573 
574  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
575  /* representation is same as cstring */
576  PG_RETURN_CSTRING(str);
577 }
578 
579 /*
580  * unknownsend - converts unknown to binary format
581  */
582 Datum
584 {
585  /* representation is same as cstring */
586  char *str = PG_GETARG_CSTRING(0);
588 
589  pq_begintypsend(&buf);
590  pq_sendtext(&buf, str, strlen(str));
592 }
593 
594 
595 /* ========== PUBLIC ROUTINES ========== */
596 
597 /*
598  * textlen -
599  * returns the logical length of a text*
600  * (which is less than the VARSIZE of the text*)
601  */
602 Datum
604 {
605  Datum str = PG_GETARG_DATUM(0);
606 
607  /* try to avoid decompressing argument */
609 }
610 
611 /*
612  * text_length -
613  * Does the real work for textlen()
614  *
615  * This is broken out so it can be called directly by other string processing
616  * functions. Note that the argument is passed as a Datum, to indicate that
617  * it may still be in compressed form. We can avoid decompressing it at all
618  * in some cases.
619  */
620 static int32
622 {
623  /* fastpath when max encoding length is one */
626  else
627  {
628  text *t = DatumGetTextPP(str);
629 
631  VARSIZE_ANY_EXHDR(t)));
632  }
633 }
634 
635 /*
636  * textoctetlen -
637  * returns the physical length of a text*
638  * (which is less than the VARSIZE of the text*)
639  */
640 Datum
642 {
643  Datum str = PG_GETARG_DATUM(0);
644 
645  /* We need not detoast the input at all */
647 }
648 
649 /*
650  * textcat -
651  * takes two text* and returns a text* that is the concatenation of
652  * the two.
653  *
654  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
655  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
656  * Allocate space for output in all cases.
657  * XXX - thomas 1997-07-10
658  */
659 Datum
661 {
662  text *t1 = PG_GETARG_TEXT_PP(0);
663  text *t2 = PG_GETARG_TEXT_PP(1);
664 
666 }
667 
668 /*
669  * text_catenate
670  * Guts of textcat(), broken out so it can be used by other functions
671  *
672  * Arguments can be in short-header form, but not compressed or out-of-line
673  */
674 static text *
676 {
677  text *result;
678  int len1,
679  len2,
680  len;
681  char *ptr;
682 
683  len1 = VARSIZE_ANY_EXHDR(t1);
684  len2 = VARSIZE_ANY_EXHDR(t2);
685 
686  /* paranoia ... probably should throw error instead? */
687  if (len1 < 0)
688  len1 = 0;
689  if (len2 < 0)
690  len2 = 0;
691 
692  len = len1 + len2 + VARHDRSZ;
693  result = (text *) palloc(len);
694 
695  /* Set size of result string... */
696  SET_VARSIZE(result, len);
697 
698  /* Fill data field of result string... */
699  ptr = VARDATA(result);
700  if (len1 > 0)
701  memcpy(ptr, VARDATA_ANY(t1), len1);
702  if (len2 > 0)
703  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
704 
705  return result;
706 }
707 
708 /*
709  * charlen_to_bytelen()
710  * Compute the number of bytes occupied by n characters starting at *p
711  *
712  * It is caller's responsibility that there actually are n characters;
713  * the string need not be null-terminated.
714  */
715 static int
716 charlen_to_bytelen(const char *p, int n)
717 {
719  {
720  /* Optimization for single-byte encodings */
721  return n;
722  }
723  else
724  {
725  const char *s;
726 
727  for (s = p; n > 0; n--)
728  s += pg_mblen(s);
729 
730  return s - p;
731  }
732 }
733 
734 /*
735  * text_substr()
736  * Return a substring starting at the specified position.
737  * - thomas 1997-12-31
738  *
739  * Input:
740  * - string
741  * - starting position (is one-based)
742  * - string length
743  *
744  * If the starting position is zero or less, then return from the start of the string
745  * adjusting the length to be consistent with the "negative start" per SQL.
746  * If the length is less than zero, return the remaining string.
747  *
748  * Added multibyte support.
749  * - Tatsuo Ishii 1998-4-21
750  * Changed behavior if starting position is less than one to conform to SQL behavior.
751  * Formerly returned the entire string; now returns a portion.
752  * - Thomas Lockhart 1998-12-10
753  * Now uses faster TOAST-slicing interface
754  * - John Gray 2002-02-22
755  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
756  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
757  * error; if E < 1, return '', not entire string). Fixed MB related bug when
758  * S > LC and < LC + 4 sometimes garbage characters are returned.
759  * - Joe Conway 2002-08-10
760  */
761 Datum
763 {
765  PG_GETARG_INT32(1),
766  PG_GETARG_INT32(2),
767  false));
768 }
769 
770 /*
771  * text_substr_no_len -
772  * Wrapper to avoid opr_sanity failure due to
773  * one function accepting a different number of args.
774  */
775 Datum
777 {
779  PG_GETARG_INT32(1),
780  -1, true));
781 }
782 
783 /*
784  * text_substring -
785  * Does the real work for text_substr() and text_substr_no_len()
786  *
787  * This is broken out so it can be called directly by other string processing
788  * functions. Note that the argument is passed as a Datum, to indicate that
789  * it may still be in compressed/toasted form. We can avoid detoasting all
790  * of it in some cases.
791  *
792  * The result is always a freshly palloc'd datum.
793  */
794 static text *
795 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
796 {
798  int32 S = start; /* start position */
799  int32 S1; /* adjusted start position */
800  int32 L1; /* adjusted substring length */
801 
802  /* life is easy if the encoding max length is 1 */
803  if (eml == 1)
804  {
805  S1 = Max(S, 1);
806 
807  if (length_not_specified) /* special case - get length to end of
808  * string */
809  L1 = -1;
810  else
811  {
812  /* end position */
813  int E = S + length;
814 
815  /*
816  * A negative value for L is the only way for the end position to
817  * be before the start. SQL99 says to throw an error.
818  */
819  if (E < S)
820  ereport(ERROR,
821  (errcode(ERRCODE_SUBSTRING_ERROR),
822  errmsg("negative substring length not allowed")));
823 
824  /*
825  * A zero or negative value for the end position can happen if the
826  * start was negative or one. SQL99 says to return a zero-length
827  * string.
828  */
829  if (E < 1)
830  return cstring_to_text("");
831 
832  L1 = E - S1;
833  }
834 
835  /*
836  * If the start position is past the end of the string, SQL99 says to
837  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
838  * that for us. Convert to zero-based starting position
839  */
840  return DatumGetTextPSlice(str, S1 - 1, L1);
841  }
842  else if (eml > 1)
843  {
844  /*
845  * When encoding max length is > 1, we can't get LC without
846  * detoasting, so we'll grab a conservatively large slice now and go
847  * back later to do the right thing
848  */
849  int32 slice_start;
850  int32 slice_size;
851  int32 slice_strlen;
852  text *slice;
853  int32 E1;
854  int32 i;
855  char *p;
856  char *s;
857  text *ret;
858 
859  /*
860  * if S is past the end of the string, the tuple toaster will return a
861  * zero-length string to us
862  */
863  S1 = Max(S, 1);
864 
865  /*
866  * We need to start at position zero because there is no way to know
867  * in advance which byte offset corresponds to the supplied start
868  * position.
869  */
870  slice_start = 0;
871 
872  if (length_not_specified) /* special case - get length to end of
873  * string */
874  slice_size = L1 = -1;
875  else
876  {
877  int E = S + length;
878 
879  /*
880  * A negative value for L is the only way for the end position to
881  * be before the start. SQL99 says to throw an error.
882  */
883  if (E < S)
884  ereport(ERROR,
885  (errcode(ERRCODE_SUBSTRING_ERROR),
886  errmsg("negative substring length not allowed")));
887 
888  /*
889  * A zero or negative value for the end position can happen if the
890  * start was negative or one. SQL99 says to return a zero-length
891  * string.
892  */
893  if (E < 1)
894  return cstring_to_text("");
895 
896  /*
897  * if E is past the end of the string, the tuple toaster will
898  * truncate the length for us
899  */
900  L1 = E - S1;
901 
902  /*
903  * Total slice size in bytes can't be any longer than the start
904  * position plus substring length times the encoding max length.
905  */
906  slice_size = (S1 + L1) * eml;
907  }
908 
909  /*
910  * If we're working with an untoasted source, no need to do an extra
911  * copying step.
912  */
915  slice = DatumGetTextPSlice(str, slice_start, slice_size);
916  else
917  slice = (text *) DatumGetPointer(str);
918 
919  /* see if we got back an empty string */
920  if (VARSIZE_ANY_EXHDR(slice) == 0)
921  {
922  if (slice != (text *) DatumGetPointer(str))
923  pfree(slice);
924  return cstring_to_text("");
925  }
926 
927  /* Now we can get the actual length of the slice in MB characters */
928  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
929  VARSIZE_ANY_EXHDR(slice));
930 
931  /*
932  * Check that the start position wasn't > slice_strlen. If so, SQL99
933  * says to return a zero-length string.
934  */
935  if (S1 > slice_strlen)
936  {
937  if (slice != (text *) DatumGetPointer(str))
938  pfree(slice);
939  return cstring_to_text("");
940  }
941 
942  /*
943  * Adjust L1 and E1 now that we know the slice string length. Again
944  * remember that S1 is one based, and slice_start is zero based.
945  */
946  if (L1 > -1)
947  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
948  else
949  E1 = slice_start + 1 + slice_strlen;
950 
951  /*
952  * Find the start position in the slice; remember S1 is not zero based
953  */
954  p = VARDATA_ANY(slice);
955  for (i = 0; i < S1 - 1; i++)
956  p += pg_mblen(p);
957 
958  /* hang onto a pointer to our start position */
959  s = p;
960 
961  /*
962  * Count the actual bytes used by the substring of the requested
963  * length.
964  */
965  for (i = S1; i < E1; i++)
966  p += pg_mblen(p);
967 
968  ret = (text *) palloc(VARHDRSZ + (p - s));
969  SET_VARSIZE(ret, VARHDRSZ + (p - s));
970  memcpy(VARDATA(ret), s, (p - s));
971 
972  if (slice != (text *) DatumGetPointer(str))
973  pfree(slice);
974 
975  return ret;
976  }
977  else
978  elog(ERROR, "invalid backend encoding: encoding max length < 1");
979 
980  /* not reached: suppress compiler warning */
981  return NULL;
982 }
983 
984 /*
985  * textoverlay
986  * Replace specified substring of first string with second
987  *
988  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
989  * This code is a direct implementation of what the standard says.
990  */
991 Datum
993 {
994  text *t1 = PG_GETARG_TEXT_PP(0);
995  text *t2 = PG_GETARG_TEXT_PP(1);
996  int sp = PG_GETARG_INT32(2); /* substring start position */
997  int sl = PG_GETARG_INT32(3); /* substring length */
998 
999  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1000 }
1001 
1002 Datum
1004 {
1005  text *t1 = PG_GETARG_TEXT_PP(0);
1006  text *t2 = PG_GETARG_TEXT_PP(1);
1007  int sp = PG_GETARG_INT32(2); /* substring start position */
1008  int sl;
1009 
1010  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1011  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1012 }
1013 
1014 static text *
1015 text_overlay(text *t1, text *t2, int sp, int sl)
1016 {
1017  text *result;
1018  text *s1;
1019  text *s2;
1020  int sp_pl_sl;
1021 
1022  /*
1023  * Check for possible integer-overflow cases. For negative sp, throw a
1024  * "substring length" error because that's what should be expected
1025  * according to the spec's definition of OVERLAY().
1026  */
1027  if (sp <= 0)
1028  ereport(ERROR,
1029  (errcode(ERRCODE_SUBSTRING_ERROR),
1030  errmsg("negative substring length not allowed")));
1031  sp_pl_sl = sp + sl;
1032  if (sp_pl_sl <= sl)
1033  ereport(ERROR,
1034  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1035  errmsg("integer out of range")));
1036 
1037  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1038  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1039  result = text_catenate(s1, t2);
1040  result = text_catenate(result, s2);
1041 
1042  return result;
1043 }
1044 
1045 /*
1046  * textpos -
1047  * Return the position of the specified substring.
1048  * Implements the SQL POSITION() function.
1049  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1050  * - thomas 1997-07-27
1051  */
1052 Datum
1054 {
1055  text *str = PG_GETARG_TEXT_PP(0);
1056  text *search_str = PG_GETARG_TEXT_PP(1);
1057 
1058  PG_RETURN_INT32((int32) text_position(str, search_str));
1059 }
1060 
1061 /*
1062  * text_position -
1063  * Does the real work for textpos()
1064  *
1065  * Inputs:
1066  * t1 - string to be searched
1067  * t2 - pattern to match within t1
1068  * Result:
1069  * Character index of the first matched char, starting from 1,
1070  * or 0 if no match.
1071  *
1072  * This is broken out so it can be called directly by other string processing
1073  * functions.
1074  */
1075 static int
1077 {
1079  int result;
1080 
1081  text_position_setup(t1, t2, &state);
1082  result = text_position_next(1, &state);
1083  text_position_cleanup(&state);
1084  return result;
1085 }
1086 
1087 
1088 /*
1089  * text_position_setup, text_position_next, text_position_cleanup -
1090  * Component steps of text_position()
1091  *
1092  * These are broken out so that a string can be efficiently searched for
1093  * multiple occurrences of the same pattern. text_position_next may be
1094  * called multiple times with increasing values of start_pos, which is
1095  * the 1-based character position to start the search from. The "state"
1096  * variable is normally just a local variable in the caller.
1097  */
1098 
1099 static void
1101 {
1102  int len1 = VARSIZE_ANY_EXHDR(t1);
1103  int len2 = VARSIZE_ANY_EXHDR(t2);
1104 
1106  {
1107  /* simple case - single byte encoding */
1108  state->use_wchar = false;
1109  state->str1 = VARDATA_ANY(t1);
1110  state->str2 = VARDATA_ANY(t2);
1111  state->len1 = len1;
1112  state->len2 = len2;
1113  }
1114  else
1115  {
1116  /* not as simple - multibyte encoding */
1117  pg_wchar *p1,
1118  *p2;
1119 
1120  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1121  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1122  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1123  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1124 
1125  state->use_wchar = true;
1126  state->wstr1 = p1;
1127  state->wstr2 = p2;
1128  state->len1 = len1;
1129  state->len2 = len2;
1130  }
1131 
1132  /*
1133  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1134  * notes we use the terminology that the "haystack" is the string to be
1135  * searched (t1) and the "needle" is the pattern being sought (t2).
1136  *
1137  * If the needle is empty or bigger than the haystack then there is no
1138  * point in wasting cycles initializing the table. We also choose not to
1139  * use B-M-H for needles of length 1, since the skip table can't possibly
1140  * save anything in that case.
1141  */
1142  if (len1 >= len2 && len2 > 1)
1143  {
1144  int searchlength = len1 - len2;
1145  int skiptablemask;
1146  int last;
1147  int i;
1148 
1149  /*
1150  * First we must determine how much of the skip table to use. The
1151  * declaration of TextPositionState allows up to 256 elements, but for
1152  * short search problems we don't really want to have to initialize so
1153  * many elements --- it would take too long in comparison to the
1154  * actual search time. So we choose a useful skip table size based on
1155  * the haystack length minus the needle length. The closer the needle
1156  * length is to the haystack length the less useful skipping becomes.
1157  *
1158  * Note: since we use bit-masking to select table elements, the skip
1159  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1160  */
1161  if (searchlength < 16)
1162  skiptablemask = 3;
1163  else if (searchlength < 64)
1164  skiptablemask = 7;
1165  else if (searchlength < 128)
1166  skiptablemask = 15;
1167  else if (searchlength < 512)
1168  skiptablemask = 31;
1169  else if (searchlength < 2048)
1170  skiptablemask = 63;
1171  else if (searchlength < 4096)
1172  skiptablemask = 127;
1173  else
1174  skiptablemask = 255;
1175  state->skiptablemask = skiptablemask;
1176 
1177  /*
1178  * Initialize the skip table. We set all elements to the needle
1179  * length, since this is the correct skip distance for any character
1180  * not found in the needle.
1181  */
1182  for (i = 0; i <= skiptablemask; i++)
1183  state->skiptable[i] = len2;
1184 
1185  /*
1186  * Now examine the needle. For each character except the last one,
1187  * set the corresponding table element to the appropriate skip
1188  * distance. Note that when two characters share the same skip table
1189  * entry, the one later in the needle must determine the skip
1190  * distance.
1191  */
1192  last = len2 - 1;
1193 
1194  if (!state->use_wchar)
1195  {
1196  const char *str2 = state->str2;
1197 
1198  for (i = 0; i < last; i++)
1199  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1200  }
1201  else
1202  {
1203  const pg_wchar *wstr2 = state->wstr2;
1204 
1205  for (i = 0; i < last; i++)
1206  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1207  }
1208  }
1209 }
1210 
1211 static int
1213 {
1214  int haystack_len = state->len1;
1215  int needle_len = state->len2;
1216  int skiptablemask = state->skiptablemask;
1217 
1218  Assert(start_pos > 0); /* else caller error */
1219 
1220  if (needle_len <= 0)
1221  return start_pos; /* result for empty pattern */
1222 
1223  start_pos--; /* adjust for zero based arrays */
1224 
1225  /* Done if the needle can't possibly fit */
1226  if (haystack_len < start_pos + needle_len)
1227  return 0;
1228 
1229  if (!state->use_wchar)
1230  {
1231  /* simple case - single byte encoding */
1232  const char *haystack = state->str1;
1233  const char *needle = state->str2;
1234  const char *haystack_end = &haystack[haystack_len];
1235  const char *hptr;
1236 
1237  if (needle_len == 1)
1238  {
1239  /* No point in using B-M-H for a one-character needle */
1240  char nchar = *needle;
1241 
1242  hptr = &haystack[start_pos];
1243  while (hptr < haystack_end)
1244  {
1245  if (*hptr == nchar)
1246  return hptr - haystack + 1;
1247  hptr++;
1248  }
1249  }
1250  else
1251  {
1252  const char *needle_last = &needle[needle_len - 1];
1253 
1254  /* Start at startpos plus the length of the needle */
1255  hptr = &haystack[start_pos + needle_len - 1];
1256  while (hptr < haystack_end)
1257  {
1258  /* Match the needle scanning *backward* */
1259  const char *nptr;
1260  const char *p;
1261 
1262  nptr = needle_last;
1263  p = hptr;
1264  while (*nptr == *p)
1265  {
1266  /* Matched it all? If so, return 1-based position */
1267  if (nptr == needle)
1268  return p - haystack + 1;
1269  nptr--, p--;
1270  }
1271 
1272  /*
1273  * No match, so use the haystack char at hptr to decide how
1274  * far to advance. If the needle had any occurrence of that
1275  * character (or more precisely, one sharing the same
1276  * skiptable entry) before its last character, then we advance
1277  * far enough to align the last such needle character with
1278  * that haystack position. Otherwise we can advance by the
1279  * whole needle length.
1280  */
1281  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1282  }
1283  }
1284  }
1285  else
1286  {
1287  /* The multibyte char version. This works exactly the same way. */
1288  const pg_wchar *haystack = state->wstr1;
1289  const pg_wchar *needle = state->wstr2;
1290  const pg_wchar *haystack_end = &haystack[haystack_len];
1291  const pg_wchar *hptr;
1292 
1293  if (needle_len == 1)
1294  {
1295  /* No point in using B-M-H for a one-character needle */
1296  pg_wchar nchar = *needle;
1297 
1298  hptr = &haystack[start_pos];
1299  while (hptr < haystack_end)
1300  {
1301  if (*hptr == nchar)
1302  return hptr - haystack + 1;
1303  hptr++;
1304  }
1305  }
1306  else
1307  {
1308  const pg_wchar *needle_last = &needle[needle_len - 1];
1309 
1310  /* Start at startpos plus the length of the needle */
1311  hptr = &haystack[start_pos + needle_len - 1];
1312  while (hptr < haystack_end)
1313  {
1314  /* Match the needle scanning *backward* */
1315  const pg_wchar *nptr;
1316  const pg_wchar *p;
1317 
1318  nptr = needle_last;
1319  p = hptr;
1320  while (*nptr == *p)
1321  {
1322  /* Matched it all? If so, return 1-based position */
1323  if (nptr == needle)
1324  return p - haystack + 1;
1325  nptr--, p--;
1326  }
1327 
1328  /*
1329  * No match, so use the haystack char at hptr to decide how
1330  * far to advance. If the needle had any occurrence of that
1331  * character (or more precisely, one sharing the same
1332  * skiptable entry) before its last character, then we advance
1333  * far enough to align the last such needle character with
1334  * that haystack position. Otherwise we can advance by the
1335  * whole needle length.
1336  */
1337  hptr += state->skiptable[*hptr & skiptablemask];
1338  }
1339  }
1340  }
1341 
1342  return 0; /* not found */
1343 }
1344 
1345 static void
1347 {
1348  if (state->use_wchar)
1349  {
1350  pfree(state->wstr1);
1351  pfree(state->wstr2);
1352  }
1353 }
1354 
1355 /* varstr_cmp()
1356  * Comparison function for text strings with given lengths.
1357  * Includes locale support, but must copy strings to temporary memory
1358  * to allow null-termination for inputs to strcoll().
1359  * Returns an integer less than, equal to, or greater than zero, indicating
1360  * whether arg1 is less than, equal to, or greater than arg2.
1361  */
1362 int
1363 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1364 {
1365  int result;
1366 
1367  /*
1368  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1369  * have to do some memory copying. This turns out to be significantly
1370  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1371  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1372  */
1373  if (lc_collate_is_c(collid))
1374  {
1375  result = memcmp(arg1, arg2, Min(len1, len2));
1376  if ((result == 0) && (len1 != len2))
1377  result = (len1 < len2) ? -1 : 1;
1378  }
1379  else
1380  {
1381  char a1buf[TEXTBUFLEN];
1382  char a2buf[TEXTBUFLEN];
1383  char *a1p,
1384  *a2p;
1385 
1386 #ifdef HAVE_LOCALE_T
1387  pg_locale_t mylocale = 0;
1388 #endif
1389 
1390  if (collid != DEFAULT_COLLATION_OID)
1391  {
1392  if (!OidIsValid(collid))
1393  {
1394  /*
1395  * This typically means that the parser could not resolve a
1396  * conflict of implicit collations, so report it that way.
1397  */
1398  ereport(ERROR,
1399  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1400  errmsg("could not determine which collation to use for string comparison"),
1401  errhint("Use the COLLATE clause to set the collation explicitly.")));
1402  }
1403 #ifdef HAVE_LOCALE_T
1404  mylocale = pg_newlocale_from_collation(collid);
1405 #endif
1406  }
1407 
1408  /*
1409  * memcmp() can't tell us which of two unequal strings sorts first, but
1410  * it's a cheap way to tell if they're equal. Testing shows that
1411  * memcmp() followed by strcoll() is only trivially slower than
1412  * strcoll() by itself, so we don't lose much if this doesn't work out
1413  * very often, and if it does - for example, because there are many
1414  * equal strings in the input - then we win big by avoiding expensive
1415  * collation-aware comparisons.
1416  */
1417  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1418  return 0;
1419 
1420 #ifdef WIN32
1421  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1422  if (GetDatabaseEncoding() == PG_UTF8)
1423  {
1424  int a1len;
1425  int a2len;
1426  int r;
1427 
1428  if (len1 >= TEXTBUFLEN / 2)
1429  {
1430  a1len = len1 * 2 + 2;
1431  a1p = palloc(a1len);
1432  }
1433  else
1434  {
1435  a1len = TEXTBUFLEN;
1436  a1p = a1buf;
1437  }
1438  if (len2 >= TEXTBUFLEN / 2)
1439  {
1440  a2len = len2 * 2 + 2;
1441  a2p = palloc(a2len);
1442  }
1443  else
1444  {
1445  a2len = TEXTBUFLEN;
1446  a2p = a2buf;
1447  }
1448 
1449  /* stupid Microsloth API does not work for zero-length input */
1450  if (len1 == 0)
1451  r = 0;
1452  else
1453  {
1454  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1455  (LPWSTR) a1p, a1len / 2);
1456  if (!r)
1457  ereport(ERROR,
1458  (errmsg("could not convert string to UTF-16: error code %lu",
1459  GetLastError())));
1460  }
1461  ((LPWSTR) a1p)[r] = 0;
1462 
1463  if (len2 == 0)
1464  r = 0;
1465  else
1466  {
1467  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1468  (LPWSTR) a2p, a2len / 2);
1469  if (!r)
1470  ereport(ERROR,
1471  (errmsg("could not convert string to UTF-16: error code %lu",
1472  GetLastError())));
1473  }
1474  ((LPWSTR) a2p)[r] = 0;
1475 
1476  errno = 0;
1477 #ifdef HAVE_LOCALE_T
1478  if (mylocale)
1479  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1480  else
1481 #endif
1482  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1483  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1484  * headers */
1485  ereport(ERROR,
1486  (errmsg("could not compare Unicode strings: %m")));
1487 
1488  /*
1489  * In some locales wcscoll() can claim that nonidentical strings
1490  * are equal. Believing that would be bad news for a number of
1491  * reasons, so we follow Perl's lead and sort "equal" strings
1492  * according to strcmp (on the UTF-8 representation).
1493  */
1494  if (result == 0)
1495  {
1496  result = memcmp(arg1, arg2, Min(len1, len2));
1497  if ((result == 0) && (len1 != len2))
1498  result = (len1 < len2) ? -1 : 1;
1499  }
1500 
1501  if (a1p != a1buf)
1502  pfree(a1p);
1503  if (a2p != a2buf)
1504  pfree(a2p);
1505 
1506  return result;
1507  }
1508 #endif /* WIN32 */
1509 
1510  if (len1 >= TEXTBUFLEN)
1511  a1p = (char *) palloc(len1 + 1);
1512  else
1513  a1p = a1buf;
1514  if (len2 >= TEXTBUFLEN)
1515  a2p = (char *) palloc(len2 + 1);
1516  else
1517  a2p = a2buf;
1518 
1519  memcpy(a1p, arg1, len1);
1520  a1p[len1] = '\0';
1521  memcpy(a2p, arg2, len2);
1522  a2p[len2] = '\0';
1523 
1524 #ifdef HAVE_LOCALE_T
1525  if (mylocale)
1526  result = strcoll_l(a1p, a2p, mylocale);
1527  else
1528 #endif
1529  result = strcoll(a1p, a2p);
1530 
1531  /*
1532  * In some locales strcoll() can claim that nonidentical strings are
1533  * equal. Believing that would be bad news for a number of reasons,
1534  * so we follow Perl's lead and sort "equal" strings according to
1535  * strcmp().
1536  */
1537  if (result == 0)
1538  result = strcmp(a1p, a2p);
1539 
1540  if (a1p != a1buf)
1541  pfree(a1p);
1542  if (a2p != a2buf)
1543  pfree(a2p);
1544  }
1545 
1546  return result;
1547 }
1548 
1549 /* text_cmp()
1550  * Internal comparison function for text strings.
1551  * Returns -1, 0 or 1
1552  */
1553 static int
1554 text_cmp(text *arg1, text *arg2, Oid collid)
1555 {
1556  char *a1p,
1557  *a2p;
1558  int len1,
1559  len2;
1560 
1561  a1p = VARDATA_ANY(arg1);
1562  a2p = VARDATA_ANY(arg2);
1563 
1564  len1 = VARSIZE_ANY_EXHDR(arg1);
1565  len2 = VARSIZE_ANY_EXHDR(arg2);
1566 
1567  return varstr_cmp(a1p, len1, a2p, len2, collid);
1568 }
1569 
1570 /*
1571  * Comparison functions for text strings.
1572  *
1573  * Note: btree indexes need these routines not to leak memory; therefore,
1574  * be careful to free working copies of toasted datums. Most places don't
1575  * need to be so careful.
1576  */
1577 
1578 Datum
1580 {
1581  Datum arg1 = PG_GETARG_DATUM(0);
1582  Datum arg2 = PG_GETARG_DATUM(1);
1583  bool result;
1584  Size len1,
1585  len2;
1586 
1587  /*
1588  * Since we only care about equality or not-equality, we can avoid all the
1589  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1590  * don't even have to do a bitwise comparison if we can show the lengths
1591  * of the strings are unequal; which might save us from having to detoast
1592  * one or both values.
1593  */
1594  len1 = toast_raw_datum_size(arg1);
1595  len2 = toast_raw_datum_size(arg2);
1596  if (len1 != len2)
1597  result = false;
1598  else
1599  {
1600  text *targ1 = DatumGetTextPP(arg1);
1601  text *targ2 = DatumGetTextPP(arg2);
1602 
1603  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1604  len1 - VARHDRSZ) == 0);
1605 
1606  PG_FREE_IF_COPY(targ1, 0);
1607  PG_FREE_IF_COPY(targ2, 1);
1608  }
1609 
1610  PG_RETURN_BOOL(result);
1611 }
1612 
1613 Datum
1615 {
1616  Datum arg1 = PG_GETARG_DATUM(0);
1617  Datum arg2 = PG_GETARG_DATUM(1);
1618  bool result;
1619  Size len1,
1620  len2;
1621 
1622  /* See comment in texteq() */
1623  len1 = toast_raw_datum_size(arg1);
1624  len2 = toast_raw_datum_size(arg2);
1625  if (len1 != len2)
1626  result = true;
1627  else
1628  {
1629  text *targ1 = DatumGetTextPP(arg1);
1630  text *targ2 = DatumGetTextPP(arg2);
1631 
1632  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1633  len1 - VARHDRSZ) != 0);
1634 
1635  PG_FREE_IF_COPY(targ1, 0);
1636  PG_FREE_IF_COPY(targ2, 1);
1637  }
1638 
1639  PG_RETURN_BOOL(result);
1640 }
1641 
1642 Datum
1644 {
1645  text *arg1 = PG_GETARG_TEXT_PP(0);
1646  text *arg2 = PG_GETARG_TEXT_PP(1);
1647  bool result;
1648 
1649  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1650 
1651  PG_FREE_IF_COPY(arg1, 0);
1652  PG_FREE_IF_COPY(arg2, 1);
1653 
1654  PG_RETURN_BOOL(result);
1655 }
1656 
1657 Datum
1659 {
1660  text *arg1 = PG_GETARG_TEXT_PP(0);
1661  text *arg2 = PG_GETARG_TEXT_PP(1);
1662  bool result;
1663 
1664  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1665 
1666  PG_FREE_IF_COPY(arg1, 0);
1667  PG_FREE_IF_COPY(arg2, 1);
1668 
1669  PG_RETURN_BOOL(result);
1670 }
1671 
1672 Datum
1674 {
1675  text *arg1 = PG_GETARG_TEXT_PP(0);
1676  text *arg2 = PG_GETARG_TEXT_PP(1);
1677  bool result;
1678 
1679  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1680 
1681  PG_FREE_IF_COPY(arg1, 0);
1682  PG_FREE_IF_COPY(arg2, 1);
1683 
1684  PG_RETURN_BOOL(result);
1685 }
1686 
1687 Datum
1689 {
1690  text *arg1 = PG_GETARG_TEXT_PP(0);
1691  text *arg2 = PG_GETARG_TEXT_PP(1);
1692  bool result;
1693 
1694  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1695 
1696  PG_FREE_IF_COPY(arg1, 0);
1697  PG_FREE_IF_COPY(arg2, 1);
1698 
1699  PG_RETURN_BOOL(result);
1700 }
1701 
1702 Datum
1704 {
1705  text *arg1 = PG_GETARG_TEXT_PP(0);
1706  text *arg2 = PG_GETARG_TEXT_PP(1);
1707  int32 result;
1708 
1709  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1710 
1711  PG_FREE_IF_COPY(arg1, 0);
1712  PG_FREE_IF_COPY(arg2, 1);
1713 
1714  PG_RETURN_INT32(result);
1715 }
1716 
1717 Datum
1719 {
1721  Oid collid = ssup->ssup_collation;
1722  MemoryContext oldcontext;
1723 
1724  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1725 
1726  btsortsupport_worker(ssup, collid);
1727 
1728  MemoryContextSwitchTo(oldcontext);
1729 
1730  PG_RETURN_VOID();
1731 }
1732 
1733 static void
1735 {
1736  TextSortSupport *tss;
1737 
1738  /*
1739  * If LC_COLLATE = C, we can make things quite a bit faster by using
1740  * memcmp() rather than strcoll(). To minimize the per-comparison
1741  * overhead, we make this decision just once for the whole sort.
1742  */
1743  if (lc_collate_is_c(collid))
1744  {
1745  ssup->comparator = bttextfastcmp_c;
1746  return;
1747  }
1748 
1749  /*
1750  * WIN32 requires complex hacks when the database encoding is UTF-8 (except
1751  * when using the "C" collation). For now, we don't optimize that case.
1752  */
1753 #ifdef WIN32
1754  if (GetDatabaseEncoding() == PG_UTF8)
1755  return;
1756 #endif
1757 
1758  /*
1759  * We may need a collation-sensitive comparison. To make things faster,
1760  * we'll figure out the collation based on the locale id and cache the
1761  * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1762  * prepare one or two palloc'd buffers to use as temporary workspace. In
1763  * the ad-hoc comparison case we only use palloc'd buffers when we need
1764  * more space than we're comfortable allocating on the stack, but here we
1765  * can keep the buffers around for the whole sort, so it makes sense to
1766  * allocate them once and use them unconditionally.
1767  */
1768  tss = palloc(sizeof(TextSortSupport));
1769 #ifdef HAVE_LOCALE_T
1770  tss->locale = 0;
1771 #endif
1772 
1773  if (collid != DEFAULT_COLLATION_OID)
1774  {
1775  if (!OidIsValid(collid))
1776  {
1777  /*
1778  * This typically means that the parser could not resolve a
1779  * conflict of implicit collations, so report it that way.
1780  */
1781  ereport(ERROR,
1782  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1783  errmsg("could not determine which collation to use for string comparison"),
1784  errhint("Use the COLLATE clause to set the collation explicitly.")));
1785  }
1786 #ifdef HAVE_LOCALE_T
1787  tss->locale = pg_newlocale_from_collation(collid);
1788 #endif
1789  }
1790 
1791  tss->buf1 = palloc(TEXTBUFLEN);
1792  tss->buflen1 = TEXTBUFLEN;
1793  tss->buf2 = palloc(TEXTBUFLEN);
1794  tss->buflen2 = TEXTBUFLEN;
1795 
1796  ssup->ssup_extra = tss;
1798 }
1799 
1800 /*
1801  * sortsupport comparison func (for C locale case)
1802  */
1803 static int
1805 {
1806  text *arg1 = DatumGetTextPP(x);
1807  text *arg2 = DatumGetTextPP(y);
1808  char *a1p,
1809  *a2p;
1810  int len1,
1811  len2,
1812  result;
1813 
1814  a1p = VARDATA_ANY(arg1);
1815  a2p = VARDATA_ANY(arg2);
1816 
1817  len1 = VARSIZE_ANY_EXHDR(arg1);
1818  len2 = VARSIZE_ANY_EXHDR(arg2);
1819 
1820  result = memcmp(a1p, a2p, Min(len1, len2));
1821  if ((result == 0) && (len1 != len2))
1822  result = (len1 < len2) ? -1 : 1;
1823 
1824  /* We can't afford to leak memory here. */
1825  if (PointerGetDatum(arg1) != x)
1826  pfree(arg1);
1827  if (PointerGetDatum(arg2) != y)
1828  pfree(arg2);
1829 
1830  return result;
1831 }
1832 
1833 /*
1834  * sortsupport comparison func (for locale case)
1835  */
1836 static int
1838 {
1839  text *arg1 = DatumGetTextPP(x);
1840  text *arg2 = DatumGetTextPP(y);
1841  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1842 
1843  /* working state */
1844  char *a1p,
1845  *a2p;
1846  int len1,
1847  len2,
1848  result;
1849 
1850  a1p = VARDATA_ANY(arg1);
1851  a2p = VARDATA_ANY(arg2);
1852 
1853  len1 = VARSIZE_ANY_EXHDR(arg1);
1854  len2 = VARSIZE_ANY_EXHDR(arg2);
1855 
1856  /* Fast pre-check for equality, as discussed in varstr_cmp() */
1857  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
1858  {
1859  result = 0;
1860  goto done;
1861  }
1862 
1863  if (len1 >= tss->buflen1)
1864  {
1865  pfree(tss->buf1);
1866  tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
1867  tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
1868  }
1869  if (len2 >= tss->buflen2)
1870  {
1871  pfree(tss->buf2);
1872  tss->buflen2 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
1873  tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
1874  }
1875 
1876  memcpy(tss->buf1, a1p, len1);
1877  tss->buf1[len1] = '\0';
1878  memcpy(tss->buf2, a2p, len2);
1879  tss->buf2[len2] = '\0';
1880 
1881 #ifdef HAVE_LOCALE_T
1882  if (tss->locale)
1883  result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
1884  else
1885 #endif
1886  result = strcoll(tss->buf1, tss->buf2);
1887 
1888  /*
1889  * In some locales strcoll() can claim that nonidentical strings are equal.
1890  * Believing that would be bad news for a number of reasons, so we follow
1891  * Perl's lead and sort "equal" strings according to strcmp().
1892  */
1893  if (result == 0)
1894  result = strcmp(tss->buf1, tss->buf2);
1895 
1896 done:
1897  /* We can't afford to leak memory here. */
1898  if (PointerGetDatum(arg1) != x)
1899  pfree(arg1);
1900  if (PointerGetDatum(arg2) != y)
1901  pfree(arg2);
1902 
1903  return result;
1904 }
1905 
1906 Datum
1908 {
1909  text *arg1 = PG_GETARG_TEXT_PP(0);
1910  text *arg2 = PG_GETARG_TEXT_PP(1);
1911  text *result;
1912 
1913  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
1914 
1915  PG_RETURN_TEXT_P(result);
1916 }
1917 
1918 Datum
1920 {
1921  text *arg1 = PG_GETARG_TEXT_PP(0);
1922  text *arg2 = PG_GETARG_TEXT_PP(1);
1923  text *result;
1924 
1925  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
1926 
1927  PG_RETURN_TEXT_P(result);
1928 }
1929 
1930 
1931 /*
1932  * The following operators support character-by-character comparison
1933  * of text datums, to allow building indexes suitable for LIKE clauses.
1934  * Note that the regular texteq/textne comparison operators are assumed
1935  * to be compatible with these!
1936  */
1937 
1938 static int
1940 {
1941  int result;
1942  int len1,
1943  len2;
1944 
1945  len1 = VARSIZE_ANY_EXHDR(arg1);
1946  len2 = VARSIZE_ANY_EXHDR(arg2);
1947 
1948  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1949  if (result != 0)
1950  return result;
1951  else if (len1 < len2)
1952  return -1;
1953  else if (len1 > len2)
1954  return 1;
1955  else
1956  return 0;
1957 }
1958 
1959 
1960 Datum
1962 {
1963  text *arg1 = PG_GETARG_TEXT_PP(0);
1964  text *arg2 = PG_GETARG_TEXT_PP(1);
1965  int result;
1966 
1967  result = internal_text_pattern_compare(arg1, arg2);
1968 
1969  PG_FREE_IF_COPY(arg1, 0);
1970  PG_FREE_IF_COPY(arg2, 1);
1971 
1972  PG_RETURN_BOOL(result < 0);
1973 }
1974 
1975 
1976 Datum
1978 {
1979  text *arg1 = PG_GETARG_TEXT_PP(0);
1980  text *arg2 = PG_GETARG_TEXT_PP(1);
1981  int result;
1982 
1983  result = internal_text_pattern_compare(arg1, arg2);
1984 
1985  PG_FREE_IF_COPY(arg1, 0);
1986  PG_FREE_IF_COPY(arg2, 1);
1987 
1988  PG_RETURN_BOOL(result <= 0);
1989 }
1990 
1991 
1992 Datum
1994 {
1995  text *arg1 = PG_GETARG_TEXT_PP(0);
1996  text *arg2 = PG_GETARG_TEXT_PP(1);
1997  int result;
1998 
1999  result = internal_text_pattern_compare(arg1, arg2);
2000 
2001  PG_FREE_IF_COPY(arg1, 0);
2002  PG_FREE_IF_COPY(arg2, 1);
2003 
2004  PG_RETURN_BOOL(result >= 0);
2005 }
2006 
2007 
2008 Datum
2010 {
2011  text *arg1 = PG_GETARG_TEXT_PP(0);
2012  text *arg2 = PG_GETARG_TEXT_PP(1);
2013  int result;
2014 
2015  result = internal_text_pattern_compare(arg1, arg2);
2016 
2017  PG_FREE_IF_COPY(arg1, 0);
2018  PG_FREE_IF_COPY(arg2, 1);
2019 
2020  PG_RETURN_BOOL(result > 0);
2021 }
2022 
2023 
2024 Datum
2026 {
2027  text *arg1 = PG_GETARG_TEXT_PP(0);
2028  text *arg2 = PG_GETARG_TEXT_PP(1);
2029  int result;
2030 
2031  result = internal_text_pattern_compare(arg1, arg2);
2032 
2033  PG_FREE_IF_COPY(arg1, 0);
2034  PG_FREE_IF_COPY(arg2, 1);
2035 
2036  PG_RETURN_INT32(result);
2037 }
2038 
2039 
2040 /*-------------------------------------------------------------
2041  * byteaoctetlen
2042  *
2043  * get the number of bytes contained in an instance of type 'bytea'
2044  *-------------------------------------------------------------
2045  */
2046 Datum
2048 {
2049  Datum str = PG_GETARG_DATUM(0);
2050 
2051  /* We need not detoast the input at all */
2053 }
2054 
2055 /*
2056  * byteacat -
2057  * takes two bytea* and returns a bytea* that is the concatenation of
2058  * the two.
2059  *
2060  * Cloned from textcat and modified as required.
2061  */
2062 Datum
2064 {
2065  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2066  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2067 
2069 }
2070 
2071 /*
2072  * bytea_catenate
2073  * Guts of byteacat(), broken out so it can be used by other functions
2074  *
2075  * Arguments can be in short-header form, but not compressed or out-of-line
2076  */
2077 static bytea *
2079 {
2080  bytea *result;
2081  int len1,
2082  len2,
2083  len;
2084  char *ptr;
2085 
2086  len1 = VARSIZE_ANY_EXHDR(t1);
2087  len2 = VARSIZE_ANY_EXHDR(t2);
2088 
2089  /* paranoia ... probably should throw error instead? */
2090  if (len1 < 0)
2091  len1 = 0;
2092  if (len2 < 0)
2093  len2 = 0;
2094 
2095  len = len1 + len2 + VARHDRSZ;
2096  result = (bytea *) palloc(len);
2097 
2098  /* Set size of result string... */
2099  SET_VARSIZE(result, len);
2100 
2101  /* Fill data field of result string... */
2102  ptr = VARDATA(result);
2103  if (len1 > 0)
2104  memcpy(ptr, VARDATA_ANY(t1), len1);
2105  if (len2 > 0)
2106  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2107 
2108  return result;
2109 }
2110 
2111 #define PG_STR_GET_BYTEA(str_) \
2112  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2113 
2114 /*
2115  * bytea_substr()
2116  * Return a substring starting at the specified position.
2117  * Cloned from text_substr and modified as required.
2118  *
2119  * Input:
2120  * - string
2121  * - starting position (is one-based)
2122  * - string length (optional)
2123  *
2124  * If the starting position is zero or less, then return from the start of the string
2125  * adjusting the length to be consistent with the "negative start" per SQL.
2126  * If the length is less than zero, an ERROR is thrown. If no third argument
2127  * (length) is provided, the length to the end of the string is assumed.
2128  */
2129 Datum
2131 {
2133  PG_GETARG_INT32(1),
2134  PG_GETARG_INT32(2),
2135  false));
2136 }
2137 
2138 /*
2139  * bytea_substr_no_len -
2140  * Wrapper to avoid opr_sanity failure due to
2141  * one function accepting a different number of args.
2142  */
2143 Datum
2145 {
2147  PG_GETARG_INT32(1),
2148  -1,
2149  true));
2150 }
2151 
2152 static bytea *
2154  int S,
2155  int L,
2156  bool length_not_specified)
2157 {
2158  int S1; /* adjusted start position */
2159  int L1; /* adjusted substring length */
2160 
2161  S1 = Max(S, 1);
2162 
2163  if (length_not_specified)
2164  {
2165  /*
2166  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2167  * end of the string if we pass it a negative value for length.
2168  */
2169  L1 = -1;
2170  }
2171  else
2172  {
2173  /* end position */
2174  int E = S + L;
2175 
2176  /*
2177  * A negative value for L is the only way for the end position to be
2178  * before the start. SQL99 says to throw an error.
2179  */
2180  if (E < S)
2181  ereport(ERROR,
2182  (errcode(ERRCODE_SUBSTRING_ERROR),
2183  errmsg("negative substring length not allowed")));
2184 
2185  /*
2186  * A zero or negative value for the end position can happen if the
2187  * start was negative or one. SQL99 says to return a zero-length
2188  * string.
2189  */
2190  if (E < 1)
2191  return PG_STR_GET_BYTEA("");
2192 
2193  L1 = E - S1;
2194  }
2195 
2196  /*
2197  * If the start position is past the end of the string, SQL99 says to
2198  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2199  * us. Convert to zero-based starting position
2200  */
2201  return DatumGetByteaPSlice(str, S1 - 1, L1);
2202 }
2203 
2204 /*
2205  * byteaoverlay
2206  * Replace specified substring of first string with second
2207  *
2208  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2209  * This code is a direct implementation of what the standard says.
2210  */
2211 Datum
2213 {
2214  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2215  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2216  int sp = PG_GETARG_INT32(2); /* substring start position */
2217  int sl = PG_GETARG_INT32(3); /* substring length */
2218 
2219  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2220 }
2221 
2222 Datum
2224 {
2225  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2226  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2227  int sp = PG_GETARG_INT32(2); /* substring start position */
2228  int sl;
2229 
2230  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2231  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2232 }
2233 
2234 static bytea *
2235 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2236 {
2237  bytea *result;
2238  bytea *s1;
2239  bytea *s2;
2240  int sp_pl_sl;
2241 
2242  /*
2243  * Check for possible integer-overflow cases. For negative sp, throw a
2244  * "substring length" error because that's what should be expected
2245  * according to the spec's definition of OVERLAY().
2246  */
2247  if (sp <= 0)
2248  ereport(ERROR,
2249  (errcode(ERRCODE_SUBSTRING_ERROR),
2250  errmsg("negative substring length not allowed")));
2251  sp_pl_sl = sp + sl;
2252  if (sp_pl_sl <= sl)
2253  ereport(ERROR,
2254  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2255  errmsg("integer out of range")));
2256 
2257  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2258  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2259  result = bytea_catenate(s1, t2);
2260  result = bytea_catenate(result, s2);
2261 
2262  return result;
2263 }
2264 
2265 /*
2266  * byteapos -
2267  * Return the position of the specified substring.
2268  * Implements the SQL POSITION() function.
2269  * Cloned from textpos and modified as required.
2270  */
2271 Datum
2273 {
2274  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2275  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2276  int pos;
2277  int px,
2278  p;
2279  int len1,
2280  len2;
2281  char *p1,
2282  *p2;
2283 
2284  len1 = VARSIZE_ANY_EXHDR(t1);
2285  len2 = VARSIZE_ANY_EXHDR(t2);
2286 
2287  if (len2 <= 0)
2288  PG_RETURN_INT32(1); /* result for empty pattern */
2289 
2290  p1 = VARDATA_ANY(t1);
2291  p2 = VARDATA_ANY(t2);
2292 
2293  pos = 0;
2294  px = (len1 - len2);
2295  for (p = 0; p <= px; p++)
2296  {
2297  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2298  {
2299  pos = p + 1;
2300  break;
2301  };
2302  p1++;
2303  };
2304 
2305  PG_RETURN_INT32(pos);
2306 }
2307 
2308 /*-------------------------------------------------------------
2309  * byteaGetByte
2310  *
2311  * this routine treats "bytea" as an array of bytes.
2312  * It returns the Nth byte (a number between 0 and 255).
2313  *-------------------------------------------------------------
2314  */
2315 Datum
2317 {
2318  bytea *v = PG_GETARG_BYTEA_PP(0);
2319  int32 n = PG_GETARG_INT32(1);
2320  int len;
2321  int byte;
2322 
2323  len = VARSIZE_ANY_EXHDR(v);
2324 
2325  if (n < 0 || n >= len)
2326  ereport(ERROR,
2327  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2328  errmsg("index %d out of valid range, 0..%d",
2329  n, len - 1)));
2330 
2331  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2332 
2333  PG_RETURN_INT32(byte);
2334 }
2335 
2336 /*-------------------------------------------------------------
2337  * byteaGetBit
2338  *
2339  * This routine treats a "bytea" type like an array of bits.
2340  * It returns the value of the Nth bit (0 or 1).
2341  *
2342  *-------------------------------------------------------------
2343  */
2344 Datum
2346 {
2347  bytea *v = PG_GETARG_BYTEA_PP(0);
2348  int32 n = PG_GETARG_INT32(1);
2349  int byteNo,
2350  bitNo;
2351  int len;
2352  int byte;
2353 
2354  len = VARSIZE_ANY_EXHDR(v);
2355 
2356  if (n < 0 || n >= len * 8)
2357  ereport(ERROR,
2358  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2359  errmsg("index %d out of valid range, 0..%d",
2360  n, len * 8 - 1)));
2361 
2362  byteNo = n / 8;
2363  bitNo = n % 8;
2364 
2365  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2366 
2367  if (byte & (1 << bitNo))
2368  PG_RETURN_INT32(1);
2369  else
2370  PG_RETURN_INT32(0);
2371 }
2372 
2373 /*-------------------------------------------------------------
2374  * byteaSetByte
2375  *
2376  * Given an instance of type 'bytea' creates a new one with
2377  * the Nth byte set to the given value.
2378  *
2379  *-------------------------------------------------------------
2380  */
2381 Datum
2383 {
2384  bytea *v = PG_GETARG_BYTEA_P(0);
2385  int32 n = PG_GETARG_INT32(1);
2386  int32 newByte = PG_GETARG_INT32(2);
2387  int len;
2388  bytea *res;
2389 
2390  len = VARSIZE(v) - VARHDRSZ;
2391 
2392  if (n < 0 || n >= len)
2393  ereport(ERROR,
2394  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2395  errmsg("index %d out of valid range, 0..%d",
2396  n, len - 1)));
2397 
2398  /*
2399  * Make a copy of the original varlena.
2400  */
2401  res = (bytea *) palloc(VARSIZE(v));
2402  memcpy((char *) res, (char *) v, VARSIZE(v));
2403 
2404  /*
2405  * Now set the byte.
2406  */
2407  ((unsigned char *) VARDATA(res))[n] = newByte;
2408 
2409  PG_RETURN_BYTEA_P(res);
2410 }
2411 
2412 /*-------------------------------------------------------------
2413  * byteaSetBit
2414  *
2415  * Given an instance of type 'bytea' creates a new one with
2416  * the Nth bit set to the given value.
2417  *
2418  *-------------------------------------------------------------
2419  */
2420 Datum
2422 {
2423  bytea *v = PG_GETARG_BYTEA_P(0);
2424  int32 n = PG_GETARG_INT32(1);
2425  int32 newBit = PG_GETARG_INT32(2);
2426  bytea *res;
2427  int len;
2428  int oldByte,
2429  newByte;
2430  int byteNo,
2431  bitNo;
2432 
2433  len = VARSIZE(v) - VARHDRSZ;
2434 
2435  if (n < 0 || n >= len * 8)
2436  ereport(ERROR,
2437  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2438  errmsg("index %d out of valid range, 0..%d",
2439  n, len * 8 - 1)));
2440 
2441  byteNo = n / 8;
2442  bitNo = n % 8;
2443 
2444  /*
2445  * sanity check!
2446  */
2447  if (newBit != 0 && newBit != 1)
2448  ereport(ERROR,
2449  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2450  errmsg("new bit must be 0 or 1")));
2451 
2452  /*
2453  * Make a copy of the original varlena.
2454  */
2455  res = (bytea *) palloc(VARSIZE(v));
2456  memcpy((char *) res, (char *) v, VARSIZE(v));
2457 
2458  /*
2459  * Update the byte.
2460  */
2461  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
2462 
2463  if (newBit == 0)
2464  newByte = oldByte & (~(1 << bitNo));
2465  else
2466  newByte = oldByte | (1 << bitNo);
2467 
2468  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
2469 
2470  PG_RETURN_BYTEA_P(res);
2471 }
2472 
2473 
2474 /* text_name()
2475  * Converts a text type to a Name type.
2476  */
2477 Datum
2479 {
2480  text *s = PG_GETARG_TEXT_PP(0);
2481  Name result;
2482  int len;
2483 
2484  len = VARSIZE_ANY_EXHDR(s);
2485 
2486  /* Truncate oversize input */
2487  if (len >= NAMEDATALEN)
2488  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
2489 
2490  /* We use palloc0 here to ensure result is zero-padded */
2491  result = (Name) palloc0(NAMEDATALEN);
2492  memcpy(NameStr(*result), VARDATA_ANY(s), len);
2493 
2494  PG_RETURN_NAME(result);
2495 }
2496 
2497 /* name_text()
2498  * Converts a Name type to a text type.
2499  */
2500 Datum
2502 {
2503  Name s = PG_GETARG_NAME(0);
2504 
2506 }
2507 
2508 
2509 /*
2510  * textToQualifiedNameList - convert a text object to list of names
2511  *
2512  * This implements the input parsing needed by nextval() and other
2513  * functions that take a text parameter representing a qualified name.
2514  * We split the name at dots, downcase if not double-quoted, and
2515  * truncate names if they're too long.
2516  */
2517 List *
2519 {
2520  char *rawname;
2521  List *result = NIL;
2522  List *namelist;
2523  ListCell *l;
2524 
2525  /* Convert to C string (handles possible detoasting). */
2526  /* Note we rely on being able to modify rawname below. */
2527  rawname = text_to_cstring(textval);
2528 
2529  if (!SplitIdentifierString(rawname, '.', &namelist))
2530  ereport(ERROR,
2531  (errcode(ERRCODE_INVALID_NAME),
2532  errmsg("invalid name syntax")));
2533 
2534  if (namelist == NIL)
2535  ereport(ERROR,
2536  (errcode(ERRCODE_INVALID_NAME),
2537  errmsg("invalid name syntax")));
2538 
2539  foreach(l, namelist)
2540  {
2541  char *curname = (char *) lfirst(l);
2542 
2543  result = lappend(result, makeString(pstrdup(curname)));
2544  }
2545 
2546  pfree(rawname);
2547  list_free(namelist);
2548 
2549  return result;
2550 }
2551 
2552 /*
2553  * SplitIdentifierString --- parse a string containing identifiers
2554  *
2555  * This is the guts of textToQualifiedNameList, and is exported for use in
2556  * other situations such as parsing GUC variables. In the GUC case, it's
2557  * important to avoid memory leaks, so the API is designed to minimize the
2558  * amount of stuff that needs to be allocated and freed.
2559  *
2560  * Inputs:
2561  * rawstring: the input string; must be overwritable! On return, it's
2562  * been modified to contain the separated identifiers.
2563  * separator: the separator punctuation expected between identifiers
2564  * (typically '.' or ','). Whitespace may also appear around
2565  * identifiers.
2566  * Outputs:
2567  * namelist: filled with a palloc'd list of pointers to identifiers within
2568  * rawstring. Caller should list_free() this even on error return.
2569  *
2570  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2571  *
2572  * Note that an empty string is considered okay here, though not in
2573  * textToQualifiedNameList.
2574  */
2575 bool
2576 SplitIdentifierString(char *rawstring, char separator,
2577  List **namelist)
2578 {
2579  char *nextp = rawstring;
2580  bool done = false;
2581 
2582  *namelist = NIL;
2583 
2584  while (isspace((unsigned char) *nextp))
2585  nextp++; /* skip leading whitespace */
2586 
2587  if (*nextp == '\0')
2588  return true; /* allow empty string */
2589 
2590  /* At the top of the loop, we are at start of a new identifier. */
2591  do
2592  {
2593  char *curname;
2594  char *endp;
2595 
2596  if (*nextp == '\"')
2597  {
2598  /* Quoted name --- collapse quote-quote pairs, no downcasing */
2599  curname = nextp + 1;
2600  for (;;)
2601  {
2602  endp = strchr(nextp + 1, '\"');
2603  if (endp == NULL)
2604  return false; /* mismatched quotes */
2605  if (endp[1] != '\"')
2606  break; /* found end of quoted name */
2607  /* Collapse adjacent quotes into one quote, and look again */
2608  memmove(endp, endp + 1, strlen(endp));
2609  nextp = endp;
2610  }
2611  /* endp now points at the terminating quote */
2612  nextp = endp + 1;
2613  }
2614  else
2615  {
2616  /* Unquoted name --- extends to separator or whitespace */
2617  char *downname;
2618  int len;
2619 
2620  curname = nextp;
2621  while (*nextp && *nextp != separator &&
2622  !isspace((unsigned char) *nextp))
2623  nextp++;
2624  endp = nextp;
2625  if (curname == nextp)
2626  return false; /* empty unquoted name not allowed */
2627 
2628  /*
2629  * Downcase the identifier, using same code as main lexer does.
2630  *
2631  * XXX because we want to overwrite the input in-place, we cannot
2632  * support a downcasing transformation that increases the string
2633  * length. This is not a problem given the current implementation
2634  * of downcase_truncate_identifier, but we'll probably have to do
2635  * something about this someday.
2636  */
2637  len = endp - curname;
2638  downname = downcase_truncate_identifier(curname, len, false);
2639  Assert(strlen(downname) <= len);
2640  strncpy(curname, downname, len);
2641  pfree(downname);
2642  }
2643 
2644  while (isspace((unsigned char) *nextp))
2645  nextp++; /* skip trailing whitespace */
2646 
2647  if (*nextp == separator)
2648  {
2649  nextp++;
2650  while (isspace((unsigned char) *nextp))
2651  nextp++; /* skip leading whitespace for next */
2652  /* we expect another name, so done remains false */
2653  }
2654  else if (*nextp == '\0')
2655  done = true;
2656  else
2657  return false; /* invalid syntax */
2658 
2659  /* Now safe to overwrite separator with a null */
2660  *endp = '\0';
2661 
2662  /* Truncate name if it's overlength */
2663  truncate_identifier(curname, strlen(curname), false);
2664 
2665  /*
2666  * Finished isolating current name --- add it to list
2667  */
2668  *namelist = lappend(*namelist, curname);
2669 
2670  /* Loop back if we didn't reach end of string */
2671  } while (!done);
2672 
2673  return true;
2674 }
2675 
2676 
2677 /*
2678  * SplitDirectoriesString --- parse a string containing directory names
2679  *
2680  * This is similar to SplitIdentifierString, except that the parsing
2681  * rules are meant to handle pathnames instead of identifiers: there is
2682  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
2683  * and we apply canonicalize_path() to each extracted string. Because of the
2684  * last, the returned strings are separately palloc'd rather than being
2685  * pointers into rawstring --- but we still scribble on rawstring.
2686  *
2687  * Inputs:
2688  * rawstring: the input string; must be modifiable!
2689  * separator: the separator punctuation expected between directories
2690  * (typically ',' or ';'). Whitespace may also appear around
2691  * directories.
2692  * Outputs:
2693  * namelist: filled with a palloc'd list of directory names.
2694  * Caller should list_free_deep() this even on error return.
2695  *
2696  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2697  *
2698  * Note that an empty string is considered okay here.
2699  */
2700 bool
2701 SplitDirectoriesString(char *rawstring, char separator,
2702  List **namelist)
2703 {
2704  char *nextp = rawstring;
2705  bool done = false;
2706 
2707  *namelist = NIL;
2708 
2709  while (isspace((unsigned char) *nextp))
2710  nextp++; /* skip leading whitespace */
2711 
2712  if (*nextp == '\0')
2713  return true; /* allow empty string */
2714 
2715  /* At the top of the loop, we are at start of a new directory. */
2716  do
2717  {
2718  char *curname;
2719  char *endp;
2720 
2721  if (*nextp == '\"')
2722  {
2723  /* Quoted name --- collapse quote-quote pairs */
2724  curname = nextp + 1;
2725  for (;;)
2726  {
2727  endp = strchr(nextp + 1, '\"');
2728  if (endp == NULL)
2729  return false; /* mismatched quotes */
2730  if (endp[1] != '\"')
2731  break; /* found end of quoted name */
2732  /* Collapse adjacent quotes into one quote, and look again */
2733  memmove(endp, endp + 1, strlen(endp));
2734  nextp = endp;
2735  }
2736  /* endp now points at the terminating quote */
2737  nextp = endp + 1;
2738  }
2739  else
2740  {
2741  /* Unquoted name --- extends to separator or end of string */
2742  curname = endp = nextp;
2743  while (*nextp && *nextp != separator)
2744  {
2745  /* trailing whitespace should not be included in name */
2746  if (!isspace((unsigned char) *nextp))
2747  endp = nextp + 1;
2748  nextp++;
2749  }
2750  if (curname == endp)
2751  return false; /* empty unquoted name not allowed */
2752  }
2753 
2754  while (isspace((unsigned char) *nextp))
2755  nextp++; /* skip trailing whitespace */
2756 
2757  if (*nextp == separator)
2758  {
2759  nextp++;
2760  while (isspace((unsigned char) *nextp))
2761  nextp++; /* skip leading whitespace for next */
2762  /* we expect another name, so done remains false */
2763  }
2764  else if (*nextp == '\0')
2765  done = true;
2766  else
2767  return false; /* invalid syntax */
2768 
2769  /* Now safe to overwrite separator with a null */
2770  *endp = '\0';
2771 
2772  /* Truncate path if it's overlength */
2773  if (strlen(curname) >= MAXPGPATH)
2774  curname[MAXPGPATH - 1] = '\0';
2775 
2776  /*
2777  * Finished isolating current name --- add it to list
2778  */
2779  curname = pstrdup(curname);
2780  canonicalize_path(curname);
2781  *namelist = lappend(*namelist, curname);
2782 
2783  /* Loop back if we didn't reach end of string */
2784  } while (!done);
2785 
2786  return true;
2787 }
2788 
2789 
2790 /*****************************************************************************
2791  * Comparison Functions used for bytea
2792  *
2793  * Note: btree indexes need these routines not to leak memory; therefore,
2794  * be careful to free working copies of toasted datums. Most places don't
2795  * need to be so careful.
2796  *****************************************************************************/
2797 
2798 Datum
2800 {
2801  Datum arg1 = PG_GETARG_DATUM(0);
2802  Datum arg2 = PG_GETARG_DATUM(1);
2803  bool result;
2804  Size len1,
2805  len2;
2806 
2807  /*
2808  * We can use a fast path for unequal lengths, which might save us from
2809  * having to detoast one or both values.
2810  */
2811  len1 = toast_raw_datum_size(arg1);
2812  len2 = toast_raw_datum_size(arg2);
2813  if (len1 != len2)
2814  result = false;
2815  else
2816  {
2817  bytea *barg1 = DatumGetByteaPP(arg1);
2818  bytea *barg2 = DatumGetByteaPP(arg2);
2819 
2820  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2821  len1 - VARHDRSZ) == 0);
2822 
2823  PG_FREE_IF_COPY(barg1, 0);
2824  PG_FREE_IF_COPY(barg2, 1);
2825  }
2826 
2827  PG_RETURN_BOOL(result);
2828 }
2829 
2830 Datum
2832 {
2833  Datum arg1 = PG_GETARG_DATUM(0);
2834  Datum arg2 = PG_GETARG_DATUM(1);
2835  bool result;
2836  Size len1,
2837  len2;
2838 
2839  /*
2840  * We can use a fast path for unequal lengths, which might save us from
2841  * having to detoast one or both values.
2842  */
2843  len1 = toast_raw_datum_size(arg1);
2844  len2 = toast_raw_datum_size(arg2);
2845  if (len1 != len2)
2846  result = true;
2847  else
2848  {
2849  bytea *barg1 = DatumGetByteaPP(arg1);
2850  bytea *barg2 = DatumGetByteaPP(arg2);
2851 
2852  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2853  len1 - VARHDRSZ) != 0);
2854 
2855  PG_FREE_IF_COPY(barg1, 0);
2856  PG_FREE_IF_COPY(barg2, 1);
2857  }
2858 
2859  PG_RETURN_BOOL(result);
2860 }
2861 
2862 Datum
2864 {
2865  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2866  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2867  int len1,
2868  len2;
2869  int cmp;
2870 
2871  len1 = VARSIZE_ANY_EXHDR(arg1);
2872  len2 = VARSIZE_ANY_EXHDR(arg2);
2873 
2874  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2875 
2876  PG_FREE_IF_COPY(arg1, 0);
2877  PG_FREE_IF_COPY(arg2, 1);
2878 
2879  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2880 }
2881 
2882 Datum
2884 {
2885  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2886  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2887  int len1,
2888  len2;
2889  int cmp;
2890 
2891  len1 = VARSIZE_ANY_EXHDR(arg1);
2892  len2 = VARSIZE_ANY_EXHDR(arg2);
2893 
2894  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2895 
2896  PG_FREE_IF_COPY(arg1, 0);
2897  PG_FREE_IF_COPY(arg2, 1);
2898 
2899  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2900 }
2901 
2902 Datum
2904 {
2905  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2906  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2907  int len1,
2908  len2;
2909  int cmp;
2910 
2911  len1 = VARSIZE_ANY_EXHDR(arg1);
2912  len2 = VARSIZE_ANY_EXHDR(arg2);
2913 
2914  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2915 
2916  PG_FREE_IF_COPY(arg1, 0);
2917  PG_FREE_IF_COPY(arg2, 1);
2918 
2919  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2920 }
2921 
2922 Datum
2924 {
2925  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2926  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2927  int len1,
2928  len2;
2929  int cmp;
2930 
2931  len1 = VARSIZE_ANY_EXHDR(arg1);
2932  len2 = VARSIZE_ANY_EXHDR(arg2);
2933 
2934  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2935 
2936  PG_FREE_IF_COPY(arg1, 0);
2937  PG_FREE_IF_COPY(arg2, 1);
2938 
2939  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2940 }
2941 
2942 Datum
2944 {
2945  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2946  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2947  int len1,
2948  len2;
2949  int cmp;
2950 
2951  len1 = VARSIZE_ANY_EXHDR(arg1);
2952  len2 = VARSIZE_ANY_EXHDR(arg2);
2953 
2954  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2955  if ((cmp == 0) && (len1 != len2))
2956  cmp = (len1 < len2) ? -1 : 1;
2957 
2958  PG_FREE_IF_COPY(arg1, 0);
2959  PG_FREE_IF_COPY(arg2, 1);
2960 
2961  PG_RETURN_INT32(cmp);
2962 }
2963 
2964 /*
2965  * appendStringInfoText
2966  *
2967  * Append a text to str.
2968  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2969  */
2970 static void
2972 {
2974 }
2975 
2976 /*
2977  * replace_text
2978  * replace all occurrences of 'old_sub_str' in 'orig_str'
2979  * with 'new_sub_str' to form 'new_str'
2980  *
2981  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2982  * otherwise returns 'new_str'
2983  */
2984 Datum
2986 {
2987  text *src_text = PG_GETARG_TEXT_PP(0);
2988  text *from_sub_text = PG_GETARG_TEXT_PP(1);
2989  text *to_sub_text = PG_GETARG_TEXT_PP(2);
2990  int src_text_len;
2991  int from_sub_text_len;
2993  text *ret_text;
2994  int start_posn;
2995  int curr_posn;
2996  int chunk_len;
2997  char *start_ptr;
2998  StringInfoData str;
2999 
3000  text_position_setup(src_text, from_sub_text, &state);
3001 
3002  /*
3003  * Note: we check the converted string length, not the original, because
3004  * they could be different if the input contained invalid encoding.
3005  */
3006  src_text_len = state.len1;
3007  from_sub_text_len = state.len2;
3008 
3009  /* Return unmodified source string if empty source or pattern */
3010  if (src_text_len < 1 || from_sub_text_len < 1)
3011  {
3012  text_position_cleanup(&state);
3013  PG_RETURN_TEXT_P(src_text);
3014  }
3015 
3016  start_posn = 1;
3017  curr_posn = text_position_next(1, &state);
3018 
3019  /* When the from_sub_text is not found, there is nothing to do. */
3020  if (curr_posn == 0)
3021  {
3022  text_position_cleanup(&state);
3023  PG_RETURN_TEXT_P(src_text);
3024  }
3025 
3026  /* start_ptr points to the start_posn'th character of src_text */
3027  start_ptr = VARDATA_ANY(src_text);
3028 
3029  initStringInfo(&str);
3030 
3031  do
3032  {
3034 
3035  /* copy the data skipped over by last text_position_next() */
3036  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3037  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3038 
3039  appendStringInfoText(&str, to_sub_text);
3040 
3041  start_posn = curr_posn;
3042  start_ptr += chunk_len;
3043  start_posn += from_sub_text_len;
3044  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3045 
3046  curr_posn = text_position_next(start_posn, &state);
3047  }
3048  while (curr_posn > 0);
3049 
3050  /* copy trailing data */
3051  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3052  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3053 
3054  text_position_cleanup(&state);
3055 
3056  ret_text = cstring_to_text_with_len(str.data, str.len);
3057  pfree(str.data);
3058 
3059  PG_RETURN_TEXT_P(ret_text);
3060 }
3061 
3062 /*
3063  * check_replace_text_has_escape_char
3064  *
3065  * check whether replace_text contains escape char.
3066  */
3067 static bool
3069 {
3070  const char *p = VARDATA_ANY(replace_text);
3071  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3072 
3074  {
3075  for (; p < p_end; p++)
3076  {
3077  if (*p == '\\')
3078  return true;
3079  }
3080  }
3081  else
3082  {
3083  for (; p < p_end; p += pg_mblen(p))
3084  {
3085  if (*p == '\\')
3086  return true;
3087  }
3088  }
3089 
3090  return false;
3091 }
3092 
3093 /*
3094  * appendStringInfoRegexpSubstr
3095  *
3096  * Append replace_text to str, substituting regexp back references for
3097  * \n escapes. start_ptr is the start of the match in the source string,
3098  * at logical character position data_pos.
3099  */
3100 static void
3102  regmatch_t *pmatch,
3103  char *start_ptr, int data_pos)
3104 {
3105  const char *p = VARDATA_ANY(replace_text);
3106  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3107  int eml = pg_database_encoding_max_length();
3108 
3109  for (;;)
3110  {
3111  const char *chunk_start = p;
3112  int so;
3113  int eo;
3114 
3115  /* Find next escape char. */
3116  if (eml == 1)
3117  {
3118  for (; p < p_end && *p != '\\'; p++)
3119  /* nothing */ ;
3120  }
3121  else
3122  {
3123  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3124  /* nothing */ ;
3125  }
3126 
3127  /* Copy the text we just scanned over, if any. */
3128  if (p > chunk_start)
3129  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3130 
3131  /* Done if at end of string, else advance over escape char. */
3132  if (p >= p_end)
3133  break;
3134  p++;
3135 
3136  if (p >= p_end)
3137  {
3138  /* Escape at very end of input. Treat same as unexpected char */
3139  appendStringInfoChar(str, '\\');
3140  break;
3141  }
3142 
3143  if (*p >= '1' && *p <= '9')
3144  {
3145  /* Use the back reference of regexp. */
3146  int idx = *p - '0';
3147 
3148  so = pmatch[idx].rm_so;
3149  eo = pmatch[idx].rm_eo;
3150  p++;
3151  }
3152  else if (*p == '&')
3153  {
3154  /* Use the entire matched string. */
3155  so = pmatch[0].rm_so;
3156  eo = pmatch[0].rm_eo;
3157  p++;
3158  }
3159  else if (*p == '\\')
3160  {
3161  /* \\ means transfer one \ to output. */
3162  appendStringInfoChar(str, '\\');
3163  p++;
3164  continue;
3165  }
3166  else
3167  {
3168  /*
3169  * If escape char is not followed by any expected char, just treat
3170  * it as ordinary data to copy. (XXX would it be better to throw
3171  * an error?)
3172  */
3173  appendStringInfoChar(str, '\\');
3174  continue;
3175  }
3176 
3177  if (so != -1 && eo != -1)
3178  {
3179  /*
3180  * Copy the text that is back reference of regexp. Note so and eo
3181  * are counted in characters not bytes.
3182  */
3183  char *chunk_start;
3184  int chunk_len;
3185 
3186  Assert(so >= data_pos);
3187  chunk_start = start_ptr;
3188  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3189  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3190  appendBinaryStringInfo(str, chunk_start, chunk_len);
3191  }
3192  }
3193 }
3194 
3195 #define REGEXP_REPLACE_BACKREF_CNT 10
3196 
3197 /*
3198  * replace_text_regexp
3199  *
3200  * replace text that matches to regexp in src_text to replace_text.
3201  *
3202  * Note: to avoid having to include regex.h in builtins.h, we declare
3203  * the regexp argument as void *, but really it's regex_t *.
3204  */
3205 text *
3206 replace_text_regexp(text *src_text, void *regexp,
3207  text *replace_text, bool glob)
3208 {
3209  text *ret_text;
3210  regex_t *re = (regex_t *) regexp;
3211  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3214  pg_wchar *data;
3215  size_t data_len;
3216  int search_start;
3217  int data_pos;
3218  char *start_ptr;
3219  bool have_escape;
3220 
3221  initStringInfo(&buf);
3222 
3223  /* Convert data string to wide characters. */
3224  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3225  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3226 
3227  /* Check whether replace_text has escape char. */
3228  have_escape = check_replace_text_has_escape_char(replace_text);
3229 
3230  /* start_ptr points to the data_pos'th character of src_text */
3231  start_ptr = (char *) VARDATA_ANY(src_text);
3232  data_pos = 0;
3233 
3234  search_start = 0;
3235  while (search_start <= data_len)
3236  {
3237  int regexec_result;
3238 
3240 
3241  regexec_result = pg_regexec(re,
3242  data,
3243  data_len,
3244  search_start,
3245  NULL, /* no details */
3247  pmatch,
3248  0);
3249 
3250  if (regexec_result == REG_NOMATCH)
3251  break;
3252 
3253  if (regexec_result != REG_OKAY)
3254  {
3255  char errMsg[100];
3256 
3258  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3259  ereport(ERROR,
3260  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3261  errmsg("regular expression failed: %s", errMsg)));
3262  }
3263 
3264  /*
3265  * Copy the text to the left of the match position. Note we are given
3266  * character not byte indexes.
3267  */
3268  if (pmatch[0].rm_so - data_pos > 0)
3269  {
3270  int chunk_len;
3271 
3272  chunk_len = charlen_to_bytelen(start_ptr,
3273  pmatch[0].rm_so - data_pos);
3274  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3275 
3276  /*
3277  * Advance start_ptr over that text, to avoid multiple rescans of
3278  * it if the replace_text contains multiple back-references.
3279  */
3280  start_ptr += chunk_len;
3281  data_pos = pmatch[0].rm_so;
3282  }
3283 
3284  /*
3285  * Copy the replace_text. Process back references when the
3286  * replace_text has escape characters.
3287  */
3288  if (have_escape)
3289  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3290  start_ptr, data_pos);
3291  else
3292  appendStringInfoText(&buf, replace_text);
3293 
3294  /* Advance start_ptr and data_pos over the matched text. */
3295  start_ptr += charlen_to_bytelen(start_ptr,
3296  pmatch[0].rm_eo - data_pos);
3297  data_pos = pmatch[0].rm_eo;
3298 
3299  /*
3300  * When global option is off, replace the first instance only.
3301  */
3302  if (!glob)
3303  break;
3304 
3305  /*
3306  * Advance search position. Normally we start the next search at the
3307  * end of the previous match; but if the match was of zero length, we
3308  * have to advance by one character, or we'd just find the same match
3309  * again.
3310  */
3311  search_start = data_pos;
3312  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3313  search_start++;
3314  }
3315 
3316  /*
3317  * Copy the text to the right of the last match.
3318  */
3319  if (data_pos < data_len)
3320  {
3321  int chunk_len;
3322 
3323  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3324  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3325  }
3326 
3327  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3328  pfree(buf.data);
3329  pfree(data);
3330 
3331  return ret_text;
3332 }
3333 
3334 /*
3335  * split_text
3336  * parse input string
3337  * return ord item (1 based)
3338  * based on provided field separator
3339  */
3340 Datum
3342 {
3343  text *inputstring = PG_GETARG_TEXT_PP(0);
3344  text *fldsep = PG_GETARG_TEXT_PP(1);
3345  int fldnum = PG_GETARG_INT32(2);
3346  int inputstring_len;
3347  int fldsep_len;
3349  int start_posn;
3350  int end_posn;
3351  text *result_text;
3352 
3353  /* field number is 1 based */
3354  if (fldnum < 1)
3355  ereport(ERROR,
3356  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3357  errmsg("field position must be greater than zero")));
3358 
3359  text_position_setup(inputstring, fldsep, &state);
3360 
3361  /*
3362  * Note: we check the converted string length, not the original, because
3363  * they could be different if the input contained invalid encoding.
3364  */
3365  inputstring_len = state.len1;
3366  fldsep_len = state.len2;
3367 
3368  /* return empty string for empty input string */
3369  if (inputstring_len < 1)
3370  {
3371  text_position_cleanup(&state);
3373  }
3374 
3375  /* empty field separator */
3376  if (fldsep_len < 1)
3377  {
3378  text_position_cleanup(&state);
3379  /* if first field, return input string, else empty string */
3380  if (fldnum == 1)
3381  PG_RETURN_TEXT_P(inputstring);
3382  else
3384  }
3385 
3386  /* identify bounds of first field */
3387  start_posn = 1;
3388  end_posn = text_position_next(1, &state);
3389 
3390  /* special case if fldsep not found at all */
3391  if (end_posn == 0)
3392  {
3393  text_position_cleanup(&state);
3394  /* if field 1 requested, return input string, else empty string */
3395  if (fldnum == 1)
3396  PG_RETURN_TEXT_P(inputstring);
3397  else
3399  }
3400 
3401  while (end_posn > 0 && --fldnum > 0)
3402  {
3403  /* identify bounds of next field */
3404  start_posn = end_posn + fldsep_len;
3405  end_posn = text_position_next(start_posn, &state);
3406  }
3407 
3408  text_position_cleanup(&state);
3409 
3410  if (fldnum > 0)
3411  {
3412  /* N'th field separator not found */
3413  /* if last field requested, return it, else empty string */
3414  if (fldnum == 1)
3415  result_text = text_substring(PointerGetDatum(inputstring),
3416  start_posn,
3417  -1,
3418  true);
3419  else
3420  result_text = cstring_to_text("");
3421  }
3422  else
3423  {
3424  /* non-last field requested */
3425  result_text = text_substring(PointerGetDatum(inputstring),
3426  start_posn,
3427  end_posn - start_posn,
3428  false);
3429  }
3430 
3431  PG_RETURN_TEXT_P(result_text);
3432 }
3433 
3434 /*
3435  * Convenience function to return true when two text params are equal.
3436  */
3437 static bool
3438 text_isequal(text *txt1, text *txt2)
3439 {
3441  PointerGetDatum(txt1),
3442  PointerGetDatum(txt2)));
3443 }
3444 
3445 /*
3446  * text_to_array
3447  * parse input string and return text array of elements,
3448  * based on provided field separator
3449  */
3450 Datum
3452 {
3453  return text_to_array_internal(fcinfo);
3454 }
3455 
3456 /*
3457  * text_to_array_null
3458  * parse input string and return text array of elements,
3459  * based on provided field separator and null string
3460  *
3461  * This is a separate entry point only to prevent the regression tests from
3462  * complaining about different argument sets for the same internal function.
3463  */
3464 Datum
3466 {
3467  return text_to_array_internal(fcinfo);
3468 }
3469 
3470 /*
3471  * common code for text_to_array and text_to_array_null functions
3472  *
3473  * These are not strict so we have to test for null inputs explicitly.
3474  */
3475 static Datum
3477 {
3478  text *inputstring;
3479  text *fldsep;
3480  text *null_string;
3481  int inputstring_len;
3482  int fldsep_len;
3483  char *start_ptr;
3484  text *result_text;
3485  bool is_null;
3486  ArrayBuildState *astate = NULL;
3487 
3488  /* when input string is NULL, then result is NULL too */
3489  if (PG_ARGISNULL(0))
3490  PG_RETURN_NULL();
3491 
3492  inputstring = PG_GETARG_TEXT_PP(0);
3493 
3494  /* fldsep can be NULL */
3495  if (!PG_ARGISNULL(1))
3496  fldsep = PG_GETARG_TEXT_PP(1);
3497  else
3498  fldsep = NULL;
3499 
3500  /* null_string can be NULL or omitted */
3501  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
3502  null_string = PG_GETARG_TEXT_PP(2);
3503  else
3504  null_string = NULL;
3505 
3506  if (fldsep != NULL)
3507  {
3508  /*
3509  * Normal case with non-null fldsep. Use the text_position machinery
3510  * to search for occurrences of fldsep.
3511  */
3513  int fldnum;
3514  int start_posn;
3515  int end_posn;
3516  int chunk_len;
3517 
3518  text_position_setup(inputstring, fldsep, &state);
3519 
3520  /*
3521  * Note: we check the converted string length, not the original,
3522  * because they could be different if the input contained invalid
3523  * encoding.
3524  */
3525  inputstring_len = state.len1;
3526  fldsep_len = state.len2;
3527 
3528  /* return empty array for empty input string */
3529  if (inputstring_len < 1)
3530  {
3531  text_position_cleanup(&state);
3533  }
3534 
3535  /*
3536  * empty field separator: return the input string as a one-element
3537  * array
3538  */
3539  if (fldsep_len < 1)
3540  {
3541  text_position_cleanup(&state);
3542  /* single element can be a NULL too */
3543  is_null = null_string ? text_isequal(inputstring, null_string) : false;
3545  PointerGetDatum(inputstring),
3546  is_null, 1));
3547  }
3548 
3549  start_posn = 1;
3550  /* start_ptr points to the start_posn'th character of inputstring */
3551  start_ptr = VARDATA_ANY(inputstring);
3552 
3553  for (fldnum = 1;; fldnum++) /* field number is 1 based */
3554  {
3556 
3557  end_posn = text_position_next(start_posn, &state);
3558 
3559  if (end_posn == 0)
3560  {
3561  /* fetch last field */
3562  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
3563  }
3564  else
3565  {
3566  /* fetch non-last field */
3567  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
3568  }
3569 
3570  /* must build a temp text datum to pass to accumArrayResult */
3571  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3572  is_null = null_string ? text_isequal(result_text, null_string) : false;
3573 
3574  /* stash away this field */
3575  astate = accumArrayResult(astate,
3576  PointerGetDatum(result_text),
3577  is_null,
3578  TEXTOID,
3580 
3581  pfree(result_text);
3582 
3583  if (end_posn == 0)
3584  break;
3585 
3586  start_posn = end_posn;
3587  start_ptr += chunk_len;
3588  start_posn += fldsep_len;
3589  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
3590  }
3591 
3592  text_position_cleanup(&state);
3593  }
3594  else
3595  {
3596  /*
3597  * When fldsep is NULL, each character in the inputstring becomes an
3598  * element in the result array. The separator is effectively the
3599  * space between characters.
3600  */
3601  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
3602 
3603  /* return empty array for empty input string */
3604  if (inputstring_len < 1)
3606 
3607  start_ptr = VARDATA_ANY(inputstring);
3608 
3609  while (inputstring_len > 0)
3610  {
3611  int chunk_len = pg_mblen(start_ptr);
3612 
3614 
3615  /* must build a temp text datum to pass to accumArrayResult */
3616  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3617  is_null = null_string ? text_isequal(result_text, null_string) : false;
3618 
3619  /* stash away this field */
3620  astate = accumArrayResult(astate,
3621  PointerGetDatum(result_text),
3622  is_null,
3623  TEXTOID,
3625 
3626  pfree(result_text);
3627 
3628  start_ptr += chunk_len;
3629  inputstring_len -= chunk_len;
3630  }
3631  }
3632 
3635 }
3636 
3637 /*
3638  * array_to_text
3639  * concatenate Cstring representation of input array elements
3640  * using provided field separator
3641  */
3642 Datum
3644 {
3646  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3647 
3648  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
3649 }
3650 
3651 /*
3652  * array_to_text_null
3653  * concatenate Cstring representation of input array elements
3654  * using provided field separator and null string
3655  *
3656  * This version is not strict so we have to test for null inputs explicitly.
3657  */
3658 Datum
3660 {
3661  ArrayType *v;
3662  char *fldsep;
3663  char *null_string;
3664 
3665  /* returns NULL when first or second parameter is NULL */
3666  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
3667  PG_RETURN_NULL();
3668 
3669  v = PG_GETARG_ARRAYTYPE_P(0);
3670  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3671 
3672  /* NULL null string is passed through as a null pointer */
3673  if (!PG_ARGISNULL(2))
3674  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
3675  else
3676  null_string = NULL;
3677 
3678  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
3679 }
3680 
3681 /*
3682  * common code for array_to_text and array_to_text_null functions
3683  */
3684 static text *
3686  const char *fldsep, const char *null_string)
3687 {
3688  text *result;
3689  int nitems,
3690  *dims,
3691  ndims;
3692  Oid element_type;
3693  int typlen;
3694  bool typbyval;
3695  char typalign;
3697  bool printed = false;
3698  char *p;
3699  bits8 *bitmap;
3700  int bitmask;
3701  int i;
3702  ArrayMetaState *my_extra;
3703 
3704  ndims = ARR_NDIM(v);
3705  dims = ARR_DIMS(v);
3706  nitems = ArrayGetNItems(ndims, dims);
3707 
3708  /* if there are no elements, return an empty string */
3709  if (nitems == 0)
3710  return cstring_to_text_with_len("", 0);
3711 
3712  element_type = ARR_ELEMTYPE(v);
3713  initStringInfo(&buf);
3714 
3715  /*
3716  * We arrange to look up info about element type, including its output
3717  * conversion proc, only once per series of calls, assuming the element
3718  * type doesn't change underneath us.
3719  */
3720  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3721  if (my_extra == NULL)
3722  {
3723  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3724  sizeof(ArrayMetaState));
3725  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3726  my_extra->element_type = ~element_type;
3727  }
3728 
3729  if (my_extra->element_type != element_type)
3730  {
3731  /*
3732  * Get info about element type, including its output conversion proc
3733  */
3734  get_type_io_data(element_type, IOFunc_output,
3735  &my_extra->typlen, &my_extra->typbyval,
3736  &my_extra->typalign, &my_extra->typdelim,
3737  &my_extra->typioparam, &my_extra->typiofunc);
3738  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
3739  fcinfo->flinfo->fn_mcxt);
3740  my_extra->element_type = element_type;
3741  }
3742  typlen = my_extra->typlen;
3743  typbyval = my_extra->typbyval;
3744  typalign = my_extra->typalign;
3745 
3746  p = ARR_DATA_PTR(v);
3747  bitmap = ARR_NULLBITMAP(v);
3748  bitmask = 1;
3749 
3750  for (i = 0; i < nitems; i++)
3751  {
3752  Datum itemvalue;
3753  char *value;
3754 
3755  /* Get source element, checking for NULL */
3756  if (bitmap && (*bitmap & bitmask) == 0)
3757  {
3758  /* if null_string is NULL, we just ignore null elements */
3759  if (null_string != NULL)
3760  {
3761  if (printed)
3762  appendStringInfo(&buf, "%s%s", fldsep, null_string);
3763  else
3764  appendStringInfoString(&buf, null_string);
3765  printed = true;
3766  }
3767  }
3768  else
3769  {
3770  itemvalue = fetch_att(p, typbyval, typlen);
3771 
3772  value = OutputFunctionCall(&my_extra->proc, itemvalue);
3773 
3774  if (printed)
3775  appendStringInfo(&buf, "%s%s", fldsep, value);
3776  else
3777  appendStringInfoString(&buf, value);
3778  printed = true;
3779 
3780  p = att_addlength_pointer(p, typlen, p);
3781  p = (char *) att_align_nominal(p, typalign);
3782  }
3783 
3784  /* advance bitmap pointer if any */
3785  if (bitmap)
3786  {
3787  bitmask <<= 1;
3788  if (bitmask == 0x100)
3789  {
3790  bitmap++;
3791  bitmask = 1;
3792  }
3793  }
3794  }
3795 
3796  result = cstring_to_text_with_len(buf.data, buf.len);
3797  pfree(buf.data);
3798 
3799  return result;
3800 }
3801 
3802 #define HEXBASE 16
3803 /*
3804  * Convert a int32 to a string containing a base 16 (hex) representation of
3805  * the number.
3806  */
3807 Datum
3809 {
3811  char *ptr;
3812  const char *digits = "0123456789abcdef";
3813  char buf[32]; /* bigger than needed, but reasonable */
3814 
3815  ptr = buf + sizeof(buf) - 1;
3816  *ptr = '\0';
3817 
3818  do
3819  {
3820  *--ptr = digits[value % HEXBASE];
3821  value /= HEXBASE;
3822  } while (ptr > buf && value);
3823 
3825 }
3826 
3827 /*
3828  * Convert a int64 to a string containing a base 16 (hex) representation of
3829  * the number.
3830  */
3831 Datum
3833 {
3834  uint64 value = (uint64) PG_GETARG_INT64(0);
3835  char *ptr;
3836  const char *digits = "0123456789abcdef";
3837  char buf[32]; /* bigger than needed, but reasonable */
3838 
3839  ptr = buf + sizeof(buf) - 1;
3840  *ptr = '\0';
3841 
3842  do
3843  {
3844  *--ptr = digits[value % HEXBASE];
3845  value /= HEXBASE;
3846  } while (ptr > buf && value);
3847 
3849 }
3850 
3851 /*
3852  * Create an md5 hash of a text string and return it as hex
3853  *
3854  * md5 produces a 16 byte (128 bit) hash; double it for hex
3855  */
3856 #define MD5_HASH_LEN 32
3857 
3858 Datum
3860 {
3861  text *in_text = PG_GETARG_TEXT_PP(0);
3862  size_t len;
3863  char hexsum[MD5_HASH_LEN + 1];
3864 
3865  /* Calculate the length of the buffer using varlena metadata */
3866  len = VARSIZE_ANY_EXHDR(in_text);
3867 
3868  /* get the hash result */
3869  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3870  ereport(ERROR,
3871  (errcode(ERRCODE_OUT_OF_MEMORY),
3872  errmsg("out of memory")));
3873 
3874  /* convert to text and return it */
3876 }
3877 
3878 /*
3879  * Create an md5 hash of a bytea field and return it as a hex string:
3880  * 16-byte md5 digest is represented in 32 hex characters.
3881  */
3882 Datum
3884 {
3885  bytea *in = PG_GETARG_BYTEA_PP(0);
3886  size_t len;
3887  char hexsum[MD5_HASH_LEN + 1];
3888 
3889  len = VARSIZE_ANY_EXHDR(in);
3890  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3891  ereport(ERROR,
3892  (errcode(ERRCODE_OUT_OF_MEMORY),
3893  errmsg("out of memory")));
3894 
3896 }
3897 
3898 /*
3899  * Return the size of a datum, possibly compressed
3900  *
3901  * Works on any data type
3902  */
3903 Datum
3905 {
3907  int32 result;
3908  int typlen;
3909 
3910  /* On first call, get the input type's typlen, and save at *fn_extra */
3911  if (fcinfo->flinfo->fn_extra == NULL)
3912  {
3913  /* Lookup the datatype of the supplied argument */
3914  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3915 
3916  typlen = get_typlen(argtypeid);
3917  if (typlen == 0) /* should not happen */
3918  elog(ERROR, "cache lookup failed for type %u", argtypeid);
3919 
3920  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3921  sizeof(int));
3922  *((int *) fcinfo->flinfo->fn_extra) = typlen;
3923  }
3924  else
3925  typlen = *((int *) fcinfo->flinfo->fn_extra);
3926 
3927  if (typlen == -1)
3928  {
3929  /* varlena type, possibly toasted */
3930  result = toast_datum_size(value);
3931  }
3932  else if (typlen == -2)
3933  {
3934  /* cstring */
3935  result = strlen(DatumGetCString(value)) + 1;
3936  }
3937  else
3938  {
3939  /* ordinary fixed-width type */
3940  result = typlen;
3941  }
3942 
3943  PG_RETURN_INT32(result);
3944 }
3945 
3946 /*
3947  * string_agg - Concatenates values and returns string.
3948  *
3949  * Syntax: string_agg(value text, delimiter text) RETURNS text
3950  *
3951  * Note: Any NULL values are ignored. The first-call delimiter isn't
3952  * actually used at all, and on subsequent calls the delimiter precedes
3953  * the associated value.
3954  */
3955 
3956 /* subroutine to initialize state */
3957 static StringInfo
3959 {
3960  StringInfo state;
3961  MemoryContext aggcontext;
3962  MemoryContext oldcontext;
3963 
3964  if (!AggCheckCallContext(fcinfo, &aggcontext))
3965  {
3966  /* cannot be called directly because of internal-type argument */
3967  elog(ERROR, "string_agg_transfn called in non-aggregate context");
3968  }
3969 
3970  /*
3971  * Create state in aggregate context. It'll stay there across subsequent
3972  * calls.
3973  */
3974  oldcontext = MemoryContextSwitchTo(aggcontext);
3975  state = makeStringInfo();
3976  MemoryContextSwitchTo(oldcontext);
3977 
3978  return state;
3979 }
3980 
3981 Datum
3983 {
3984  StringInfo state;
3985 
3986  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3987 
3988  /* Append the value unless null. */
3989  if (!PG_ARGISNULL(1))
3990  {
3991  /* On the first time through, we ignore the delimiter. */
3992  if (state == NULL)
3993  state = makeStringAggState(fcinfo);
3994  else if (!PG_ARGISNULL(2))
3995  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
3996 
3997  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
3998  }
3999 
4000  /*
4001  * The transition type for string_agg() is declared to be "internal",
4002  * which is a pass-by-value type the same size as a pointer.
4003  */
4004  PG_RETURN_POINTER(state);
4005 }
4006 
4007 Datum
4009 {
4010  StringInfo state;
4011 
4012  /* cannot be called directly because of internal-type argument */
4013  Assert(AggCheckCallContext(fcinfo, NULL));
4014 
4015  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4016 
4017  if (state != NULL)
4019  else
4020  PG_RETURN_NULL();
4021 }
4022 
4023 /*
4024  * Implementation of both concat() and concat_ws().
4025  *
4026  * sepstr is the separator string to place between values.
4027  * argidx identifies the first argument to concatenate (counting from zero).
4028  * Returns NULL if result should be NULL, else text value.
4029  */
4030 static text *
4031 concat_internal(const char *sepstr, int argidx,
4032  FunctionCallInfo fcinfo)
4033 {
4034  text *result;
4035  StringInfoData str;
4036  bool first_arg = true;
4037  int i;
4038 
4039  /*
4040  * concat(VARIADIC some-array) is essentially equivalent to
4041  * array_to_text(), ie concat the array elements with the given separator.
4042  * So we just pass the case off to that code.
4043  */
4044  if (get_fn_expr_variadic(fcinfo->flinfo))
4045  {
4046  ArrayType *arr;
4047 
4048  /* Should have just the one argument */
4049  Assert(argidx == PG_NARGS() - 1);
4050 
4051  /* concat(VARIADIC NULL) is defined as NULL */
4052  if (PG_ARGISNULL(argidx))
4053  return NULL;
4054 
4055  /*
4056  * Non-null argument had better be an array. We assume that any call
4057  * context that could let get_fn_expr_variadic return true will have
4058  * checked that a VARIADIC-labeled parameter actually is an array. So
4059  * it should be okay to just Assert that it's an array rather than
4060  * doing a full-fledged error check.
4061  */
4063 
4064  /* OK, safe to fetch the array value */
4065  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4066 
4067  /*
4068  * And serialize the array. We tell array_to_text to ignore null
4069  * elements, which matches the behavior of the loop below.
4070  */
4071  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4072  }
4073 
4074  /* Normal case without explicit VARIADIC marker */
4075  initStringInfo(&str);
4076 
4077  for (i = argidx; i < PG_NARGS(); i++)
4078  {
4079  if (!PG_ARGISNULL(i))
4080  {
4082  Oid valtype;
4083  Oid typOutput;
4084  bool typIsVarlena;
4085 
4086  /* add separator if appropriate */
4087  if (first_arg)
4088  first_arg = false;
4089  else
4090  appendStringInfoString(&str, sepstr);
4091 
4092  /* call the appropriate type output function, append the result */
4093  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4094  if (!OidIsValid(valtype))
4095  elog(ERROR, "could not determine data type of concat() input");
4096  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4098  OidOutputFunctionCall(typOutput, value));
4099  }
4100  }
4101 
4102  result = cstring_to_text_with_len(str.data, str.len);
4103  pfree(str.data);
4104 
4105  return result;
4106 }
4107 
4108 /*
4109  * Concatenate all arguments. NULL arguments are ignored.
4110  */
4111 Datum
4113 {
4114  text *result;
4115 
4116  result = concat_internal("", 0, fcinfo);
4117  if (result == NULL)
4118  PG_RETURN_NULL();
4119  PG_RETURN_TEXT_P(result);
4120 }
4121 
4122 /*
4123  * Concatenate all but first argument value with separators. The first
4124  * parameter is used as the separator. NULL arguments are ignored.
4125  */
4126 Datum
4128 {
4129  char *sep;
4130  text *result;
4131 
4132  /* return NULL when separator is NULL */
4133  if (PG_ARGISNULL(0))
4134  PG_RETURN_NULL();
4136 
4137  result = concat_internal(sep, 1, fcinfo);
4138  if (result == NULL)
4139  PG_RETURN_NULL();
4140  PG_RETURN_TEXT_P(result);
4141 }
4142 
4143 /*
4144  * Return first n characters in the string. When n is negative,
4145  * return all but last |n| characters.
4146  */
4147 Datum
4149 {
4150  text *str = PG_GETARG_TEXT_PP(0);
4151  const char *p = VARDATA_ANY(str);
4152  int len = VARSIZE_ANY_EXHDR(str);
4153  int n = PG_GETARG_INT32(1);
4154  int rlen;
4155 
4156  if (n < 0)
4157  n = pg_mbstrlen_with_len(p, len) + n;
4158  rlen = pg_mbcharcliplen(p, len, n);
4159 
4161 }
4162 
4163 /*
4164  * Return last n characters in the string. When n is negative,
4165  * return all but first |n| characters.
4166  */
4167 Datum
4169 {
4170  text *str = PG_GETARG_TEXT_PP(0);
4171  const char *p = VARDATA_ANY(str);
4172  int len = VARSIZE_ANY_EXHDR(str);
4173  int n = PG_GETARG_INT32(1);
4174  int off;
4175 
4176  if (n < 0)
4177  n = -n;
4178  else
4179  n = pg_mbstrlen_with_len(p, len) - n;
4180  off = pg_mbcharcliplen(p, len, n);
4181 
4182  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4183 }
4184 
4185 /*
4186  * Return reversed string
4187  */
4188 Datum
4190 {
4191  text *str = PG_GETARG_TEXT_PP(0);
4192  const char *p = VARDATA_ANY(str);
4193  int len = VARSIZE_ANY_EXHDR(str);
4194  const char *endp = p + len;
4195  text *result;
4196  char *dst;
4197 
4198  result = palloc(len + VARHDRSZ);
4199  dst = (char *) VARDATA(result) + len;
4200  SET_VARSIZE(result, len + VARHDRSZ);
4201 
4203  {
4204  /* multibyte version */
4205  while (p < endp)
4206  {
4207  int sz;
4208 
4209  sz = pg_mblen(p);
4210  dst -= sz;
4211  memcpy(dst, p, sz);
4212  p += sz;
4213  }
4214  }
4215  else
4216  {
4217  /* single byte version */
4218  while (p < endp)
4219  *(--dst) = *p++;
4220  }
4221 
4222  PG_RETURN_TEXT_P(result);
4223 }
4224 
4225 
4226 /*
4227  * Support macros for text_format()
4228  */
4229 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4230 
4231 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4232  do { \
4233  if (++(ptr) >= (end_ptr)) \
4234  ereport(ERROR, \
4235  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4236  errmsg("unterminated format specifier"))); \
4237  } while (0)
4238 
4239 /*
4240  * Returns a formatted string
4241  */
4242 Datum
4244 {
4245  text *fmt;
4246  StringInfoData str;
4247  const char *cp;
4248  const char *start_ptr;
4249  const char *end_ptr;
4250  text *result;
4251  int arg;
4252  bool funcvariadic;
4253  int nargs;
4254  Datum *elements = NULL;
4255  bool *nulls = NULL;
4256  Oid element_type = InvalidOid;
4257  Oid prev_type = InvalidOid;
4258  Oid prev_width_type = InvalidOid;
4259  FmgrInfo typoutputfinfo;
4260  FmgrInfo typoutputinfo_width;
4261 
4262  /* When format string is null, immediately return null */
4263  if (PG_ARGISNULL(0))
4264  PG_RETURN_NULL();
4265 
4266  /* If argument is marked VARIADIC, expand array into elements */
4267  if (get_fn_expr_variadic(fcinfo->flinfo))
4268  {
4269  ArrayType *arr;
4270  int16 elmlen;
4271  bool elmbyval;
4272  char elmalign;
4273  int nitems;
4274 
4275  /* Should have just the one argument */
4276  Assert(PG_NARGS() == 2);
4277 
4278  /* If argument is NULL, we treat it as zero-length array */
4279  if (PG_ARGISNULL(1))
4280  nitems = 0;
4281  else
4282  {
4283  /*
4284  * Non-null argument had better be an array. We assume that any
4285  * call context that could let get_fn_expr_variadic return true
4286  * will have checked that a VARIADIC-labeled parameter actually is
4287  * an array. So it should be okay to just Assert that it's an
4288  * array rather than doing a full-fledged error check.
4289  */
4291 
4292  /* OK, safe to fetch the array value */
4293  arr = PG_GETARG_ARRAYTYPE_P(1);
4294 
4295  /* Get info about array element type */
4296  element_type = ARR_ELEMTYPE(arr);
4297  get_typlenbyvalalign(element_type,
4298  &elmlen, &elmbyval, &elmalign);
4299 
4300  /* Extract all array elements */
4301  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4302  &elements, &nulls, &nitems);
4303  }
4304 
4305  nargs = nitems + 1;
4306  funcvariadic = true;
4307  }
4308  else
4309  {
4310  /* Non-variadic case, we'll process the arguments individually */
4311  nargs = PG_NARGS();
4312  funcvariadic = false;
4313  }
4314 
4315  /* Setup for main loop. */
4316  fmt = PG_GETARG_TEXT_PP(0);
4317  start_ptr = VARDATA_ANY(fmt);
4318  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4319  initStringInfo(&str);
4320  arg = 1; /* next argument position to print */
4321 
4322  /* Scan format string, looking for conversion specifiers. */
4323  for (cp = start_ptr; cp < end_ptr; cp++)
4324  {
4325  int argpos;
4326  int widthpos;
4327  int flags;
4328  int width;
4329  Datum value;
4330  bool isNull;
4331  Oid typid;
4332 
4333  /*
4334  * If it's not the start of a conversion specifier, just copy it to
4335  * the output buffer.
4336  */
4337  if (*cp != '%')
4338  {
4339  appendStringInfoCharMacro(&str, *cp);
4340  continue;
4341  }
4342 
4343  ADVANCE_PARSE_POINTER(cp, end_ptr);
4344 
4345  /* Easy case: %% outputs a single % */
4346  if (*cp == '%')
4347  {
4348  appendStringInfoCharMacro(&str, *cp);
4349  continue;
4350  }
4351 
4352  /* Parse the optional portions of the format specifier */
4353  cp = text_format_parse_format(cp, end_ptr,
4354  &argpos, &widthpos,
4355  &flags, &width);
4356 
4357  /*
4358  * Next we should see the main conversion specifier. Whether or not
4359  * an argument position was present, it's known that at least one
4360  * character remains in the string at this point. Experience suggests
4361  * that it's worth checking that that character is one of the expected
4362  * ones before we try to fetch arguments, so as to produce the least
4363  * confusing response to a mis-formatted specifier.
4364  */
4365  if (strchr("sIL", *cp) == NULL)
4366  ereport(ERROR,
4367  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4368  errmsg("unrecognized conversion type specifier \"%c\"",
4369  *cp)));
4370 
4371  /* If indirect width was specified, get its value */
4372  if (widthpos >= 0)
4373  {
4374  /* Collect the specified or next argument position */
4375  if (widthpos > 0)
4376  arg = widthpos;
4377  if (arg >= nargs)
4378  ereport(ERROR,
4379  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4380  errmsg("too few arguments for format")));
4381 
4382  /* Get the value and type of the selected argument */
4383  if (!funcvariadic)
4384  {
4385  value = PG_GETARG_DATUM(arg);
4386  isNull = PG_ARGISNULL(arg);
4387  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4388  }
4389  else
4390  {
4391  value = elements[arg - 1];
4392  isNull = nulls[arg - 1];
4393  typid = element_type;
4394  }
4395  if (!OidIsValid(typid))
4396  elog(ERROR, "could not determine data type of format() input");
4397 
4398  arg++;
4399 
4400  /* We can treat NULL width the same as zero */
4401  if (isNull)
4402  width = 0;
4403  else if (typid == INT4OID)
4404  width = DatumGetInt32(value);
4405  else if (typid == INT2OID)
4406  width = DatumGetInt16(value);
4407  else
4408  {
4409  /* For less-usual datatypes, convert to text then to int */
4410  char *str;
4411 
4412  if (typid != prev_width_type)
4413  {
4414  Oid typoutputfunc;
4415  bool typIsVarlena;
4416 
4417  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4418  fmgr_info(typoutputfunc, &typoutputinfo_width);
4419  prev_width_type = typid;
4420  }
4421 
4422  str = OutputFunctionCall(&typoutputinfo_width, value);
4423 
4424  /* pg_atoi will complain about bad data or overflow */
4425  width = pg_atoi(str, sizeof(int), '\0');
4426 
4427  pfree(str);
4428  }
4429  }
4430 
4431  /* Collect the specified or next argument position */
4432  if (argpos > 0)
4433  arg = argpos;
4434  if (arg >= nargs)
4435  ereport(ERROR,
4436  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4437  errmsg("too few arguments for format")));
4438 
4439  /* Get the value and type of the selected argument */
4440  if (!funcvariadic)
4441  {
4442  value = PG_GETARG_DATUM(arg);
4443  isNull = PG_ARGISNULL(arg);
4444  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4445  }
4446  else
4447  {
4448  value = elements[arg - 1];
4449  isNull = nulls[arg - 1];
4450  typid = element_type;
4451  }
4452  if (!OidIsValid(typid))
4453  elog(ERROR, "could not determine data type of format() input");
4454 
4455  arg++;
4456 
4457  /*
4458  * Get the appropriate typOutput function, reusing previous one if
4459  * same type as previous argument. That's particularly useful in the
4460  * variadic-array case, but often saves work even for ordinary calls.
4461  */
4462  if (typid != prev_type)
4463  {
4464  Oid typoutputfunc;
4465  bool typIsVarlena;
4466 
4467  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4468  fmgr_info(typoutputfunc, &typoutputfinfo);
4469  prev_type = typid;
4470  }
4471 
4472  /*
4473  * And now we can format the value.
4474  */
4475  switch (*cp)
4476  {
4477  case 's':
4478  case 'I':
4479  case 'L':
4480  text_format_string_conversion(&str, *cp, &typoutputfinfo,
4481  value, isNull,
4482  flags, width);
4483  break;
4484  default:
4485  /* should not get here, because of previous check */
4486  ereport(ERROR,
4487  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4488  errmsg("unrecognized conversion type specifier \"%c\"",
4489  *cp)));
4490  break;
4491  }
4492  }
4493 
4494  /* Don't need deconstruct_array results anymore. */
4495  if (elements != NULL)
4496  pfree(elements);
4497  if (nulls != NULL)
4498  pfree(nulls);
4499 
4500  /* Generate results. */
4501  result = cstring_to_text_with_len(str.data, str.len);
4502  pfree(str.data);
4503 
4504  PG_RETURN_TEXT_P(result);
4505 }
4506 
4507 /*
4508  * Parse contiguous digits as a decimal number.
4509  *
4510  * Returns true if some digits could be parsed.
4511  * The value is returned into *value, and *ptr is advanced to the next
4512  * character to be parsed.
4513  *
4514  * Note parsing invariant: at least one character is known available before
4515  * string end (end_ptr) at entry, and this is still true at exit.
4516  */
4517 static bool
4518 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
4519 {
4520  bool found = false;
4521  const char *cp = *ptr;
4522  int val = 0;
4523 
4524  while (*cp >= '0' && *cp <= '9')
4525  {
4526  int newval = val * 10 + (*cp - '0');
4527 
4528  if (newval / 10 != val) /* overflow? */
4529  ereport(ERROR,
4530  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4531  errmsg("number is out of range")));
4532  val = newval;
4533  ADVANCE_PARSE_POINTER(cp, end_ptr);
4534  found = true;
4535  }
4536 
4537  *ptr = cp;
4538  *value = val;
4539 
4540  return found;
4541 }
4542 
4543 /*
4544  * Parse a format specifier (generally following the SUS printf spec).
4545  *
4546  * We have already advanced over the initial '%', and we are looking for
4547  * [argpos][flags][width]type (but the type character is not consumed here).
4548  *
4549  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
4550  * Output parameters:
4551  * argpos: argument position for value to be printed. -1 means unspecified.
4552  * widthpos: argument position for width. Zero means the argument position
4553  * was unspecified (ie, take the next arg) and -1 means no width
4554  * argument (width was omitted or specified as a constant).
4555  * flags: bitmask of flags.
4556  * width: directly-specified width value. Zero means the width was omitted
4557  * (note it's not necessary to distinguish this case from an explicit
4558  * zero width value).
4559  *
4560  * The function result is the next character position to be parsed, ie, the
4561  * location where the type character is/should be.
4562  *
4563  * Note parsing invariant: at least one character is known available before
4564  * string end (end_ptr) at entry, and this is still true at exit.
4565  */
4566 static const char *
4567 text_format_parse_format(const char *start_ptr, const char *end_ptr,
4568  int *argpos, int *widthpos,
4569  int *flags, int *width)
4570 {
4571  const char *cp = start_ptr;
4572  int n;
4573 
4574  /* set defaults for output parameters */
4575  *argpos = -1;
4576  *widthpos = -1;
4577  *flags = 0;
4578  *width = 0;
4579 
4580  /* try to identify first number */
4581  if (text_format_parse_digits(&cp, end_ptr, &n))
4582  {
4583  if (*cp != '$')
4584  {
4585  /* Must be just a width and a type, so we're done */
4586  *width = n;
4587  return cp;
4588  }
4589  /* The number was argument position */
4590  *argpos = n;
4591  /* Explicit 0 for argument index is immediately refused */
4592  if (n == 0)
4593  ereport(ERROR,
4594  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4595  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4596  ADVANCE_PARSE_POINTER(cp, end_ptr);
4597  }
4598 
4599  /* Handle flags (only minus is supported now) */
4600  while (*cp == '-')
4601  {
4602  *flags |= TEXT_FORMAT_FLAG_MINUS;
4603  ADVANCE_PARSE_POINTER(cp, end_ptr);
4604  }
4605 
4606  if (*cp == '*')
4607  {
4608  /* Handle indirect width */
4609  ADVANCE_PARSE_POINTER(cp, end_ptr);
4610  if (text_format_parse_digits(&cp, end_ptr, &n))
4611  {
4612  /* number in this position must be closed by $ */
4613  if (*cp != '$')
4614  ereport(ERROR,
4615  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4616  errmsg("width argument position must be ended by \"$\"")));
4617  /* The number was width argument position */
4618  *widthpos = n;
4619  /* Explicit 0 for argument index is immediately refused */
4620  if (n == 0)
4621  ereport(ERROR,
4622  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4623  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4624  ADVANCE_PARSE_POINTER(cp, end_ptr);
4625  }
4626  else
4627  *widthpos = 0; /* width's argument position is unspecified */
4628  }
4629  else
4630  {
4631  /* Check for direct width specification */
4632  if (text_format_parse_digits(&cp, end_ptr, &n))
4633  *width = n;
4634  }
4635 
4636  /* cp should now be pointing at type character */
4637  return cp;
4638 }
4639 
4640 /*
4641  * Format a %s, %I, or %L conversion
4642  */
4643 static void
4645  FmgrInfo *typOutputInfo,
4646  Datum value, bool isNull,
4647  int flags, int width)
4648 {
4649  char *str;
4650 
4651  /* Handle NULL arguments before trying to stringify the value. */
4652  if (isNull)
4653  {
4654  if (conversion == 's')
4655  text_format_append_string(buf, "", flags, width);
4656  else if (conversion == 'L')
4657  text_format_append_string(buf, "NULL", flags, width);
4658  else if (conversion == 'I')
4659  ereport(ERROR,
4660  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4661  errmsg("null values cannot be formatted as an SQL identifier")));
4662  return;
4663  }
4664 
4665  /* Stringify. */
4666  str = OutputFunctionCall(typOutputInfo, value);
4667 
4668  /* Escape. */
4669  if (conversion == 'I')
4670  {
4671  /* quote_identifier may or may not allocate a new string. */
4672  text_format_append_string(buf, quote_identifier(str), flags, width);
4673  }
4674  else if (conversion == 'L')
4675  {
4676  char *qstr = quote_literal_cstr(str);
4677 
4678  text_format_append_string(buf, qstr, flags, width);
4679  /* quote_literal_cstr() always allocates a new string */
4680  pfree(qstr);
4681  }
4682  else
4683  text_format_append_string(buf, str, flags, width);
4684 
4685  /* Cleanup. */
4686  pfree(str);
4687 }
4688 
4689 /*
4690  * Append str to buf, padding as directed by flags/width
4691  */
4692 static void
4694  int flags, int width)
4695 {
4696  bool align_to_left = false;
4697  int len;
4698 
4699  /* fast path for typical easy case */
4700  if (width == 0)
4701  {
4702  appendStringInfoString(buf, str);
4703  return;
4704  }
4705 
4706  if (width < 0)
4707  {
4708  /* Negative width: implicit '-' flag, then take absolute value */
4709  align_to_left = true;
4710  /* -INT_MIN is undefined */
4711  if (width <= INT_MIN)
4712  ereport(ERROR,
4713  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4714  errmsg("number is out of range")));
4715  width = -width;
4716  }
4717  else if (flags & TEXT_FORMAT_FLAG_MINUS)
4718  align_to_left = true;
4719 
4720  len = pg_mbstrlen(str);
4721  if (align_to_left)
4722  {
4723  /* left justify */
4724  appendStringInfoString(buf, str);
4725  if (len < width)
4726  appendStringInfoSpaces(buf, width - len);
4727  }
4728  else
4729  {
4730  /* right justify */
4731  if (len < width)
4732  appendStringInfoSpaces(buf, width - len);
4733  appendStringInfoString(buf, str);
4734  }
4735 }
4736 
4737 /*
4738  * text_format_nv - nonvariadic wrapper for text_format function.
4739  *
4740  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
4741  * which checks that all built-in functions that share the implementing C
4742  * function take the same number of arguments.
4743  */
4744 Datum
4746 {
4747  return text_format(fcinfo);
4748 }
4749 
4750 /*
4751  * Helper function for Levenshtein distance functions. Faster than memcmp(),
4752  * for this use case.
4753  */
4754 static inline bool
4755 rest_of_char_same(const char *s1, const char *s2, int len)
4756 {
4757  while (len > 0)
4758  {
4759  len--;
4760  if (s1[len] != s2[len])
4761  return false;
4762  }
4763  return true;
4764 }
4765 
4766 /* Expand each Levenshtein distance variant */
4767 #include "levenshtein.c"
4768 #define LEVENSHTEIN_LESS_EQUAL
4769 #include "levenshtein.c"