PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/tuptoaster.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "libpq/md5.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/builtins.h"
29 #include "utils/bytea.h"
30 #include "utils/lsyscache.h"
31 #include "utils/memutils.h"
32 #include "utils/pg_locale.h"
33 #include "utils/sortsupport.h"
34 
35 
36 /* GUC variable */
38 
39 typedef struct varlena unknown;
40 
41 typedef struct
42 {
43  bool use_wchar; /* T if multibyte encoding */
44  char *str1; /* use these if not use_wchar */
45  char *str2; /* note: these point to original texts */
46  pg_wchar *wstr1; /* use these if use_wchar */
47  pg_wchar *wstr2; /* note: these are palloc'd */
48  int len1; /* string lengths in logical characters */
49  int len2;
50  /* Skip table for Boyer-Moore-Horspool search algorithm: */
51  int skiptablemask; /* mask for ANDing with skiptable subscripts */
52  int skiptable[256]; /* skip distance for given mismatched char */
54 
55 typedef struct
56 {
57  char *buf1; /* 1st string */
58  char *buf2; /* 2nd string */
59  int buflen1;
60  int buflen2;
61 #ifdef HAVE_LOCALE_T
63 #endif
65 
66 /*
67  * This should be large enough that most strings will fit, but small enough
68  * that we feel comfortable putting it on the stack
69  */
70 #define TEXTBUFLEN 1024
71 
72 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
73 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
74 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
75 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
76 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
77 
78 static void btsortsupport_worker(SortSupport ssup, Oid collid);
79 static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
80 static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
81 static int32 text_length(Datum str);
82 static text *text_catenate(text *t1, text *t2);
83 static text *text_substring(Datum str,
84  int32 start,
85  int32 length,
86  bool length_not_specified);
87 static text *text_overlay(text *t1, text *t2, int sp, int sl);
88 static int text_position(text *t1, text *t2);
89 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
90 static int text_position_next(int start_pos, TextPositionState *state);
92 static int text_cmp(text *arg1, text *arg2, Oid collid);
93 static bytea *bytea_catenate(bytea *t1, bytea *t2);
94 static bytea *bytea_substring(Datum str,
95  int S,
96  int L,
97  bool length_not_specified);
98 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
99 static void appendStringInfoText(StringInfo str, const text *t);
102  const char *fldsep, const char *null_string);
104 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
105  int *value);
106 static const char *text_format_parse_format(const char *start_ptr,
107  const char *end_ptr,
108  int *argpos, int *widthpos,
109  int *flags, int *width);
110 static void text_format_string_conversion(StringInfo buf, char conversion,
111  FmgrInfo *typOutputInfo,
112  Datum value, bool isNull,
113  int flags, int width);
114 static void text_format_append_string(StringInfo buf, const char *str,
115  int flags, int width);
116 
117 
118 /*****************************************************************************
119  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
120  *****************************************************************************/
121 
122 /*
123  * cstring_to_text
124  *
125  * Create a text value from a null-terminated C string.
126  *
127  * The new text value is freshly palloc'd with a full-size VARHDR.
128  */
129 text *
130 cstring_to_text(const char *s)
131 {
132  return cstring_to_text_with_len(s, strlen(s));
133 }
134 
135 /*
136  * cstring_to_text_with_len
137  *
138  * Same as cstring_to_text except the caller specifies the string length;
139  * the string need not be null_terminated.
140  */
141 text *
142 cstring_to_text_with_len(const char *s, int len)
143 {
144  text *result = (text *) palloc(len + VARHDRSZ);
145 
146  SET_VARSIZE(result, len + VARHDRSZ);
147  memcpy(VARDATA(result), s, len);
148 
149  return result;
150 }
151 
152 /*
153  * text_to_cstring
154  *
155  * Create a palloc'd, null-terminated C string from a text value.
156  *
157  * We support being passed a compressed or toasted text value.
158  * This is a bit bogus since such values shouldn't really be referred to as
159  * "text *", but it seems useful for robustness. If we didn't handle that
160  * case here, we'd need another routine that did, anyway.
161  */
162 char *
164 {
165  /* must cast away the const, unfortunately */
166  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
167  int len = VARSIZE_ANY_EXHDR(tunpacked);
168  char *result;
169 
170  result = (char *) palloc(len + 1);
171  memcpy(result, VARDATA_ANY(tunpacked), len);
172  result[len] = '\0';
173 
174  if (tunpacked != t)
175  pfree(tunpacked);
176 
177  return result;
178 }
179 
180 /*
181  * text_to_cstring_buffer
182  *
183  * Copy a text value into a caller-supplied buffer of size dst_len.
184  *
185  * The text string is truncated if necessary to fit. The result is
186  * guaranteed null-terminated (unless dst_len == 0).
187  *
188  * We support being passed a compressed or toasted text value.
189  * This is a bit bogus since such values shouldn't really be referred to as
190  * "text *", but it seems useful for robustness. If we didn't handle that
191  * case here, we'd need another routine that did, anyway.
192  */
193 void
194 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
195 {
196  /* must cast away the const, unfortunately */
197  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
198  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
199 
200  if (dst_len > 0)
201  {
202  dst_len--;
203  if (dst_len >= src_len)
204  dst_len = src_len;
205  else /* ensure truncation is encoding-safe */
206  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
207  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
208  dst[dst_len] = '\0';
209  }
210 
211  if (srcunpacked != src)
212  pfree(srcunpacked);
213 }
214 
215 
216 /*****************************************************************************
217  * USER I/O ROUTINES *
218  *****************************************************************************/
219 
220 
221 #define VAL(CH) ((CH) - '0')
222 #define DIG(VAL) ((VAL) + '0')
223 
224 /*
225  * byteain - converts from printable representation of byte array
226  *
227  * Non-printable characters must be passed as '\nnn' (octal) and are
228  * converted to internal form. '\' must be passed as '\\'.
229  * ereport(ERROR, ...) if bad form.
230  *
231  * BUGS:
232  * The input is scanned twice.
233  * The error checking of input is minimal.
234  */
235 Datum
237 {
238  char *inputText = PG_GETARG_CSTRING(0);
239  char *tp;
240  char *rp;
241  int bc;
242  bytea *result;
243 
244  /* Recognize hex input */
245  if (inputText[0] == '\\' && inputText[1] == 'x')
246  {
247  size_t len = strlen(inputText);
248 
249  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
250  result = palloc(bc);
251  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
252  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
253 
254  PG_RETURN_BYTEA_P(result);
255  }
256 
257  /* Else, it's the traditional escaped style */
258  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
259  {
260  if (tp[0] != '\\')
261  tp++;
262  else if ((tp[0] == '\\') &&
263  (tp[1] >= '0' && tp[1] <= '3') &&
264  (tp[2] >= '0' && tp[2] <= '7') &&
265  (tp[3] >= '0' && tp[3] <= '7'))
266  tp += 4;
267  else if ((tp[0] == '\\') &&
268  (tp[1] == '\\'))
269  tp += 2;
270  else
271  {
272  /*
273  * one backslash, not followed by another or ### valid octal
274  */
275  ereport(ERROR,
276  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
277  errmsg("invalid input syntax for type bytea")));
278  }
279  }
280 
281  bc += VARHDRSZ;
282 
283  result = (bytea *) palloc(bc);
284  SET_VARSIZE(result, bc);
285 
286  tp = inputText;
287  rp = VARDATA(result);
288  while (*tp != '\0')
289  {
290  if (tp[0] != '\\')
291  *rp++ = *tp++;
292  else if ((tp[0] == '\\') &&
293  (tp[1] >= '0' && tp[1] <= '3') &&
294  (tp[2] >= '0' && tp[2] <= '7') &&
295  (tp[3] >= '0' && tp[3] <= '7'))
296  {
297  bc = VAL(tp[1]);
298  bc <<= 3;
299  bc += VAL(tp[2]);
300  bc <<= 3;
301  *rp++ = bc + VAL(tp[3]);
302 
303  tp += 4;
304  }
305  else if ((tp[0] == '\\') &&
306  (tp[1] == '\\'))
307  {
308  *rp++ = '\\';
309  tp += 2;
310  }
311  else
312  {
313  /*
314  * We should never get here. The first pass should not allow it.
315  */
316  ereport(ERROR,
317  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318  errmsg("invalid input syntax for type bytea")));
319  }
320  }
321 
322  PG_RETURN_BYTEA_P(result);
323 }
324 
325 /*
326  * byteaout - converts to printable representation of byte array
327  *
328  * In the traditional escaped format, non-printable characters are
329  * printed as '\nnn' (octal) and '\' as '\\'.
330  */
331 Datum
333 {
334  bytea *vlena = PG_GETARG_BYTEA_PP(0);
335  char *result;
336  char *rp;
337 
339  {
340  /* Print hex format */
341  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
342  *rp++ = '\\';
343  *rp++ = 'x';
344  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
345  }
346  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
347  {
348  /* Print traditional escaped format */
349  char *vp;
350  int len;
351  int i;
352 
353  len = 1; /* empty string has 1 char */
354  vp = VARDATA_ANY(vlena);
355  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
356  {
357  if (*vp == '\\')
358  len += 2;
359  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
360  len += 4;
361  else
362  len++;
363  }
364  rp = result = (char *) palloc(len);
365  vp = VARDATA_ANY(vlena);
366  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
367  {
368  if (*vp == '\\')
369  {
370  *rp++ = '\\';
371  *rp++ = '\\';
372  }
373  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
374  {
375  int val; /* holds unprintable chars */
376 
377  val = *vp;
378  rp[0] = '\\';
379  rp[3] = DIG(val & 07);
380  val >>= 3;
381  rp[2] = DIG(val & 07);
382  val >>= 3;
383  rp[1] = DIG(val & 03);
384  rp += 4;
385  }
386  else
387  *rp++ = *vp;
388  }
389  }
390  else
391  {
392  elog(ERROR, "unrecognized bytea_output setting: %d",
393  bytea_output);
394  rp = result = NULL; /* keep compiler quiet */
395  }
396  *rp = '\0';
397  PG_RETURN_CSTRING(result);
398 }
399 
400 /*
401  * bytearecv - converts external binary format to bytea
402  */
403 Datum
405 {
407  bytea *result;
408  int nbytes;
409 
410  nbytes = buf->len - buf->cursor;
411  result = (bytea *) palloc(nbytes + VARHDRSZ);
412  SET_VARSIZE(result, nbytes + VARHDRSZ);
413  pq_copymsgbytes(buf, VARDATA(result), nbytes);
414  PG_RETURN_BYTEA_P(result);
415 }
416 
417 /*
418  * byteasend - converts bytea to binary format
419  *
420  * This is a special case: just copy the input...
421  */
422 Datum
424 {
425  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
426 
427  PG_RETURN_BYTEA_P(vlena);
428 }
429 
430 Datum
432 {
434 
435  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
436 
437  /* Append the value unless null. */
438  if (!PG_ARGISNULL(1))
439  {
441 
442  /* On the first time through, we ignore the delimiter. */
443  if (state == NULL)
444  state = makeStringAggState(fcinfo);
445  else if (!PG_ARGISNULL(2))
446  {
447  bytea *delim = PG_GETARG_BYTEA_PP(2);
448 
450  }
451 
453  }
454 
455  /*
456  * The transition type for string_agg() is declared to be "internal",
457  * which is a pass-by-value type the same size as a pointer.
458  */
459  PG_RETURN_POINTER(state);
460 }
461 
462 Datum
464 {
466 
467  /* cannot be called directly because of internal-type argument */
468  Assert(AggCheckCallContext(fcinfo, NULL));
469 
470  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
471 
472  if (state != NULL)
473  {
474  bytea *result;
475 
476  result = (bytea *) palloc(state->len + VARHDRSZ);
477  SET_VARSIZE(result, state->len + VARHDRSZ);
478  memcpy(VARDATA(result), state->data, state->len);
479  PG_RETURN_BYTEA_P(result);
480  }
481  else
482  PG_RETURN_NULL();
483 }
484 
485 /*
486  * textin - converts "..." to internal representation
487  */
488 Datum
490 {
491  char *inputText = PG_GETARG_CSTRING(0);
492 
493  PG_RETURN_TEXT_P(cstring_to_text(inputText));
494 }
495 
496 /*
497  * textout - converts internal representation to "..."
498  */
499 Datum
501 {
502  Datum txt = PG_GETARG_DATUM(0);
503 
505 }
506 
507 /*
508  * textrecv - converts external binary format to text
509  */
510 Datum
512 {
514  text *result;
515  char *str;
516  int nbytes;
517 
518  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
519 
520  result = cstring_to_text_with_len(str, nbytes);
521  pfree(str);
522  PG_RETURN_TEXT_P(result);
523 }
524 
525 /*
526  * textsend - converts text to binary format
527  */
528 Datum
530 {
531  text *t = PG_GETARG_TEXT_PP(0);
533 
534  pq_begintypsend(&buf);
537 }
538 
539 
540 /*
541  * unknownin - converts "..." to internal representation
542  */
543 Datum
545 {
546  char *str = PG_GETARG_CSTRING(0);
547 
548  /* representation is same as cstring */
550 }
551 
552 /*
553  * unknownout - converts internal representation to "..."
554  */
555 Datum
557 {
558  /* representation is same as cstring */
559  char *str = PG_GETARG_CSTRING(0);
560 
562 }
563 
564 /*
565  * unknownrecv - converts external binary format to unknown
566  */
567 Datum
569 {
571  char *str;
572  int nbytes;
573 
574  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
575  /* representation is same as cstring */
576  PG_RETURN_CSTRING(str);
577 }
578 
579 /*
580  * unknownsend - converts unknown to binary format
581  */
582 Datum
584 {
585  /* representation is same as cstring */
586  char *str = PG_GETARG_CSTRING(0);
588 
589  pq_begintypsend(&buf);
590  pq_sendtext(&buf, str, strlen(str));
592 }
593 
594 
595 /* ========== PUBLIC ROUTINES ========== */
596 
597 /*
598  * textlen -
599  * returns the logical length of a text*
600  * (which is less than the VARSIZE of the text*)
601  */
602 Datum
604 {
605  Datum str = PG_GETARG_DATUM(0);
606 
607  /* try to avoid decompressing argument */
609 }
610 
611 /*
612  * text_length -
613  * Does the real work for textlen()
614  *
615  * This is broken out so it can be called directly by other string processing
616  * functions. Note that the argument is passed as a Datum, to indicate that
617  * it may still be in compressed form. We can avoid decompressing it at all
618  * in some cases.
619  */
620 static int32
622 {
623  /* fastpath when max encoding length is one */
626  else
627  {
628  text *t = DatumGetTextPP(str);
629 
631  VARSIZE_ANY_EXHDR(t)));
632  }
633 }
634 
635 /*
636  * textoctetlen -
637  * returns the physical length of a text*
638  * (which is less than the VARSIZE of the text*)
639  */
640 Datum
642 {
643  Datum str = PG_GETARG_DATUM(0);
644 
645  /* We need not detoast the input at all */
647 }
648 
649 /*
650  * textcat -
651  * takes two text* and returns a text* that is the concatenation of
652  * the two.
653  *
654  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
655  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
656  * Allocate space for output in all cases.
657  * XXX - thomas 1997-07-10
658  */
659 Datum
661 {
662  text *t1 = PG_GETARG_TEXT_PP(0);
663  text *t2 = PG_GETARG_TEXT_PP(1);
664 
666 }
667 
668 /*
669  * text_catenate
670  * Guts of textcat(), broken out so it can be used by other functions
671  *
672  * Arguments can be in short-header form, but not compressed or out-of-line
673  */
674 static text *
676 {
677  text *result;
678  int len1,
679  len2,
680  len;
681  char *ptr;
682 
683  len1 = VARSIZE_ANY_EXHDR(t1);
684  len2 = VARSIZE_ANY_EXHDR(t2);
685 
686  /* paranoia ... probably should throw error instead? */
687  if (len1 < 0)
688  len1 = 0;
689  if (len2 < 0)
690  len2 = 0;
691 
692  len = len1 + len2 + VARHDRSZ;
693  result = (text *) palloc(len);
694 
695  /* Set size of result string... */
696  SET_VARSIZE(result, len);
697 
698  /* Fill data field of result string... */
699  ptr = VARDATA(result);
700  if (len1 > 0)
701  memcpy(ptr, VARDATA_ANY(t1), len1);
702  if (len2 > 0)
703  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
704 
705  return result;
706 }
707 
708 /*
709  * charlen_to_bytelen()
710  * Compute the number of bytes occupied by n characters starting at *p
711  *
712  * It is caller's responsibility that there actually are n characters;
713  * the string need not be null-terminated.
714  */
715 static int
716 charlen_to_bytelen(const char *p, int n)
717 {
719  {
720  /* Optimization for single-byte encodings */
721  return n;
722  }
723  else
724  {
725  const char *s;
726 
727  for (s = p; n > 0; n--)
728  s += pg_mblen(s);
729 
730  return s - p;
731  }
732 }
733 
734 /*
735  * text_substr()
736  * Return a substring starting at the specified position.
737  * - thomas 1997-12-31
738  *
739  * Input:
740  * - string
741  * - starting position (is one-based)
742  * - string length
743  *
744  * If the starting position is zero or less, then return from the start of the string
745  * adjusting the length to be consistent with the "negative start" per SQL.
746  * If the length is less than zero, return the remaining string.
747  *
748  * Added multibyte support.
749  * - Tatsuo Ishii 1998-4-21
750  * Changed behavior if starting position is less than one to conform to SQL behavior.
751  * Formerly returned the entire string; now returns a portion.
752  * - Thomas Lockhart 1998-12-10
753  * Now uses faster TOAST-slicing interface
754  * - John Gray 2002-02-22
755  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
756  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
757  * error; if E < 1, return '', not entire string). Fixed MB related bug when
758  * S > LC and < LC + 4 sometimes garbage characters are returned.
759  * - Joe Conway 2002-08-10
760  */
761 Datum
763 {
765  PG_GETARG_INT32(1),
766  PG_GETARG_INT32(2),
767  false));
768 }
769 
770 /*
771  * text_substr_no_len -
772  * Wrapper to avoid opr_sanity failure due to
773  * one function accepting a different number of args.
774  */
775 Datum
777 {
779  PG_GETARG_INT32(1),
780  -1, true));
781 }
782 
783 /*
784  * text_substring -
785  * Does the real work for text_substr() and text_substr_no_len()
786  *
787  * This is broken out so it can be called directly by other string processing
788  * functions. Note that the argument is passed as a Datum, to indicate that
789  * it may still be in compressed/toasted form. We can avoid detoasting all
790  * of it in some cases.
791  *
792  * The result is always a freshly palloc'd datum.
793  */
794 static text *
795 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
796 {
798  int32 S = start; /* start position */
799  int32 S1; /* adjusted start position */
800  int32 L1; /* adjusted substring length */
801 
802  /* life is easy if the encoding max length is 1 */
803  if (eml == 1)
804  {
805  S1 = Max(S, 1);
806 
807  if (length_not_specified) /* special case - get length to end of
808  * string */
809  L1 = -1;
810  else
811  {
812  /* end position */
813  int E = S + length;
814 
815  /*
816  * A negative value for L is the only way for the end position to
817  * be before the start. SQL99 says to throw an error.
818  */
819  if (E < S)
820  ereport(ERROR,
821  (errcode(ERRCODE_SUBSTRING_ERROR),
822  errmsg("negative substring length not allowed")));
823 
824  /*
825  * A zero or negative value for the end position can happen if the
826  * start was negative or one. SQL99 says to return a zero-length
827  * string.
828  */
829  if (E < 1)
830  return cstring_to_text("");
831 
832  L1 = E - S1;
833  }
834 
835  /*
836  * If the start position is past the end of the string, SQL99 says to
837  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
838  * that for us. Convert to zero-based starting position
839  */
840  return DatumGetTextPSlice(str, S1 - 1, L1);
841  }
842  else if (eml > 1)
843  {
844  /*
845  * When encoding max length is > 1, we can't get LC without
846  * detoasting, so we'll grab a conservatively large slice now and go
847  * back later to do the right thing
848  */
849  int32 slice_start;
850  int32 slice_size;
851  int32 slice_strlen;
852  text *slice;
853  int32 E1;
854  int32 i;
855  char *p;
856  char *s;
857  text *ret;
858 
859  /*
860  * if S is past the end of the string, the tuple toaster will return a
861  * zero-length string to us
862  */
863  S1 = Max(S, 1);
864 
865  /*
866  * We need to start at position zero because there is no way to know
867  * in advance which byte offset corresponds to the supplied start
868  * position.
869  */
870  slice_start = 0;
871 
872  if (length_not_specified) /* special case - get length to end of
873  * string */
874  slice_size = L1 = -1;
875  else
876  {
877  int E = S + length;
878 
879  /*
880  * A negative value for L is the only way for the end position to
881  * be before the start. SQL99 says to throw an error.
882  */
883  if (E < S)
884  ereport(ERROR,
885  (errcode(ERRCODE_SUBSTRING_ERROR),
886  errmsg("negative substring length not allowed")));
887 
888  /*
889  * A zero or negative value for the end position can happen if the
890  * start was negative or one. SQL99 says to return a zero-length
891  * string.
892  */
893  if (E < 1)
894  return cstring_to_text("");
895 
896  /*
897  * if E is past the end of the string, the tuple toaster will
898  * truncate the length for us
899  */
900  L1 = E - S1;
901 
902  /*
903  * Total slice size in bytes can't be any longer than the start
904  * position plus substring length times the encoding max length.
905  */
906  slice_size = (S1 + L1) * eml;
907  }
908 
909  /*
910  * If we're working with an untoasted source, no need to do an extra
911  * copying step.
912  */
915  slice = DatumGetTextPSlice(str, slice_start, slice_size);
916  else
917  slice = (text *) DatumGetPointer(str);
918 
919  /* see if we got back an empty string */
920  if (VARSIZE_ANY_EXHDR(slice) == 0)
921  {
922  if (slice != (text *) DatumGetPointer(str))
923  pfree(slice);
924  return cstring_to_text("");
925  }
926 
927  /* Now we can get the actual length of the slice in MB characters */
928  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
929  VARSIZE_ANY_EXHDR(slice));
930 
931  /*
932  * Check that the start position wasn't > slice_strlen. If so, SQL99
933  * says to return a zero-length string.
934  */
935  if (S1 > slice_strlen)
936  {
937  if (slice != (text *) DatumGetPointer(str))
938  pfree(slice);
939  return cstring_to_text("");
940  }
941 
942  /*
943  * Adjust L1 and E1 now that we know the slice string length. Again
944  * remember that S1 is one based, and slice_start is zero based.
945  */
946  if (L1 > -1)
947  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
948  else
949  E1 = slice_start + 1 + slice_strlen;
950 
951  /*
952  * Find the start position in the slice; remember S1 is not zero based
953  */
954  p = VARDATA_ANY(slice);
955  for (i = 0; i < S1 - 1; i++)
956  p += pg_mblen(p);
957 
958  /* hang onto a pointer to our start position */
959  s = p;
960 
961  /*
962  * Count the actual bytes used by the substring of the requested
963  * length.
964  */
965  for (i = S1; i < E1; i++)
966  p += pg_mblen(p);
967 
968  ret = (text *) palloc(VARHDRSZ + (p - s));
969  SET_VARSIZE(ret, VARHDRSZ + (p - s));
970  memcpy(VARDATA(ret), s, (p - s));
971 
972  if (slice != (text *) DatumGetPointer(str))
973  pfree(slice);
974 
975  return ret;
976  }
977  else
978  elog(ERROR, "invalid backend encoding: encoding max length < 1");
979 
980  /* not reached: suppress compiler warning */
981  return NULL;
982 }
983 
984 /*
985  * textoverlay
986  * Replace specified substring of first string with second
987  *
988  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
989  * This code is a direct implementation of what the standard says.
990  */
991 Datum
993 {
994  text *t1 = PG_GETARG_TEXT_PP(0);
995  text *t2 = PG_GETARG_TEXT_PP(1);
996  int sp = PG_GETARG_INT32(2); /* substring start position */
997  int sl = PG_GETARG_INT32(3); /* substring length */
998 
999  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1000 }
1001 
1002 Datum
1004 {
1005  text *t1 = PG_GETARG_TEXT_PP(0);
1006  text *t2 = PG_GETARG_TEXT_PP(1);
1007  int sp = PG_GETARG_INT32(2); /* substring start position */
1008  int sl;
1009 
1010  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1011  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1012 }
1013 
1014 static text *
1015 text_overlay(text *t1, text *t2, int sp, int sl)
1016 {
1017  text *result;
1018  text *s1;
1019  text *s2;
1020  int sp_pl_sl;
1021 
1022  /*
1023  * Check for possible integer-overflow cases. For negative sp, throw a
1024  * "substring length" error because that's what should be expected
1025  * according to the spec's definition of OVERLAY().
1026  */
1027  if (sp <= 0)
1028  ereport(ERROR,
1029  (errcode(ERRCODE_SUBSTRING_ERROR),
1030  errmsg("negative substring length not allowed")));
1031  sp_pl_sl = sp + sl;
1032  if (sp_pl_sl <= sl)
1033  ereport(ERROR,
1034  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1035  errmsg("integer out of range")));
1036 
1037  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1038  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1039  result = text_catenate(s1, t2);
1040  result = text_catenate(result, s2);
1041 
1042  return result;
1043 }
1044 
1045 /*
1046  * textpos -
1047  * Return the position of the specified substring.
1048  * Implements the SQL POSITION() function.
1049  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1050  * - thomas 1997-07-27
1051  */
1052 Datum
1054 {
1055  text *str = PG_GETARG_TEXT_PP(0);
1056  text *search_str = PG_GETARG_TEXT_PP(1);
1057 
1058  PG_RETURN_INT32((int32) text_position(str, search_str));
1059 }
1060 
1061 /*
1062  * text_position -
1063  * Does the real work for textpos()
1064  *
1065  * Inputs:
1066  * t1 - string to be searched
1067  * t2 - pattern to match within t1
1068  * Result:
1069  * Character index of the first matched char, starting from 1,
1070  * or 0 if no match.
1071  *
1072  * This is broken out so it can be called directly by other string processing
1073  * functions.
1074  */
1075 static int
1077 {
1079  int result;
1080 
1081  text_position_setup(t1, t2, &state);
1082  result = text_position_next(1, &state);
1083  text_position_cleanup(&state);
1084  return result;
1085 }
1086 
1087 
1088 /*
1089  * text_position_setup, text_position_next, text_position_cleanup -
1090  * Component steps of text_position()
1091  *
1092  * These are broken out so that a string can be efficiently searched for
1093  * multiple occurrences of the same pattern. text_position_next may be
1094  * called multiple times with increasing values of start_pos, which is
1095  * the 1-based character position to start the search from. The "state"
1096  * variable is normally just a local variable in the caller.
1097  */
1098 
1099 static void
1101 {
1102  int len1 = VARSIZE_ANY_EXHDR(t1);
1103  int len2 = VARSIZE_ANY_EXHDR(t2);
1104 
1106  {
1107  /* simple case - single byte encoding */
1108  state->use_wchar = false;
1109  state->str1 = VARDATA_ANY(t1);
1110  state->str2 = VARDATA_ANY(t2);
1111  state->len1 = len1;
1112  state->len2 = len2;
1113  }
1114  else
1115  {
1116  /* not as simple - multibyte encoding */
1117  pg_wchar *p1,
1118  *p2;
1119 
1120  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1121  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1122  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1123  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1124 
1125  state->use_wchar = true;
1126  state->wstr1 = p1;
1127  state->wstr2 = p2;
1128  state->len1 = len1;
1129  state->len2 = len2;
1130  }
1131 
1132  /*
1133  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1134  * notes we use the terminology that the "haystack" is the string to be
1135  * searched (t1) and the "needle" is the pattern being sought (t2).
1136  *
1137  * If the needle is empty or bigger than the haystack then there is no
1138  * point in wasting cycles initializing the table. We also choose not to
1139  * use B-M-H for needles of length 1, since the skip table can't possibly
1140  * save anything in that case.
1141  */
1142  if (len1 >= len2 && len2 > 1)
1143  {
1144  int searchlength = len1 - len2;
1145  int skiptablemask;
1146  int last;
1147  int i;
1148 
1149  /*
1150  * First we must determine how much of the skip table to use. The
1151  * declaration of TextPositionState allows up to 256 elements, but for
1152  * short search problems we don't really want to have to initialize so
1153  * many elements --- it would take too long in comparison to the
1154  * actual search time. So we choose a useful skip table size based on
1155  * the haystack length minus the needle length. The closer the needle
1156  * length is to the haystack length the less useful skipping becomes.
1157  *
1158  * Note: since we use bit-masking to select table elements, the skip
1159  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1160  */
1161  if (searchlength < 16)
1162  skiptablemask = 3;
1163  else if (searchlength < 64)
1164  skiptablemask = 7;
1165  else if (searchlength < 128)
1166  skiptablemask = 15;
1167  else if (searchlength < 512)
1168  skiptablemask = 31;
1169  else if (searchlength < 2048)
1170  skiptablemask = 63;
1171  else if (searchlength < 4096)
1172  skiptablemask = 127;
1173  else
1174  skiptablemask = 255;
1175  state->skiptablemask = skiptablemask;
1176 
1177  /*
1178  * Initialize the skip table. We set all elements to the needle
1179  * length, since this is the correct skip distance for any character
1180  * not found in the needle.
1181  */
1182  for (i = 0; i <= skiptablemask; i++)
1183  state->skiptable[i] = len2;
1184 
1185  /*
1186  * Now examine the needle. For each character except the last one,
1187  * set the corresponding table element to the appropriate skip
1188  * distance. Note that when two characters share the same skip table
1189  * entry, the one later in the needle must determine the skip
1190  * distance.
1191  */
1192  last = len2 - 1;
1193 
1194  if (!state->use_wchar)
1195  {
1196  const char *str2 = state->str2;
1197 
1198  for (i = 0; i < last; i++)
1199  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1200  }
1201  else
1202  {
1203  const pg_wchar *wstr2 = state->wstr2;
1204 
1205  for (i = 0; i < last; i++)
1206  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1207  }
1208  }
1209 }
1210 
1211 static int
1213 {
1214  int haystack_len = state->len1;
1215  int needle_len = state->len2;
1216  int skiptablemask = state->skiptablemask;
1217 
1218  Assert(start_pos > 0); /* else caller error */
1219 
1220  if (needle_len <= 0)
1221  return start_pos; /* result for empty pattern */
1222 
1223  start_pos--; /* adjust for zero based arrays */
1224 
1225  /* Done if the needle can't possibly fit */
1226  if (haystack_len < start_pos + needle_len)
1227  return 0;
1228 
1229  if (!state->use_wchar)
1230  {
1231  /* simple case - single byte encoding */
1232  const char *haystack = state->str1;
1233  const char *needle = state->str2;
1234  const char *haystack_end = &haystack[haystack_len];
1235  const char *hptr;
1236 
1237  if (needle_len == 1)
1238  {
1239  /* No point in using B-M-H for a one-character needle */
1240  char nchar = *needle;
1241 
1242  hptr = &haystack[start_pos];
1243  while (hptr < haystack_end)
1244  {
1245  if (*hptr == nchar)
1246  return hptr - haystack + 1;
1247  hptr++;
1248  }
1249  }
1250  else
1251  {
1252  const char *needle_last = &needle[needle_len - 1];
1253 
1254  /* Start at startpos plus the length of the needle */
1255  hptr = &haystack[start_pos + needle_len - 1];
1256  while (hptr < haystack_end)
1257  {
1258  /* Match the needle scanning *backward* */
1259  const char *nptr;
1260  const char *p;
1261 
1262  nptr = needle_last;
1263  p = hptr;
1264  while (*nptr == *p)
1265  {
1266  /* Matched it all? If so, return 1-based position */
1267  if (nptr == needle)
1268  return p - haystack + 1;
1269  nptr--, p--;
1270  }
1271 
1272  /*
1273  * No match, so use the haystack char at hptr to decide how
1274  * far to advance. If the needle had any occurrence of that
1275  * character (or more precisely, one sharing the same
1276  * skiptable entry) before its last character, then we advance
1277  * far enough to align the last such needle character with
1278  * that haystack position. Otherwise we can advance by the
1279  * whole needle length.
1280  */
1281  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1282  }
1283  }
1284  }
1285  else
1286  {
1287  /* The multibyte char version. This works exactly the same way. */
1288  const pg_wchar *haystack = state->wstr1;
1289  const pg_wchar *needle = state->wstr2;
1290  const pg_wchar *haystack_end = &haystack[haystack_len];
1291  const pg_wchar *hptr;
1292 
1293  if (needle_len == 1)
1294  {
1295  /* No point in using B-M-H for a one-character needle */
1296  pg_wchar nchar = *needle;
1297 
1298  hptr = &haystack[start_pos];
1299  while (hptr < haystack_end)
1300  {
1301  if (*hptr == nchar)
1302  return hptr - haystack + 1;
1303  hptr++;
1304  }
1305  }
1306  else
1307  {
1308  const pg_wchar *needle_last = &needle[needle_len - 1];
1309 
1310  /* Start at startpos plus the length of the needle */
1311  hptr = &haystack[start_pos + needle_len - 1];
1312  while (hptr < haystack_end)
1313  {
1314  /* Match the needle scanning *backward* */
1315  const pg_wchar *nptr;
1316  const pg_wchar *p;
1317 
1318  nptr = needle_last;
1319  p = hptr;
1320  while (*nptr == *p)
1321  {
1322  /* Matched it all? If so, return 1-based position */
1323  if (nptr == needle)
1324  return p - haystack + 1;
1325  nptr--, p--;
1326  }
1327 
1328  /*
1329  * No match, so use the haystack char at hptr to decide how
1330  * far to advance. If the needle had any occurrence of that
1331  * character (or more precisely, one sharing the same
1332  * skiptable entry) before its last character, then we advance
1333  * far enough to align the last such needle character with
1334  * that haystack position. Otherwise we can advance by the
1335  * whole needle length.
1336  */
1337  hptr += state->skiptable[*hptr & skiptablemask];
1338  }
1339  }
1340  }
1341 
1342  return 0; /* not found */
1343 }
1344 
1345 static void
1347 {
1348  if (state->use_wchar)
1349  {
1350  pfree(state->wstr1);
1351  pfree(state->wstr2);
1352  }
1353 }
1354 
1355 /* varstr_cmp()
1356  * Comparison function for text strings with given lengths.
1357  * Includes locale support, but must copy strings to temporary memory
1358  * to allow null-termination for inputs to strcoll().
1359  * Returns an integer less than, equal to, or greater than zero, indicating
1360  * whether arg1 is less than, equal to, or greater than arg2.
1361  */
1362 int
1363 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1364 {
1365  int result;
1366 
1367  /*
1368  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1369  * have to do some memory copying. This turns out to be significantly
1370  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1371  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1372  */
1373  if (lc_collate_is_c(collid))
1374  {
1375  result = memcmp(arg1, arg2, Min(len1, len2));
1376  if ((result == 0) && (len1 != len2))
1377  result = (len1 < len2) ? -1 : 1;
1378  }
1379  else
1380  {
1381  char a1buf[TEXTBUFLEN];
1382  char a2buf[TEXTBUFLEN];
1383  char *a1p,
1384  *a2p;
1385 
1386 #ifdef HAVE_LOCALE_T
1387  pg_locale_t mylocale = 0;
1388 #endif
1389 
1390  if (collid != DEFAULT_COLLATION_OID)
1391  {
1392  if (!OidIsValid(collid))
1393  {
1394  /*
1395  * This typically means that the parser could not resolve a
1396  * conflict of implicit collations, so report it that way.
1397  */
1398  ereport(ERROR,
1399  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1400  errmsg("could not determine which collation to use for string comparison"),
1401  errhint("Use the COLLATE clause to set the collation explicitly.")));
1402  }
1403 #ifdef HAVE_LOCALE_T
1404  mylocale = pg_newlocale_from_collation(collid);
1405 #endif
1406  }
1407 
1408  /*
1409  * memcmp() can't tell us which of two unequal strings sorts first, but
1410  * it's a cheap way to tell if they're equal. Testing shows that
1411  * memcmp() followed by strcoll() is only trivially slower than
1412  * strcoll() by itself, so we don't lose much if this doesn't work out
1413  * very often, and if it does - for example, because there are many
1414  * equal strings in the input - then we win big by avoiding expensive
1415  * collation-aware comparisons.
1416  */
1417  if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1418  return 0;
1419 
1420 #ifdef WIN32
1421  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1422  if (GetDatabaseEncoding() == PG_UTF8)
1423  {
1424  int a1len;
1425  int a2len;
1426  int r;
1427 
1428  if (len1 >= TEXTBUFLEN / 2)
1429  {
1430  a1len = len1 * 2 + 2;
1431  a1p = palloc(a1len);
1432  }
1433  else
1434  {
1435  a1len = TEXTBUFLEN;
1436  a1p = a1buf;
1437  }
1438  if (len2 >= TEXTBUFLEN / 2)
1439  {
1440  a2len = len2 * 2 + 2;
1441  a2p = palloc(a2len);
1442  }
1443  else
1444  {
1445  a2len = TEXTBUFLEN;
1446  a2p = a2buf;
1447  }
1448 
1449  /* stupid Microsloth API does not work for zero-length input */
1450  if (len1 == 0)
1451  r = 0;
1452  else
1453  {
1454  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1455  (LPWSTR) a1p, a1len / 2);
1456  if (!r)
1457  ereport(ERROR,
1458  (errmsg("could not convert string to UTF-16: error code %lu",
1459  GetLastError())));
1460  }
1461  ((LPWSTR) a1p)[r] = 0;
1462 
1463  if (len2 == 0)
1464  r = 0;
1465  else
1466  {
1467  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1468  (LPWSTR) a2p, a2len / 2);
1469  if (!r)
1470  ereport(ERROR,
1471  (errmsg("could not convert string to UTF-16: error code %lu",
1472  GetLastError())));
1473  }
1474  ((LPWSTR) a2p)[r] = 0;
1475 
1476  errno = 0;
1477 #ifdef HAVE_LOCALE_T
1478  if (mylocale)
1479  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1480  else
1481 #endif
1482  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1483  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1484  * headers */
1485  ereport(ERROR,
1486  (errmsg("could not compare Unicode strings: %m")));
1487 
1488  /*
1489  * In some locales wcscoll() can claim that nonidentical strings
1490  * are equal. Believing that would be bad news for a number of
1491  * reasons, so we follow Perl's lead and sort "equal" strings
1492  * according to strcmp (on the UTF-8 representation).
1493  */
1494  if (result == 0)
1495  {
1496  result = memcmp(arg1, arg2, Min(len1, len2));
1497  if ((result == 0) && (len1 != len2))
1498  result = (len1 < len2) ? -1 : 1;
1499  }
1500 
1501  if (a1p != a1buf)
1502  pfree(a1p);
1503  if (a2p != a2buf)
1504  pfree(a2p);
1505 
1506  return result;
1507  }
1508 #endif /* WIN32 */
1509 
1510  if (len1 >= TEXTBUFLEN)
1511  a1p = (char *) palloc(len1 + 1);
1512  else
1513  a1p = a1buf;
1514  if (len2 >= TEXTBUFLEN)
1515  a2p = (char *) palloc(len2 + 1);
1516  else
1517  a2p = a2buf;
1518 
1519  memcpy(a1p, arg1, len1);
1520  a1p[len1] = '\0';
1521  memcpy(a2p, arg2, len2);
1522  a2p[len2] = '\0';
1523 
1524 #ifdef HAVE_LOCALE_T
1525  if (mylocale)
1526  result = strcoll_l(a1p, a2p, mylocale);
1527  else
1528 #endif
1529  result = strcoll(a1p, a2p);
1530 
1531  /*
1532  * In some locales strcoll() can claim that nonidentical strings are
1533  * equal. Believing that would be bad news for a number of reasons,
1534  * so we follow Perl's lead and sort "equal" strings according to
1535  * strcmp().
1536  */
1537  if (result == 0)
1538  result = strcmp(a1p, a2p);
1539 
1540  if (a1p != a1buf)
1541  pfree(a1p);
1542  if (a2p != a2buf)
1543  pfree(a2p);
1544  }
1545 
1546  return result;
1547 }
1548 
1549 
1550 /* text_cmp()
1551  * Internal comparison function for text strings.
1552  * Returns -1, 0 or 1
1553  */
1554 static int
1555 text_cmp(text *arg1, text *arg2, Oid collid)
1556 {
1557  char *a1p,
1558  *a2p;
1559  int len1,
1560  len2;
1561 
1562  a1p = VARDATA_ANY(arg1);
1563  a2p = VARDATA_ANY(arg2);
1564 
1565  len1 = VARSIZE_ANY_EXHDR(arg1);
1566  len2 = VARSIZE_ANY_EXHDR(arg2);
1567 
1568  return varstr_cmp(a1p, len1, a2p, len2, collid);
1569 }
1570 
1571 /*
1572  * Comparison functions for text strings.
1573  *
1574  * Note: btree indexes need these routines not to leak memory; therefore,
1575  * be careful to free working copies of toasted datums. Most places don't
1576  * need to be so careful.
1577  */
1578 
1579 Datum
1581 {
1582  Datum arg1 = PG_GETARG_DATUM(0);
1583  Datum arg2 = PG_GETARG_DATUM(1);
1584  bool result;
1585  Size len1,
1586  len2;
1587 
1588  /*
1589  * Since we only care about equality or not-equality, we can avoid all the
1590  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1591  * don't even have to do a bitwise comparison if we can show the lengths
1592  * of the strings are unequal; which might save us from having to detoast
1593  * one or both values.
1594  */
1595  len1 = toast_raw_datum_size(arg1);
1596  len2 = toast_raw_datum_size(arg2);
1597  if (len1 != len2)
1598  result = false;
1599  else
1600  {
1601  text *targ1 = DatumGetTextPP(arg1);
1602  text *targ2 = DatumGetTextPP(arg2);
1603 
1604  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1605  len1 - VARHDRSZ) == 0);
1606 
1607  PG_FREE_IF_COPY(targ1, 0);
1608  PG_FREE_IF_COPY(targ2, 1);
1609  }
1610 
1611  PG_RETURN_BOOL(result);
1612 }
1613 
1614 Datum
1616 {
1617  Datum arg1 = PG_GETARG_DATUM(0);
1618  Datum arg2 = PG_GETARG_DATUM(1);
1619  bool result;
1620  Size len1,
1621  len2;
1622 
1623  /* See comment in texteq() */
1624  len1 = toast_raw_datum_size(arg1);
1625  len2 = toast_raw_datum_size(arg2);
1626  if (len1 != len2)
1627  result = true;
1628  else
1629  {
1630  text *targ1 = DatumGetTextPP(arg1);
1631  text *targ2 = DatumGetTextPP(arg2);
1632 
1633  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1634  len1 - VARHDRSZ) != 0);
1635 
1636  PG_FREE_IF_COPY(targ1, 0);
1637  PG_FREE_IF_COPY(targ2, 1);
1638  }
1639 
1640  PG_RETURN_BOOL(result);
1641 }
1642 
1643 Datum
1645 {
1646  text *arg1 = PG_GETARG_TEXT_PP(0);
1647  text *arg2 = PG_GETARG_TEXT_PP(1);
1648  bool result;
1649 
1650  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1651 
1652  PG_FREE_IF_COPY(arg1, 0);
1653  PG_FREE_IF_COPY(arg2, 1);
1654 
1655  PG_RETURN_BOOL(result);
1656 }
1657 
1658 Datum
1660 {
1661  text *arg1 = PG_GETARG_TEXT_PP(0);
1662  text *arg2 = PG_GETARG_TEXT_PP(1);
1663  bool result;
1664 
1665  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1666 
1667  PG_FREE_IF_COPY(arg1, 0);
1668  PG_FREE_IF_COPY(arg2, 1);
1669 
1670  PG_RETURN_BOOL(result);
1671 }
1672 
1673 Datum
1675 {
1676  text *arg1 = PG_GETARG_TEXT_PP(0);
1677  text *arg2 = PG_GETARG_TEXT_PP(1);
1678  bool result;
1679 
1680  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1681 
1682  PG_FREE_IF_COPY(arg1, 0);
1683  PG_FREE_IF_COPY(arg2, 1);
1684 
1685  PG_RETURN_BOOL(result);
1686 }
1687 
1688 Datum
1690 {
1691  text *arg1 = PG_GETARG_TEXT_PP(0);
1692  text *arg2 = PG_GETARG_TEXT_PP(1);
1693  bool result;
1694 
1695  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1696 
1697  PG_FREE_IF_COPY(arg1, 0);
1698  PG_FREE_IF_COPY(arg2, 1);
1699 
1700  PG_RETURN_BOOL(result);
1701 }
1702 
1703 Datum
1705 {
1706  text *arg1 = PG_GETARG_TEXT_PP(0);
1707  text *arg2 = PG_GETARG_TEXT_PP(1);
1708  int32 result;
1709 
1710  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1711 
1712  PG_FREE_IF_COPY(arg1, 0);
1713  PG_FREE_IF_COPY(arg2, 1);
1714 
1715  PG_RETURN_INT32(result);
1716 }
1717 
1718 Datum
1720 {
1722  Oid collid = ssup->ssup_collation;
1723  MemoryContext oldcontext;
1724 
1725  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1726 
1727  btsortsupport_worker(ssup, collid);
1728 
1729  MemoryContextSwitchTo(oldcontext);
1730 
1731  PG_RETURN_VOID();
1732 }
1733 
1734 static void
1736 {
1737  TextSortSupport *tss;
1738 
1739  /*
1740  * If LC_COLLATE = C, we can make things quite a bit faster by using
1741  * memcmp() rather than strcoll(). To minimize the per-comparison
1742  * overhead, we make this decision just once for the whole sort.
1743  */
1744  if (lc_collate_is_c(collid))
1745  {
1746  ssup->comparator = bttextfastcmp_c;
1747  return;
1748  }
1749 
1750  /*
1751  * WIN32 requires complex hacks when the database encoding is UTF-8 (except
1752  * when using the "C" collation). For now, we don't optimize that case.
1753  */
1754 #ifdef WIN32
1755  if (GetDatabaseEncoding() == PG_UTF8)
1756  return;
1757 #endif
1758 
1759  /*
1760  * We may need a collation-sensitive comparison. To make things faster,
1761  * we'll figure out the collation based on the locale id and cache the
1762  * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1763  * prepare one or two palloc'd buffers to use as temporary workspace. In
1764  * the ad-hoc comparison case we only use palloc'd buffers when we need
1765  * more space than we're comfortable allocating on the stack, but here we
1766  * can keep the buffers around for the whole sort, so it makes sense to
1767  * allocate them once and use them unconditionally.
1768  */
1769  tss = palloc(sizeof(TextSortSupport));
1770 #ifdef HAVE_LOCALE_T
1771  tss->locale = 0;
1772 #endif
1773 
1774  if (collid != DEFAULT_COLLATION_OID)
1775  {
1776  if (!OidIsValid(collid))
1777  {
1778  /*
1779  * This typically means that the parser could not resolve a
1780  * conflict of implicit collations, so report it that way.
1781  */
1782  ereport(ERROR,
1783  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1784  errmsg("could not determine which collation to use for string comparison"),
1785  errhint("Use the COLLATE clause to set the collation explicitly.")));
1786  }
1787 #ifdef HAVE_LOCALE_T
1788  tss->locale = pg_newlocale_from_collation(collid);
1789 #endif
1790  }
1791 
1792  tss->buf1 = palloc(TEXTBUFLEN);
1793  tss->buflen1 = TEXTBUFLEN;
1794  tss->buf2 = palloc(TEXTBUFLEN);
1795  tss->buflen2 = TEXTBUFLEN;
1796 
1797  ssup->ssup_extra = tss;
1799 }
1800 
1801 /*
1802  * sortsupport comparison func (for C locale case)
1803  */
1804 static int
1806 {
1807  text *arg1 = DatumGetTextPP(x);
1808  text *arg2 = DatumGetTextPP(y);
1809  char *a1p,
1810  *a2p;
1811  int len1,
1812  len2,
1813  result;
1814 
1815  a1p = VARDATA_ANY(arg1);
1816  a2p = VARDATA_ANY(arg2);
1817 
1818  len1 = VARSIZE_ANY_EXHDR(arg1);
1819  len2 = VARSIZE_ANY_EXHDR(arg2);
1820 
1821  result = memcmp(a1p, a2p, Min(len1, len2));
1822  if ((result == 0) && (len1 != len2))
1823  result = (len1 < len2) ? -1 : 1;
1824 
1825  /* We can't afford to leak memory here. */
1826  if (PointerGetDatum(arg1) != x)
1827  pfree(arg1);
1828  if (PointerGetDatum(arg2) != y)
1829  pfree(arg2);
1830 
1831  return result;
1832 }
1833 
1834 /*
1835  * sortsupport comparison func (for locale case)
1836  */
1837 static int
1839 {
1840  text *arg1 = DatumGetTextPP(x);
1841  text *arg2 = DatumGetTextPP(y);
1842  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1843 
1844  /* working state */
1845  char *a1p,
1846  *a2p;
1847  int len1,
1848  len2,
1849  result;
1850 
1851  a1p = VARDATA_ANY(arg1);
1852  a2p = VARDATA_ANY(arg2);
1853 
1854  len1 = VARSIZE_ANY_EXHDR(arg1);
1855  len2 = VARSIZE_ANY_EXHDR(arg2);
1856 
1857  /* Fast pre-check for equality, as discussed in varstr_cmp() */
1858  if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
1859  {
1860  result = 0;
1861  goto done;
1862  }
1863 
1864  if (len1 >= tss->buflen1)
1865  {
1866  pfree(tss->buf1);
1867  tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
1868  tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
1869  }
1870  if (len2 >= tss->buflen2)
1871  {
1872  pfree(tss->buf2);
1873  tss->buflen2 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
1874  tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
1875  }
1876 
1877  memcpy(tss->buf1, a1p, len1);
1878  tss->buf1[len1] = '\0';
1879  memcpy(tss->buf2, a2p, len2);
1880  tss->buf2[len2] = '\0';
1881 
1882 #ifdef HAVE_LOCALE_T
1883  if (tss->locale)
1884  result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
1885  else
1886 #endif
1887  result = strcoll(tss->buf1, tss->buf2);
1888 
1889  /*
1890  * In some locales strcoll() can claim that nonidentical strings are equal.
1891  * Believing that would be bad news for a number of reasons, so we follow
1892  * Perl's lead and sort "equal" strings according to strcmp().
1893  */
1894  if (result == 0)
1895  result = strcmp(tss->buf1, tss->buf2);
1896 
1897 done:
1898  /* We can't afford to leak memory here. */
1899  if (PointerGetDatum(arg1) != x)
1900  pfree(arg1);
1901  if (PointerGetDatum(arg2) != y)
1902  pfree(arg2);
1903 
1904  return result;
1905 }
1906 
1907 Datum
1909 {
1910  text *arg1 = PG_GETARG_TEXT_PP(0);
1911  text *arg2 = PG_GETARG_TEXT_PP(1);
1912  text *result;
1913 
1914  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
1915 
1916  PG_RETURN_TEXT_P(result);
1917 }
1918 
1919 Datum
1921 {
1922  text *arg1 = PG_GETARG_TEXT_PP(0);
1923  text *arg2 = PG_GETARG_TEXT_PP(1);
1924  text *result;
1925 
1926  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
1927 
1928  PG_RETURN_TEXT_P(result);
1929 }
1930 
1931 
1932 /*
1933  * The following operators support character-by-character comparison
1934  * of text datums, to allow building indexes suitable for LIKE clauses.
1935  * Note that the regular texteq/textne comparison operators are assumed
1936  * to be compatible with these!
1937  */
1938 
1939 static int
1941 {
1942  int result;
1943  int len1,
1944  len2;
1945 
1946  len1 = VARSIZE_ANY_EXHDR(arg1);
1947  len2 = VARSIZE_ANY_EXHDR(arg2);
1948 
1949  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1950  if (result != 0)
1951  return result;
1952  else if (len1 < len2)
1953  return -1;
1954  else if (len1 > len2)
1955  return 1;
1956  else
1957  return 0;
1958 }
1959 
1960 
1961 Datum
1963 {
1964  text *arg1 = PG_GETARG_TEXT_PP(0);
1965  text *arg2 = PG_GETARG_TEXT_PP(1);
1966  int result;
1967 
1968  result = internal_text_pattern_compare(arg1, arg2);
1969 
1970  PG_FREE_IF_COPY(arg1, 0);
1971  PG_FREE_IF_COPY(arg2, 1);
1972 
1973  PG_RETURN_BOOL(result < 0);
1974 }
1975 
1976 
1977 Datum
1979 {
1980  text *arg1 = PG_GETARG_TEXT_PP(0);
1981  text *arg2 = PG_GETARG_TEXT_PP(1);
1982  int result;
1983 
1984  result = internal_text_pattern_compare(arg1, arg2);
1985 
1986  PG_FREE_IF_COPY(arg1, 0);
1987  PG_FREE_IF_COPY(arg2, 1);
1988 
1989  PG_RETURN_BOOL(result <= 0);
1990 }
1991 
1992 
1993 Datum
1995 {
1996  text *arg1 = PG_GETARG_TEXT_PP(0);
1997  text *arg2 = PG_GETARG_TEXT_PP(1);
1998  int result;
1999 
2000  result = internal_text_pattern_compare(arg1, arg2);
2001 
2002  PG_FREE_IF_COPY(arg1, 0);
2003  PG_FREE_IF_COPY(arg2, 1);
2004 
2005  PG_RETURN_BOOL(result >= 0);
2006 }
2007 
2008 
2009 Datum
2011 {
2012  text *arg1 = PG_GETARG_TEXT_PP(0);
2013  text *arg2 = PG_GETARG_TEXT_PP(1);
2014  int result;
2015 
2016  result = internal_text_pattern_compare(arg1, arg2);
2017 
2018  PG_FREE_IF_COPY(arg1, 0);
2019  PG_FREE_IF_COPY(arg2, 1);
2020 
2021  PG_RETURN_BOOL(result > 0);
2022 }
2023 
2024 
2025 Datum
2027 {
2028  text *arg1 = PG_GETARG_TEXT_PP(0);
2029  text *arg2 = PG_GETARG_TEXT_PP(1);
2030  int result;
2031 
2032  result = internal_text_pattern_compare(arg1, arg2);
2033 
2034  PG_FREE_IF_COPY(arg1, 0);
2035  PG_FREE_IF_COPY(arg2, 1);
2036 
2037  PG_RETURN_INT32(result);
2038 }
2039 
2040 
2041 /*-------------------------------------------------------------
2042  * byteaoctetlen
2043  *
2044  * get the number of bytes contained in an instance of type 'bytea'
2045  *-------------------------------------------------------------
2046  */
2047 Datum
2049 {
2050  Datum str = PG_GETARG_DATUM(0);
2051 
2052  /* We need not detoast the input at all */
2054 }
2055 
2056 /*
2057  * byteacat -
2058  * takes two bytea* and returns a bytea* that is the concatenation of
2059  * the two.
2060  *
2061  * Cloned from textcat and modified as required.
2062  */
2063 Datum
2065 {
2066  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2067  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2068 
2070 }
2071 
2072 /*
2073  * bytea_catenate
2074  * Guts of byteacat(), broken out so it can be used by other functions
2075  *
2076  * Arguments can be in short-header form, but not compressed or out-of-line
2077  */
2078 static bytea *
2080 {
2081  bytea *result;
2082  int len1,
2083  len2,
2084  len;
2085  char *ptr;
2086 
2087  len1 = VARSIZE_ANY_EXHDR(t1);
2088  len2 = VARSIZE_ANY_EXHDR(t2);
2089 
2090  /* paranoia ... probably should throw error instead? */
2091  if (len1 < 0)
2092  len1 = 0;
2093  if (len2 < 0)
2094  len2 = 0;
2095 
2096  len = len1 + len2 + VARHDRSZ;
2097  result = (bytea *) palloc(len);
2098 
2099  /* Set size of result string... */
2100  SET_VARSIZE(result, len);
2101 
2102  /* Fill data field of result string... */
2103  ptr = VARDATA(result);
2104  if (len1 > 0)
2105  memcpy(ptr, VARDATA_ANY(t1), len1);
2106  if (len2 > 0)
2107  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2108 
2109  return result;
2110 }
2111 
2112 #define PG_STR_GET_BYTEA(str_) \
2113  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2114 
2115 /*
2116  * bytea_substr()
2117  * Return a substring starting at the specified position.
2118  * Cloned from text_substr and modified as required.
2119  *
2120  * Input:
2121  * - string
2122  * - starting position (is one-based)
2123  * - string length (optional)
2124  *
2125  * If the starting position is zero or less, then return from the start of the string
2126  * adjusting the length to be consistent with the "negative start" per SQL.
2127  * If the length is less than zero, an ERROR is thrown. If no third argument
2128  * (length) is provided, the length to the end of the string is assumed.
2129  */
2130 Datum
2132 {
2134  PG_GETARG_INT32(1),
2135  PG_GETARG_INT32(2),
2136  false));
2137 }
2138 
2139 /*
2140  * bytea_substr_no_len -
2141  * Wrapper to avoid opr_sanity failure due to
2142  * one function accepting a different number of args.
2143  */
2144 Datum
2146 {
2148  PG_GETARG_INT32(1),
2149  -1,
2150  true));
2151 }
2152 
2153 static bytea *
2155  int S,
2156  int L,
2157  bool length_not_specified)
2158 {
2159  int S1; /* adjusted start position */
2160  int L1; /* adjusted substring length */
2161 
2162  S1 = Max(S, 1);
2163 
2164  if (length_not_specified)
2165  {
2166  /*
2167  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2168  * end of the string if we pass it a negative value for length.
2169  */
2170  L1 = -1;
2171  }
2172  else
2173  {
2174  /* end position */
2175  int E = S + L;
2176 
2177  /*
2178  * A negative value for L is the only way for the end position to be
2179  * before the start. SQL99 says to throw an error.
2180  */
2181  if (E < S)
2182  ereport(ERROR,
2183  (errcode(ERRCODE_SUBSTRING_ERROR),
2184  errmsg("negative substring length not allowed")));
2185 
2186  /*
2187  * A zero or negative value for the end position can happen if the
2188  * start was negative or one. SQL99 says to return a zero-length
2189  * string.
2190  */
2191  if (E < 1)
2192  return PG_STR_GET_BYTEA("");
2193 
2194  L1 = E - S1;
2195  }
2196 
2197  /*
2198  * If the start position is past the end of the string, SQL99 says to
2199  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2200  * us. Convert to zero-based starting position
2201  */
2202  return DatumGetByteaPSlice(str, S1 - 1, L1);
2203 }
2204 
2205 /*
2206  * byteaoverlay
2207  * Replace specified substring of first string with second
2208  *
2209  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2210  * This code is a direct implementation of what the standard says.
2211  */
2212 Datum
2214 {
2215  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2216  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2217  int sp = PG_GETARG_INT32(2); /* substring start position */
2218  int sl = PG_GETARG_INT32(3); /* substring length */
2219 
2220  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2221 }
2222 
2223 Datum
2225 {
2226  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2227  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2228  int sp = PG_GETARG_INT32(2); /* substring start position */
2229  int sl;
2230 
2231  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2232  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2233 }
2234 
2235 static bytea *
2236 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2237 {
2238  bytea *result;
2239  bytea *s1;
2240  bytea *s2;
2241  int sp_pl_sl;
2242 
2243  /*
2244  * Check for possible integer-overflow cases. For negative sp, throw a
2245  * "substring length" error because that's what should be expected
2246  * according to the spec's definition of OVERLAY().
2247  */
2248  if (sp <= 0)
2249  ereport(ERROR,
2250  (errcode(ERRCODE_SUBSTRING_ERROR),
2251  errmsg("negative substring length not allowed")));
2252  sp_pl_sl = sp + sl;
2253  if (sp_pl_sl <= sl)
2254  ereport(ERROR,
2255  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2256  errmsg("integer out of range")));
2257 
2258  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2259  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2260  result = bytea_catenate(s1, t2);
2261  result = bytea_catenate(result, s2);
2262 
2263  return result;
2264 }
2265 
2266 /*
2267  * byteapos -
2268  * Return the position of the specified substring.
2269  * Implements the SQL POSITION() function.
2270  * Cloned from textpos and modified as required.
2271  */
2272 Datum
2274 {
2275  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2276  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2277  int pos;
2278  int px,
2279  p;
2280  int len1,
2281  len2;
2282  char *p1,
2283  *p2;
2284 
2285  len1 = VARSIZE_ANY_EXHDR(t1);
2286  len2 = VARSIZE_ANY_EXHDR(t2);
2287 
2288  if (len2 <= 0)
2289  PG_RETURN_INT32(1); /* result for empty pattern */
2290 
2291  p1 = VARDATA_ANY(t1);
2292  p2 = VARDATA_ANY(t2);
2293 
2294  pos = 0;
2295  px = (len1 - len2);
2296  for (p = 0; p <= px; p++)
2297  {
2298  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2299  {
2300  pos = p + 1;
2301  break;
2302  };
2303  p1++;
2304  };
2305 
2306  PG_RETURN_INT32(pos);
2307 }
2308 
2309 /*-------------------------------------------------------------
2310  * byteaGetByte
2311  *
2312  * this routine treats "bytea" as an array of bytes.
2313  * It returns the Nth byte (a number between 0 and 255).
2314  *-------------------------------------------------------------
2315  */
2316 Datum
2318 {
2319  bytea *v = PG_GETARG_BYTEA_PP(0);
2320  int32 n = PG_GETARG_INT32(1);
2321  int len;
2322  int byte;
2323 
2324  len = VARSIZE_ANY_EXHDR(v);
2325 
2326  if (n < 0 || n >= len)
2327  ereport(ERROR,
2328  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2329  errmsg("index %d out of valid range, 0..%d",
2330  n, len - 1)));
2331 
2332  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2333 
2334  PG_RETURN_INT32(byte);
2335 }
2336 
2337 /*-------------------------------------------------------------
2338  * byteaGetBit
2339  *
2340  * This routine treats a "bytea" type like an array of bits.
2341  * It returns the value of the Nth bit (0 or 1).
2342  *
2343  *-------------------------------------------------------------
2344  */
2345 Datum
2347 {
2348  bytea *v = PG_GETARG_BYTEA_PP(0);
2349  int32 n = PG_GETARG_INT32(1);
2350  int byteNo,
2351  bitNo;
2352  int len;
2353  int byte;
2354 
2355  len = VARSIZE_ANY_EXHDR(v);
2356 
2357  if (n < 0 || n >= len * 8)
2358  ereport(ERROR,
2359  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2360  errmsg("index %d out of valid range, 0..%d",
2361  n, len * 8 - 1)));
2362 
2363  byteNo = n / 8;
2364  bitNo = n % 8;
2365 
2366  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2367 
2368  if (byte & (1 << bitNo))
2369  PG_RETURN_INT32(1);
2370  else
2371  PG_RETURN_INT32(0);
2372 }
2373 
2374 /*-------------------------------------------------------------
2375  * byteaSetByte
2376  *
2377  * Given an instance of type 'bytea' creates a new one with
2378  * the Nth byte set to the given value.
2379  *
2380  *-------------------------------------------------------------
2381  */
2382 Datum
2384 {
2385  bytea *v = PG_GETARG_BYTEA_P(0);
2386  int32 n = PG_GETARG_INT32(1);
2387  int32 newByte = PG_GETARG_INT32(2);
2388  int len;
2389  bytea *res;
2390 
2391  len = VARSIZE(v) - VARHDRSZ;
2392 
2393  if (n < 0 || n >= len)
2394  ereport(ERROR,
2395  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2396  errmsg("index %d out of valid range, 0..%d",
2397  n, len - 1)));
2398 
2399  /*
2400  * Make a copy of the original varlena.
2401  */
2402  res = (bytea *) palloc(VARSIZE(v));
2403  memcpy((char *) res, (char *) v, VARSIZE(v));
2404 
2405  /*
2406  * Now set the byte.
2407  */
2408  ((unsigned char *) VARDATA(res))[n] = newByte;
2409 
2410  PG_RETURN_BYTEA_P(res);
2411 }
2412 
2413 /*-------------------------------------------------------------
2414  * byteaSetBit
2415  *
2416  * Given an instance of type 'bytea' creates a new one with
2417  * the Nth bit set to the given value.
2418  *
2419  *-------------------------------------------------------------
2420  */
2421 Datum
2423 {
2424  bytea *v = PG_GETARG_BYTEA_P(0);
2425  int32 n = PG_GETARG_INT32(1);
2426  int32 newBit = PG_GETARG_INT32(2);
2427  bytea *res;
2428  int len;
2429  int oldByte,
2430  newByte;
2431  int byteNo,
2432  bitNo;
2433 
2434  len = VARSIZE(v) - VARHDRSZ;
2435 
2436  if (n < 0 || n >= len * 8)
2437  ereport(ERROR,
2438  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2439  errmsg("index %d out of valid range, 0..%d",
2440  n, len * 8 - 1)));
2441 
2442  byteNo = n / 8;
2443  bitNo = n % 8;
2444 
2445  /*
2446  * sanity check!
2447  */
2448  if (newBit != 0 && newBit != 1)
2449  ereport(ERROR,
2450  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2451  errmsg("new bit must be 0 or 1")));
2452 
2453  /*
2454  * Make a copy of the original varlena.
2455  */
2456  res = (bytea *) palloc(VARSIZE(v));
2457  memcpy((char *) res, (char *) v, VARSIZE(v));
2458 
2459  /*
2460  * Update the byte.
2461  */
2462  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
2463 
2464  if (newBit == 0)
2465  newByte = oldByte & (~(1 << bitNo));
2466  else
2467  newByte = oldByte | (1 << bitNo);
2468 
2469  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
2470 
2471  PG_RETURN_BYTEA_P(res);
2472 }
2473 
2474 
2475 /* text_name()
2476  * Converts a text type to a Name type.
2477  */
2478 Datum
2480 {
2481  text *s = PG_GETARG_TEXT_PP(0);
2482  Name result;
2483  int len;
2484 
2485  len = VARSIZE_ANY_EXHDR(s);
2486 
2487  /* Truncate oversize input */
2488  if (len >= NAMEDATALEN)
2489  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
2490 
2491  /* We use palloc0 here to ensure result is zero-padded */
2492  result = (Name) palloc0(NAMEDATALEN);
2493  memcpy(NameStr(*result), VARDATA_ANY(s), len);
2494 
2495  PG_RETURN_NAME(result);
2496 }
2497 
2498 /* name_text()
2499  * Converts a Name type to a text type.
2500  */
2501 Datum
2503 {
2504  Name s = PG_GETARG_NAME(0);
2505 
2507 }
2508 
2509 
2510 /*
2511  * textToQualifiedNameList - convert a text object to list of names
2512  *
2513  * This implements the input parsing needed by nextval() and other
2514  * functions that take a text parameter representing a qualified name.
2515  * We split the name at dots, downcase if not double-quoted, and
2516  * truncate names if they're too long.
2517  */
2518 List *
2520 {
2521  char *rawname;
2522  List *result = NIL;
2523  List *namelist;
2524  ListCell *l;
2525 
2526  /* Convert to C string (handles possible detoasting). */
2527  /* Note we rely on being able to modify rawname below. */
2528  rawname = text_to_cstring(textval);
2529 
2530  if (!SplitIdentifierString(rawname, '.', &namelist))
2531  ereport(ERROR,
2532  (errcode(ERRCODE_INVALID_NAME),
2533  errmsg("invalid name syntax")));
2534 
2535  if (namelist == NIL)
2536  ereport(ERROR,
2537  (errcode(ERRCODE_INVALID_NAME),
2538  errmsg("invalid name syntax")));
2539 
2540  foreach(l, namelist)
2541  {
2542  char *curname = (char *) lfirst(l);
2543 
2544  result = lappend(result, makeString(pstrdup(curname)));
2545  }
2546 
2547  pfree(rawname);
2548  list_free(namelist);
2549 
2550  return result;
2551 }
2552 
2553 /*
2554  * SplitIdentifierString --- parse a string containing identifiers
2555  *
2556  * This is the guts of textToQualifiedNameList, and is exported for use in
2557  * other situations such as parsing GUC variables. In the GUC case, it's
2558  * important to avoid memory leaks, so the API is designed to minimize the
2559  * amount of stuff that needs to be allocated and freed.
2560  *
2561  * Inputs:
2562  * rawstring: the input string; must be overwritable! On return, it's
2563  * been modified to contain the separated identifiers.
2564  * separator: the separator punctuation expected between identifiers
2565  * (typically '.' or ','). Whitespace may also appear around
2566  * identifiers.
2567  * Outputs:
2568  * namelist: filled with a palloc'd list of pointers to identifiers within
2569  * rawstring. Caller should list_free() this even on error return.
2570  *
2571  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2572  *
2573  * Note that an empty string is considered okay here, though not in
2574  * textToQualifiedNameList.
2575  */
2576 bool
2577 SplitIdentifierString(char *rawstring, char separator,
2578  List **namelist)
2579 {
2580  char *nextp = rawstring;
2581  bool done = false;
2582 
2583  *namelist = NIL;
2584 
2585  while (isspace((unsigned char) *nextp))
2586  nextp++; /* skip leading whitespace */
2587 
2588  if (*nextp == '\0')
2589  return true; /* allow empty string */
2590 
2591  /* At the top of the loop, we are at start of a new identifier. */
2592  do
2593  {
2594  char *curname;
2595  char *endp;
2596 
2597  if (*nextp == '\"')
2598  {
2599  /* Quoted name --- collapse quote-quote pairs, no downcasing */
2600  curname = nextp + 1;
2601  for (;;)
2602  {
2603  endp = strchr(nextp + 1, '\"');
2604  if (endp == NULL)
2605  return false; /* mismatched quotes */
2606  if (endp[1] != '\"')
2607  break; /* found end of quoted name */
2608  /* Collapse adjacent quotes into one quote, and look again */
2609  memmove(endp, endp + 1, strlen(endp));
2610  nextp = endp;
2611  }
2612  /* endp now points at the terminating quote */
2613  nextp = endp + 1;
2614  }
2615  else
2616  {
2617  /* Unquoted name --- extends to separator or whitespace */
2618  char *downname;
2619  int len;
2620 
2621  curname = nextp;
2622  while (*nextp && *nextp != separator &&
2623  !isspace((unsigned char) *nextp))
2624  nextp++;
2625  endp = nextp;
2626  if (curname == nextp)
2627  return false; /* empty unquoted name not allowed */
2628 
2629  /*
2630  * Downcase the identifier, using same code as main lexer does.
2631  *
2632  * XXX because we want to overwrite the input in-place, we cannot
2633  * support a downcasing transformation that increases the string
2634  * length. This is not a problem given the current implementation
2635  * of downcase_truncate_identifier, but we'll probably have to do
2636  * something about this someday.
2637  */
2638  len = endp - curname;
2639  downname = downcase_truncate_identifier(curname, len, false);
2640  Assert(strlen(downname) <= len);
2641  strncpy(curname, downname, len);
2642  pfree(downname);
2643  }
2644 
2645  while (isspace((unsigned char) *nextp))
2646  nextp++; /* skip trailing whitespace */
2647 
2648  if (*nextp == separator)
2649  {
2650  nextp++;
2651  while (isspace((unsigned char) *nextp))
2652  nextp++; /* skip leading whitespace for next */
2653  /* we expect another name, so done remains false */
2654  }
2655  else if (*nextp == '\0')
2656  done = true;
2657  else
2658  return false; /* invalid syntax */
2659 
2660  /* Now safe to overwrite separator with a null */
2661  *endp = '\0';
2662 
2663  /* Truncate name if it's overlength */
2664  truncate_identifier(curname, strlen(curname), false);
2665 
2666  /*
2667  * Finished isolating current name --- add it to list
2668  */
2669  *namelist = lappend(*namelist, curname);
2670 
2671  /* Loop back if we didn't reach end of string */
2672  } while (!done);
2673 
2674  return true;
2675 }
2676 
2677 
2678 /*
2679  * SplitDirectoriesString --- parse a string containing directory names
2680  *
2681  * This is similar to SplitIdentifierString, except that the parsing
2682  * rules are meant to handle pathnames instead of identifiers: there is
2683  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
2684  * and we apply canonicalize_path() to each extracted string. Because of the
2685  * last, the returned strings are separately palloc'd rather than being
2686  * pointers into rawstring --- but we still scribble on rawstring.
2687  *
2688  * Inputs:
2689  * rawstring: the input string; must be modifiable!
2690  * separator: the separator punctuation expected between directories
2691  * (typically ',' or ';'). Whitespace may also appear around
2692  * directories.
2693  * Outputs:
2694  * namelist: filled with a palloc'd list of directory names.
2695  * Caller should list_free_deep() this even on error return.
2696  *
2697  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2698  *
2699  * Note that an empty string is considered okay here.
2700  */
2701 bool
2702 SplitDirectoriesString(char *rawstring, char separator,
2703  List **namelist)
2704 {
2705  char *nextp = rawstring;
2706  bool done = false;
2707 
2708  *namelist = NIL;
2709 
2710  while (isspace((unsigned char) *nextp))
2711  nextp++; /* skip leading whitespace */
2712 
2713  if (*nextp == '\0')
2714  return true; /* allow empty string */
2715 
2716  /* At the top of the loop, we are at start of a new directory. */
2717  do
2718  {
2719  char *curname;
2720  char *endp;
2721 
2722  if (*nextp == '\"')
2723  {
2724  /* Quoted name --- collapse quote-quote pairs */
2725  curname = nextp + 1;
2726  for (;;)
2727  {
2728  endp = strchr(nextp + 1, '\"');
2729  if (endp == NULL)
2730  return false; /* mismatched quotes */
2731  if (endp[1] != '\"')
2732  break; /* found end of quoted name */
2733  /* Collapse adjacent quotes into one quote, and look again */
2734  memmove(endp, endp + 1, strlen(endp));
2735  nextp = endp;
2736  }
2737  /* endp now points at the terminating quote */
2738  nextp = endp + 1;
2739  }
2740  else
2741  {
2742  /* Unquoted name --- extends to separator or end of string */
2743  curname = endp = nextp;
2744  while (*nextp && *nextp != separator)
2745  {
2746  /* trailing whitespace should not be included in name */
2747  if (!isspace((unsigned char) *nextp))
2748  endp = nextp + 1;
2749  nextp++;
2750  }
2751  if (curname == endp)
2752  return false; /* empty unquoted name not allowed */
2753  }
2754 
2755  while (isspace((unsigned char) *nextp))
2756  nextp++; /* skip trailing whitespace */
2757 
2758  if (*nextp == separator)
2759  {
2760  nextp++;
2761  while (isspace((unsigned char) *nextp))
2762  nextp++; /* skip leading whitespace for next */
2763  /* we expect another name, so done remains false */
2764  }
2765  else if (*nextp == '\0')
2766  done = true;
2767  else
2768  return false; /* invalid syntax */
2769 
2770  /* Now safe to overwrite separator with a null */
2771  *endp = '\0';
2772 
2773  /* Truncate path if it's overlength */
2774  if (strlen(curname) >= MAXPGPATH)
2775  curname[MAXPGPATH - 1] = '\0';
2776 
2777  /*
2778  * Finished isolating current name --- add it to list
2779  */
2780  curname = pstrdup(curname);
2781  canonicalize_path(curname);
2782  *namelist = lappend(*namelist, curname);
2783 
2784  /* Loop back if we didn't reach end of string */
2785  } while (!done);
2786 
2787  return true;
2788 }
2789 
2790 
2791 /*****************************************************************************
2792  * Comparison Functions used for bytea
2793  *
2794  * Note: btree indexes need these routines not to leak memory; therefore,
2795  * be careful to free working copies of toasted datums. Most places don't
2796  * need to be so careful.
2797  *****************************************************************************/
2798 
2799 Datum
2801 {
2802  Datum arg1 = PG_GETARG_DATUM(0);
2803  Datum arg2 = PG_GETARG_DATUM(1);
2804  bool result;
2805  Size len1,
2806  len2;
2807 
2808  /*
2809  * We can use a fast path for unequal lengths, which might save us from
2810  * having to detoast one or both values.
2811  */
2812  len1 = toast_raw_datum_size(arg1);
2813  len2 = toast_raw_datum_size(arg2);
2814  if (len1 != len2)
2815  result = false;
2816  else
2817  {
2818  bytea *barg1 = DatumGetByteaPP(arg1);
2819  bytea *barg2 = DatumGetByteaPP(arg2);
2820 
2821  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2822  len1 - VARHDRSZ) == 0);
2823 
2824  PG_FREE_IF_COPY(barg1, 0);
2825  PG_FREE_IF_COPY(barg2, 1);
2826  }
2827 
2828  PG_RETURN_BOOL(result);
2829 }
2830 
2831 Datum
2833 {
2834  Datum arg1 = PG_GETARG_DATUM(0);
2835  Datum arg2 = PG_GETARG_DATUM(1);
2836  bool result;
2837  Size len1,
2838  len2;
2839 
2840  /*
2841  * We can use a fast path for unequal lengths, which might save us from
2842  * having to detoast one or both values.
2843  */
2844  len1 = toast_raw_datum_size(arg1);
2845  len2 = toast_raw_datum_size(arg2);
2846  if (len1 != len2)
2847  result = true;
2848  else
2849  {
2850  bytea *barg1 = DatumGetByteaPP(arg1);
2851  bytea *barg2 = DatumGetByteaPP(arg2);
2852 
2853  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2854  len1 - VARHDRSZ) != 0);
2855 
2856  PG_FREE_IF_COPY(barg1, 0);
2857  PG_FREE_IF_COPY(barg2, 1);
2858  }
2859 
2860  PG_RETURN_BOOL(result);
2861 }
2862 
2863 Datum
2865 {
2866  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2867  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2868  int len1,
2869  len2;
2870  int cmp;
2871 
2872  len1 = VARSIZE_ANY_EXHDR(arg1);
2873  len2 = VARSIZE_ANY_EXHDR(arg2);
2874 
2875  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2876 
2877  PG_FREE_IF_COPY(arg1, 0);
2878  PG_FREE_IF_COPY(arg2, 1);
2879 
2880  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2881 }
2882 
2883 Datum
2885 {
2886  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2887  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2888  int len1,
2889  len2;
2890  int cmp;
2891 
2892  len1 = VARSIZE_ANY_EXHDR(arg1);
2893  len2 = VARSIZE_ANY_EXHDR(arg2);
2894 
2895  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2896 
2897  PG_FREE_IF_COPY(arg1, 0);
2898  PG_FREE_IF_COPY(arg2, 1);
2899 
2900  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2901 }
2902 
2903 Datum
2905 {
2906  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2907  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2908  int len1,
2909  len2;
2910  int cmp;
2911 
2912  len1 = VARSIZE_ANY_EXHDR(arg1);
2913  len2 = VARSIZE_ANY_EXHDR(arg2);
2914 
2915  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2916 
2917  PG_FREE_IF_COPY(arg1, 0);
2918  PG_FREE_IF_COPY(arg2, 1);
2919 
2920  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2921 }
2922 
2923 Datum
2925 {
2926  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2927  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2928  int len1,
2929  len2;
2930  int cmp;
2931 
2932  len1 = VARSIZE_ANY_EXHDR(arg1);
2933  len2 = VARSIZE_ANY_EXHDR(arg2);
2934 
2935  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2936 
2937  PG_FREE_IF_COPY(arg1, 0);
2938  PG_FREE_IF_COPY(arg2, 1);
2939 
2940  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2941 }
2942 
2943 Datum
2945 {
2946  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2947  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2948  int len1,
2949  len2;
2950  int cmp;
2951 
2952  len1 = VARSIZE_ANY_EXHDR(arg1);
2953  len2 = VARSIZE_ANY_EXHDR(arg2);
2954 
2955  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2956  if ((cmp == 0) && (len1 != len2))
2957  cmp = (len1 < len2) ? -1 : 1;
2958 
2959  PG_FREE_IF_COPY(arg1, 0);
2960  PG_FREE_IF_COPY(arg2, 1);
2961 
2962  PG_RETURN_INT32(cmp);
2963 }
2964 
2965 /*
2966  * appendStringInfoText
2967  *
2968  * Append a text to str.
2969  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2970  */
2971 static void
2973 {
2975 }
2976 
2977 /*
2978  * replace_text
2979  * replace all occurrences of 'old_sub_str' in 'orig_str'
2980  * with 'new_sub_str' to form 'new_str'
2981  *
2982  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2983  * otherwise returns 'new_str'
2984  */
2985 Datum
2987 {
2988  text *src_text = PG_GETARG_TEXT_PP(0);
2989  text *from_sub_text = PG_GETARG_TEXT_PP(1);
2990  text *to_sub_text = PG_GETARG_TEXT_PP(2);
2991  int src_text_len;
2992  int from_sub_text_len;
2994  text *ret_text;
2995  int start_posn;
2996  int curr_posn;
2997  int chunk_len;
2998  char *start_ptr;
2999  StringInfoData str;
3000 
3001  text_position_setup(src_text, from_sub_text, &state);
3002 
3003  /*
3004  * Note: we check the converted string length, not the original, because
3005  * they could be different if the input contained invalid encoding.
3006  */
3007  src_text_len = state.len1;
3008  from_sub_text_len = state.len2;
3009 
3010  /* Return unmodified source string if empty source or pattern */
3011  if (src_text_len < 1 || from_sub_text_len < 1)
3012  {
3013  text_position_cleanup(&state);
3014  PG_RETURN_TEXT_P(src_text);
3015  }
3016 
3017  start_posn = 1;
3018  curr_posn = text_position_next(1, &state);
3019 
3020  /* When the from_sub_text is not found, there is nothing to do. */
3021  if (curr_posn == 0)
3022  {
3023  text_position_cleanup(&state);
3024  PG_RETURN_TEXT_P(src_text);
3025  }
3026 
3027  /* start_ptr points to the start_posn'th character of src_text */
3028  start_ptr = VARDATA_ANY(src_text);
3029 
3030  initStringInfo(&str);
3031 
3032  do
3033  {
3035 
3036  /* copy the data skipped over by last text_position_next() */
3037  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3038  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3039 
3040  appendStringInfoText(&str, to_sub_text);
3041 
3042  start_posn = curr_posn;
3043  start_ptr += chunk_len;
3044  start_posn += from_sub_text_len;
3045  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3046 
3047  curr_posn = text_position_next(start_posn, &state);
3048  }
3049  while (curr_posn > 0);
3050 
3051  /* copy trailing data */
3052  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3053  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3054 
3055  text_position_cleanup(&state);
3056 
3057  ret_text = cstring_to_text_with_len(str.data, str.len);
3058  pfree(str.data);
3059 
3060  PG_RETURN_TEXT_P(ret_text);
3061 }
3062 
3063 /*
3064  * check_replace_text_has_escape_char
3065  *
3066  * check whether replace_text contains escape char.
3067  */
3068 static bool
3070 {
3071  const char *p = VARDATA_ANY(replace_text);
3072  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3073 
3075  {
3076  for (; p < p_end; p++)
3077  {
3078  if (*p == '\\')
3079  return true;
3080  }
3081  }
3082  else
3083  {
3084  for (; p < p_end; p += pg_mblen(p))
3085  {
3086  if (*p == '\\')
3087  return true;
3088  }
3089  }
3090 
3091  return false;
3092 }
3093 
3094 /*
3095  * appendStringInfoRegexpSubstr
3096  *
3097  * Append replace_text to str, substituting regexp back references for
3098  * \n escapes. start_ptr is the start of the match in the source string,
3099  * at logical character position data_pos.
3100  */
3101 static void
3103  regmatch_t *pmatch,
3104  char *start_ptr, int data_pos)
3105 {
3106  const char *p = VARDATA_ANY(replace_text);
3107  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3108  int eml = pg_database_encoding_max_length();
3109 
3110  for (;;)
3111  {
3112  const char *chunk_start = p;
3113  int so;
3114  int eo;
3115 
3116  /* Find next escape char. */
3117  if (eml == 1)
3118  {
3119  for (; p < p_end && *p != '\\'; p++)
3120  /* nothing */ ;
3121  }
3122  else
3123  {
3124  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3125  /* nothing */ ;
3126  }
3127 
3128  /* Copy the text we just scanned over, if any. */
3129  if (p > chunk_start)
3130  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3131 
3132  /* Done if at end of string, else advance over escape char. */
3133  if (p >= p_end)
3134  break;
3135  p++;
3136 
3137  if (p >= p_end)
3138  {
3139  /* Escape at very end of input. Treat same as unexpected char */
3140  appendStringInfoChar(str, '\\');
3141  break;
3142  }
3143 
3144  if (*p >= '1' && *p <= '9')
3145  {
3146  /* Use the back reference of regexp. */
3147  int idx = *p - '0';
3148 
3149  so = pmatch[idx].rm_so;
3150  eo = pmatch[idx].rm_eo;
3151  p++;
3152  }
3153  else if (*p == '&')
3154  {
3155  /* Use the entire matched string. */
3156  so = pmatch[0].rm_so;
3157  eo = pmatch[0].rm_eo;
3158  p++;
3159  }
3160  else if (*p == '\\')
3161  {
3162  /* \\ means transfer one \ to output. */
3163  appendStringInfoChar(str, '\\');
3164  p++;
3165  continue;
3166  }
3167  else
3168  {
3169  /*
3170  * If escape char is not followed by any expected char, just treat
3171  * it as ordinary data to copy. (XXX would it be better to throw
3172  * an error?)
3173  */
3174  appendStringInfoChar(str, '\\');
3175  continue;
3176  }
3177 
3178  if (so != -1 && eo != -1)
3179  {
3180  /*
3181  * Copy the text that is back reference of regexp. Note so and eo
3182  * are counted in characters not bytes.
3183  */
3184  char *chunk_start;
3185  int chunk_len;
3186 
3187  Assert(so >= data_pos);
3188  chunk_start = start_ptr;
3189  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3190  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3191  appendBinaryStringInfo(str, chunk_start, chunk_len);
3192  }
3193  }
3194 }
3195 
3196 #define REGEXP_REPLACE_BACKREF_CNT 10
3197 
3198 /*
3199  * replace_text_regexp
3200  *
3201  * replace text that matches to regexp in src_text to replace_text.
3202  *
3203  * Note: to avoid having to include regex.h in builtins.h, we declare
3204  * the regexp argument as void *, but really it's regex_t *.
3205  */
3206 text *
3207 replace_text_regexp(text *src_text, void *regexp,
3208  text *replace_text, bool glob)
3209 {
3210  text *ret_text;
3211  regex_t *re = (regex_t *) regexp;
3212  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3215  pg_wchar *data;
3216  size_t data_len;
3217  int search_start;
3218  int data_pos;
3219  char *start_ptr;
3220  bool have_escape;
3221 
3222  initStringInfo(&buf);
3223 
3224  /* Convert data string to wide characters. */
3225  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3226  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3227 
3228  /* Check whether replace_text has escape char. */
3229  have_escape = check_replace_text_has_escape_char(replace_text);
3230 
3231  /* start_ptr points to the data_pos'th character of src_text */
3232  start_ptr = (char *) VARDATA_ANY(src_text);
3233  data_pos = 0;
3234 
3235  search_start = 0;
3236  while (search_start <= data_len)
3237  {
3238  int regexec_result;
3239 
3241 
3242  regexec_result = pg_regexec(re,
3243  data,
3244  data_len,
3245  search_start,
3246  NULL, /* no details */
3248  pmatch,
3249  0);
3250 
3251  if (regexec_result == REG_NOMATCH)
3252  break;
3253 
3254  if (regexec_result != REG_OKAY)
3255  {
3256  char errMsg[100];
3257 
3259  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3260  ereport(ERROR,
3261  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3262  errmsg("regular expression failed: %s", errMsg)));
3263  }
3264 
3265  /*
3266  * Copy the text to the left of the match position. Note we are given
3267  * character not byte indexes.
3268  */
3269  if (pmatch[0].rm_so - data_pos > 0)
3270  {
3271  int chunk_len;
3272 
3273  chunk_len = charlen_to_bytelen(start_ptr,
3274  pmatch[0].rm_so - data_pos);
3275  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3276 
3277  /*
3278  * Advance start_ptr over that text, to avoid multiple rescans of
3279  * it if the replace_text contains multiple back-references.
3280  */
3281  start_ptr += chunk_len;
3282  data_pos = pmatch[0].rm_so;
3283  }
3284 
3285  /*
3286  * Copy the replace_text. Process back references when the
3287  * replace_text has escape characters.
3288  */
3289  if (have_escape)
3290  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3291  start_ptr, data_pos);
3292  else
3293  appendStringInfoText(&buf, replace_text);
3294 
3295  /* Advance start_ptr and data_pos over the matched text. */
3296  start_ptr += charlen_to_bytelen(start_ptr,
3297  pmatch[0].rm_eo - data_pos);
3298  data_pos = pmatch[0].rm_eo;
3299 
3300  /*
3301  * When global option is off, replace the first instance only.
3302  */
3303  if (!glob)
3304  break;
3305 
3306  /*
3307  * Advance search position. Normally we start the next search at the
3308  * end of the previous match; but if the match was of zero length, we
3309  * have to advance by one character, or we'd just find the same match
3310  * again.
3311  */
3312  search_start = data_pos;
3313  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3314  search_start++;
3315  }
3316 
3317  /*
3318  * Copy the text to the right of the last match.
3319  */
3320  if (data_pos < data_len)
3321  {
3322  int chunk_len;
3323 
3324  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3325  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3326  }
3327 
3328  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3329  pfree(buf.data);
3330  pfree(data);
3331 
3332  return ret_text;
3333 }
3334 
3335 /*
3336  * split_text
3337  * parse input string
3338  * return ord item (1 based)
3339  * based on provided field separator
3340  */
3341 Datum
3343 {
3344  text *inputstring = PG_GETARG_TEXT_PP(0);
3345  text *fldsep = PG_GETARG_TEXT_PP(1);
3346  int fldnum = PG_GETARG_INT32(2);
3347  int inputstring_len;
3348  int fldsep_len;
3350  int start_posn;
3351  int end_posn;
3352  text *result_text;
3353 
3354  /* field number is 1 based */
3355  if (fldnum < 1)
3356  ereport(ERROR,
3357  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3358  errmsg("field position must be greater than zero")));
3359 
3360  text_position_setup(inputstring, fldsep, &state);
3361 
3362  /*
3363  * Note: we check the converted string length, not the original, because
3364  * they could be different if the input contained invalid encoding.
3365  */
3366  inputstring_len = state.len1;
3367  fldsep_len = state.len2;
3368 
3369  /* return empty string for empty input string */
3370  if (inputstring_len < 1)
3371  {
3372  text_position_cleanup(&state);
3374  }
3375 
3376  /* empty field separator */
3377  if (fldsep_len < 1)
3378  {
3379  text_position_cleanup(&state);
3380  /* if first field, return input string, else empty string */
3381  if (fldnum == 1)
3382  PG_RETURN_TEXT_P(inputstring);
3383  else
3385  }
3386 
3387  /* identify bounds of first field */
3388  start_posn = 1;
3389  end_posn = text_position_next(1, &state);
3390 
3391  /* special case if fldsep not found at all */
3392  if (end_posn == 0)
3393  {
3394  text_position_cleanup(&state);
3395  /* if field 1 requested, return input string, else empty string */
3396  if (fldnum == 1)
3397  PG_RETURN_TEXT_P(inputstring);
3398  else
3400  }
3401 
3402  while (end_posn > 0 && --fldnum > 0)
3403  {
3404  /* identify bounds of next field */
3405  start_posn = end_posn + fldsep_len;
3406  end_posn = text_position_next(start_posn, &state);
3407  }
3408 
3409  text_position_cleanup(&state);
3410 
3411  if (fldnum > 0)
3412  {
3413  /* N'th field separator not found */
3414  /* if last field requested, return it, else empty string */
3415  if (fldnum == 1)
3416  result_text = text_substring(PointerGetDatum(inputstring),
3417  start_posn,
3418  -1,
3419  true);
3420  else
3421  result_text = cstring_to_text("");
3422  }
3423  else
3424  {
3425  /* non-last field requested */
3426  result_text = text_substring(PointerGetDatum(inputstring),
3427  start_posn,
3428  end_posn - start_posn,
3429  false);
3430  }
3431 
3432  PG_RETURN_TEXT_P(result_text);
3433 }
3434 
3435 /*
3436  * Convenience function to return true when two text params are equal.
3437  */
3438 static bool
3439 text_isequal(text *txt1, text *txt2)
3440 {
3442  PointerGetDatum(txt1),
3443  PointerGetDatum(txt2)));
3444 }
3445 
3446 /*
3447  * text_to_array
3448  * parse input string and return text array of elements,
3449  * based on provided field separator
3450  */
3451 Datum
3453 {
3454  return text_to_array_internal(fcinfo);
3455 }
3456 
3457 /*
3458  * text_to_array_null
3459  * parse input string and return text array of elements,
3460  * based on provided field separator and null string
3461  *
3462  * This is a separate entry point only to prevent the regression tests from
3463  * complaining about different argument sets for the same internal function.
3464  */
3465 Datum
3467 {
3468  return text_to_array_internal(fcinfo);
3469 }
3470 
3471 /*
3472  * common code for text_to_array and text_to_array_null functions
3473  *
3474  * These are not strict so we have to test for null inputs explicitly.
3475  */
3476 static Datum
3478 {
3479  text *inputstring;
3480  text *fldsep;
3481  text *null_string;
3482  int inputstring_len;
3483  int fldsep_len;
3484  char *start_ptr;
3485  text *result_text;
3486  bool is_null;
3487  ArrayBuildState *astate = NULL;
3488 
3489  /* when input string is NULL, then result is NULL too */
3490  if (PG_ARGISNULL(0))
3491  PG_RETURN_NULL();
3492 
3493  inputstring = PG_GETARG_TEXT_PP(0);
3494 
3495  /* fldsep can be NULL */
3496  if (!PG_ARGISNULL(1))
3497  fldsep = PG_GETARG_TEXT_PP(1);
3498  else
3499  fldsep = NULL;
3500 
3501  /* null_string can be NULL or omitted */
3502  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
3503  null_string = PG_GETARG_TEXT_PP(2);
3504  else
3505  null_string = NULL;
3506 
3507  if (fldsep != NULL)
3508  {
3509  /*
3510  * Normal case with non-null fldsep. Use the text_position machinery
3511  * to search for occurrences of fldsep.
3512  */
3514  int fldnum;
3515  int start_posn;
3516  int end_posn;
3517  int chunk_len;
3518 
3519  text_position_setup(inputstring, fldsep, &state);
3520 
3521  /*
3522  * Note: we check the converted string length, not the original,
3523  * because they could be different if the input contained invalid
3524  * encoding.
3525  */
3526  inputstring_len = state.len1;
3527  fldsep_len = state.len2;
3528 
3529  /* return empty array for empty input string */
3530  if (inputstring_len < 1)
3531  {
3532  text_position_cleanup(&state);
3534  }
3535 
3536  /*
3537  * empty field separator: return the input string as a one-element
3538  * array
3539  */
3540  if (fldsep_len < 1)
3541  {
3542  text_position_cleanup(&state);
3543  /* single element can be a NULL too */
3544  is_null = null_string ? text_isequal(inputstring, null_string) : false;
3546  PointerGetDatum(inputstring),
3547  is_null, 1));
3548  }
3549 
3550  start_posn = 1;
3551  /* start_ptr points to the start_posn'th character of inputstring */
3552  start_ptr = VARDATA_ANY(inputstring);
3553 
3554  for (fldnum = 1;; fldnum++) /* field number is 1 based */
3555  {
3557 
3558  end_posn = text_position_next(start_posn, &state);
3559 
3560  if (end_posn == 0)
3561  {
3562  /* fetch last field */
3563  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
3564  }
3565  else
3566  {
3567  /* fetch non-last field */
3568  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
3569  }
3570 
3571  /* must build a temp text datum to pass to accumArrayResult */
3572  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3573  is_null = null_string ? text_isequal(result_text, null_string) : false;
3574 
3575  /* stash away this field */
3576  astate = accumArrayResult(astate,
3577  PointerGetDatum(result_text),
3578  is_null,
3579  TEXTOID,
3581 
3582  pfree(result_text);
3583 
3584  if (end_posn == 0)
3585  break;
3586 
3587  start_posn = end_posn;
3588  start_ptr += chunk_len;
3589  start_posn += fldsep_len;
3590  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
3591  }
3592 
3593  text_position_cleanup(&state);
3594  }
3595  else
3596  {
3597  /*
3598  * When fldsep is NULL, each character in the inputstring becomes an
3599  * element in the result array. The separator is effectively the
3600  * space between characters.
3601  */
3602  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
3603 
3604  /* return empty array for empty input string */
3605  if (inputstring_len < 1)
3607 
3608  start_ptr = VARDATA_ANY(inputstring);
3609 
3610  while (inputstring_len > 0)
3611  {
3612  int chunk_len = pg_mblen(start_ptr);
3613 
3615 
3616  /* must build a temp text datum to pass to accumArrayResult */
3617  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3618  is_null = null_string ? text_isequal(result_text, null_string) : false;
3619 
3620  /* stash away this field */
3621  astate = accumArrayResult(astate,
3622  PointerGetDatum(result_text),
3623  is_null,
3624  TEXTOID,
3626 
3627  pfree(result_text);
3628 
3629  start_ptr += chunk_len;
3630  inputstring_len -= chunk_len;
3631  }
3632  }
3633 
3636 }
3637 
3638 /*
3639  * array_to_text
3640  * concatenate Cstring representation of input array elements
3641  * using provided field separator
3642  */
3643 Datum
3645 {
3647  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3648 
3649  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
3650 }
3651 
3652 /*
3653  * array_to_text_null
3654  * concatenate Cstring representation of input array elements
3655  * using provided field separator and null string
3656  *
3657  * This version is not strict so we have to test for null inputs explicitly.
3658  */
3659 Datum
3661 {
3662  ArrayType *v;
3663  char *fldsep;
3664  char *null_string;
3665 
3666  /* returns NULL when first or second parameter is NULL */
3667  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
3668  PG_RETURN_NULL();
3669 
3670  v = PG_GETARG_ARRAYTYPE_P(0);
3671  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3672 
3673  /* NULL null string is passed through as a null pointer */
3674  if (!PG_ARGISNULL(2))
3675  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
3676  else
3677  null_string = NULL;
3678 
3679  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
3680 }
3681 
3682 /*
3683  * common code for array_to_text and array_to_text_null functions
3684  */
3685 static text *
3687  const char *fldsep, const char *null_string)
3688 {
3689  text *result;
3690  int nitems,
3691  *dims,
3692  ndims;
3693  Oid element_type;
3694  int typlen;
3695  bool typbyval;
3696  char typalign;
3698  bool printed = false;
3699  char *p;
3700  bits8 *bitmap;
3701  int bitmask;
3702  int i;
3703  ArrayMetaState *my_extra;
3704 
3705  ndims = ARR_NDIM(v);
3706  dims = ARR_DIMS(v);
3707  nitems = ArrayGetNItems(ndims, dims);
3708 
3709  /* if there are no elements, return an empty string */
3710  if (nitems == 0)
3711  return cstring_to_text_with_len("", 0);
3712 
3713  element_type = ARR_ELEMTYPE(v);
3714  initStringInfo(&buf);
3715 
3716  /*
3717  * We arrange to look up info about element type, including its output
3718  * conversion proc, only once per series of calls, assuming the element
3719  * type doesn't change underneath us.
3720  */
3721  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3722  if (my_extra == NULL)
3723  {
3724  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3725  sizeof(ArrayMetaState));
3726  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3727  my_extra->element_type = ~element_type;
3728  }
3729 
3730  if (my_extra->element_type != element_type)
3731  {
3732  /*
3733  * Get info about element type, including its output conversion proc
3734  */
3735  get_type_io_data(element_type, IOFunc_output,
3736  &my_extra->typlen, &my_extra->typbyval,
3737  &my_extra->typalign, &my_extra->typdelim,
3738  &my_extra->typioparam, &my_extra->typiofunc);
3739  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
3740  fcinfo->flinfo->fn_mcxt);
3741  my_extra->element_type = element_type;
3742  }
3743  typlen = my_extra->typlen;
3744  typbyval = my_extra->typbyval;
3745  typalign = my_extra->typalign;
3746 
3747  p = ARR_DATA_PTR(v);
3748  bitmap = ARR_NULLBITMAP(v);
3749  bitmask = 1;
3750 
3751  for (i = 0; i < nitems; i++)
3752  {
3753  Datum itemvalue;
3754  char *value;
3755 
3756  /* Get source element, checking for NULL */
3757  if (bitmap && (*bitmap & bitmask) == 0)
3758  {
3759  /* if null_string is NULL, we just ignore null elements */
3760  if (null_string != NULL)
3761  {
3762  if (printed)
3763  appendStringInfo(&buf, "%s%s", fldsep, null_string);
3764  else
3765  appendStringInfoString(&buf, null_string);
3766  printed = true;
3767  }
3768  }
3769  else
3770  {
3771  itemvalue = fetch_att(p, typbyval, typlen);
3772 
3773  value = OutputFunctionCall(&my_extra->proc, itemvalue);
3774 
3775  if (printed)
3776  appendStringInfo(&buf, "%s%s", fldsep, value);
3777  else
3778  appendStringInfoString(&buf, value);
3779  printed = true;
3780 
3781  p = att_addlength_pointer(p, typlen, p);
3782  p = (char *) att_align_nominal(p, typalign);
3783  }
3784 
3785  /* advance bitmap pointer if any */
3786  if (bitmap)
3787  {
3788  bitmask <<= 1;
3789  if (bitmask == 0x100)
3790  {
3791  bitmap++;
3792  bitmask = 1;
3793  }
3794  }
3795  }
3796 
3797  result = cstring_to_text_with_len(buf.data, buf.len);
3798  pfree(buf.data);
3799 
3800  return result;
3801 }
3802 
3803 #define HEXBASE 16
3804 /*
3805  * Convert a int32 to a string containing a base 16 (hex) representation of
3806  * the number.
3807  */
3808 Datum
3810 {
3812  char *ptr;
3813  const char *digits = "0123456789abcdef";
3814  char buf[32]; /* bigger than needed, but reasonable */
3815 
3816  ptr = buf + sizeof(buf) - 1;
3817  *ptr = '\0';
3818 
3819  do
3820  {
3821  *--ptr = digits[value % HEXBASE];
3822  value /= HEXBASE;
3823  } while (ptr > buf && value);
3824 
3826 }
3827 
3828 /*
3829  * Convert a int64 to a string containing a base 16 (hex) representation of
3830  * the number.
3831  */
3832 Datum
3834 {
3835  uint64 value = (uint64) PG_GETARG_INT64(0);
3836  char *ptr;
3837  const char *digits = "0123456789abcdef";
3838  char buf[32]; /* bigger than needed, but reasonable */
3839 
3840  ptr = buf + sizeof(buf) - 1;
3841  *ptr = '\0';
3842 
3843  do
3844  {
3845  *--ptr = digits[value % HEXBASE];
3846  value /= HEXBASE;
3847  } while (ptr > buf && value);
3848 
3850 }
3851 
3852 /*
3853  * Create an md5 hash of a text string and return it as hex
3854  *
3855  * md5 produces a 16 byte (128 bit) hash; double it for hex
3856  */
3857 #define MD5_HASH_LEN 32
3858 
3859 Datum
3861 {
3862  text *in_text = PG_GETARG_TEXT_PP(0);
3863  size_t len;
3864  char hexsum[MD5_HASH_LEN + 1];
3865 
3866  /* Calculate the length of the buffer using varlena metadata */
3867  len = VARSIZE_ANY_EXHDR(in_text);
3868 
3869  /* get the hash result */
3870  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3871  ereport(ERROR,
3872  (errcode(ERRCODE_OUT_OF_MEMORY),
3873  errmsg("out of memory")));
3874 
3875  /* convert to text and return it */
3877 }
3878 
3879 /*
3880  * Create an md5 hash of a bytea field and return it as a hex string:
3881  * 16-byte md5 digest is represented in 32 hex characters.
3882  */
3883 Datum
3885 {
3886  bytea *in = PG_GETARG_BYTEA_PP(0);
3887  size_t len;
3888  char hexsum[MD5_HASH_LEN + 1];
3889 
3890  len = VARSIZE_ANY_EXHDR(in);
3891  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3892  ereport(ERROR,
3893  (errcode(ERRCODE_OUT_OF_MEMORY),
3894  errmsg("out of memory")));
3895 
3897 }
3898 
3899 /*
3900  * Return the size of a datum, possibly compressed
3901  *
3902  * Works on any data type
3903  */
3904 Datum
3906 {
3908  int32 result;
3909  int typlen;
3910 
3911  /* On first call, get the input type's typlen, and save at *fn_extra */
3912  if (fcinfo->flinfo->fn_extra == NULL)
3913  {
3914  /* Lookup the datatype of the supplied argument */
3915  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3916 
3917  typlen = get_typlen(argtypeid);
3918  if (typlen == 0) /* should not happen */
3919  elog(ERROR, "cache lookup failed for type %u", argtypeid);
3920 
3921  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3922  sizeof(int));
3923  *((int *) fcinfo->flinfo->fn_extra) = typlen;
3924  }
3925  else
3926  typlen = *((int *) fcinfo->flinfo->fn_extra);
3927 
3928  if (typlen == -1)
3929  {
3930  /* varlena type, possibly toasted */
3931  result = toast_datum_size(value);
3932  }
3933  else if (typlen == -2)
3934  {
3935  /* cstring */
3936  result = strlen(DatumGetCString(value)) + 1;
3937  }
3938  else
3939  {
3940  /* ordinary fixed-width type */
3941  result = typlen;
3942  }
3943 
3944  PG_RETURN_INT32(result);
3945 }
3946 
3947 /*
3948  * string_agg - Concatenates values and returns string.
3949  *
3950  * Syntax: string_agg(value text, delimiter text) RETURNS text
3951  *
3952  * Note: Any NULL values are ignored. The first-call delimiter isn't
3953  * actually used at all, and on subsequent calls the delimiter precedes
3954  * the associated value.
3955  */
3956 
3957 /* subroutine to initialize state */
3958 static StringInfo
3960 {
3961  StringInfo state;
3962  MemoryContext aggcontext;
3963  MemoryContext oldcontext;
3964 
3965  if (!AggCheckCallContext(fcinfo, &aggcontext))
3966  {
3967  /* cannot be called directly because of internal-type argument */
3968  elog(ERROR, "string_agg_transfn called in non-aggregate context");
3969  }
3970 
3971  /*
3972  * Create state in aggregate context. It'll stay there across subsequent
3973  * calls.
3974  */
3975  oldcontext = MemoryContextSwitchTo(aggcontext);
3976  state = makeStringInfo();
3977  MemoryContextSwitchTo(oldcontext);
3978 
3979  return state;
3980 }
3981 
3982 Datum
3984 {
3985  StringInfo state;
3986 
3987  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3988 
3989  /* Append the value unless null. */
3990  if (!PG_ARGISNULL(1))
3991  {
3992  /* On the first time through, we ignore the delimiter. */
3993  if (state == NULL)
3994  state = makeStringAggState(fcinfo);
3995  else if (!PG_ARGISNULL(2))
3996  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
3997 
3998  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
3999  }
4000 
4001  /*
4002  * The transition type for string_agg() is declared to be "internal",
4003  * which is a pass-by-value type the same size as a pointer.
4004  */
4005  PG_RETURN_POINTER(state);
4006 }
4007 
4008 Datum
4010 {
4011  StringInfo state;
4012 
4013  /* cannot be called directly because of internal-type argument */
4014  Assert(AggCheckCallContext(fcinfo, NULL));
4015 
4016  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4017 
4018  if (state != NULL)
4020  else
4021  PG_RETURN_NULL();
4022 }
4023 
4024 /*
4025  * Implementation of both concat() and concat_ws().
4026  *
4027  * sepstr is the separator string to place between values.
4028  * argidx identifies the first argument to concatenate (counting from zero).
4029  * Returns NULL if result should be NULL, else text value.
4030  */
4031 static text *
4032 concat_internal(const char *sepstr, int argidx,
4033  FunctionCallInfo fcinfo)
4034 {
4035  text *result;
4036  StringInfoData str;
4037  bool first_arg = true;
4038  int i;
4039 
4040  /*
4041  * concat(VARIADIC some-array) is essentially equivalent to
4042  * array_to_text(), ie concat the array elements with the given separator.
4043  * So we just pass the case off to that code.
4044  */
4045  if (get_fn_expr_variadic(fcinfo->flinfo))
4046  {
4047  ArrayType *arr;
4048 
4049  /* Should have just the one argument */
4050  Assert(argidx == PG_NARGS() - 1);
4051 
4052  /* concat(VARIADIC NULL) is defined as NULL */
4053  if (PG_ARGISNULL(argidx))
4054  return NULL;
4055 
4056  /*
4057  * Non-null argument had better be an array. We assume that any call
4058  * context that could let get_fn_expr_variadic return true will have
4059  * checked that a VARIADIC-labeled parameter actually is an array. So
4060  * it should be okay to just Assert that it's an array rather than
4061  * doing a full-fledged error check.
4062  */
4064 
4065  /* OK, safe to fetch the array value */
4066  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4067 
4068  /*
4069  * And serialize the array. We tell array_to_text to ignore null
4070  * elements, which matches the behavior of the loop below.
4071  */
4072  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4073  }
4074 
4075  /* Normal case without explicit VARIADIC marker */
4076  initStringInfo(&str);
4077 
4078  for (i = argidx; i < PG_NARGS(); i++)
4079  {
4080  if (!PG_ARGISNULL(i))
4081  {
4083  Oid valtype;
4084  Oid typOutput;
4085  bool typIsVarlena;
4086 
4087  /* add separator if appropriate */
4088  if (first_arg)
4089  first_arg = false;
4090  else
4091  appendStringInfoString(&str, sepstr);
4092 
4093  /* call the appropriate type output function, append the result */
4094  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4095  if (!OidIsValid(valtype))
4096  elog(ERROR, "could not determine data type of concat() input");
4097  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4099  OidOutputFunctionCall(typOutput, value));
4100  }
4101  }
4102 
4103  result = cstring_to_text_with_len(str.data, str.len);
4104  pfree(str.data);
4105 
4106  return result;
4107 }
4108 
4109 /*
4110  * Concatenate all arguments. NULL arguments are ignored.
4111  */
4112 Datum
4114 {
4115  text *result;
4116 
4117  result = concat_internal("", 0, fcinfo);
4118  if (result == NULL)
4119  PG_RETURN_NULL();
4120  PG_RETURN_TEXT_P(result);
4121 }
4122 
4123 /*
4124  * Concatenate all but first argument value with separators. The first
4125  * parameter is used as the separator. NULL arguments are ignored.
4126  */
4127 Datum
4129 {
4130  char *sep;
4131  text *result;
4132 
4133  /* return NULL when separator is NULL */
4134  if (PG_ARGISNULL(0))
4135  PG_RETURN_NULL();
4137 
4138  result = concat_internal(sep, 1, fcinfo);
4139  if (result == NULL)
4140  PG_RETURN_NULL();
4141  PG_RETURN_TEXT_P(result);
4142 }
4143 
4144 /*
4145  * Return first n characters in the string. When n is negative,
4146  * return all but last |n| characters.
4147  */
4148 Datum
4150 {
4151  text *str = PG_GETARG_TEXT_PP(0);
4152  const char *p = VARDATA_ANY(str);
4153  int len = VARSIZE_ANY_EXHDR(str);
4154  int n = PG_GETARG_INT32(1);
4155  int rlen;
4156 
4157  if (n < 0)
4158  n = pg_mbstrlen_with_len(p, len) + n;
4159  rlen = pg_mbcharcliplen(p, len, n);
4160 
4162 }
4163 
4164 /*
4165  * Return last n characters in the string. When n is negative,
4166  * return all but first |n| characters.
4167  */
4168 Datum
4170 {
4171  text *str = PG_GETARG_TEXT_PP(0);
4172  const char *p = VARDATA_ANY(str);
4173  int len = VARSIZE_ANY_EXHDR(str);
4174  int n = PG_GETARG_INT32(1);
4175  int off;
4176 
4177  if (n < 0)
4178  n = -n;
4179  else
4180  n = pg_mbstrlen_with_len(p, len) - n;
4181  off = pg_mbcharcliplen(p, len, n);
4182 
4183  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4184 }
4185 
4186 /*
4187  * Return reversed string
4188  */
4189 Datum
4191 {
4192  text *str = PG_GETARG_TEXT_PP(0);
4193  const char *p = VARDATA_ANY(str);
4194  int len = VARSIZE_ANY_EXHDR(str);
4195  const char *endp = p + len;
4196  text *result;
4197  char *dst;
4198 
4199  result = palloc(len + VARHDRSZ);
4200  dst = (char *) VARDATA(result) + len;
4201  SET_VARSIZE(result, len + VARHDRSZ);
4202 
4204  {
4205  /* multibyte version */
4206  while (p < endp)
4207  {
4208  int sz;
4209 
4210  sz = pg_mblen(p);
4211  dst -= sz;
4212  memcpy(dst, p, sz);
4213  p += sz;
4214  }
4215  }
4216  else
4217  {
4218  /* single byte version */
4219  while (p < endp)
4220  *(--dst) = *p++;
4221  }
4222 
4223  PG_RETURN_TEXT_P(result);
4224 }
4225 
4226 
4227 /*
4228  * Support macros for text_format()
4229  */
4230 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4231 
4232 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4233  do { \
4234  if (++(ptr) >= (end_ptr)) \
4235  ereport(ERROR, \
4236  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4237  errmsg("unterminated format specifier"))); \
4238  } while (0)
4239 
4240 /*
4241  * Returns a formatted string
4242  */
4243 Datum
4245 {
4246  text *fmt;
4247  StringInfoData str;
4248  const char *cp;
4249  const char *start_ptr;
4250  const char *end_ptr;
4251  text *result;
4252  int arg;
4253  bool funcvariadic;
4254  int nargs;
4255  Datum *elements = NULL;
4256  bool *nulls = NULL;
4257  Oid element_type = InvalidOid;
4258  Oid prev_type = InvalidOid;
4259  Oid prev_width_type = InvalidOid;
4260  FmgrInfo typoutputfinfo;
4261  FmgrInfo typoutputinfo_width;
4262 
4263  /* When format string is null, immediately return null */
4264  if (PG_ARGISNULL(0))
4265  PG_RETURN_NULL();
4266 
4267  /* If argument is marked VARIADIC, expand array into elements */
4268  if (get_fn_expr_variadic(fcinfo->flinfo))
4269  {
4270  ArrayType *arr;
4271  int16 elmlen;
4272  bool elmbyval;
4273  char elmalign;
4274  int nitems;
4275 
4276  /* Should have just the one argument */
4277  Assert(PG_NARGS() == 2);
4278 
4279  /* If argument is NULL, we treat it as zero-length array */
4280  if (PG_ARGISNULL(1))
4281  nitems = 0;
4282  else
4283  {
4284  /*
4285  * Non-null argument had better be an array. We assume that any
4286  * call context that could let get_fn_expr_variadic return true
4287  * will have checked that a VARIADIC-labeled parameter actually is
4288  * an array. So it should be okay to just Assert that it's an
4289  * array rather than doing a full-fledged error check.
4290  */
4292 
4293  /* OK, safe to fetch the array value */
4294  arr = PG_GETARG_ARRAYTYPE_P(1);
4295 
4296  /* Get info about array element type */
4297  element_type = ARR_ELEMTYPE(arr);
4298  get_typlenbyvalalign(element_type,
4299  &elmlen, &elmbyval, &elmalign);
4300 
4301  /* Extract all array elements */
4302  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4303  &elements, &nulls, &nitems);
4304  }
4305 
4306  nargs = nitems + 1;
4307  funcvariadic = true;
4308  }
4309  else
4310  {
4311  /* Non-variadic case, we'll process the arguments individually */
4312  nargs = PG_NARGS();
4313  funcvariadic = false;
4314  }
4315 
4316  /* Setup for main loop. */
4317  fmt = PG_GETARG_TEXT_PP(0);
4318  start_ptr = VARDATA_ANY(fmt);
4319  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4320  initStringInfo(&str);
4321  arg = 1; /* next argument position to print */
4322 
4323  /* Scan format string, looking for conversion specifiers. */
4324  for (cp = start_ptr; cp < end_ptr; cp++)
4325  {
4326  int argpos;
4327  int widthpos;
4328  int flags;
4329  int width;
4330  Datum value;
4331  bool isNull;
4332  Oid typid;
4333 
4334  /*
4335  * If it's not the start of a conversion specifier, just copy it to
4336  * the output buffer.
4337  */
4338  if (*cp != '%')
4339  {
4340  appendStringInfoCharMacro(&str, *cp);
4341  continue;
4342  }
4343 
4344  ADVANCE_PARSE_POINTER(cp, end_ptr);
4345 
4346  /* Easy case: %% outputs a single % */
4347  if (*cp == '%')
4348  {
4349  appendStringInfoCharMacro(&str, *cp);
4350  continue;
4351  }
4352 
4353  /* Parse the optional portions of the format specifier */
4354  cp = text_format_parse_format(cp, end_ptr,
4355  &argpos, &widthpos,
4356  &flags, &width);
4357 
4358  /*
4359  * Next we should see the main conversion specifier. Whether or not
4360  * an argument position was present, it's known that at least one
4361  * character remains in the string at this point. Experience suggests
4362  * that it's worth checking that that character is one of the expected
4363  * ones before we try to fetch arguments, so as to produce the least
4364  * confusing response to a mis-formatted specifier.
4365  */
4366  if (strchr("sIL", *cp) == NULL)
4367  ereport(ERROR,
4368  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4369  errmsg("unrecognized conversion type specifier \"%c\"",
4370  *cp)));
4371 
4372  /* If indirect width was specified, get its value */
4373  if (widthpos >= 0)
4374  {
4375  /* Collect the specified or next argument position */
4376  if (widthpos > 0)
4377  arg = widthpos;
4378  if (arg >= nargs)
4379  ereport(ERROR,
4380  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4381  errmsg("too few arguments for format")));
4382 
4383  /* Get the value and type of the selected argument */
4384  if (!funcvariadic)
4385  {
4386  value = PG_GETARG_DATUM(arg);
4387  isNull = PG_ARGISNULL(arg);
4388  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4389  }
4390  else
4391  {
4392  value = elements[arg - 1];
4393  isNull = nulls[arg - 1];
4394  typid = element_type;
4395  }
4396  if (!OidIsValid(typid))
4397  elog(ERROR, "could not determine data type of format() input");
4398 
4399  arg++;
4400 
4401  /* We can treat NULL width the same as zero */
4402  if (isNull)
4403  width = 0;
4404  else if (typid == INT4OID)
4405  width = DatumGetInt32(value);
4406  else if (typid == INT2OID)
4407  width = DatumGetInt16(value);
4408  else
4409  {
4410  /* For less-usual datatypes, convert to text then to int */
4411  char *str;
4412 
4413  if (typid != prev_width_type)
4414  {
4415  Oid typoutputfunc;
4416  bool typIsVarlena;
4417 
4418  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4419  fmgr_info(typoutputfunc, &typoutputinfo_width);
4420  prev_width_type = typid;
4421  }
4422 
4423  str = OutputFunctionCall(&typoutputinfo_width, value);
4424 
4425  /* pg_atoi will complain about bad data or overflow */
4426  width = pg_atoi(str, sizeof(int), '\0');
4427 
4428  pfree(str);
4429  }
4430  }
4431 
4432  /* Collect the specified or next argument position */
4433  if (argpos > 0)
4434  arg = argpos;
4435  if (arg >= nargs)
4436  ereport(ERROR,
4437  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4438  errmsg("too few arguments for format")));
4439 
4440  /* Get the value and type of the selected argument */
4441  if (!funcvariadic)
4442  {
4443  value = PG_GETARG_DATUM(arg);
4444  isNull = PG_ARGISNULL(arg);
4445  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4446  }
4447  else
4448  {
4449  value = elements[arg - 1];
4450  isNull = nulls[arg - 1];
4451  typid = element_type;
4452  }
4453  if (!OidIsValid(typid))
4454  elog(ERROR, "could not determine data type of format() input");
4455 
4456  arg++;
4457 
4458  /*
4459  * Get the appropriate typOutput function, reusing previous one if
4460  * same type as previous argument. That's particularly useful in the
4461  * variadic-array case, but often saves work even for ordinary calls.
4462  */
4463  if (typid != prev_type)
4464  {
4465  Oid typoutputfunc;
4466  bool typIsVarlena;
4467 
4468  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4469  fmgr_info(typoutputfunc, &typoutputfinfo);
4470  prev_type = typid;
4471  }
4472 
4473  /*
4474  * And now we can format the value.
4475  */
4476  switch (*cp)
4477  {
4478  case 's':
4479  case 'I':
4480  case 'L':
4481  text_format_string_conversion(&str, *cp, &typoutputfinfo,
4482  value, isNull,
4483  flags, width);
4484  break;
4485  default:
4486  /* should not get here, because of previous check */
4487  ereport(ERROR,
4488  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4489  errmsg("unrecognized conversion type specifier \"%c\"",
4490  *cp)));
4491  break;
4492  }
4493  }
4494 
4495  /* Don't need deconstruct_array results anymore. */
4496  if (elements != NULL)
4497  pfree(elements);
4498  if (nulls != NULL)
4499  pfree(nulls);
4500 
4501  /* Generate results. */
4502  result = cstring_to_text_with_len(str.data, str.len);
4503  pfree(str.data);
4504 
4505  PG_RETURN_TEXT_P(result);
4506 }
4507 
4508 /*
4509  * Parse contiguous digits as a decimal number.
4510  *
4511  * Returns true if some digits could be parsed.
4512  * The value is returned into *value, and *ptr is advanced to the next
4513  * character to be parsed.
4514  *
4515  * Note parsing invariant: at least one character is known available before
4516  * string end (end_ptr) at entry, and this is still true at exit.
4517  */
4518 static bool
4519 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
4520 {
4521  bool found = false;
4522  const char *cp = *ptr;
4523  int val = 0;
4524 
4525  while (*cp >= '0' && *cp <= '9')
4526  {
4527  int newval = val * 10 + (*cp - '0');
4528 
4529  if (newval / 10 != val) /* overflow? */
4530  ereport(ERROR,
4531  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4532  errmsg("number is out of range")));
4533  val = newval;
4534  ADVANCE_PARSE_POINTER(cp, end_ptr);
4535  found = true;
4536  }
4537 
4538  *ptr = cp;
4539  *value = val;
4540 
4541  return found;
4542 }
4543 
4544 /*
4545  * Parse a format specifier (generally following the SUS printf spec).
4546  *
4547  * We have already advanced over the initial '%', and we are looking for
4548  * [argpos][flags][width]type (but the type character is not consumed here).
4549  *
4550  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
4551  * Output parameters:
4552  * argpos: argument position for value to be printed. -1 means unspecified.
4553  * widthpos: argument position for width. Zero means the argument position
4554  * was unspecified (ie, take the next arg) and -1 means no width
4555  * argument (width was omitted or specified as a constant).
4556  * flags: bitmask of flags.
4557  * width: directly-specified width value. Zero means the width was omitted
4558  * (note it's not necessary to distinguish this case from an explicit
4559  * zero width value).
4560  *
4561  * The function result is the next character position to be parsed, ie, the
4562  * location where the type character is/should be.
4563  *
4564  * Note parsing invariant: at least one character is known available before
4565  * string end (end_ptr) at entry, and this is still true at exit.
4566  */
4567 static const char *
4568 text_format_parse_format(const char *start_ptr, const char *end_ptr,
4569  int *argpos, int *widthpos,
4570  int *flags, int *width)
4571 {
4572  const char *cp = start_ptr;
4573  int n;
4574 
4575  /* set defaults for output parameters */
4576  *argpos = -1;
4577  *widthpos = -1;
4578  *flags = 0;
4579  *width = 0;
4580 
4581  /* try to identify first number */
4582  if (text_format_parse_digits(&cp, end_ptr, &n))
4583  {
4584  if (*cp != '$')
4585  {
4586  /* Must be just a width and a type, so we're done */
4587  *width = n;
4588  return cp;
4589  }
4590  /* The number was argument position */
4591  *argpos = n;
4592  /* Explicit 0 for argument index is immediately refused */
4593  if (n == 0)
4594  ereport(ERROR,
4595  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4596  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4597  ADVANCE_PARSE_POINTER(cp, end_ptr);
4598  }
4599 
4600  /* Handle flags (only minus is supported now) */
4601  while (*cp == '-')
4602  {
4603  *flags |= TEXT_FORMAT_FLAG_MINUS;
4604  ADVANCE_PARSE_POINTER(cp, end_ptr);
4605  }
4606 
4607  if (*cp == '*')
4608  {
4609  /* Handle indirect width */
4610  ADVANCE_PARSE_POINTER(cp, end_ptr);
4611  if (text_format_parse_digits(&cp, end_ptr, &n))
4612  {
4613  /* number in this position must be closed by $ */
4614  if (*cp != '$')
4615  ereport(ERROR,
4616  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4617  errmsg("width argument position must be ended by \"$\"")));
4618  /* The number was width argument position */
4619  *widthpos = n;
4620  /* Explicit 0 for argument index is immediately refused */
4621  if (n == 0)
4622  ereport(ERROR,
4623  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4624  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4625  ADVANCE_PARSE_POINTER(cp, end_ptr);
4626  }
4627  else
4628  *widthpos = 0; /* width's argument position is unspecified */
4629  }
4630  else
4631  {
4632  /* Check for direct width specification */
4633  if (text_format_parse_digits(&cp, end_ptr, &n))
4634  *width = n;
4635  }
4636 
4637  /* cp should now be pointing at type character */
4638  return cp;
4639 }
4640 
4641 /*
4642  * Format a %s, %I, or %L conversion
4643  */
4644 static void
4646  FmgrInfo *typOutputInfo,
4647  Datum value, bool isNull,
4648  int flags, int width)
4649 {
4650  char *str;
4651 
4652  /* Handle NULL arguments before trying to stringify the value. */
4653  if (isNull)
4654  {
4655  if (conversion == 's')
4656  text_format_append_string(buf, "", flags, width);
4657  else if (conversion == 'L')
4658  text_format_append_string(buf, "NULL", flags, width);
4659  else if (conversion == 'I')
4660  ereport(ERROR,
4661  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4662  errmsg("null values cannot be formatted as an SQL identifier")));
4663  return;
4664  }
4665 
4666  /* Stringify. */
4667  str = OutputFunctionCall(typOutputInfo, value);
4668 
4669  /* Escape. */
4670  if (conversion == 'I')
4671  {
4672  /* quote_identifier may or may not allocate a new string. */
4673  text_format_append_string(buf, quote_identifier(str), flags, width);
4674  }
4675  else if (conversion == 'L')
4676  {
4677  char *qstr = quote_literal_cstr(str);
4678 
4679  text_format_append_string(buf, qstr, flags, width);
4680  /* quote_literal_cstr() always allocates a new string */
4681  pfree(qstr);
4682  }
4683  else
4684  text_format_append_string(buf, str, flags, width);
4685 
4686  /* Cleanup. */
4687  pfree(str);
4688 }
4689 
4690 /*
4691  * Append str to buf, padding as directed by flags/width
4692  */
4693 static void
4695  int flags, int width)
4696 {
4697  bool align_to_left = false;
4698  int len;
4699 
4700  /* fast path for typical easy case */
4701  if (width == 0)
4702  {
4703  appendStringInfoString(buf, str);
4704  return;
4705  }
4706 
4707  if (width < 0)
4708  {
4709  /* Negative width: implicit '-' flag, then take absolute value */
4710  align_to_left = true;
4711  /* -INT_MIN is undefined */
4712  if (width <= INT_MIN)
4713  ereport(ERROR,
4714  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4715  errmsg("number is out of range")));
4716  width = -width;
4717  }
4718  else if (flags & TEXT_FORMAT_FLAG_MINUS)
4719  align_to_left = true;
4720 
4721  len = pg_mbstrlen(str);
4722  if (align_to_left)
4723  {
4724  /* left justify */
4725  appendStringInfoString(buf, str);
4726  if (len < width)
4727  appendStringInfoSpaces(buf, width - len);
4728  }
4729  else
4730  {
4731  /* right justify */
4732  if (len < width)
4733  appendStringInfoSpaces(buf, width - len);
4734  appendStringInfoString(buf, str);
4735  }
4736 }
4737 
4738 /*
4739  * text_format_nv - nonvariadic wrapper for text_format function.
4740  *
4741  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
4742  * which checks that all built-in functions that share the implementing C
4743  * function take the same number of arguments.
4744  */
4745 Datum
4747 {
4748  return text_format(fcinfo);
4749 }