PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/tuptoaster.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "libpq/md5.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/builtins.h"
29 #include "utils/bytea.h"
30 #include "utils/lsyscache.h"
31 #include "utils/memutils.h"
32 #include "utils/pg_locale.h"
33 #include "utils/sortsupport.h"
34 
35 
36 /* GUC variable */
38 
39 typedef struct varlena unknown;
40 
41 typedef struct
42 {
43  bool use_wchar; /* T if multibyte encoding */
44  char *str1; /* use these if not use_wchar */
45  char *str2; /* note: these point to original texts */
46  pg_wchar *wstr1; /* use these if use_wchar */
47  pg_wchar *wstr2; /* note: these are palloc'd */
48  int len1; /* string lengths in logical characters */
49  int len2;
50  /* Skip table for Boyer-Moore-Horspool search algorithm: */
51  int skiptablemask; /* mask for ANDing with skiptable subscripts */
52  int skiptable[256]; /* skip distance for given mismatched char */
54 
55 typedef struct
56 {
57  char *buf1; /* 1st string */
58  char *buf2; /* 2nd string */
59  int buflen1;
60  int buflen2;
61 #ifdef HAVE_LOCALE_T
63 #endif
65 
66 /*
67  * This should be large enough that most strings will fit, but small enough
68  * that we feel comfortable putting it on the stack
69  */
70 #define TEXTBUFLEN 1024
71 
72 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
73 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
74 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
75 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
76 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
77 
78 static void btsortsupport_worker(SortSupport ssup, Oid collid);
79 static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
80 static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
81 static int32 text_length(Datum str);
82 static text *text_catenate(text *t1, text *t2);
83 static text *text_substring(Datum str,
84  int32 start,
85  int32 length,
86  bool length_not_specified);
87 static text *text_overlay(text *t1, text *t2, int sp, int sl);
88 static int text_position(text *t1, text *t2);
89 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
90 static int text_position_next(int start_pos, TextPositionState *state);
92 static int text_cmp(text *arg1, text *arg2, Oid collid);
93 static bytea *bytea_catenate(bytea *t1, bytea *t2);
94 static bytea *bytea_substring(Datum str,
95  int S,
96  int L,
97  bool length_not_specified);
98 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
99 static void appendStringInfoText(StringInfo str, const text *t);
102  const char *fldsep, const char *null_string);
104 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
105  int *value);
106 static const char *text_format_parse_format(const char *start_ptr,
107  const char *end_ptr,
108  int *argpos, int *widthpos,
109  int *flags, int *width);
110 static void text_format_string_conversion(StringInfo buf, char conversion,
111  FmgrInfo *typOutputInfo,
112  Datum value, bool isNull,
113  int flags, int width);
114 static void text_format_append_string(StringInfo buf, const char *str,
115  int flags, int width);
116 
117 
118 /*****************************************************************************
119  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
120  *****************************************************************************/
121 
122 /*
123  * cstring_to_text
124  *
125  * Create a text value from a null-terminated C string.
126  *
127  * The new text value is freshly palloc'd with a full-size VARHDR.
128  */
129 text *
130 cstring_to_text(const char *s)
131 {
132  return cstring_to_text_with_len(s, strlen(s));
133 }
134 
135 /*
136  * cstring_to_text_with_len
137  *
138  * Same as cstring_to_text except the caller specifies the string length;
139  * the string need not be null_terminated.
140  */
141 text *
142 cstring_to_text_with_len(const char *s, int len)
143 {
144  text *result = (text *) palloc(len + VARHDRSZ);
145 
146  SET_VARSIZE(result, len + VARHDRSZ);
147  memcpy(VARDATA(result), s, len);
148 
149  return result;
150 }
151 
152 /*
153  * text_to_cstring
154  *
155  * Create a palloc'd, null-terminated C string from a text value.
156  *
157  * We support being passed a compressed or toasted text value.
158  * This is a bit bogus since such values shouldn't really be referred to as
159  * "text *", but it seems useful for robustness. If we didn't handle that
160  * case here, we'd need another routine that did, anyway.
161  */
162 char *
164 {
165  /* must cast away the const, unfortunately */
166  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
167  int len = VARSIZE_ANY_EXHDR(tunpacked);
168  char *result;
169 
170  result = (char *) palloc(len + 1);
171  memcpy(result, VARDATA_ANY(tunpacked), len);
172  result[len] = '\0';
173 
174  if (tunpacked != t)
175  pfree(tunpacked);
176 
177  return result;
178 }
179 
180 /*
181  * text_to_cstring_buffer
182  *
183  * Copy a text value into a caller-supplied buffer of size dst_len.
184  *
185  * The text string is truncated if necessary to fit. The result is
186  * guaranteed null-terminated (unless dst_len == 0).
187  *
188  * We support being passed a compressed or toasted text value.
189  * This is a bit bogus since such values shouldn't really be referred to as
190  * "text *", but it seems useful for robustness. If we didn't handle that
191  * case here, we'd need another routine that did, anyway.
192  */
193 void
194 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
195 {
196  /* must cast away the const, unfortunately */
197  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
198  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
199 
200  if (dst_len > 0)
201  {
202  dst_len--;
203  if (dst_len >= src_len)
204  dst_len = src_len;
205  else /* ensure truncation is encoding-safe */
206  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
207  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
208  dst[dst_len] = '\0';
209  }
210 
211  if (srcunpacked != src)
212  pfree(srcunpacked);
213 }
214 
215 
216 /*****************************************************************************
217  * USER I/O ROUTINES *
218  *****************************************************************************/
219 
220 
221 #define VAL(CH) ((CH) - '0')
222 #define DIG(VAL) ((VAL) + '0')
223 
224 /*
225  * byteain - converts from printable representation of byte array
226  *
227  * Non-printable characters must be passed as '\nnn' (octal) and are
228  * converted to internal form. '\' must be passed as '\\'.
229  * ereport(ERROR, ...) if bad form.
230  *
231  * BUGS:
232  * The input is scanned twice.
233  * The error checking of input is minimal.
234  */
235 Datum
237 {
238  char *inputText = PG_GETARG_CSTRING(0);
239  char *tp;
240  char *rp;
241  int bc;
242  bytea *result;
243 
244  /* Recognize hex input */
245  if (inputText[0] == '\\' && inputText[1] == 'x')
246  {
247  size_t len = strlen(inputText);
248 
249  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
250  result = palloc(bc);
251  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
252  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
253 
254  PG_RETURN_BYTEA_P(result);
255  }
256 
257  /* Else, it's the traditional escaped style */
258  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
259  {
260  if (tp[0] != '\\')
261  tp++;
262  else if ((tp[0] == '\\') &&
263  (tp[1] >= '0' && tp[1] <= '3') &&
264  (tp[2] >= '0' && tp[2] <= '7') &&
265  (tp[3] >= '0' && tp[3] <= '7'))
266  tp += 4;
267  else if ((tp[0] == '\\') &&
268  (tp[1] == '\\'))
269  tp += 2;
270  else
271  {
272  /*
273  * one backslash, not followed by another or ### valid octal
274  */
275  ereport(ERROR,
276  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
277  errmsg("invalid input syntax for type bytea")));
278  }
279  }
280 
281  bc += VARHDRSZ;
282 
283  result = (bytea *) palloc(bc);
284  SET_VARSIZE(result, bc);
285 
286  tp = inputText;
287  rp = VARDATA(result);
288  while (*tp != '\0')
289  {
290  if (tp[0] != '\\')
291  *rp++ = *tp++;
292  else if ((tp[0] == '\\') &&
293  (tp[1] >= '0' && tp[1] <= '3') &&
294  (tp[2] >= '0' && tp[2] <= '7') &&
295  (tp[3] >= '0' && tp[3] <= '7'))
296  {
297  bc = VAL(tp[1]);
298  bc <<= 3;
299  bc += VAL(tp[2]);
300  bc <<= 3;
301  *rp++ = bc + VAL(tp[3]);
302 
303  tp += 4;
304  }
305  else if ((tp[0] == '\\') &&
306  (tp[1] == '\\'))
307  {
308  *rp++ = '\\';
309  tp += 2;
310  }
311  else
312  {
313  /*
314  * We should never get here. The first pass should not allow it.
315  */
316  ereport(ERROR,
317  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318  errmsg("invalid input syntax for type bytea")));
319  }
320  }
321 
322  PG_RETURN_BYTEA_P(result);
323 }
324 
325 /*
326  * byteaout - converts to printable representation of byte array
327  *
328  * In the traditional escaped format, non-printable characters are
329  * printed as '\nnn' (octal) and '\' as '\\'.
330  */
331 Datum
333 {
334  bytea *vlena = PG_GETARG_BYTEA_PP(0);
335  char *result;
336  char *rp;
337 
339  {
340  /* Print hex format */
341  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
342  *rp++ = '\\';
343  *rp++ = 'x';
344  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
345  }
346  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
347  {
348  /* Print traditional escaped format */
349  char *vp;
350  int len;
351  int i;
352 
353  len = 1; /* empty string has 1 char */
354  vp = VARDATA_ANY(vlena);
355  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
356  {
357  if (*vp == '\\')
358  len += 2;
359  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
360  len += 4;
361  else
362  len++;
363  }
364  rp = result = (char *) palloc(len);
365  vp = VARDATA_ANY(vlena);
366  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
367  {
368  if (*vp == '\\')
369  {
370  *rp++ = '\\';
371  *rp++ = '\\';
372  }
373  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
374  {
375  int val; /* holds unprintable chars */
376 
377  val = *vp;
378  rp[0] = '\\';
379  rp[3] = DIG(val & 07);
380  val >>= 3;
381  rp[2] = DIG(val & 07);
382  val >>= 3;
383  rp[1] = DIG(val & 03);
384  rp += 4;
385  }
386  else
387  *rp++ = *vp;
388  }
389  }
390  else
391  {
392  elog(ERROR, "unrecognized bytea_output setting: %d",
393  bytea_output);
394  rp = result = NULL; /* keep compiler quiet */
395  }
396  *rp = '\0';
397  PG_RETURN_CSTRING(result);
398 }
399 
400 /*
401  * bytearecv - converts external binary format to bytea
402  */
403 Datum
405 {
407  bytea *result;
408  int nbytes;
409 
410  nbytes = buf->len - buf->cursor;
411  result = (bytea *) palloc(nbytes + VARHDRSZ);
412  SET_VARSIZE(result, nbytes + VARHDRSZ);
413  pq_copymsgbytes(buf, VARDATA(result), nbytes);
414  PG_RETURN_BYTEA_P(result);
415 }
416 
417 /*
418  * byteasend - converts bytea to binary format
419  *
420  * This is a special case: just copy the input...
421  */
422 Datum
424 {
425  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
426 
427  PG_RETURN_BYTEA_P(vlena);
428 }
429 
430 Datum
432 {
434 
435  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
436 
437  /* Append the value unless null. */
438  if (!PG_ARGISNULL(1))
439  {
441 
442  /* On the first time through, we ignore the delimiter. */
443  if (state == NULL)
444  state = makeStringAggState(fcinfo);
445  else if (!PG_ARGISNULL(2))
446  {
447  bytea *delim = PG_GETARG_BYTEA_PP(2);
448 
450  }
451 
453  }
454 
455  /*
456  * The transition type for string_agg() is declared to be "internal",
457  * which is a pass-by-value type the same size as a pointer.
458  */
459  PG_RETURN_POINTER(state);
460 }
461 
462 Datum
464 {
466 
467  /* cannot be called directly because of internal-type argument */
468  Assert(AggCheckCallContext(fcinfo, NULL));
469 
470  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
471 
472  if (state != NULL)
473  {
474  bytea *result;
475 
476  result = (bytea *) palloc(state->len + VARHDRSZ);
477  SET_VARSIZE(result, state->len + VARHDRSZ);
478  memcpy(VARDATA(result), state->data, state->len);
479  PG_RETURN_BYTEA_P(result);
480  }
481  else
482  PG_RETURN_NULL();
483 }
484 
485 /*
486  * textin - converts "..." to internal representation
487  */
488 Datum
490 {
491  char *inputText = PG_GETARG_CSTRING(0);
492 
493  PG_RETURN_TEXT_P(cstring_to_text(inputText));
494 }
495 
496 /*
497  * textout - converts internal representation to "..."
498  */
499 Datum
501 {
502  Datum txt = PG_GETARG_DATUM(0);
503 
505 }
506 
507 /*
508  * textrecv - converts external binary format to text
509  */
510 Datum
512 {
514  text *result;
515  char *str;
516  int nbytes;
517 
518  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
519 
520  result = cstring_to_text_with_len(str, nbytes);
521  pfree(str);
522  PG_RETURN_TEXT_P(result);
523 }
524 
525 /*
526  * textsend - converts text to binary format
527  */
528 Datum
530 {
531  text *t = PG_GETARG_TEXT_PP(0);
533 
534  pq_begintypsend(&buf);
537 }
538 
539 
540 /*
541  * unknownin - converts "..." to internal representation
542  */
543 Datum
545 {
546  char *str = PG_GETARG_CSTRING(0);
547 
548  /* representation is same as cstring */
550 }
551 
552 /*
553  * unknownout - converts internal representation to "..."
554  */
555 Datum
557 {
558  /* representation is same as cstring */
559  char *str = PG_GETARG_CSTRING(0);
560 
562 }
563 
564 /*
565  * unknownrecv - converts external binary format to unknown
566  */
567 Datum
569 {
571  char *str;
572  int nbytes;
573 
574  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
575  /* representation is same as cstring */
576  PG_RETURN_CSTRING(str);
577 }
578 
579 /*
580  * unknownsend - converts unknown to binary format
581  */
582 Datum
584 {
585  /* representation is same as cstring */
586  char *str = PG_GETARG_CSTRING(0);
588 
589  pq_begintypsend(&buf);
590  pq_sendtext(&buf, str, strlen(str));
592 }
593 
594 
595 /* ========== PUBLIC ROUTINES ========== */
596 
597 /*
598  * textlen -
599  * returns the logical length of a text*
600  * (which is less than the VARSIZE of the text*)
601  */
602 Datum
604 {
605  Datum str = PG_GETARG_DATUM(0);
606 
607  /* try to avoid decompressing argument */
609 }
610 
611 /*
612  * text_length -
613  * Does the real work for textlen()
614  *
615  * This is broken out so it can be called directly by other string processing
616  * functions. Note that the argument is passed as a Datum, to indicate that
617  * it may still be in compressed form. We can avoid decompressing it at all
618  * in some cases.
619  */
620 static int32
622 {
623  /* fastpath when max encoding length is one */
626  else
627  {
628  text *t = DatumGetTextPP(str);
629 
631  VARSIZE_ANY_EXHDR(t)));
632  }
633 }
634 
635 /*
636  * textoctetlen -
637  * returns the physical length of a text*
638  * (which is less than the VARSIZE of the text*)
639  */
640 Datum
642 {
643  Datum str = PG_GETARG_DATUM(0);
644 
645  /* We need not detoast the input at all */
647 }
648 
649 /*
650  * textcat -
651  * takes two text* and returns a text* that is the concatenation of
652  * the two.
653  *
654  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
655  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
656  * Allocate space for output in all cases.
657  * XXX - thomas 1997-07-10
658  */
659 Datum
661 {
662  text *t1 = PG_GETARG_TEXT_PP(0);
663  text *t2 = PG_GETARG_TEXT_PP(1);
664 
666 }
667 
668 /*
669  * text_catenate
670  * Guts of textcat(), broken out so it can be used by other functions
671  *
672  * Arguments can be in short-header form, but not compressed or out-of-line
673  */
674 static text *
676 {
677  text *result;
678  int len1,
679  len2,
680  len;
681  char *ptr;
682 
683  len1 = VARSIZE_ANY_EXHDR(t1);
684  len2 = VARSIZE_ANY_EXHDR(t2);
685 
686  /* paranoia ... probably should throw error instead? */
687  if (len1 < 0)
688  len1 = 0;
689  if (len2 < 0)
690  len2 = 0;
691 
692  len = len1 + len2 + VARHDRSZ;
693  result = (text *) palloc(len);
694 
695  /* Set size of result string... */
696  SET_VARSIZE(result, len);
697 
698  /* Fill data field of result string... */
699  ptr = VARDATA(result);
700  if (len1 > 0)
701  memcpy(ptr, VARDATA_ANY(t1), len1);
702  if (len2 > 0)
703  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
704 
705  return result;
706 }
707 
708 /*
709  * charlen_to_bytelen()
710  * Compute the number of bytes occupied by n characters starting at *p
711  *
712  * It is caller's responsibility that there actually are n characters;
713  * the string need not be null-terminated.
714  */
715 static int
716 charlen_to_bytelen(const char *p, int n)
717 {
719  {
720  /* Optimization for single-byte encodings */
721  return n;
722  }
723  else
724  {
725  const char *s;
726 
727  for (s = p; n > 0; n--)
728  s += pg_mblen(s);
729 
730  return s - p;
731  }
732 }
733 
734 /*
735  * text_substr()
736  * Return a substring starting at the specified position.
737  * - thomas 1997-12-31
738  *
739  * Input:
740  * - string
741  * - starting position (is one-based)
742  * - string length
743  *
744  * If the starting position is zero or less, then return from the start of the string
745  * adjusting the length to be consistent with the "negative start" per SQL.
746  * If the length is less than zero, return the remaining string.
747  *
748  * Added multibyte support.
749  * - Tatsuo Ishii 1998-4-21
750  * Changed behavior if starting position is less than one to conform to SQL behavior.
751  * Formerly returned the entire string; now returns a portion.
752  * - Thomas Lockhart 1998-12-10
753  * Now uses faster TOAST-slicing interface
754  * - John Gray 2002-02-22
755  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
756  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
757  * error; if E < 1, return '', not entire string). Fixed MB related bug when
758  * S > LC and < LC + 4 sometimes garbage characters are returned.
759  * - Joe Conway 2002-08-10
760  */
761 Datum
763 {
765  PG_GETARG_INT32(1),
766  PG_GETARG_INT32(2),
767  false));
768 }
769 
770 /*
771  * text_substr_no_len -
772  * Wrapper to avoid opr_sanity failure due to
773  * one function accepting a different number of args.
774  */
775 Datum
777 {
779  PG_GETARG_INT32(1),
780  -1, true));
781 }
782 
783 /*
784  * text_substring -
785  * Does the real work for text_substr() and text_substr_no_len()
786  *
787  * This is broken out so it can be called directly by other string processing
788  * functions. Note that the argument is passed as a Datum, to indicate that
789  * it may still be in compressed/toasted form. We can avoid detoasting all
790  * of it in some cases.
791  *
792  * The result is always a freshly palloc'd datum.
793  */
794 static text *
795 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
796 {
798  int32 S = start; /* start position */
799  int32 S1; /* adjusted start position */
800  int32 L1; /* adjusted substring length */
801 
802  /* life is easy if the encoding max length is 1 */
803  if (eml == 1)
804  {
805  S1 = Max(S, 1);
806 
807  if (length_not_specified) /* special case - get length to end of
808  * string */
809  L1 = -1;
810  else
811  {
812  /* end position */
813  int E = S + length;
814 
815  /*
816  * A negative value for L is the only way for the end position to
817  * be before the start. SQL99 says to throw an error.
818  */
819  if (E < S)
820  ereport(ERROR,
821  (errcode(ERRCODE_SUBSTRING_ERROR),
822  errmsg("negative substring length not allowed")));
823 
824  /*
825  * A zero or negative value for the end position can happen if the
826  * start was negative or one. SQL99 says to return a zero-length
827  * string.
828  */
829  if (E < 1)
830  return cstring_to_text("");
831 
832  L1 = E - S1;
833  }
834 
835  /*
836  * If the start position is past the end of the string, SQL99 says to
837  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
838  * that for us. Convert to zero-based starting position
839  */
840  return DatumGetTextPSlice(str, S1 - 1, L1);
841  }
842  else if (eml > 1)
843  {
844  /*
845  * When encoding max length is > 1, we can't get LC without
846  * detoasting, so we'll grab a conservatively large slice now and go
847  * back later to do the right thing
848  */
849  int32 slice_start;
850  int32 slice_size;
851  int32 slice_strlen;
852  text *slice;
853  int32 E1;
854  int32 i;
855  char *p;
856  char *s;
857  text *ret;
858 
859  /*
860  * if S is past the end of the string, the tuple toaster will return a
861  * zero-length string to us
862  */
863  S1 = Max(S, 1);
864 
865  /*
866  * We need to start at position zero because there is no way to know
867  * in advance which byte offset corresponds to the supplied start
868  * position.
869  */
870  slice_start = 0;
871 
872  if (length_not_specified) /* special case - get length to end of
873  * string */
874  slice_size = L1 = -1;
875  else
876  {
877  int E = S + length;
878 
879  /*
880  * A negative value for L is the only way for the end position to
881  * be before the start. SQL99 says to throw an error.
882  */
883  if (E < S)
884  ereport(ERROR,
885  (errcode(ERRCODE_SUBSTRING_ERROR),
886  errmsg("negative substring length not allowed")));
887 
888  /*
889  * A zero or negative value for the end position can happen if the
890  * start was negative or one. SQL99 says to return a zero-length
891  * string.
892  */
893  if (E < 1)
894  return cstring_to_text("");
895 
896  /*
897  * if E is past the end of the string, the tuple toaster will
898  * truncate the length for us
899  */
900  L1 = E - S1;
901 
902  /*
903  * Total slice size in bytes can't be any longer than the start
904  * position plus substring length times the encoding max length.
905  */
906  slice_size = (S1 + L1) * eml;
907  }
908 
909  /*
910  * If we're working with an untoasted source, no need to do an extra
911  * copying step.
912  */
915  slice = DatumGetTextPSlice(str, slice_start, slice_size);
916  else
917  slice = (text *) DatumGetPointer(str);
918 
919  /* see if we got back an empty string */
920  if (VARSIZE_ANY_EXHDR(slice) == 0)
921  {
922  if (slice != (text *) DatumGetPointer(str))
923  pfree(slice);
924  return cstring_to_text("");
925  }
926 
927  /* Now we can get the actual length of the slice in MB characters */
928  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
929  VARSIZE_ANY_EXHDR(slice));
930 
931  /*
932  * Check that the start position wasn't > slice_strlen. If so, SQL99
933  * says to return a zero-length string.
934  */
935  if (S1 > slice_strlen)
936  {
937  if (slice != (text *) DatumGetPointer(str))
938  pfree(slice);
939  return cstring_to_text("");
940  }
941 
942  /*
943  * Adjust L1 and E1 now that we know the slice string length. Again
944  * remember that S1 is one based, and slice_start is zero based.
945  */
946  if (L1 > -1)
947  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
948  else
949  E1 = slice_start + 1 + slice_strlen;
950 
951  /*
952  * Find the start position in the slice; remember S1 is not zero based
953  */
954  p = VARDATA_ANY(slice);
955  for (i = 0; i < S1 - 1; i++)
956  p += pg_mblen(p);
957 
958  /* hang onto a pointer to our start position */
959  s = p;
960 
961  /*
962  * Count the actual bytes used by the substring of the requested
963  * length.
964  */
965  for (i = S1; i < E1; i++)
966  p += pg_mblen(p);
967 
968  ret = (text *) palloc(VARHDRSZ + (p - s));
969  SET_VARSIZE(ret, VARHDRSZ + (p - s));
970  memcpy(VARDATA(ret), s, (p - s));
971 
972  if (slice != (text *) DatumGetPointer(str))
973  pfree(slice);
974 
975  return ret;
976  }
977  else
978  elog(ERROR, "invalid backend encoding: encoding max length < 1");
979 
980  /* not reached: suppress compiler warning */
981  return NULL;
982 }
983 
984 /*
985  * textoverlay
986  * Replace specified substring of first string with second
987  *
988  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
989  * This code is a direct implementation of what the standard says.
990  */
991 Datum
993 {
994  text *t1 = PG_GETARG_TEXT_PP(0);
995  text *t2 = PG_GETARG_TEXT_PP(1);
996  int sp = PG_GETARG_INT32(2); /* substring start position */
997  int sl = PG_GETARG_INT32(3); /* substring length */
998 
999  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1000 }
1001 
1002 Datum
1004 {
1005  text *t1 = PG_GETARG_TEXT_PP(0);
1006  text *t2 = PG_GETARG_TEXT_PP(1);
1007  int sp = PG_GETARG_INT32(2); /* substring start position */
1008  int sl;
1009 
1010  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1011  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1012 }
1013 
1014 static text *
1015 text_overlay(text *t1, text *t2, int sp, int sl)
1016 {
1017  text *result;
1018  text *s1;
1019  text *s2;
1020  int sp_pl_sl;
1021 
1022  /*
1023  * Check for possible integer-overflow cases. For negative sp, throw a
1024  * "substring length" error because that's what should be expected
1025  * according to the spec's definition of OVERLAY().
1026  */
1027  if (sp <= 0)
1028  ereport(ERROR,
1029  (errcode(ERRCODE_SUBSTRING_ERROR),
1030  errmsg("negative substring length not allowed")));
1031  sp_pl_sl = sp + sl;
1032  if (sp_pl_sl <= sl)
1033  ereport(ERROR,
1034  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1035  errmsg("integer out of range")));
1036 
1037  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1038  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1039  result = text_catenate(s1, t2);
1040  result = text_catenate(result, s2);
1041 
1042  return result;
1043 }
1044 
1045 /*
1046  * textpos -
1047  * Return the position of the specified substring.
1048  * Implements the SQL POSITION() function.
1049  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1050  * - thomas 1997-07-27
1051  */
1052 Datum
1054 {
1055  text *str = PG_GETARG_TEXT_PP(0);
1056  text *search_str = PG_GETARG_TEXT_PP(1);
1057 
1058  PG_RETURN_INT32((int32) text_position(str, search_str));
1059 }
1060 
1061 /*
1062  * text_position -
1063  * Does the real work for textpos()
1064  *
1065  * Inputs:
1066  * t1 - string to be searched
1067  * t2 - pattern to match within t1
1068  * Result:
1069  * Character index of the first matched char, starting from 1,
1070  * or 0 if no match.
1071  *
1072  * This is broken out so it can be called directly by other string processing
1073  * functions.
1074  */
1075 static int
1077 {
1079  int result;
1080 
1081  text_position_setup(t1, t2, &state);
1082  result = text_position_next(1, &state);
1083  text_position_cleanup(&state);
1084  return result;
1085 }
1086 
1087 
1088 /*
1089  * text_position_setup, text_position_next, text_position_cleanup -
1090  * Component steps of text_position()
1091  *
1092  * These are broken out so that a string can be efficiently searched for
1093  * multiple occurrences of the same pattern. text_position_next may be
1094  * called multiple times with increasing values of start_pos, which is
1095  * the 1-based character position to start the search from. The "state"
1096  * variable is normally just a local variable in the caller.
1097  */
1098 
1099 static void
1101 {
1102  int len1 = VARSIZE_ANY_EXHDR(t1);
1103  int len2 = VARSIZE_ANY_EXHDR(t2);
1104 
1106  {
1107  /* simple case - single byte encoding */
1108  state->use_wchar = false;
1109  state->str1 = VARDATA_ANY(t1);
1110  state->str2 = VARDATA_ANY(t2);
1111  state->len1 = len1;
1112  state->len2 = len2;
1113  }
1114  else
1115  {
1116  /* not as simple - multibyte encoding */
1117  pg_wchar *p1,
1118  *p2;
1119 
1120  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1121  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1122  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1123  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1124 
1125  state->use_wchar = true;
1126  state->wstr1 = p1;
1127  state->wstr2 = p2;
1128  state->len1 = len1;
1129  state->len2 = len2;
1130  }
1131 
1132  /*
1133  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1134  * notes we use the terminology that the "haystack" is the string to be
1135  * searched (t1) and the "needle" is the pattern being sought (t2).
1136  *
1137  * If the needle is empty or bigger than the haystack then there is no
1138  * point in wasting cycles initializing the table. We also choose not to
1139  * use B-M-H for needles of length 1, since the skip table can't possibly
1140  * save anything in that case.
1141  */
1142  if (len1 >= len2 && len2 > 1)
1143  {
1144  int searchlength = len1 - len2;
1145  int skiptablemask;
1146  int last;
1147  int i;
1148 
1149  /*
1150  * First we must determine how much of the skip table to use. The
1151  * declaration of TextPositionState allows up to 256 elements, but for
1152  * short search problems we don't really want to have to initialize so
1153  * many elements --- it would take too long in comparison to the
1154  * actual search time. So we choose a useful skip table size based on
1155  * the haystack length minus the needle length. The closer the needle
1156  * length is to the haystack length the less useful skipping becomes.
1157  *
1158  * Note: since we use bit-masking to select table elements, the skip
1159  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1160  */
1161  if (searchlength < 16)
1162  skiptablemask = 3;
1163  else if (searchlength < 64)
1164  skiptablemask = 7;
1165  else if (searchlength < 128)
1166  skiptablemask = 15;
1167  else if (searchlength < 512)
1168  skiptablemask = 31;
1169  else if (searchlength < 2048)
1170  skiptablemask = 63;
1171  else if (searchlength < 4096)
1172  skiptablemask = 127;
1173  else
1174  skiptablemask = 255;
1175  state->skiptablemask = skiptablemask;
1176 
1177  /*
1178  * Initialize the skip table. We set all elements to the needle
1179  * length, since this is the correct skip distance for any character
1180  * not found in the needle.
1181  */
1182  for (i = 0; i <= skiptablemask; i++)
1183  state->skiptable[i] = len2;
1184 
1185  /*
1186  * Now examine the needle. For each character except the last one,
1187  * set the corresponding table element to the appropriate skip
1188  * distance. Note that when two characters share the same skip table
1189  * entry, the one later in the needle must determine the skip
1190  * distance.
1191  */
1192  last = len2 - 1;
1193 
1194  if (!state->use_wchar)
1195  {
1196  const char *str2 = state->str2;
1197 
1198  for (i = 0; i < last; i++)
1199  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1200  }
1201  else
1202  {
1203  const pg_wchar *wstr2 = state->wstr2;
1204 
1205  for (i = 0; i < last; i++)
1206  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1207  }
1208  }
1209 }
1210 
1211 static int
1213 {
1214  int haystack_len = state->len1;
1215  int needle_len = state->len2;
1216  int skiptablemask = state->skiptablemask;
1217 
1218  Assert(start_pos > 0); /* else caller error */
1219 
1220  if (needle_len <= 0)
1221  return start_pos; /* result for empty pattern */
1222 
1223  start_pos--; /* adjust for zero based arrays */
1224 
1225  /* Done if the needle can't possibly fit */
1226  if (haystack_len < start_pos + needle_len)
1227  return 0;
1228 
1229  if (!state->use_wchar)
1230  {
1231  /* simple case - single byte encoding */
1232  const char *haystack = state->str1;
1233  const char *needle = state->str2;
1234  const char *haystack_end = &haystack[haystack_len];
1235  const char *hptr;
1236 
1237  if (needle_len == 1)
1238  {
1239  /* No point in using B-M-H for a one-character needle */
1240  char nchar = *needle;
1241 
1242  hptr = &haystack[start_pos];
1243  while (hptr < haystack_end)
1244  {
1245  if (*hptr == nchar)
1246  return hptr - haystack + 1;
1247  hptr++;
1248  }
1249  }
1250  else
1251  {
1252  const char *needle_last = &needle[needle_len - 1];
1253 
1254  /* Start at startpos plus the length of the needle */
1255  hptr = &haystack[start_pos + needle_len - 1];
1256  while (hptr < haystack_end)
1257  {
1258  /* Match the needle scanning *backward* */
1259  const char *nptr;
1260  const char *p;
1261 
1262  nptr = needle_last;
1263  p = hptr;
1264  while (*nptr == *p)
1265  {
1266  /* Matched it all? If so, return 1-based position */
1267  if (nptr == needle)
1268  return p - haystack + 1;
1269  nptr--, p--;
1270  }
1271 
1272  /*
1273  * No match, so use the haystack char at hptr to decide how
1274  * far to advance. If the needle had any occurrence of that
1275  * character (or more precisely, one sharing the same
1276  * skiptable entry) before its last character, then we advance
1277  * far enough to align the last such needle character with
1278  * that haystack position. Otherwise we can advance by the
1279  * whole needle length.
1280  */
1281  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1282  }
1283  }
1284  }
1285  else
1286  {
1287  /* The multibyte char version. This works exactly the same way. */
1288  const pg_wchar *haystack = state->wstr1;
1289  const pg_wchar *needle = state->wstr2;
1290  const pg_wchar *haystack_end = &haystack[haystack_len];
1291  const pg_wchar *hptr;
1292 
1293  if (needle_len == 1)
1294  {
1295  /* No point in using B-M-H for a one-character needle */
1296  pg_wchar nchar = *needle;
1297 
1298  hptr = &haystack[start_pos];
1299  while (hptr < haystack_end)
1300  {
1301  if (*hptr == nchar)
1302  return hptr - haystack + 1;
1303  hptr++;
1304  }
1305  }
1306  else
1307  {
1308  const pg_wchar *needle_last = &needle[needle_len - 1];
1309 
1310  /* Start at startpos plus the length of the needle */
1311  hptr = &haystack[start_pos + needle_len - 1];
1312  while (hptr < haystack_end)
1313  {
1314  /* Match the needle scanning *backward* */
1315  const pg_wchar *nptr;
1316  const pg_wchar *p;
1317 
1318  nptr = needle_last;
1319  p = hptr;
1320  while (*nptr == *p)
1321  {
1322  /* Matched it all? If so, return 1-based position */
1323  if (nptr == needle)
1324  return p - haystack + 1;
1325  nptr--, p--;
1326  }
1327 
1328  /*
1329  * No match, so use the haystack char at hptr to decide how
1330  * far to advance. If the needle had any occurrence of that
1331  * character (or more precisely, one sharing the same
1332  * skiptable entry) before its last character, then we advance
1333  * far enough to align the last such needle character with
1334  * that haystack position. Otherwise we can advance by the
1335  * whole needle length.
1336  */
1337  hptr += state->skiptable[*hptr & skiptablemask];
1338  }
1339  }
1340  }
1341 
1342  return 0; /* not found */
1343 }
1344 
1345 static void
1347 {
1348  if (state->use_wchar)
1349  {
1350  pfree(state->wstr1);
1351  pfree(state->wstr2);
1352  }
1353 }
1354 
1355 /* varstr_cmp()
1356  * Comparison function for text strings with given lengths.
1357  * Includes locale support, but must copy strings to temporary memory
1358  * to allow null-termination for inputs to strcoll().
1359  * Returns an integer less than, equal to, or greater than zero, indicating
1360  * whether arg1 is less than, equal to, or greater than arg2.
1361  */
1362 int
1363 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1364 {
1365  int result;
1366 
1367  /*
1368  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1369  * have to do some memory copying. This turns out to be significantly
1370  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1371  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1372  */
1373  if (lc_collate_is_c(collid))
1374  {
1375  result = memcmp(arg1, arg2, Min(len1, len2));
1376  if ((result == 0) && (len1 != len2))
1377  result = (len1 < len2) ? -1 : 1;
1378  }
1379  else
1380  {
1381  char a1buf[TEXTBUFLEN];
1382  char a2buf[TEXTBUFLEN];
1383  char *a1p,
1384  *a2p;
1385 
1386 #ifdef HAVE_LOCALE_T
1387  pg_locale_t mylocale = 0;
1388 #endif
1389 
1390  if (collid != DEFAULT_COLLATION_OID)
1391  {
1392  if (!OidIsValid(collid))
1393  {
1394  /*
1395  * This typically means that the parser could not resolve a
1396  * conflict of implicit collations, so report it that way.
1397  */
1398  ereport(ERROR,
1399  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1400  errmsg("could not determine which collation to use for string comparison"),
1401  errhint("Use the COLLATE clause to set the collation explicitly.")));
1402  }
1403 #ifdef HAVE_LOCALE_T
1404  mylocale = pg_newlocale_from_collation(collid);
1405 #endif
1406  }
1407 
1408 #ifdef WIN32
1409  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1410  if (GetDatabaseEncoding() == PG_UTF8)
1411  {
1412  int a1len;
1413  int a2len;
1414  int r;
1415 
1416  if (len1 >= TEXTBUFLEN / 2)
1417  {
1418  a1len = len1 * 2 + 2;
1419  a1p = palloc(a1len);
1420  }
1421  else
1422  {
1423  a1len = TEXTBUFLEN;
1424  a1p = a1buf;
1425  }
1426  if (len2 >= TEXTBUFLEN / 2)
1427  {
1428  a2len = len2 * 2 + 2;
1429  a2p = palloc(a2len);
1430  }
1431  else
1432  {
1433  a2len = TEXTBUFLEN;
1434  a2p = a2buf;
1435  }
1436 
1437  /* stupid Microsloth API does not work for zero-length input */
1438  if (len1 == 0)
1439  r = 0;
1440  else
1441  {
1442  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1443  (LPWSTR) a1p, a1len / 2);
1444  if (!r)
1445  ereport(ERROR,
1446  (errmsg("could not convert string to UTF-16: error code %lu",
1447  GetLastError())));
1448  }
1449  ((LPWSTR) a1p)[r] = 0;
1450 
1451  if (len2 == 0)
1452  r = 0;
1453  else
1454  {
1455  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1456  (LPWSTR) a2p, a2len / 2);
1457  if (!r)
1458  ereport(ERROR,
1459  (errmsg("could not convert string to UTF-16: error code %lu",
1460  GetLastError())));
1461  }
1462  ((LPWSTR) a2p)[r] = 0;
1463 
1464  errno = 0;
1465 #ifdef HAVE_LOCALE_T
1466  if (mylocale)
1467  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1468  else
1469 #endif
1470  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1471  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1472  * headers */
1473  ereport(ERROR,
1474  (errmsg("could not compare Unicode strings: %m")));
1475 
1476  /*
1477  * In some locales wcscoll() can claim that nonidentical strings
1478  * are equal. Believing that would be bad news for a number of
1479  * reasons, so we follow Perl's lead and sort "equal" strings
1480  * according to strcmp (on the UTF-8 representation).
1481  */
1482  if (result == 0)
1483  {
1484  result = memcmp(arg1, arg2, Min(len1, len2));
1485  if ((result == 0) && (len1 != len2))
1486  result = (len1 < len2) ? -1 : 1;
1487  }
1488 
1489  if (a1p != a1buf)
1490  pfree(a1p);
1491  if (a2p != a2buf)
1492  pfree(a2p);
1493 
1494  return result;
1495  }
1496 #endif /* WIN32 */
1497 
1498  if (len1 >= TEXTBUFLEN)
1499  a1p = (char *) palloc(len1 + 1);
1500  else
1501  a1p = a1buf;
1502  if (len2 >= TEXTBUFLEN)
1503  a2p = (char *) palloc(len2 + 1);
1504  else
1505  a2p = a2buf;
1506 
1507  memcpy(a1p, arg1, len1);
1508  a1p[len1] = '\0';
1509  memcpy(a2p, arg2, len2);
1510  a2p[len2] = '\0';
1511 
1512 #ifdef HAVE_LOCALE_T
1513  if (mylocale)
1514  result = strcoll_l(a1p, a2p, mylocale);
1515  else
1516 #endif
1517  result = strcoll(a1p, a2p);
1518 
1519  /*
1520  * In some locales strcoll() can claim that nonidentical strings are
1521  * equal. Believing that would be bad news for a number of reasons,
1522  * so we follow Perl's lead and sort "equal" strings according to
1523  * strcmp().
1524  */
1525  if (result == 0)
1526  result = strcmp(a1p, a2p);
1527 
1528  if (a1p != a1buf)
1529  pfree(a1p);
1530  if (a2p != a2buf)
1531  pfree(a2p);
1532  }
1533 
1534  return result;
1535 }
1536 
1537 
1538 /* text_cmp()
1539  * Internal comparison function for text strings.
1540  * Returns -1, 0 or 1
1541  */
1542 static int
1543 text_cmp(text *arg1, text *arg2, Oid collid)
1544 {
1545  char *a1p,
1546  *a2p;
1547  int len1,
1548  len2;
1549 
1550  a1p = VARDATA_ANY(arg1);
1551  a2p = VARDATA_ANY(arg2);
1552 
1553  len1 = VARSIZE_ANY_EXHDR(arg1);
1554  len2 = VARSIZE_ANY_EXHDR(arg2);
1555 
1556  return varstr_cmp(a1p, len1, a2p, len2, collid);
1557 }
1558 
1559 /*
1560  * Comparison functions for text strings.
1561  *
1562  * Note: btree indexes need these routines not to leak memory; therefore,
1563  * be careful to free working copies of toasted datums. Most places don't
1564  * need to be so careful.
1565  */
1566 
1567 Datum
1569 {
1570  Datum arg1 = PG_GETARG_DATUM(0);
1571  Datum arg2 = PG_GETARG_DATUM(1);
1572  bool result;
1573  Size len1,
1574  len2;
1575 
1576  /*
1577  * Since we only care about equality or not-equality, we can avoid all the
1578  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1579  * don't even have to do a bitwise comparison if we can show the lengths
1580  * of the strings are unequal; which might save us from having to detoast
1581  * one or both values.
1582  */
1583  len1 = toast_raw_datum_size(arg1);
1584  len2 = toast_raw_datum_size(arg2);
1585  if (len1 != len2)
1586  result = false;
1587  else
1588  {
1589  text *targ1 = DatumGetTextPP(arg1);
1590  text *targ2 = DatumGetTextPP(arg2);
1591 
1592  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1593  len1 - VARHDRSZ) == 0);
1594 
1595  PG_FREE_IF_COPY(targ1, 0);
1596  PG_FREE_IF_COPY(targ2, 1);
1597  }
1598 
1599  PG_RETURN_BOOL(result);
1600 }
1601 
1602 Datum
1604 {
1605  Datum arg1 = PG_GETARG_DATUM(0);
1606  Datum arg2 = PG_GETARG_DATUM(1);
1607  bool result;
1608  Size len1,
1609  len2;
1610 
1611  /* See comment in texteq() */
1612  len1 = toast_raw_datum_size(arg1);
1613  len2 = toast_raw_datum_size(arg2);
1614  if (len1 != len2)
1615  result = true;
1616  else
1617  {
1618  text *targ1 = DatumGetTextPP(arg1);
1619  text *targ2 = DatumGetTextPP(arg2);
1620 
1621  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1622  len1 - VARHDRSZ) != 0);
1623 
1624  PG_FREE_IF_COPY(targ1, 0);
1625  PG_FREE_IF_COPY(targ2, 1);
1626  }
1627 
1628  PG_RETURN_BOOL(result);
1629 }
1630 
1631 Datum
1633 {
1634  text *arg1 = PG_GETARG_TEXT_PP(0);
1635  text *arg2 = PG_GETARG_TEXT_PP(1);
1636  bool result;
1637 
1638  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1639 
1640  PG_FREE_IF_COPY(arg1, 0);
1641  PG_FREE_IF_COPY(arg2, 1);
1642 
1643  PG_RETURN_BOOL(result);
1644 }
1645 
1646 Datum
1648 {
1649  text *arg1 = PG_GETARG_TEXT_PP(0);
1650  text *arg2 = PG_GETARG_TEXT_PP(1);
1651  bool result;
1652 
1653  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1654 
1655  PG_FREE_IF_COPY(arg1, 0);
1656  PG_FREE_IF_COPY(arg2, 1);
1657 
1658  PG_RETURN_BOOL(result);
1659 }
1660 
1661 Datum
1663 {
1664  text *arg1 = PG_GETARG_TEXT_PP(0);
1665  text *arg2 = PG_GETARG_TEXT_PP(1);
1666  bool result;
1667 
1668  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1669 
1670  PG_FREE_IF_COPY(arg1, 0);
1671  PG_FREE_IF_COPY(arg2, 1);
1672 
1673  PG_RETURN_BOOL(result);
1674 }
1675 
1676 Datum
1678 {
1679  text *arg1 = PG_GETARG_TEXT_PP(0);
1680  text *arg2 = PG_GETARG_TEXT_PP(1);
1681  bool result;
1682 
1683  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1684 
1685  PG_FREE_IF_COPY(arg1, 0);
1686  PG_FREE_IF_COPY(arg2, 1);
1687 
1688  PG_RETURN_BOOL(result);
1689 }
1690 
1691 Datum
1693 {
1694  text *arg1 = PG_GETARG_TEXT_PP(0);
1695  text *arg2 = PG_GETARG_TEXT_PP(1);
1696  int32 result;
1697 
1698  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1699 
1700  PG_FREE_IF_COPY(arg1, 0);
1701  PG_FREE_IF_COPY(arg2, 1);
1702 
1703  PG_RETURN_INT32(result);
1704 }
1705 
1706 Datum
1708 {
1710  Oid collid = ssup->ssup_collation;
1711  MemoryContext oldcontext;
1712 
1713  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1714 
1715  btsortsupport_worker(ssup, collid);
1716 
1717  MemoryContextSwitchTo(oldcontext);
1718 
1719  PG_RETURN_VOID();
1720 }
1721 
1722 static void
1724 {
1725  TextSortSupport *tss;
1726 
1727  /*
1728  * If LC_COLLATE = C, we can make things quite a bit faster by using
1729  * memcmp() rather than strcoll(). To minimize the per-comparison
1730  * overhead, we make this decision just once for the whole sort.
1731  */
1732  if (lc_collate_is_c(collid))
1733  {
1734  ssup->comparator = bttextfastcmp_c;
1735  return;
1736  }
1737 
1738  /*
1739  * WIN32 requires complex hacks when the database encoding is UTF-8 (except
1740  * when using the "C" collation). For now, we don't optimize that case.
1741  */
1742 #ifdef WIN32
1743  if (GetDatabaseEncoding() == PG_UTF8)
1744  return;
1745 #endif
1746 
1747  /*
1748  * We may need a collation-sensitive comparison. To make things faster,
1749  * we'll figure out the collation based on the locale id and cache the
1750  * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1751  * prepare one or two palloc'd buffers to use as temporary workspace. In
1752  * the ad-hoc comparison case we only use palloc'd buffers when we need
1753  * more space than we're comfortable allocating on the stack, but here we
1754  * can keep the buffers around for the whole sort, so it makes sense to
1755  * allocate them once and use them unconditionally.
1756  */
1757  tss = palloc(sizeof(TextSortSupport));
1758 #ifdef HAVE_LOCALE_T
1759  tss->locale = 0;
1760 #endif
1761 
1762  if (collid != DEFAULT_COLLATION_OID)
1763  {
1764  if (!OidIsValid(collid))
1765  {
1766  /*
1767  * This typically means that the parser could not resolve a
1768  * conflict of implicit collations, so report it that way.
1769  */
1770  ereport(ERROR,
1771  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1772  errmsg("could not determine which collation to use for string comparison"),
1773  errhint("Use the COLLATE clause to set the collation explicitly.")));
1774  }
1775 #ifdef HAVE_LOCALE_T
1776  tss->locale = pg_newlocale_from_collation(collid);
1777 #endif
1778  }
1779 
1780  tss->buf1 = palloc(TEXTBUFLEN);
1781  tss->buflen1 = TEXTBUFLEN;
1782  tss->buf2 = palloc(TEXTBUFLEN);
1783  tss->buflen2 = TEXTBUFLEN;
1784 
1785  ssup->ssup_extra = tss;
1787 }
1788 
1789 /*
1790  * sortsupport comparison func (for C locale case)
1791  */
1792 static int
1794 {
1795  text *arg1 = DatumGetTextPP(x);
1796  text *arg2 = DatumGetTextPP(y);
1797  char *a1p,
1798  *a2p;
1799  int len1,
1800  len2,
1801  result;
1802 
1803  a1p = VARDATA_ANY(arg1);
1804  a2p = VARDATA_ANY(arg2);
1805 
1806  len1 = VARSIZE_ANY_EXHDR(arg1);
1807  len2 = VARSIZE_ANY_EXHDR(arg2);
1808 
1809  result = memcmp(a1p, a2p, Min(len1, len2));
1810  if ((result == 0) && (len1 != len2))
1811  result = (len1 < len2) ? -1 : 1;
1812 
1813  /* We can't afford to leak memory here. */
1814  if (PointerGetDatum(arg1) != x)
1815  pfree(arg1);
1816  if (PointerGetDatum(arg2) != y)
1817  pfree(arg2);
1818 
1819  return result;
1820 }
1821 
1822 /*
1823  * sortsupport comparison func (for locale case)
1824  */
1825 static int
1827 {
1828  text *arg1 = DatumGetTextPP(x);
1829  text *arg2 = DatumGetTextPP(y);
1830  TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1831 
1832  /* working state */
1833  char *a1p,
1834  *a2p;
1835  int len1,
1836  len2,
1837  result;
1838 
1839  a1p = VARDATA_ANY(arg1);
1840  a2p = VARDATA_ANY(arg2);
1841 
1842  len1 = VARSIZE_ANY_EXHDR(arg1);
1843  len2 = VARSIZE_ANY_EXHDR(arg2);
1844 
1845  if (len1 >= tss->buflen1)
1846  {
1847  pfree(tss->buf1);
1848  tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
1849  tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
1850  }
1851  if (len2 >= tss->buflen2)
1852  {
1853  pfree(tss->buf2);
1854  tss->buflen2 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
1855  tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
1856  }
1857 
1858  memcpy(tss->buf1, a1p, len1);
1859  tss->buf1[len1] = '\0';
1860  memcpy(tss->buf2, a2p, len2);
1861  tss->buf2[len2] = '\0';
1862 
1863 #ifdef HAVE_LOCALE_T
1864  if (tss->locale)
1865  result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
1866  else
1867 #endif
1868  result = strcoll(tss->buf1, tss->buf2);
1869 
1870  /*
1871  * In some locales strcoll() can claim that nonidentical strings are equal.
1872  * Believing that would be bad news for a number of reasons, so we follow
1873  * Perl's lead and sort "equal" strings according to strcmp().
1874  */
1875  if (result == 0)
1876  result = strcmp(tss->buf1, tss->buf2);
1877 
1878  /* We can't afford to leak memory here. */
1879  if (PointerGetDatum(arg1) != x)
1880  pfree(arg1);
1881  if (PointerGetDatum(arg2) != y)
1882  pfree(arg2);
1883 
1884  return result;
1885 }
1886 
1887 Datum
1889 {
1890  text *arg1 = PG_GETARG_TEXT_PP(0);
1891  text *arg2 = PG_GETARG_TEXT_PP(1);
1892  text *result;
1893 
1894  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
1895 
1896  PG_RETURN_TEXT_P(result);
1897 }
1898 
1899 Datum
1901 {
1902  text *arg1 = PG_GETARG_TEXT_PP(0);
1903  text *arg2 = PG_GETARG_TEXT_PP(1);
1904  text *result;
1905 
1906  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
1907 
1908  PG_RETURN_TEXT_P(result);
1909 }
1910 
1911 
1912 /*
1913  * The following operators support character-by-character comparison
1914  * of text datums, to allow building indexes suitable for LIKE clauses.
1915  * Note that the regular texteq/textne comparison operators are assumed
1916  * to be compatible with these!
1917  */
1918 
1919 static int
1921 {
1922  int result;
1923  int len1,
1924  len2;
1925 
1926  len1 = VARSIZE_ANY_EXHDR(arg1);
1927  len2 = VARSIZE_ANY_EXHDR(arg2);
1928 
1929  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1930  if (result != 0)
1931  return result;
1932  else if (len1 < len2)
1933  return -1;
1934  else if (len1 > len2)
1935  return 1;
1936  else
1937  return 0;
1938 }
1939 
1940 
1941 Datum
1943 {
1944  text *arg1 = PG_GETARG_TEXT_PP(0);
1945  text *arg2 = PG_GETARG_TEXT_PP(1);
1946  int result;
1947 
1948  result = internal_text_pattern_compare(arg1, arg2);
1949 
1950  PG_FREE_IF_COPY(arg1, 0);
1951  PG_FREE_IF_COPY(arg2, 1);
1952 
1953  PG_RETURN_BOOL(result < 0);
1954 }
1955 
1956 
1957 Datum
1959 {
1960  text *arg1 = PG_GETARG_TEXT_PP(0);
1961  text *arg2 = PG_GETARG_TEXT_PP(1);
1962  int result;
1963 
1964  result = internal_text_pattern_compare(arg1, arg2);
1965 
1966  PG_FREE_IF_COPY(arg1, 0);
1967  PG_FREE_IF_COPY(arg2, 1);
1968 
1969  PG_RETURN_BOOL(result <= 0);
1970 }
1971 
1972 
1973 Datum
1975 {
1976  text *arg1 = PG_GETARG_TEXT_PP(0);
1977  text *arg2 = PG_GETARG_TEXT_PP(1);
1978  int result;
1979 
1980  result = internal_text_pattern_compare(arg1, arg2);
1981 
1982  PG_FREE_IF_COPY(arg1, 0);
1983  PG_FREE_IF_COPY(arg2, 1);
1984 
1985  PG_RETURN_BOOL(result >= 0);
1986 }
1987 
1988 
1989 Datum
1991 {
1992  text *arg1 = PG_GETARG_TEXT_PP(0);
1993  text *arg2 = PG_GETARG_TEXT_PP(1);
1994  int result;
1995 
1996  result = internal_text_pattern_compare(arg1, arg2);
1997 
1998  PG_FREE_IF_COPY(arg1, 0);
1999  PG_FREE_IF_COPY(arg2, 1);
2000 
2001  PG_RETURN_BOOL(result > 0);
2002 }
2003 
2004 
2005 Datum
2007 {
2008  text *arg1 = PG_GETARG_TEXT_PP(0);
2009  text *arg2 = PG_GETARG_TEXT_PP(1);
2010  int result;
2011 
2012  result = internal_text_pattern_compare(arg1, arg2);
2013 
2014  PG_FREE_IF_COPY(arg1, 0);
2015  PG_FREE_IF_COPY(arg2, 1);
2016 
2017  PG_RETURN_INT32(result);
2018 }
2019 
2020 
2021 /*-------------------------------------------------------------
2022  * byteaoctetlen
2023  *
2024  * get the number of bytes contained in an instance of type 'bytea'
2025  *-------------------------------------------------------------
2026  */
2027 Datum
2029 {
2030  Datum str = PG_GETARG_DATUM(0);
2031 
2032  /* We need not detoast the input at all */
2034 }
2035 
2036 /*
2037  * byteacat -
2038  * takes two bytea* and returns a bytea* that is the concatenation of
2039  * the two.
2040  *
2041  * Cloned from textcat and modified as required.
2042  */
2043 Datum
2045 {
2046  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2047  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2048 
2050 }
2051 
2052 /*
2053  * bytea_catenate
2054  * Guts of byteacat(), broken out so it can be used by other functions
2055  *
2056  * Arguments can be in short-header form, but not compressed or out-of-line
2057  */
2058 static bytea *
2060 {
2061  bytea *result;
2062  int len1,
2063  len2,
2064  len;
2065  char *ptr;
2066 
2067  len1 = VARSIZE_ANY_EXHDR(t1);
2068  len2 = VARSIZE_ANY_EXHDR(t2);
2069 
2070  /* paranoia ... probably should throw error instead? */
2071  if (len1 < 0)
2072  len1 = 0;
2073  if (len2 < 0)
2074  len2 = 0;
2075 
2076  len = len1 + len2 + VARHDRSZ;
2077  result = (bytea *) palloc(len);
2078 
2079  /* Set size of result string... */
2080  SET_VARSIZE(result, len);
2081 
2082  /* Fill data field of result string... */
2083  ptr = VARDATA(result);
2084  if (len1 > 0)
2085  memcpy(ptr, VARDATA_ANY(t1), len1);
2086  if (len2 > 0)
2087  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2088 
2089  return result;
2090 }
2091 
2092 #define PG_STR_GET_BYTEA(str_) \
2093  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2094 
2095 /*
2096  * bytea_substr()
2097  * Return a substring starting at the specified position.
2098  * Cloned from text_substr and modified as required.
2099  *
2100  * Input:
2101  * - string
2102  * - starting position (is one-based)
2103  * - string length (optional)
2104  *
2105  * If the starting position is zero or less, then return from the start of the string
2106  * adjusting the length to be consistent with the "negative start" per SQL.
2107  * If the length is less than zero, an ERROR is thrown. If no third argument
2108  * (length) is provided, the length to the end of the string is assumed.
2109  */
2110 Datum
2112 {
2114  PG_GETARG_INT32(1),
2115  PG_GETARG_INT32(2),
2116  false));
2117 }
2118 
2119 /*
2120  * bytea_substr_no_len -
2121  * Wrapper to avoid opr_sanity failure due to
2122  * one function accepting a different number of args.
2123  */
2124 Datum
2126 {
2128  PG_GETARG_INT32(1),
2129  -1,
2130  true));
2131 }
2132 
2133 static bytea *
2135  int S,
2136  int L,
2137  bool length_not_specified)
2138 {
2139  int S1; /* adjusted start position */
2140  int L1; /* adjusted substring length */
2141 
2142  S1 = Max(S, 1);
2143 
2144  if (length_not_specified)
2145  {
2146  /*
2147  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2148  * end of the string if we pass it a negative value for length.
2149  */
2150  L1 = -1;
2151  }
2152  else
2153  {
2154  /* end position */
2155  int E = S + L;
2156 
2157  /*
2158  * A negative value for L is the only way for the end position to be
2159  * before the start. SQL99 says to throw an error.
2160  */
2161  if (E < S)
2162  ereport(ERROR,
2163  (errcode(ERRCODE_SUBSTRING_ERROR),
2164  errmsg("negative substring length not allowed")));
2165 
2166  /*
2167  * A zero or negative value for the end position can happen if the
2168  * start was negative or one. SQL99 says to return a zero-length
2169  * string.
2170  */
2171  if (E < 1)
2172  return PG_STR_GET_BYTEA("");
2173 
2174  L1 = E - S1;
2175  }
2176 
2177  /*
2178  * If the start position is past the end of the string, SQL99 says to
2179  * return a zero-length string -- DatumGetByteaPSlice() will do that for
2180  * us. Convert to zero-based starting position
2181  */
2182  return DatumGetByteaPSlice(str, S1 - 1, L1);
2183 }
2184 
2185 /*
2186  * byteaoverlay
2187  * Replace specified substring of first string with second
2188  *
2189  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2190  * This code is a direct implementation of what the standard says.
2191  */
2192 Datum
2194 {
2195  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2196  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2197  int sp = PG_GETARG_INT32(2); /* substring start position */
2198  int sl = PG_GETARG_INT32(3); /* substring length */
2199 
2200  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2201 }
2202 
2203 Datum
2205 {
2206  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2207  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2208  int sp = PG_GETARG_INT32(2); /* substring start position */
2209  int sl;
2210 
2211  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2212  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2213 }
2214 
2215 static bytea *
2216 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2217 {
2218  bytea *result;
2219  bytea *s1;
2220  bytea *s2;
2221  int sp_pl_sl;
2222 
2223  /*
2224  * Check for possible integer-overflow cases. For negative sp, throw a
2225  * "substring length" error because that's what should be expected
2226  * according to the spec's definition of OVERLAY().
2227  */
2228  if (sp <= 0)
2229  ereport(ERROR,
2230  (errcode(ERRCODE_SUBSTRING_ERROR),
2231  errmsg("negative substring length not allowed")));
2232  sp_pl_sl = sp + sl;
2233  if (sp_pl_sl <= sl)
2234  ereport(ERROR,
2235  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2236  errmsg("integer out of range")));
2237 
2238  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2239  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2240  result = bytea_catenate(s1, t2);
2241  result = bytea_catenate(result, s2);
2242 
2243  return result;
2244 }
2245 
2246 /*
2247  * byteapos -
2248  * Return the position of the specified substring.
2249  * Implements the SQL POSITION() function.
2250  * Cloned from textpos and modified as required.
2251  */
2252 Datum
2254 {
2255  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2256  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2257  int pos;
2258  int px,
2259  p;
2260  int len1,
2261  len2;
2262  char *p1,
2263  *p2;
2264 
2265  len1 = VARSIZE_ANY_EXHDR(t1);
2266  len2 = VARSIZE_ANY_EXHDR(t2);
2267 
2268  if (len2 <= 0)
2269  PG_RETURN_INT32(1); /* result for empty pattern */
2270 
2271  p1 = VARDATA_ANY(t1);
2272  p2 = VARDATA_ANY(t2);
2273 
2274  pos = 0;
2275  px = (len1 - len2);
2276  for (p = 0; p <= px; p++)
2277  {
2278  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2279  {
2280  pos = p + 1;
2281  break;
2282  };
2283  p1++;
2284  };
2285 
2286  PG_RETURN_INT32(pos);
2287 }
2288 
2289 /*-------------------------------------------------------------
2290  * byteaGetByte
2291  *
2292  * this routine treats "bytea" as an array of bytes.
2293  * It returns the Nth byte (a number between 0 and 255).
2294  *-------------------------------------------------------------
2295  */
2296 Datum
2298 {
2299  bytea *v = PG_GETARG_BYTEA_PP(0);
2300  int32 n = PG_GETARG_INT32(1);
2301  int len;
2302  int byte;
2303 
2304  len = VARSIZE_ANY_EXHDR(v);
2305 
2306  if (n < 0 || n >= len)
2307  ereport(ERROR,
2308  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2309  errmsg("index %d out of valid range, 0..%d",
2310  n, len - 1)));
2311 
2312  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2313 
2314  PG_RETURN_INT32(byte);
2315 }
2316 
2317 /*-------------------------------------------------------------
2318  * byteaGetBit
2319  *
2320  * This routine treats a "bytea" type like an array of bits.
2321  * It returns the value of the Nth bit (0 or 1).
2322  *
2323  *-------------------------------------------------------------
2324  */
2325 Datum
2327 {
2328  bytea *v = PG_GETARG_BYTEA_PP(0);
2329  int32 n = PG_GETARG_INT32(1);
2330  int byteNo,
2331  bitNo;
2332  int len;
2333  int byte;
2334 
2335  len = VARSIZE_ANY_EXHDR(v);
2336 
2337  if (n < 0 || n >= len * 8)
2338  ereport(ERROR,
2339  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2340  errmsg("index %d out of valid range, 0..%d",
2341  n, len * 8 - 1)));
2342 
2343  byteNo = n / 8;
2344  bitNo = n % 8;
2345 
2346  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2347 
2348  if (byte & (1 << bitNo))
2349  PG_RETURN_INT32(1);
2350  else
2351  PG_RETURN_INT32(0);
2352 }
2353 
2354 /*-------------------------------------------------------------
2355  * byteaSetByte
2356  *
2357  * Given an instance of type 'bytea' creates a new one with
2358  * the Nth byte set to the given value.
2359  *
2360  *-------------------------------------------------------------
2361  */
2362 Datum
2364 {
2365  bytea *v = PG_GETARG_BYTEA_P(0);
2366  int32 n = PG_GETARG_INT32(1);
2367  int32 newByte = PG_GETARG_INT32(2);
2368  int len;
2369  bytea *res;
2370 
2371  len = VARSIZE(v) - VARHDRSZ;
2372 
2373  if (n < 0 || n >= len)
2374  ereport(ERROR,
2375  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2376  errmsg("index %d out of valid range, 0..%d",
2377  n, len - 1)));
2378 
2379  /*
2380  * Make a copy of the original varlena.
2381  */
2382  res = (bytea *) palloc(VARSIZE(v));
2383  memcpy((char *) res, (char *) v, VARSIZE(v));
2384 
2385  /*
2386  * Now set the byte.
2387  */
2388  ((unsigned char *) VARDATA(res))[n] = newByte;
2389 
2390  PG_RETURN_BYTEA_P(res);
2391 }
2392 
2393 /*-------------------------------------------------------------
2394  * byteaSetBit
2395  *
2396  * Given an instance of type 'bytea' creates a new one with
2397  * the Nth bit set to the given value.
2398  *
2399  *-------------------------------------------------------------
2400  */
2401 Datum
2403 {
2404  bytea *v = PG_GETARG_BYTEA_P(0);
2405  int32 n = PG_GETARG_INT32(1);
2406  int32 newBit = PG_GETARG_INT32(2);
2407  bytea *res;
2408  int len;
2409  int oldByte,
2410  newByte;
2411  int byteNo,
2412  bitNo;
2413 
2414  len = VARSIZE(v) - VARHDRSZ;
2415 
2416  if (n < 0 || n >= len * 8)
2417  ereport(ERROR,
2418  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2419  errmsg("index %d out of valid range, 0..%d",
2420  n, len * 8 - 1)));
2421 
2422  byteNo = n / 8;
2423  bitNo = n % 8;
2424 
2425  /*
2426  * sanity check!
2427  */
2428  if (newBit != 0 && newBit != 1)
2429  ereport(ERROR,
2430  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2431  errmsg("new bit must be 0 or 1")));
2432 
2433  /*
2434  * Make a copy of the original varlena.
2435  */
2436  res = (bytea *) palloc(VARSIZE(v));
2437  memcpy((char *) res, (char *) v, VARSIZE(v));
2438 
2439  /*
2440  * Update the byte.
2441  */
2442  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
2443 
2444  if (newBit == 0)
2445  newByte = oldByte & (~(1 << bitNo));
2446  else
2447  newByte = oldByte | (1 << bitNo);
2448 
2449  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
2450 
2451  PG_RETURN_BYTEA_P(res);
2452 }
2453 
2454 
2455 /* text_name()
2456  * Converts a text type to a Name type.
2457  */
2458 Datum
2460 {
2461  text *s = PG_GETARG_TEXT_PP(0);
2462  Name result;
2463  int len;
2464 
2465  len = VARSIZE_ANY_EXHDR(s);
2466 
2467  /* Truncate oversize input */
2468  if (len >= NAMEDATALEN)
2469  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
2470 
2471  /* We use palloc0 here to ensure result is zero-padded */
2472  result = (Name) palloc0(NAMEDATALEN);
2473  memcpy(NameStr(*result), VARDATA_ANY(s), len);
2474 
2475  PG_RETURN_NAME(result);
2476 }
2477 
2478 /* name_text()
2479  * Converts a Name type to a text type.
2480  */
2481 Datum
2483 {
2484  Name s = PG_GETARG_NAME(0);
2485 
2487 }
2488 
2489 
2490 /*
2491  * textToQualifiedNameList - convert a text object to list of names
2492  *
2493  * This implements the input parsing needed by nextval() and other
2494  * functions that take a text parameter representing a qualified name.
2495  * We split the name at dots, downcase if not double-quoted, and
2496  * truncate names if they're too long.
2497  */
2498 List *
2500 {
2501  char *rawname;
2502  List *result = NIL;
2503  List *namelist;
2504  ListCell *l;
2505 
2506  /* Convert to C string (handles possible detoasting). */
2507  /* Note we rely on being able to modify rawname below. */
2508  rawname = text_to_cstring(textval);
2509 
2510  if (!SplitIdentifierString(rawname, '.', &namelist))
2511  ereport(ERROR,
2512  (errcode(ERRCODE_INVALID_NAME),
2513  errmsg("invalid name syntax")));
2514 
2515  if (namelist == NIL)
2516  ereport(ERROR,
2517  (errcode(ERRCODE_INVALID_NAME),
2518  errmsg("invalid name syntax")));
2519 
2520  foreach(l, namelist)
2521  {
2522  char *curname = (char *) lfirst(l);
2523 
2524  result = lappend(result, makeString(pstrdup(curname)));
2525  }
2526 
2527  pfree(rawname);
2528  list_free(namelist);
2529 
2530  return result;
2531 }
2532 
2533 /*
2534  * SplitIdentifierString --- parse a string containing identifiers
2535  *
2536  * This is the guts of textToQualifiedNameList, and is exported for use in
2537  * other situations such as parsing GUC variables. In the GUC case, it's
2538  * important to avoid memory leaks, so the API is designed to minimize the
2539  * amount of stuff that needs to be allocated and freed.
2540  *
2541  * Inputs:
2542  * rawstring: the input string; must be overwritable! On return, it's
2543  * been modified to contain the separated identifiers.
2544  * separator: the separator punctuation expected between identifiers
2545  * (typically '.' or ','). Whitespace may also appear around
2546  * identifiers.
2547  * Outputs:
2548  * namelist: filled with a palloc'd list of pointers to identifiers within
2549  * rawstring. Caller should list_free() this even on error return.
2550  *
2551  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2552  *
2553  * Note that an empty string is considered okay here, though not in
2554  * textToQualifiedNameList.
2555  */
2556 bool
2557 SplitIdentifierString(char *rawstring, char separator,
2558  List **namelist)
2559 {
2560  char *nextp = rawstring;
2561  bool done = false;
2562 
2563  *namelist = NIL;
2564 
2565  while (isspace((unsigned char) *nextp))
2566  nextp++; /* skip leading whitespace */
2567 
2568  if (*nextp == '\0')
2569  return true; /* allow empty string */
2570 
2571  /* At the top of the loop, we are at start of a new identifier. */
2572  do
2573  {
2574  char *curname;
2575  char *endp;
2576 
2577  if (*nextp == '\"')
2578  {
2579  /* Quoted name --- collapse quote-quote pairs, no downcasing */
2580  curname = nextp + 1;
2581  for (;;)
2582  {
2583  endp = strchr(nextp + 1, '\"');
2584  if (endp == NULL)
2585  return false; /* mismatched quotes */
2586  if (endp[1] != '\"')
2587  break; /* found end of quoted name */
2588  /* Collapse adjacent quotes into one quote, and look again */
2589  memmove(endp, endp + 1, strlen(endp));
2590  nextp = endp;
2591  }
2592  /* endp now points at the terminating quote */
2593  nextp = endp + 1;
2594  }
2595  else
2596  {
2597  /* Unquoted name --- extends to separator or whitespace */
2598  char *downname;
2599  int len;
2600 
2601  curname = nextp;
2602  while (*nextp && *nextp != separator &&
2603  !isspace((unsigned char) *nextp))
2604  nextp++;
2605  endp = nextp;
2606  if (curname == nextp)
2607  return false; /* empty unquoted name not allowed */
2608 
2609  /*
2610  * Downcase the identifier, using same code as main lexer does.
2611  *
2612  * XXX because we want to overwrite the input in-place, we cannot
2613  * support a downcasing transformation that increases the string
2614  * length. This is not a problem given the current implementation
2615  * of downcase_truncate_identifier, but we'll probably have to do
2616  * something about this someday.
2617  */
2618  len = endp - curname;
2619  downname = downcase_truncate_identifier(curname, len, false);
2620  Assert(strlen(downname) <= len);
2621  strncpy(curname, downname, len);
2622  pfree(downname);
2623  }
2624 
2625  while (isspace((unsigned char) *nextp))
2626  nextp++; /* skip trailing whitespace */
2627 
2628  if (*nextp == separator)
2629  {
2630  nextp++;
2631  while (isspace((unsigned char) *nextp))
2632  nextp++; /* skip leading whitespace for next */
2633  /* we expect another name, so done remains false */
2634  }
2635  else if (*nextp == '\0')
2636  done = true;
2637  else
2638  return false; /* invalid syntax */
2639 
2640  /* Now safe to overwrite separator with a null */
2641  *endp = '\0';
2642 
2643  /* Truncate name if it's overlength */
2644  truncate_identifier(curname, strlen(curname), false);
2645 
2646  /*
2647  * Finished isolating current name --- add it to list
2648  */
2649  *namelist = lappend(*namelist, curname);
2650 
2651  /* Loop back if we didn't reach end of string */
2652  } while (!done);
2653 
2654  return true;
2655 }
2656 
2657 
2658 /*
2659  * SplitDirectoriesString --- parse a string containing directory names
2660  *
2661  * This is similar to SplitIdentifierString, except that the parsing
2662  * rules are meant to handle pathnames instead of identifiers: there is
2663  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
2664  * and we apply canonicalize_path() to each extracted string. Because of the
2665  * last, the returned strings are separately palloc'd rather than being
2666  * pointers into rawstring --- but we still scribble on rawstring.
2667  *
2668  * Inputs:
2669  * rawstring: the input string; must be modifiable!
2670  * separator: the separator punctuation expected between directories
2671  * (typically ',' or ';'). Whitespace may also appear around
2672  * directories.
2673  * Outputs:
2674  * namelist: filled with a palloc'd list of directory names.
2675  * Caller should list_free_deep() this even on error return.
2676  *
2677  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2678  *
2679  * Note that an empty string is considered okay here.
2680  */
2681 bool
2682 SplitDirectoriesString(char *rawstring, char separator,
2683  List **namelist)
2684 {
2685  char *nextp = rawstring;
2686  bool done = false;
2687 
2688  *namelist = NIL;
2689 
2690  while (isspace((unsigned char) *nextp))
2691  nextp++; /* skip leading whitespace */
2692 
2693  if (*nextp == '\0')
2694  return true; /* allow empty string */
2695 
2696  /* At the top of the loop, we are at start of a new directory. */
2697  do
2698  {
2699  char *curname;
2700  char *endp;
2701 
2702  if (*nextp == '\"')
2703  {
2704  /* Quoted name --- collapse quote-quote pairs */
2705  curname = nextp + 1;
2706  for (;;)
2707  {
2708  endp = strchr(nextp + 1, '\"');
2709  if (endp == NULL)
2710  return false; /* mismatched quotes */
2711  if (endp[1] != '\"')
2712  break; /* found end of quoted name */
2713  /* Collapse adjacent quotes into one quote, and look again */
2714  memmove(endp, endp + 1, strlen(endp));
2715  nextp = endp;
2716  }
2717  /* endp now points at the terminating quote */
2718  nextp = endp + 1;
2719  }
2720  else
2721  {
2722  /* Unquoted name --- extends to separator or end of string */
2723  curname = endp = nextp;
2724  while (*nextp && *nextp != separator)
2725  {
2726  /* trailing whitespace should not be included in name */
2727  if (!isspace((unsigned char) *nextp))
2728  endp = nextp + 1;
2729  nextp++;
2730  }
2731  if (curname == endp)
2732  return false; /* empty unquoted name not allowed */
2733  }
2734 
2735  while (isspace((unsigned char) *nextp))
2736  nextp++; /* skip trailing whitespace */
2737 
2738  if (*nextp == separator)
2739  {
2740  nextp++;
2741  while (isspace((unsigned char) *nextp))
2742  nextp++; /* skip leading whitespace for next */
2743  /* we expect another name, so done remains false */
2744  }
2745  else if (*nextp == '\0')
2746  done = true;
2747  else
2748  return false; /* invalid syntax */
2749 
2750  /* Now safe to overwrite separator with a null */
2751  *endp = '\0';
2752 
2753  /* Truncate path if it's overlength */
2754  if (strlen(curname) >= MAXPGPATH)
2755  curname[MAXPGPATH - 1] = '\0';
2756 
2757  /*
2758  * Finished isolating current name --- add it to list
2759  */
2760  curname = pstrdup(curname);
2761  canonicalize_path(curname);
2762  *namelist = lappend(*namelist, curname);
2763 
2764  /* Loop back if we didn't reach end of string */
2765  } while (!done);
2766 
2767  return true;
2768 }
2769 
2770 
2771 /*****************************************************************************
2772  * Comparison Functions used for bytea
2773  *
2774  * Note: btree indexes need these routines not to leak memory; therefore,
2775  * be careful to free working copies of toasted datums. Most places don't
2776  * need to be so careful.
2777  *****************************************************************************/
2778 
2779 Datum
2781 {
2782  Datum arg1 = PG_GETARG_DATUM(0);
2783  Datum arg2 = PG_GETARG_DATUM(1);
2784  bool result;
2785  Size len1,
2786  len2;
2787 
2788  /*
2789  * We can use a fast path for unequal lengths, which might save us from
2790  * having to detoast one or both values.
2791  */
2792  len1 = toast_raw_datum_size(arg1);
2793  len2 = toast_raw_datum_size(arg2);
2794  if (len1 != len2)
2795  result = false;
2796  else
2797  {
2798  bytea *barg1 = DatumGetByteaPP(arg1);
2799  bytea *barg2 = DatumGetByteaPP(arg2);
2800 
2801  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2802  len1 - VARHDRSZ) == 0);
2803 
2804  PG_FREE_IF_COPY(barg1, 0);
2805  PG_FREE_IF_COPY(barg2, 1);
2806  }
2807 
2808  PG_RETURN_BOOL(result);
2809 }
2810 
2811 Datum
2813 {
2814  Datum arg1 = PG_GETARG_DATUM(0);
2815  Datum arg2 = PG_GETARG_DATUM(1);
2816  bool result;
2817  Size len1,
2818  len2;
2819 
2820  /*
2821  * We can use a fast path for unequal lengths, which might save us from
2822  * having to detoast one or both values.
2823  */
2824  len1 = toast_raw_datum_size(arg1);
2825  len2 = toast_raw_datum_size(arg2);
2826  if (len1 != len2)
2827  result = true;
2828  else
2829  {
2830  bytea *barg1 = DatumGetByteaPP(arg1);
2831  bytea *barg2 = DatumGetByteaPP(arg2);
2832 
2833  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2834  len1 - VARHDRSZ) != 0);
2835 
2836  PG_FREE_IF_COPY(barg1, 0);
2837  PG_FREE_IF_COPY(barg2, 1);
2838  }
2839 
2840  PG_RETURN_BOOL(result);
2841 }
2842 
2843 Datum
2845 {
2846  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2847  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2848  int len1,
2849  len2;
2850  int cmp;
2851 
2852  len1 = VARSIZE_ANY_EXHDR(arg1);
2853  len2 = VARSIZE_ANY_EXHDR(arg2);
2854 
2855  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2856 
2857  PG_FREE_IF_COPY(arg1, 0);
2858  PG_FREE_IF_COPY(arg2, 1);
2859 
2860  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2861 }
2862 
2863 Datum
2865 {
2866  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2867  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2868  int len1,
2869  len2;
2870  int cmp;
2871 
2872  len1 = VARSIZE_ANY_EXHDR(arg1);
2873  len2 = VARSIZE_ANY_EXHDR(arg2);
2874 
2875  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2876 
2877  PG_FREE_IF_COPY(arg1, 0);
2878  PG_FREE_IF_COPY(arg2, 1);
2879 
2880  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2881 }
2882 
2883 Datum
2885 {
2886  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2887  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2888  int len1,
2889  len2;
2890  int cmp;
2891 
2892  len1 = VARSIZE_ANY_EXHDR(arg1);
2893  len2 = VARSIZE_ANY_EXHDR(arg2);
2894 
2895  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2896 
2897  PG_FREE_IF_COPY(arg1, 0);
2898  PG_FREE_IF_COPY(arg2, 1);
2899 
2900  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2901 }
2902 
2903 Datum
2905 {
2906  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2907  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2908  int len1,
2909  len2;
2910  int cmp;
2911 
2912  len1 = VARSIZE_ANY_EXHDR(arg1);
2913  len2 = VARSIZE_ANY_EXHDR(arg2);
2914 
2915  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2916 
2917  PG_FREE_IF_COPY(arg1, 0);
2918  PG_FREE_IF_COPY(arg2, 1);
2919 
2920  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2921 }
2922 
2923 Datum
2925 {
2926  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2927  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2928  int len1,
2929  len2;
2930  int cmp;
2931 
2932  len1 = VARSIZE_ANY_EXHDR(arg1);
2933  len2 = VARSIZE_ANY_EXHDR(arg2);
2934 
2935  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2936  if ((cmp == 0) && (len1 != len2))
2937  cmp = (len1 < len2) ? -1 : 1;
2938 
2939  PG_FREE_IF_COPY(arg1, 0);
2940  PG_FREE_IF_COPY(arg2, 1);
2941 
2942  PG_RETURN_INT32(cmp);
2943 }
2944 
2945 /*
2946  * appendStringInfoText
2947  *
2948  * Append a text to str.
2949  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2950  */
2951 static void
2953 {
2955 }
2956 
2957 /*
2958  * replace_text
2959  * replace all occurrences of 'old_sub_str' in 'orig_str'
2960  * with 'new_sub_str' to form 'new_str'
2961  *
2962  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2963  * otherwise returns 'new_str'
2964  */
2965 Datum
2967 {
2968  text *src_text = PG_GETARG_TEXT_PP(0);
2969  text *from_sub_text = PG_GETARG_TEXT_PP(1);
2970  text *to_sub_text = PG_GETARG_TEXT_PP(2);
2971  int src_text_len;
2972  int from_sub_text_len;
2974  text *ret_text;
2975  int start_posn;
2976  int curr_posn;
2977  int chunk_len;
2978  char *start_ptr;
2979  StringInfoData str;
2980 
2981  text_position_setup(src_text, from_sub_text, &state);
2982 
2983  /*
2984  * Note: we check the converted string length, not the original, because
2985  * they could be different if the input contained invalid encoding.
2986  */
2987  src_text_len = state.len1;
2988  from_sub_text_len = state.len2;
2989 
2990  /* Return unmodified source string if empty source or pattern */
2991  if (src_text_len < 1 || from_sub_text_len < 1)
2992  {
2993  text_position_cleanup(&state);
2994  PG_RETURN_TEXT_P(src_text);
2995  }
2996 
2997  start_posn = 1;
2998  curr_posn = text_position_next(1, &state);
2999 
3000  /* When the from_sub_text is not found, there is nothing to do. */
3001  if (curr_posn == 0)
3002  {
3003  text_position_cleanup(&state);
3004  PG_RETURN_TEXT_P(src_text);
3005  }
3006 
3007  /* start_ptr points to the start_posn'th character of src_text */
3008  start_ptr = VARDATA_ANY(src_text);
3009 
3010  initStringInfo(&str);
3011 
3012  do
3013  {
3015 
3016  /* copy the data skipped over by last text_position_next() */
3017  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3018  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3019 
3020  appendStringInfoText(&str, to_sub_text);
3021 
3022  start_posn = curr_posn;
3023  start_ptr += chunk_len;
3024  start_posn += from_sub_text_len;
3025  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3026 
3027  curr_posn = text_position_next(start_posn, &state);
3028  }
3029  while (curr_posn > 0);
3030 
3031  /* copy trailing data */
3032  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3033  appendBinaryStringInfo(&str, start_ptr, chunk_len);
3034 
3035  text_position_cleanup(&state);
3036 
3037  ret_text = cstring_to_text_with_len(str.data, str.len);
3038  pfree(str.data);
3039 
3040  PG_RETURN_TEXT_P(ret_text);
3041 }
3042 
3043 /*
3044  * check_replace_text_has_escape_char
3045  *
3046  * check whether replace_text contains escape char.
3047  */
3048 static bool
3050 {
3051  const char *p = VARDATA_ANY(replace_text);
3052  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3053 
3055  {
3056  for (; p < p_end; p++)
3057  {
3058  if (*p == '\\')
3059  return true;
3060  }
3061  }
3062  else
3063  {
3064  for (; p < p_end; p += pg_mblen(p))
3065  {
3066  if (*p == '\\')
3067  return true;
3068  }
3069  }
3070 
3071  return false;
3072 }
3073 
3074 /*
3075  * appendStringInfoRegexpSubstr
3076  *
3077  * Append replace_text to str, substituting regexp back references for
3078  * \n escapes. start_ptr is the start of the match in the source string,
3079  * at logical character position data_pos.
3080  */
3081 static void
3083  regmatch_t *pmatch,
3084  char *start_ptr, int data_pos)
3085 {
3086  const char *p = VARDATA_ANY(replace_text);
3087  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3088  int eml = pg_database_encoding_max_length();
3089 
3090  for (;;)
3091  {
3092  const char *chunk_start = p;
3093  int so;
3094  int eo;
3095 
3096  /* Find next escape char. */
3097  if (eml == 1)
3098  {
3099  for (; p < p_end && *p != '\\'; p++)
3100  /* nothing */ ;
3101  }
3102  else
3103  {
3104  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3105  /* nothing */ ;
3106  }
3107 
3108  /* Copy the text we just scanned over, if any. */
3109  if (p > chunk_start)
3110  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3111 
3112  /* Done if at end of string, else advance over escape char. */
3113  if (p >= p_end)
3114  break;
3115  p++;
3116 
3117  if (p >= p_end)
3118  {
3119  /* Escape at very end of input. Treat same as unexpected char */
3120  appendStringInfoChar(str, '\\');
3121  break;
3122  }
3123 
3124  if (*p >= '1' && *p <= '9')
3125  {
3126  /* Use the back reference of regexp. */
3127  int idx = *p - '0';
3128 
3129  so = pmatch[idx].rm_so;
3130  eo = pmatch[idx].rm_eo;
3131  p++;
3132  }
3133  else if (*p == '&')
3134  {
3135  /* Use the entire matched string. */
3136  so = pmatch[0].rm_so;
3137  eo = pmatch[0].rm_eo;
3138  p++;
3139  }
3140  else if (*p == '\\')
3141  {
3142  /* \\ means transfer one \ to output. */
3143  appendStringInfoChar(str, '\\');
3144  p++;
3145  continue;
3146  }
3147  else
3148  {
3149  /*
3150  * If escape char is not followed by any expected char, just treat
3151  * it as ordinary data to copy. (XXX would it be better to throw
3152  * an error?)
3153  */
3154  appendStringInfoChar(str, '\\');
3155  continue;
3156  }
3157 
3158  if (so != -1 && eo != -1)
3159  {
3160  /*
3161  * Copy the text that is back reference of regexp. Note so and eo
3162  * are counted in characters not bytes.
3163  */
3164  char *chunk_start;
3165  int chunk_len;
3166 
3167  Assert(so >= data_pos);
3168  chunk_start = start_ptr;
3169  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
3170  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
3171  appendBinaryStringInfo(str, chunk_start, chunk_len);
3172  }
3173  }
3174 }
3175 
3176 #define REGEXP_REPLACE_BACKREF_CNT 10
3177 
3178 /*
3179  * replace_text_regexp
3180  *
3181  * replace text that matches to regexp in src_text to replace_text.
3182  *
3183  * Note: to avoid having to include regex.h in builtins.h, we declare
3184  * the regexp argument as void *, but really it's regex_t *.
3185  */
3186 text *
3187 replace_text_regexp(text *src_text, void *regexp,
3188  text *replace_text, bool glob)
3189 {
3190  text *ret_text;
3191  regex_t *re = (regex_t *) regexp;
3192  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
3195  pg_wchar *data;
3196  size_t data_len;
3197  int search_start;
3198  int data_pos;
3199  char *start_ptr;
3200  bool have_escape;
3201 
3202  initStringInfo(&buf);
3203 
3204  /* Convert data string to wide characters. */
3205  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3206  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3207 
3208  /* Check whether replace_text has escape char. */
3209  have_escape = check_replace_text_has_escape_char(replace_text);
3210 
3211  /* start_ptr points to the data_pos'th character of src_text */
3212  start_ptr = (char *) VARDATA_ANY(src_text);
3213  data_pos = 0;
3214 
3215  search_start = 0;
3216  while (search_start <= data_len)
3217  {
3218  int regexec_result;
3219 
3221 
3222  regexec_result = pg_regexec(re,
3223  data,
3224  data_len,
3225  search_start,
3226  NULL, /* no details */
3228  pmatch,
3229  0);
3230 
3231  if (regexec_result == REG_NOMATCH)
3232  break;
3233 
3234  if (regexec_result != REG_OKAY)
3235  {
3236  char errMsg[100];
3237 
3239  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3240  ereport(ERROR,
3241  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3242  errmsg("regular expression failed: %s", errMsg)));
3243  }
3244 
3245  /*
3246  * Copy the text to the left of the match position. Note we are given
3247  * character not byte indexes.
3248  */
3249  if (pmatch[0].rm_so - data_pos > 0)
3250  {
3251  int chunk_len;
3252 
3253  chunk_len = charlen_to_bytelen(start_ptr,
3254  pmatch[0].rm_so - data_pos);
3255  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3256 
3257  /*
3258  * Advance start_ptr over that text, to avoid multiple rescans of
3259  * it if the replace_text contains multiple back-references.
3260  */
3261  start_ptr += chunk_len;
3262  data_pos = pmatch[0].rm_so;
3263  }
3264 
3265  /*
3266  * Copy the replace_text. Process back references when the
3267  * replace_text has escape characters.
3268  */
3269  if (have_escape)
3270  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3271  start_ptr, data_pos);
3272  else
3273  appendStringInfoText(&buf, replace_text);
3274 
3275  /* Advance start_ptr and data_pos over the matched text. */
3276  start_ptr += charlen_to_bytelen(start_ptr,
3277  pmatch[0].rm_eo - data_pos);
3278  data_pos = pmatch[0].rm_eo;
3279 
3280  /*
3281  * When global option is off, replace the first instance only.
3282  */
3283  if (!glob)
3284  break;
3285 
3286  /*
3287  * Advance search position. Normally we start the next search at the
3288  * end of the previous match; but if the match was of zero length, we
3289  * have to advance by one character, or we'd just find the same match
3290  * again.
3291  */
3292  search_start = data_pos;
3293  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3294  search_start++;
3295  }
3296 
3297  /*
3298  * Copy the text to the right of the last match.
3299  */
3300  if (data_pos < data_len)
3301  {
3302  int chunk_len;
3303 
3304  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3305  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3306  }
3307 
3308  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3309  pfree(buf.data);
3310  pfree(data);
3311 
3312  return ret_text;
3313 }
3314 
3315 /*
3316  * split_text
3317  * parse input string
3318  * return ord item (1 based)
3319  * based on provided field separator
3320  */
3321 Datum
3323 {
3324  text *inputstring = PG_GETARG_TEXT_PP(0);
3325  text *fldsep = PG_GETARG_TEXT_PP(1);
3326  int fldnum = PG_GETARG_INT32(2);
3327  int inputstring_len;
3328  int fldsep_len;
3330  int start_posn;
3331  int end_posn;
3332  text *result_text;
3333 
3334  /* field number is 1 based */
3335  if (fldnum < 1)
3336  ereport(ERROR,
3337  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3338  errmsg("field position must be greater than zero")));
3339 
3340  text_position_setup(inputstring, fldsep, &state);
3341 
3342  /*
3343  * Note: we check the converted string length, not the original, because
3344  * they could be different if the input contained invalid encoding.
3345  */
3346  inputstring_len = state.len1;
3347  fldsep_len = state.len2;
3348 
3349  /* return empty string for empty input string */
3350  if (inputstring_len < 1)
3351  {
3352  text_position_cleanup(&state);
3354  }
3355 
3356  /* empty field separator */
3357  if (fldsep_len < 1)
3358  {
3359  text_position_cleanup(&state);
3360  /* if first field, return input string, else empty string */
3361  if (fldnum == 1)
3362  PG_RETURN_TEXT_P(inputstring);
3363  else
3365  }
3366 
3367  /* identify bounds of first field */
3368  start_posn = 1;
3369  end_posn = text_position_next(1, &state);
3370 
3371  /* special case if fldsep not found at all */
3372  if (end_posn == 0)
3373  {
3374  text_position_cleanup(&state);
3375  /* if field 1 requested, return input string, else empty string */
3376  if (fldnum == 1)
3377  PG_RETURN_TEXT_P(inputstring);
3378  else
3380  }
3381 
3382  while (end_posn > 0 && --fldnum > 0)
3383  {
3384  /* identify bounds of next field */
3385  start_posn = end_posn + fldsep_len;
3386  end_posn = text_position_next(start_posn, &state);
3387  }
3388 
3389  text_position_cleanup(&state);
3390 
3391  if (fldnum > 0)
3392  {
3393  /* N'th field separator not found */
3394  /* if last field requested, return it, else empty string */
3395  if (fldnum == 1)
3396  result_text = text_substring(PointerGetDatum(inputstring),
3397  start_posn,
3398  -1,
3399  true);
3400  else
3401  result_text = cstring_to_text("");
3402  }
3403  else
3404  {
3405  /* non-last field requested */
3406  result_text = text_substring(PointerGetDatum(inputstring),
3407  start_posn,
3408  end_posn - start_posn,
3409  false);
3410  }
3411 
3412  PG_RETURN_TEXT_P(result_text);
3413 }
3414 
3415 /*
3416  * Convenience function to return true when two text params are equal.
3417  */
3418 static bool
3419 text_isequal(text *txt1, text *txt2)
3420 {
3422  PointerGetDatum(txt1),
3423  PointerGetDatum(txt2)));
3424 }
3425 
3426 /*
3427  * text_to_array
3428  * parse input string and return text array of elements,
3429  * based on provided field separator
3430  */
3431 Datum
3433 {
3434  return text_to_array_internal(fcinfo);
3435 }
3436 
3437 /*
3438  * text_to_array_null
3439  * parse input string and return text array of elements,
3440  * based on provided field separator and null string
3441  *
3442  * This is a separate entry point only to prevent the regression tests from
3443  * complaining about different argument sets for the same internal function.
3444  */
3445 Datum
3447 {
3448  return text_to_array_internal(fcinfo);
3449 }
3450 
3451 /*
3452  * common code for text_to_array and text_to_array_null functions
3453  *
3454  * These are not strict so we have to test for null inputs explicitly.
3455  */
3456 static Datum
3458 {
3459  text *inputstring;
3460  text *fldsep;
3461  text *null_string;
3462  int inputstring_len;
3463  int fldsep_len;
3464  char *start_ptr;
3465  text *result_text;
3466  bool is_null;
3467  ArrayBuildState *astate = NULL;
3468 
3469  /* when input string is NULL, then result is NULL too */
3470  if (PG_ARGISNULL(0))
3471  PG_RETURN_NULL();
3472 
3473  inputstring = PG_GETARG_TEXT_PP(0);
3474 
3475  /* fldsep can be NULL */
3476  if (!PG_ARGISNULL(1))
3477  fldsep = PG_GETARG_TEXT_PP(1);
3478  else
3479  fldsep = NULL;
3480 
3481  /* null_string can be NULL or omitted */
3482  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
3483  null_string = PG_GETARG_TEXT_PP(2);
3484  else
3485  null_string = NULL;
3486 
3487  if (fldsep != NULL)
3488  {
3489  /*
3490  * Normal case with non-null fldsep. Use the text_position machinery
3491  * to search for occurrences of fldsep.
3492  */
3494  int fldnum;
3495  int start_posn;
3496  int end_posn;
3497  int chunk_len;
3498 
3499  text_position_setup(inputstring, fldsep, &state);
3500 
3501  /*
3502  * Note: we check the converted string length, not the original,
3503  * because they could be different if the input contained invalid
3504  * encoding.
3505  */
3506  inputstring_len = state.len1;
3507  fldsep_len = state.len2;
3508 
3509  /* return empty array for empty input string */
3510  if (inputstring_len < 1)
3511  {
3512  text_position_cleanup(&state);
3514  }
3515 
3516  /*
3517  * empty field separator: return the input string as a one-element
3518  * array
3519  */
3520  if (fldsep_len < 1)
3521  {
3522  text_position_cleanup(&state);
3523  /* single element can be a NULL too */
3524  is_null = null_string ? text_isequal(inputstring, null_string) : false;
3526  PointerGetDatum(inputstring),
3527  is_null, 1));
3528  }
3529 
3530  start_posn = 1;
3531  /* start_ptr points to the start_posn'th character of inputstring */
3532  start_ptr = VARDATA_ANY(inputstring);
3533 
3534  for (fldnum = 1;; fldnum++) /* field number is 1 based */
3535  {
3537 
3538  end_posn = text_position_next(start_posn, &state);
3539 
3540  if (end_posn == 0)
3541  {
3542  /* fetch last field */
3543  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
3544  }
3545  else
3546  {
3547  /* fetch non-last field */
3548  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
3549  }
3550 
3551  /* must build a temp text datum to pass to accumArrayResult */
3552  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3553  is_null = null_string ? text_isequal(result_text, null_string) : false;
3554 
3555  /* stash away this field */
3556  astate = accumArrayResult(astate,
3557  PointerGetDatum(result_text),
3558  is_null,
3559  TEXTOID,
3561 
3562  pfree(result_text);
3563 
3564  if (end_posn == 0)
3565  break;
3566 
3567  start_posn = end_posn;
3568  start_ptr += chunk_len;
3569  start_posn += fldsep_len;
3570  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
3571  }
3572 
3573  text_position_cleanup(&state);
3574  }
3575  else
3576  {
3577  /*
3578  * When fldsep is NULL, each character in the inputstring becomes an
3579  * element in the result array. The separator is effectively the
3580  * space between characters.
3581  */
3582  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
3583 
3584  /* return empty array for empty input string */
3585  if (inputstring_len < 1)
3587 
3588  start_ptr = VARDATA_ANY(inputstring);
3589 
3590  while (inputstring_len > 0)
3591  {
3592  int chunk_len = pg_mblen(start_ptr);
3593 
3595 
3596  /* must build a temp text datum to pass to accumArrayResult */
3597  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3598  is_null = null_string ? text_isequal(result_text, null_string) : false;
3599 
3600  /* stash away this field */
3601  astate = accumArrayResult(astate,
3602  PointerGetDatum(result_text),
3603  is_null,
3604  TEXTOID,
3606 
3607  pfree(result_text);
3608 
3609  start_ptr += chunk_len;
3610  inputstring_len -= chunk_len;
3611  }
3612  }
3613 
3616 }
3617 
3618 /*
3619  * array_to_text
3620  * concatenate Cstring representation of input array elements
3621  * using provided field separator
3622  */
3623 Datum
3625 {
3627  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3628 
3629  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
3630 }
3631 
3632 /*
3633  * array_to_text_null
3634  * concatenate Cstring representation of input array elements
3635  * using provided field separator and null string
3636  *
3637  * This version is not strict so we have to test for null inputs explicitly.
3638  */
3639 Datum
3641 {
3642  ArrayType *v;
3643  char *fldsep;
3644  char *null_string;
3645 
3646  /* returns NULL when first or second parameter is NULL */
3647  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
3648  PG_RETURN_NULL();
3649 
3650  v = PG_GETARG_ARRAYTYPE_P(0);
3651  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3652 
3653  /* NULL null string is passed through as a null pointer */
3654  if (!PG_ARGISNULL(2))
3655  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
3656  else
3657  null_string = NULL;
3658 
3659  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
3660 }
3661 
3662 /*
3663  * common code for array_to_text and array_to_text_null functions
3664  */
3665 static text *
3667  const char *fldsep, const char *null_string)
3668 {
3669  text *result;
3670  int nitems,
3671  *dims,
3672  ndims;
3673  Oid element_type;
3674  int typlen;
3675  bool typbyval;
3676  char typalign;
3678  bool printed = false;
3679  char *p;
3680  bits8 *bitmap;
3681  int bitmask;
3682  int i;
3683  ArrayMetaState *my_extra;
3684 
3685  ndims = ARR_NDIM(v);
3686  dims = ARR_DIMS(v);
3687  nitems = ArrayGetNItems(ndims, dims);
3688 
3689  /* if there are no elements, return an empty string */
3690  if (nitems == 0)
3691  return cstring_to_text_with_len("", 0);
3692 
3693  element_type = ARR_ELEMTYPE(v);
3694  initStringInfo(&buf);
3695 
3696  /*
3697  * We arrange to look up info about element type, including its output
3698  * conversion proc, only once per series of calls, assuming the element
3699  * type doesn't change underneath us.
3700  */
3701  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3702  if (my_extra == NULL)
3703  {
3704  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3705  sizeof(ArrayMetaState));
3706  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3707  my_extra->element_type = ~element_type;
3708  }
3709 
3710  if (my_extra->element_type != element_type)
3711  {
3712  /*
3713  * Get info about element type, including its output conversion proc
3714  */
3715  get_type_io_data(element_type, IOFunc_output,
3716  &my_extra->typlen, &my_extra->typbyval,
3717  &my_extra->typalign, &my_extra->typdelim,
3718  &my_extra->typioparam, &my_extra->typiofunc);
3719  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
3720  fcinfo->flinfo->fn_mcxt);
3721  my_extra->element_type = element_type;
3722  }
3723  typlen = my_extra->typlen;
3724  typbyval = my_extra->typbyval;
3725  typalign = my_extra->typalign;
3726 
3727  p = ARR_DATA_PTR(v);
3728  bitmap = ARR_NULLBITMAP(v);
3729  bitmask = 1;
3730 
3731  for (i = 0; i < nitems; i++)
3732  {
3733  Datum itemvalue;
3734  char *value;
3735 
3736  /* Get source element, checking for NULL */
3737  if (bitmap && (*bitmap & bitmask) == 0)
3738  {
3739  /* if null_string is NULL, we just ignore null elements */
3740  if (null_string != NULL)
3741  {
3742  if (printed)
3743  appendStringInfo(&buf, "%s%s", fldsep, null_string);
3744  else
3745  appendStringInfoString(&buf, null_string);
3746  printed = true;
3747  }
3748  }
3749  else
3750  {
3751  itemvalue = fetch_att(p, typbyval, typlen);
3752 
3753  value = OutputFunctionCall(&my_extra->proc, itemvalue);
3754 
3755  if (printed)
3756  appendStringInfo(&buf, "%s%s", fldsep, value);
3757  else
3758  appendStringInfoString(&buf, value);
3759  printed = true;
3760 
3761  p = att_addlength_pointer(p, typlen, p);
3762  p = (char *) att_align_nominal(p, typalign);
3763  }
3764 
3765  /* advance bitmap pointer if any */
3766  if (bitmap)
3767  {
3768  bitmask <<= 1;
3769  if (bitmask == 0x100)
3770  {
3771  bitmap++;
3772  bitmask = 1;
3773  }
3774  }
3775  }
3776 
3777  result = cstring_to_text_with_len(buf.data, buf.len);
3778  pfree(buf.data);
3779 
3780  return result;
3781 }
3782 
3783 #define HEXBASE 16
3784 /*
3785  * Convert a int32 to a string containing a base 16 (hex) representation of
3786  * the number.
3787  */
3788 Datum
3790 {
3792  char *ptr;
3793  const char *digits = "0123456789abcdef";
3794  char buf[32]; /* bigger than needed, but reasonable */
3795 
3796  ptr = buf + sizeof(buf) - 1;
3797  *ptr = '\0';
3798 
3799  do
3800  {
3801  *--ptr = digits[value % HEXBASE];
3802  value /= HEXBASE;
3803  } while (ptr > buf && value);
3804 
3806 }
3807 
3808 /*
3809  * Convert a int64 to a string containing a base 16 (hex) representation of
3810  * the number.
3811  */
3812 Datum
3814 {
3815  uint64 value = (uint64) PG_GETARG_INT64(0);
3816  char *ptr;
3817  const char *digits = "0123456789abcdef";
3818  char buf[32]; /* bigger than needed, but reasonable */
3819 
3820  ptr = buf + sizeof(buf) - 1;
3821  *ptr = '\0';
3822 
3823  do
3824  {
3825  *--ptr = digits[value % HEXBASE];
3826  value /= HEXBASE;
3827  } while (ptr > buf && value);
3828 
3830 }
3831 
3832 /*
3833  * Create an md5 hash of a text string and return it as hex
3834  *
3835  * md5 produces a 16 byte (128 bit) hash; double it for hex
3836  */
3837 #define MD5_HASH_LEN 32
3838 
3839 Datum
3841 {
3842  text *in_text = PG_GETARG_TEXT_PP(0);
3843  size_t len;
3844  char hexsum[MD5_HASH_LEN + 1];
3845 
3846  /* Calculate the length of the buffer using varlena metadata */
3847  len = VARSIZE_ANY_EXHDR(in_text);
3848 
3849  /* get the hash result */
3850  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3851  ereport(ERROR,
3852  (errcode(ERRCODE_OUT_OF_MEMORY),
3853  errmsg("out of memory")));
3854 
3855  /* convert to text and return it */
3857 }
3858 
3859 /*
3860  * Create an md5 hash of a bytea field and return it as a hex string:
3861  * 16-byte md5 digest is represented in 32 hex characters.
3862  */
3863 Datum
3865 {
3866  bytea *in = PG_GETARG_BYTEA_PP(0);
3867  size_t len;
3868  char hexsum[MD5_HASH_LEN + 1];
3869 
3870  len = VARSIZE_ANY_EXHDR(in);
3871  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3872  ereport(ERROR,
3873  (errcode(ERRCODE_OUT_OF_MEMORY),
3874  errmsg("out of memory")));
3875 
3877 }
3878 
3879 /*
3880  * Return the size of a datum, possibly compressed
3881  *
3882  * Works on any data type
3883  */
3884 Datum
3886 {
3888  int32 result;
3889  int typlen;
3890 
3891  /* On first call, get the input type's typlen, and save at *fn_extra */
3892  if (fcinfo->flinfo->fn_extra == NULL)
3893  {
3894  /* Lookup the datatype of the supplied argument */
3895  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3896 
3897  typlen = get_typlen(argtypeid);
3898  if (typlen == 0) /* should not happen */
3899  elog(ERROR, "cache lookup failed for type %u", argtypeid);
3900 
3901  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3902  sizeof(int));
3903  *((int *) fcinfo->flinfo->fn_extra) = typlen;
3904  }
3905  else
3906  typlen = *((int *) fcinfo->flinfo->fn_extra);
3907 
3908  if (typlen == -1)
3909  {
3910  /* varlena type, possibly toasted */
3911  result = toast_datum_size(value);
3912  }
3913  else if (typlen == -2)
3914  {
3915  /* cstring */
3916  result = strlen(DatumGetCString(value)) + 1;
3917  }
3918  else
3919  {
3920  /* ordinary fixed-width type */
3921  result = typlen;
3922  }
3923 
3924  PG_RETURN_INT32(result);
3925 }
3926 
3927 /*
3928  * string_agg - Concatenates values and returns string.
3929  *
3930  * Syntax: string_agg(value text, delimiter text) RETURNS text
3931  *
3932  * Note: Any NULL values are ignored. The first-call delimiter isn't
3933  * actually used at all, and on subsequent calls the delimiter precedes
3934  * the associated value.
3935  */
3936 
3937 /* subroutine to initialize state */
3938 static StringInfo
3940 {
3941  StringInfo state;
3942  MemoryContext aggcontext;
3943  MemoryContext oldcontext;
3944 
3945  if (!AggCheckCallContext(fcinfo, &aggcontext))
3946  {
3947  /* cannot be called directly because of internal-type argument */
3948  elog(ERROR, "string_agg_transfn called in non-aggregate context");
3949  }
3950 
3951  /*
3952  * Create state in aggregate context. It'll stay there across subsequent
3953  * calls.
3954  */
3955  oldcontext = MemoryContextSwitchTo(aggcontext);
3956  state = makeStringInfo();
3957  MemoryContextSwitchTo(oldcontext);
3958 
3959  return state;
3960 }
3961 
3962 Datum
3964 {
3965  StringInfo state;
3966 
3967  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3968 
3969  /* Append the value unless null. */
3970  if (!PG_ARGISNULL(1))
3971  {
3972  /* On the first time through, we ignore the delimiter. */
3973  if (state == NULL)
3974  state = makeStringAggState(fcinfo);
3975  else if (!PG_ARGISNULL(2))
3976  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
3977 
3978  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
3979  }
3980 
3981  /*
3982  * The transition type for string_agg() is declared to be "internal",
3983  * which is a pass-by-value type the same size as a pointer.
3984  */
3985  PG_RETURN_POINTER(state);
3986 }
3987 
3988 Datum
3990 {
3991  StringInfo state;
3992 
3993  /* cannot be called directly because of internal-type argument */
3994  Assert(AggCheckCallContext(fcinfo, NULL));
3995 
3996  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3997 
3998  if (state != NULL)
4000  else
4001  PG_RETURN_NULL();
4002 }
4003 
4004 /*
4005  * Implementation of both concat() and concat_ws().
4006  *
4007  * sepstr is the separator string to place between values.
4008  * argidx identifies the first argument to concatenate (counting from zero).
4009  * Returns NULL if result should be NULL, else text value.
4010  */
4011 static text *
4012 concat_internal(const char *sepstr, int argidx,
4013  FunctionCallInfo fcinfo)
4014 {
4015  text *result;
4016  StringInfoData str;
4017  bool first_arg = true;
4018  int i;
4019 
4020  /*
4021  * concat(VARIADIC some-array) is essentially equivalent to
4022  * array_to_text(), ie concat the array elements with the given separator.
4023  * So we just pass the case off to that code.
4024  */
4025  if (get_fn_expr_variadic(fcinfo->flinfo))
4026  {
4027  ArrayType *arr;
4028 
4029  /* Should have just the one argument */
4030  Assert(argidx == PG_NARGS() - 1);
4031 
4032  /* concat(VARIADIC NULL) is defined as NULL */
4033  if (PG_ARGISNULL(argidx))
4034  return NULL;
4035 
4036  /*
4037  * Non-null argument had better be an array. We assume that any call
4038  * context that could let get_fn_expr_variadic return true will have
4039  * checked that a VARIADIC-labeled parameter actually is an array. So
4040  * it should be okay to just Assert that it's an array rather than
4041  * doing a full-fledged error check.
4042  */
4044 
4045  /* OK, safe to fetch the array value */
4046  arr = PG_GETARG_ARRAYTYPE_P(argidx);
4047 
4048  /*
4049  * And serialize the array. We tell array_to_text to ignore null
4050  * elements, which matches the behavior of the loop below.
4051  */
4052  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4053  }
4054 
4055  /* Normal case without explicit VARIADIC marker */
4056  initStringInfo(&str);
4057 
4058  for (i = argidx; i < PG_NARGS(); i++)
4059  {
4060  if (!PG_ARGISNULL(i))
4061  {
4063  Oid valtype;
4064  Oid typOutput;
4065  bool typIsVarlena;
4066 
4067  /* add separator if appropriate */
4068  if (first_arg)
4069  first_arg = false;
4070  else
4071  appendStringInfoString(&str, sepstr);
4072 
4073  /* call the appropriate type output function, append the result */
4074  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4075  if (!OidIsValid(valtype))
4076  elog(ERROR, "could not determine data type of concat() input");
4077  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4079  OidOutputFunctionCall(typOutput, value));
4080  }
4081  }
4082 
4083  result = cstring_to_text_with_len(str.data, str.len);
4084  pfree(str.data);
4085 
4086  return result;
4087 }
4088 
4089 /*
4090  * Concatenate all arguments. NULL arguments are ignored.
4091  */
4092 Datum
4094 {
4095  text *result;
4096 
4097  result = concat_internal("", 0, fcinfo);
4098  if (result == NULL)
4099  PG_RETURN_NULL();
4100  PG_RETURN_TEXT_P(result);
4101 }
4102 
4103 /*
4104  * Concatenate all but first argument value with separators. The first
4105  * parameter is used as the separator. NULL arguments are ignored.
4106  */
4107 Datum
4109 {
4110  char *sep;
4111  text *result;
4112 
4113  /* return NULL when separator is NULL */
4114  if (PG_ARGISNULL(0))
4115  PG_RETURN_NULL();
4117 
4118  result = concat_internal(sep, 1, fcinfo);
4119  if (result == NULL)
4120  PG_RETURN_NULL();
4121  PG_RETURN_TEXT_P(result);
4122 }
4123 
4124 /*
4125  * Return first n characters in the string. When n is negative,
4126  * return all but last |n| characters.
4127  */
4128 Datum
4130 {
4131  text *str = PG_GETARG_TEXT_PP(0);
4132  const char *p = VARDATA_ANY(str);
4133  int len = VARSIZE_ANY_EXHDR(str);
4134  int n = PG_GETARG_INT32(1);
4135  int rlen;
4136 
4137  if (n < 0)
4138  n = pg_mbstrlen_with_len(p, len) + n;
4139  rlen = pg_mbcharcliplen(p, len, n);
4140 
4142 }
4143 
4144 /*
4145  * Return last n characters in the string. When n is negative,
4146  * return all but first |n| characters.
4147  */
4148 Datum
4150 {
4151  text *str = PG_GETARG_TEXT_PP(0);
4152  const char *p = VARDATA_ANY(str);
4153  int len = VARSIZE_ANY_EXHDR(str);
4154  int n = PG_GETARG_INT32(1);
4155  int off;
4156 
4157  if (n < 0)
4158  n = -n;
4159  else
4160  n = pg_mbstrlen_with_len(p, len) - n;
4161  off = pg_mbcharcliplen(p, len, n);
4162 
4163  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
4164 }
4165 
4166 /*
4167  * Return reversed string
4168  */
4169 Datum
4171 {
4172  text *str = PG_GETARG_TEXT_PP(0);
4173  const char *p = VARDATA_ANY(str);
4174  int len = VARSIZE_ANY_EXHDR(str);
4175  const char *endp = p + len;
4176  text *result;
4177  char *dst;
4178 
4179  result = palloc(len + VARHDRSZ);
4180  dst = (char *) VARDATA(result) + len;
4181  SET_VARSIZE(result, len + VARHDRSZ);
4182 
4184  {
4185  /* multibyte version */
4186  while (p < endp)
4187  {
4188  int sz;
4189 
4190  sz = pg_mblen(p);
4191  dst -= sz;
4192  memcpy(dst, p, sz);
4193  p += sz;
4194  }
4195  }
4196  else
4197  {
4198  /* single byte version */
4199  while (p < endp)
4200  *(--dst) = *p++;
4201  }
4202 
4203  PG_RETURN_TEXT_P(result);
4204 }
4205 
4206 
4207 /*
4208  * Support macros for text_format()
4209  */
4210 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4211 
4212 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4213  do { \
4214  if (++(ptr) >= (end_ptr)) \
4215  ereport(ERROR, \
4216  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4217  errmsg("unterminated format specifier"))); \
4218  } while (0)
4219 
4220 /*
4221  * Returns a formatted string
4222  */
4223 Datum
4225 {
4226  text *fmt;
4227  StringInfoData str;
4228  const char *cp;
4229  const char *start_ptr;
4230  const char *end_ptr;
4231  text *result;
4232  int arg;
4233  bool funcvariadic;
4234  int nargs;
4235  Datum *elements = NULL;
4236  bool *nulls = NULL;
4237  Oid element_type = InvalidOid;
4238  Oid prev_type = InvalidOid;
4239  Oid prev_width_type = InvalidOid;
4240  FmgrInfo typoutputfinfo;
4241  FmgrInfo typoutputinfo_width;
4242 
4243  /* When format string is null, immediately return null */
4244  if (PG_ARGISNULL(0))
4245  PG_RETURN_NULL();
4246 
4247  /* If argument is marked VARIADIC, expand array into elements */
4248  if (get_fn_expr_variadic(fcinfo->flinfo))
4249  {
4250  ArrayType *arr;
4251  int16 elmlen;
4252  bool elmbyval;
4253  char elmalign;
4254  int nitems;
4255 
4256  /* Should have just the one argument */
4257  Assert(PG_NARGS() == 2);
4258 
4259  /* If argument is NULL, we treat it as zero-length array */
4260  if (PG_ARGISNULL(1))
4261  nitems = 0;
4262  else
4263  {
4264  /*
4265  * Non-null argument had better be an array. We assume that any
4266  * call context that could let get_fn_expr_variadic return true
4267  * will have checked that a VARIADIC-labeled parameter actually is
4268  * an array. So it should be okay to just Assert that it's an
4269  * array rather than doing a full-fledged error check.
4270  */
4272 
4273  /* OK, safe to fetch the array value */
4274  arr = PG_GETARG_ARRAYTYPE_P(1);
4275 
4276  /* Get info about array element type */
4277  element_type = ARR_ELEMTYPE(arr);
4278  get_typlenbyvalalign(element_type,
4279  &elmlen, &elmbyval, &elmalign);
4280 
4281  /* Extract all array elements */
4282  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4283  &elements, &nulls, &nitems);
4284  }
4285 
4286  nargs = nitems + 1;
4287  funcvariadic = true;
4288  }
4289  else
4290  {
4291  /* Non-variadic case, we'll process the arguments individually */
4292  nargs = PG_NARGS();
4293  funcvariadic = false;
4294  }
4295 
4296  /* Setup for main loop. */
4297  fmt = PG_GETARG_TEXT_PP(0);
4298  start_ptr = VARDATA_ANY(fmt);
4299  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4300  initStringInfo(&str);
4301  arg = 1; /* next argument position to print */
4302 
4303  /* Scan format string, looking for conversion specifiers. */
4304  for (cp = start_ptr; cp < end_ptr; cp++)
4305  {
4306  int argpos;
4307  int widthpos;
4308  int flags;
4309  int width;
4310  Datum value;
4311  bool isNull;
4312  Oid typid;
4313 
4314  /*
4315  * If it's not the start of a conversion specifier, just copy it to
4316  * the output buffer.
4317  */
4318  if (*cp != '%')
4319  {
4320  appendStringInfoCharMacro(&str, *cp);
4321  continue;
4322  }
4323 
4324  ADVANCE_PARSE_POINTER(cp, end_ptr);
4325 
4326  /* Easy case: %% outputs a single % */
4327  if (*cp == '%')
4328  {
4329  appendStringInfoCharMacro(&str, *cp);
4330  continue;
4331  }
4332 
4333  /* Parse the optional portions of the format specifier */
4334  cp = text_format_parse_format(cp, end_ptr,
4335  &argpos, &widthpos,
4336  &flags, &width);
4337 
4338  /*
4339  * Next we should see the main conversion specifier. Whether or not
4340  * an argument position was present, it's known that at least one
4341  * character remains in the string at this point. Experience suggests
4342  * that it's worth checking that that character is one of the expected
4343  * ones before we try to fetch arguments, so as to produce the least
4344  * confusing response to a mis-formatted specifier.
4345  */
4346  if (strchr("sIL", *cp) == NULL)
4347  ereport(ERROR,
4348  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4349  errmsg("unrecognized conversion type specifier \"%c\"",
4350  *cp)));
4351 
4352  /* If indirect width was specified, get its value */
4353  if (widthpos >= 0)
4354  {
4355  /* Collect the specified or next argument position */
4356  if (widthpos > 0)
4357  arg = widthpos;
4358  if (arg >= nargs)
4359  ereport(ERROR,
4360  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4361  errmsg("too few arguments for format")));
4362 
4363  /* Get the value and type of the selected argument */
4364  if (!funcvariadic)
4365  {
4366  value = PG_GETARG_DATUM(arg);
4367  isNull = PG_ARGISNULL(arg);
4368  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4369  }
4370  else
4371  {
4372  value = elements[arg - 1];
4373  isNull = nulls[arg - 1];
4374  typid = element_type;
4375  }
4376  if (!OidIsValid(typid))
4377  elog(ERROR, "could not determine data type of format() input");
4378 
4379  arg++;
4380 
4381  /* We can treat NULL width the same as zero */
4382  if (isNull)
4383  width = 0;
4384  else if (typid == INT4OID)
4385  width = DatumGetInt32(value);
4386  else if (typid == INT2OID)
4387  width = DatumGetInt16(value);
4388  else
4389  {
4390  /* For less-usual datatypes, convert to text then to int */
4391  char *str;
4392 
4393  if (typid != prev_width_type)
4394  {
4395  Oid typoutputfunc;
4396  bool typIsVarlena;
4397 
4398  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4399  fmgr_info(typoutputfunc, &typoutputinfo_width);
4400  prev_width_type = typid;
4401  }
4402 
4403  str = OutputFunctionCall(&typoutputinfo_width, value);
4404 
4405  /* pg_atoi will complain about bad data or overflow */
4406  width = pg_atoi(str, sizeof(int), '\0');
4407 
4408  pfree(str);
4409  }
4410  }
4411 
4412  /* Collect the specified or next argument position */
4413  if (argpos > 0)
4414  arg = argpos;
4415  if (arg >= nargs)
4416  ereport(ERROR,
4417  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4418  errmsg("too few arguments for format")));
4419 
4420  /* Get the value and type of the selected argument */
4421  if (!funcvariadic)
4422  {
4423  value = PG_GETARG_DATUM(arg);
4424  isNull = PG_ARGISNULL(arg);
4425  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4426  }
4427  else
4428  {
4429  value = elements[arg - 1];
4430  isNull = nulls[arg - 1];
4431  typid = element_type;
4432  }
4433  if (!OidIsValid(typid))
4434  elog(ERROR, "could not determine data type of format() input");
4435 
4436  arg++;
4437 
4438  /*
4439  * Get the appropriate typOutput function, reusing previous one if
4440  * same type as previous argument. That's particularly useful in the
4441  * variadic-array case, but often saves work even for ordinary calls.
4442  */
4443  if (typid != prev_type)
4444  {
4445  Oid typoutputfunc;
4446  bool typIsVarlena;
4447 
4448  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4449  fmgr_info(typoutputfunc, &typoutputfinfo);
4450  prev_type = typid;
4451  }
4452 
4453  /*
4454  * And now we can format the value.
4455  */
4456  switch (*cp)
4457  {
4458  case 's':
4459  case 'I':
4460  case 'L':
4461  text_format_string_conversion(&str, *cp, &typoutputfinfo,
4462  value, isNull,
4463  flags, width);
4464  break;
4465  default:
4466  /* should not get here, because of previous check */
4467  ereport(ERROR,
4468  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4469  errmsg("unrecognized conversion type specifier \"%c\"",
4470  *cp)));
4471  break;
4472  }
4473  }
4474 
4475  /* Don't need deconstruct_array results anymore. */
4476  if (elements != NULL)
4477  pfree(elements);
4478  if (nulls != NULL)
4479  pfree(nulls);
4480 
4481  /* Generate results. */
4482  result = cstring_to_text_with_len(str.data, str.len);
4483  pfree(str.data);
4484 
4485  PG_RETURN_TEXT_P(result);
4486 }
4487 
4488 /*
4489  * Parse contiguous digits as a decimal number.
4490  *
4491  * Returns true if some digits could be parsed.
4492  * The value is returned into *value, and *ptr is advanced to the next
4493  * character to be parsed.
4494  *
4495  * Note parsing invariant: at least one character is known available before
4496  * string end (end_ptr) at entry, and this is still true at exit.
4497  */
4498 static bool
4499 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
4500 {
4501  bool found = false;
4502  const char *cp = *ptr;
4503  int val = 0;
4504 
4505  while (*cp >= '0' && *cp <= '9')
4506  {
4507  int newval = val * 10 + (*cp - '0');
4508 
4509  if (newval / 10 != val) /* overflow? */
4510  ereport(ERROR,
4511  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4512  errmsg("number is out of range")));
4513  val = newval;
4514  ADVANCE_PARSE_POINTER(cp, end_ptr);
4515  found = true;
4516  }
4517 
4518  *ptr = cp;
4519  *value = val;
4520 
4521  return found;
4522 }
4523 
4524 /*
4525  * Parse a format specifier (generally following the SUS printf spec).
4526  *
4527  * We have already advanced over the initial '%', and we are looking for
4528  * [argpos][flags][width]type (but the type character is not consumed here).
4529  *
4530  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
4531  * Output parameters:
4532  * argpos: argument position for value to be printed. -1 means unspecified.
4533  * widthpos: argument position for width. Zero means the argument position
4534  * was unspecified (ie, take the next arg) and -1 means no width
4535  * argument (width was omitted or specified as a constant).
4536  * flags: bitmask of flags.
4537  * width: directly-specified width value. Zero means the width was omitted
4538  * (note it's not necessary to distinguish this case from an explicit
4539  * zero width value).
4540  *
4541  * The function result is the next character position to be parsed, ie, the
4542  * location where the type character is/should be.
4543  *
4544  * Note parsing invariant: at least one character is known available before
4545  * string end (end_ptr) at entry, and this is still true at exit.
4546  */
4547 static const char *
4548 text_format_parse_format(const char *start_ptr, const char *end_ptr,
4549  int *argpos, int *widthpos,
4550  int *flags, int *width)
4551 {
4552  const char *cp = start_ptr;
4553  int n;
4554 
4555  /* set defaults for output parameters */
4556  *argpos = -1;
4557  *widthpos = -1;
4558  *flags = 0;
4559  *width = 0;
4560 
4561  /* try to identify first number */
4562  if (text_format_parse_digits(&cp, end_ptr, &n))
4563  {
4564  if (*cp != '$')
4565  {
4566  /* Must be just a width and a type, so we're done */
4567  *width = n;
4568  return cp;
4569  }
4570  /* The number was argument position */
4571  *argpos = n;
4572  /* Explicit 0 for argument index is immediately refused */
4573  if (n == 0)
4574  ereport(ERROR,
4575  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4576  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4577  ADVANCE_PARSE_POINTER(cp, end_ptr);
4578  }
4579 
4580  /* Handle flags (only minus is supported now) */
4581  while (*cp == '-')
4582  {
4583  *flags |= TEXT_FORMAT_FLAG_MINUS;
4584  ADVANCE_PARSE_POINTER(cp, end_ptr);
4585  }
4586 
4587  if (*cp == '*')
4588  {
4589  /* Handle indirect width */
4590  ADVANCE_PARSE_POINTER(cp, end_ptr);
4591  if (text_format_parse_digits(&cp, end_ptr, &n))
4592  {
4593  /* number in this position must be closed by $ */
4594  if (*cp != '$')
4595  ereport(ERROR,
4596  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4597  errmsg("width argument position must be ended by \"$\"")));
4598  /* The number was width argument position */
4599  *widthpos = n;
4600  /* Explicit 0 for argument index is immediately refused */
4601  if (n == 0)
4602  ereport(ERROR,
4603  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4604  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4605  ADVANCE_PARSE_POINTER(cp, end_ptr);
4606  }
4607  else
4608  *widthpos = 0; /* width's argument position is unspecified */
4609  }
4610  else
4611  {
4612  /* Check for direct width specification */
4613  if (text_format_parse_digits(&cp, end_ptr, &n))
4614  *width = n;
4615  }
4616 
4617  /* cp should now be pointing at type character */
4618  return cp;
4619 }
4620 
4621 /*
4622  * Format a %s, %I, or %L conversion
4623  */
4624 static void
4626  FmgrInfo *typOutputInfo,
4627  Datum value, bool isNull,
4628  int flags, int width)
4629 {
4630  char *str;
4631 
4632  /* Handle NULL arguments before trying to stringify the value. */
4633  if (isNull)
4634  {
4635  if (conversion == 's')
4636  text_format_append_string(buf, "", flags, width);
4637  else if (conversion == 'L')
4638  text_format_append_string(buf, "NULL", flags, width);
4639  else if (conversion == 'I')
4640  ereport(ERROR,
4641  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4642  errmsg("null values cannot be formatted as an SQL identifier")));
4643  return;
4644  }
4645 
4646  /* Stringify. */
4647  str = OutputFunctionCall(typOutputInfo, value);
4648 
4649  /* Escape. */
4650  if (conversion == 'I')
4651  {
4652  /* quote_identifier may or may not allocate a new string. */
4653  text_format_append_string(buf, quote_identifier(str), flags, width);
4654  }
4655  else if (conversion == 'L')
4656  {
4657  char *qstr = quote_literal_cstr(str);
4658 
4659  text_format_append_string(buf, qstr, flags, width);
4660  /* quote_literal_cstr() always allocates a new string */
4661  pfree(qstr);
4662  }
4663  else
4664  text_format_append_string(buf, str, flags, width);
4665 
4666  /* Cleanup. */
4667  pfree(str);
4668 }
4669 
4670 /*
4671  * Append str to buf, padding as directed by flags/width
4672  */
4673 static void
4675  int flags, int width)
4676 {
4677  bool align_to_left = false;
4678  int len;
4679 
4680  /* fast path for typical easy case */
4681  if (width == 0)
4682  {
4683  appendStringInfoString(buf, str);
4684  return;
4685  }
4686 
4687  if (width < 0)
4688  {
4689  /* Negative width: implicit '-' flag, then take absolute value */
4690  align_to_left = true;
4691  /* -INT_MIN is undefined */
4692  if (width <= INT_MIN)
4693  ereport(ERROR,
4694  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4695  errmsg("number is out of range")));
4696  width = -width;
4697  }
4698  else if (flags & TEXT_FORMAT_FLAG_MINUS)
4699  align_to_left = true;
4700 
4701  len = pg_mbstrlen(str);
4702  if (align_to_left)
4703  {
4704  /* left justify */
4705  appendStringInfoString(buf, str);
4706  if (len < width)
4707  appendStringInfoSpaces(buf, width - len);
4708  }
4709  else
4710  {
4711  /* right justify */
4712  if (len < width)
4713  appendStringInfoSpaces(buf, width - len);
4714  appendStringInfoString(buf, str);
4715  }
4716 }
4717 
4718 /*
4719  * text_format_nv - nonvariadic wrapper for text_format function.
4720  *
4721  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
4722  * which checks that all built-in functions that share the implementing C
4723  * function take the same number of arguments.
4724  */
4725 Datum
4727 {
4728  return text_format(fcinfo);
4729 }