PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
varlena.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  * Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varlena.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <limits.h>
19 
20 #include "access/tuptoaster.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "libpq/md5.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/builtins.h"
29 #include "utils/bytea.h"
30 #include "utils/lsyscache.h"
31 #include "utils/pg_locale.h"
32 
33 
34 /* GUC variable */
36 
37 typedef struct varlena unknown;
38 
39 typedef struct
40 {
41  bool use_wchar; /* T if multibyte encoding */
42  char *str1; /* use these if not use_wchar */
43  char *str2; /* note: these point to original texts */
44  pg_wchar *wstr1; /* use these if use_wchar */
45  pg_wchar *wstr2; /* note: these are palloc'd */
46  int len1; /* string lengths in logical characters */
47  int len2;
48  /* Skip table for Boyer-Moore-Horspool search algorithm: */
49  int skiptablemask; /* mask for ANDing with skiptable subscripts */
50  int skiptable[256]; /* skip distance for given mismatched char */
52 
53 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
54 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
55 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
56 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
57 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
58 
59 static int32 text_length(Datum str);
60 static text *text_catenate(text *t1, text *t2);
61 static text *text_substring(Datum str,
62  int32 start,
63  int32 length,
64  bool length_not_specified);
65 static text *text_overlay(text *t1, text *t2, int sp, int sl);
66 static int text_position(text *t1, text *t2);
67 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
68 static int text_position_next(int start_pos, TextPositionState *state);
70 static int text_cmp(text *arg1, text *arg2, Oid collid);
71 static bytea *bytea_catenate(bytea *t1, bytea *t2);
72 static bytea *bytea_substring(Datum str,
73  int S,
74  int L,
75  bool length_not_specified);
76 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
77 static void appendStringInfoText(StringInfo str, const text *t);
80  const char *fldsep, const char *null_string);
82 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
83  int *value);
84 static const char *text_format_parse_format(const char *start_ptr,
85  const char *end_ptr,
86  int *argpos, int *widthpos,
87  int *flags, int *width);
88 static void text_format_string_conversion(StringInfo buf, char conversion,
89  FmgrInfo *typOutputInfo,
90  Datum value, bool isNull,
91  int flags, int width);
92 static void text_format_append_string(StringInfo buf, const char *str,
93  int flags, int width);
94 
95 
96 /*****************************************************************************
97  * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
98  *****************************************************************************/
99 
100 /*
101  * cstring_to_text
102  *
103  * Create a text value from a null-terminated C string.
104  *
105  * The new text value is freshly palloc'd with a full-size VARHDR.
106  */
107 text *
108 cstring_to_text(const char *s)
109 {
110  return cstring_to_text_with_len(s, strlen(s));
111 }
112 
113 /*
114  * cstring_to_text_with_len
115  *
116  * Same as cstring_to_text except the caller specifies the string length;
117  * the string need not be null_terminated.
118  */
119 text *
120 cstring_to_text_with_len(const char *s, int len)
121 {
122  text *result = (text *) palloc(len + VARHDRSZ);
123 
124  SET_VARSIZE(result, len + VARHDRSZ);
125  memcpy(VARDATA(result), s, len);
126 
127  return result;
128 }
129 
130 /*
131  * text_to_cstring
132  *
133  * Create a palloc'd, null-terminated C string from a text value.
134  *
135  * We support being passed a compressed or toasted text value.
136  * This is a bit bogus since such values shouldn't really be referred to as
137  * "text *", but it seems useful for robustness. If we didn't handle that
138  * case here, we'd need another routine that did, anyway.
139  */
140 char *
142 {
143  /* must cast away the const, unfortunately */
144  text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
145  int len = VARSIZE_ANY_EXHDR(tunpacked);
146  char *result;
147 
148  result = (char *) palloc(len + 1);
149  memcpy(result, VARDATA_ANY(tunpacked), len);
150  result[len] = '\0';
151 
152  if (tunpacked != t)
153  pfree(tunpacked);
154 
155  return result;
156 }
157 
158 /*
159  * text_to_cstring_buffer
160  *
161  * Copy a text value into a caller-supplied buffer of size dst_len.
162  *
163  * The text string is truncated if necessary to fit. The result is
164  * guaranteed null-terminated (unless dst_len == 0).
165  *
166  * We support being passed a compressed or toasted text value.
167  * This is a bit bogus since such values shouldn't really be referred to as
168  * "text *", but it seems useful for robustness. If we didn't handle that
169  * case here, we'd need another routine that did, anyway.
170  */
171 void
172 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
173 {
174  /* must cast away the const, unfortunately */
175  text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
176  size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
177 
178  if (dst_len > 0)
179  {
180  dst_len--;
181  if (dst_len >= src_len)
182  dst_len = src_len;
183  else /* ensure truncation is encoding-safe */
184  dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
185  memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
186  dst[dst_len] = '\0';
187  }
188 
189  if (srcunpacked != src)
190  pfree(srcunpacked);
191 }
192 
193 
194 /*****************************************************************************
195  * USER I/O ROUTINES *
196  *****************************************************************************/
197 
198 
199 #define VAL(CH) ((CH) - '0')
200 #define DIG(VAL) ((VAL) + '0')
201 
202 /*
203  * byteain - converts from printable representation of byte array
204  *
205  * Non-printable characters must be passed as '\nnn' (octal) and are
206  * converted to internal form. '\' must be passed as '\\'.
207  * ereport(ERROR, ...) if bad form.
208  *
209  * BUGS:
210  * The input is scanned twice.
211  * The error checking of input is minimal.
212  */
213 Datum
215 {
216  char *inputText = PG_GETARG_CSTRING(0);
217  char *tp;
218  char *rp;
219  int bc;
220  bytea *result;
221 
222  /* Recognize hex input */
223  if (inputText[0] == '\\' && inputText[1] == 'x')
224  {
225  size_t len = strlen(inputText);
226 
227  bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
228  result = palloc(bc);
229  bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
230  SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
231 
232  PG_RETURN_BYTEA_P(result);
233  }
234 
235  /* Else, it's the traditional escaped style */
236  for (bc = 0, tp = inputText; *tp != '\0'; bc++)
237  {
238  if (tp[0] != '\\')
239  tp++;
240  else if ((tp[0] == '\\') &&
241  (tp[1] >= '0' && tp[1] <= '3') &&
242  (tp[2] >= '0' && tp[2] <= '7') &&
243  (tp[3] >= '0' && tp[3] <= '7'))
244  tp += 4;
245  else if ((tp[0] == '\\') &&
246  (tp[1] == '\\'))
247  tp += 2;
248  else
249  {
250  /*
251  * one backslash, not followed by another or ### valid octal
252  */
253  ereport(ERROR,
254  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
255  errmsg("invalid input syntax for type bytea")));
256  }
257  }
258 
259  bc += VARHDRSZ;
260 
261  result = (bytea *) palloc(bc);
262  SET_VARSIZE(result, bc);
263 
264  tp = inputText;
265  rp = VARDATA(result);
266  while (*tp != '\0')
267  {
268  if (tp[0] != '\\')
269  *rp++ = *tp++;
270  else if ((tp[0] == '\\') &&
271  (tp[1] >= '0' && tp[1] <= '3') &&
272  (tp[2] >= '0' && tp[2] <= '7') &&
273  (tp[3] >= '0' && tp[3] <= '7'))
274  {
275  bc = VAL(tp[1]);
276  bc <<= 3;
277  bc += VAL(tp[2]);
278  bc <<= 3;
279  *rp++ = bc + VAL(tp[3]);
280 
281  tp += 4;
282  }
283  else if ((tp[0] == '\\') &&
284  (tp[1] == '\\'))
285  {
286  *rp++ = '\\';
287  tp += 2;
288  }
289  else
290  {
291  /*
292  * We should never get here. The first pass should not allow it.
293  */
294  ereport(ERROR,
295  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
296  errmsg("invalid input syntax for type bytea")));
297  }
298  }
299 
300  PG_RETURN_BYTEA_P(result);
301 }
302 
303 /*
304  * byteaout - converts to printable representation of byte array
305  *
306  * In the traditional escaped format, non-printable characters are
307  * printed as '\nnn' (octal) and '\' as '\\'.
308  */
309 Datum
311 {
312  bytea *vlena = PG_GETARG_BYTEA_PP(0);
313  char *result;
314  char *rp;
315 
317  {
318  /* Print hex format */
319  rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
320  *rp++ = '\\';
321  *rp++ = 'x';
322  rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
323  }
324  else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
325  {
326  /* Print traditional escaped format */
327  char *vp;
328  int len;
329  int i;
330 
331  len = 1; /* empty string has 1 char */
332  vp = VARDATA_ANY(vlena);
333  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
334  {
335  if (*vp == '\\')
336  len += 2;
337  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
338  len += 4;
339  else
340  len++;
341  }
342  rp = result = (char *) palloc(len);
343  vp = VARDATA_ANY(vlena);
344  for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
345  {
346  if (*vp == '\\')
347  {
348  *rp++ = '\\';
349  *rp++ = '\\';
350  }
351  else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
352  {
353  int val; /* holds unprintable chars */
354 
355  val = *vp;
356  rp[0] = '\\';
357  rp[3] = DIG(val & 07);
358  val >>= 3;
359  rp[2] = DIG(val & 07);
360  val >>= 3;
361  rp[1] = DIG(val & 03);
362  rp += 4;
363  }
364  else
365  *rp++ = *vp;
366  }
367  }
368  else
369  {
370  elog(ERROR, "unrecognized bytea_output setting: %d",
371  bytea_output);
372  rp = result = NULL; /* keep compiler quiet */
373  }
374  *rp = '\0';
375  PG_RETURN_CSTRING(result);
376 }
377 
378 /*
379  * bytearecv - converts external binary format to bytea
380  */
381 Datum
383 {
385  bytea *result;
386  int nbytes;
387 
388  nbytes = buf->len - buf->cursor;
389  result = (bytea *) palloc(nbytes + VARHDRSZ);
390  SET_VARSIZE(result, nbytes + VARHDRSZ);
391  pq_copymsgbytes(buf, VARDATA(result), nbytes);
392  PG_RETURN_BYTEA_P(result);
393 }
394 
395 /*
396  * byteasend - converts bytea to binary format
397  *
398  * This is a special case: just copy the input...
399  */
400 Datum
402 {
403  bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
404 
405  PG_RETURN_BYTEA_P(vlena);
406 }
407 
408 Datum
410 {
412 
413  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
414 
415  /* Append the value unless null. */
416  if (!PG_ARGISNULL(1))
417  {
419 
420  /* On the first time through, we ignore the delimiter. */
421  if (state == NULL)
422  state = makeStringAggState(fcinfo);
423  else if (!PG_ARGISNULL(2))
424  {
425  bytea *delim = PG_GETARG_BYTEA_PP(2);
426 
428  }
429 
431  }
432 
433  /*
434  * The transition type for string_agg() is declared to be "internal",
435  * which is a pass-by-value type the same size as a pointer.
436  */
437  PG_RETURN_POINTER(state);
438 }
439 
440 Datum
442 {
444 
445  /* cannot be called directly because of internal-type argument */
446  Assert(AggCheckCallContext(fcinfo, NULL));
447 
448  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
449 
450  if (state != NULL)
451  {
452  bytea *result;
453 
454  result = (bytea *) palloc(state->len + VARHDRSZ);
455  SET_VARSIZE(result, state->len + VARHDRSZ);
456  memcpy(VARDATA(result), state->data, state->len);
457  PG_RETURN_BYTEA_P(result);
458  }
459  else
460  PG_RETURN_NULL();
461 }
462 
463 /*
464  * textin - converts "..." to internal representation
465  */
466 Datum
468 {
469  char *inputText = PG_GETARG_CSTRING(0);
470 
471  PG_RETURN_TEXT_P(cstring_to_text(inputText));
472 }
473 
474 /*
475  * textout - converts internal representation to "..."
476  */
477 Datum
479 {
480  Datum txt = PG_GETARG_DATUM(0);
481 
483 }
484 
485 /*
486  * textrecv - converts external binary format to text
487  */
488 Datum
490 {
492  text *result;
493  char *str;
494  int nbytes;
495 
496  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
497 
498  result = cstring_to_text_with_len(str, nbytes);
499  pfree(str);
500  PG_RETURN_TEXT_P(result);
501 }
502 
503 /*
504  * textsend - converts text to binary format
505  */
506 Datum
508 {
509  text *t = PG_GETARG_TEXT_PP(0);
511 
512  pq_begintypsend(&buf);
515 }
516 
517 
518 /*
519  * unknownin - converts "..." to internal representation
520  */
521 Datum
523 {
524  char *str = PG_GETARG_CSTRING(0);
525 
526  /* representation is same as cstring */
528 }
529 
530 /*
531  * unknownout - converts internal representation to "..."
532  */
533 Datum
535 {
536  /* representation is same as cstring */
537  char *str = PG_GETARG_CSTRING(0);
538 
540 }
541 
542 /*
543  * unknownrecv - converts external binary format to unknown
544  */
545 Datum
547 {
549  char *str;
550  int nbytes;
551 
552  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
553  /* representation is same as cstring */
554  PG_RETURN_CSTRING(str);
555 }
556 
557 /*
558  * unknownsend - converts unknown to binary format
559  */
560 Datum
562 {
563  /* representation is same as cstring */
564  char *str = PG_GETARG_CSTRING(0);
566 
567  pq_begintypsend(&buf);
568  pq_sendtext(&buf, str, strlen(str));
570 }
571 
572 
573 /* ========== PUBLIC ROUTINES ========== */
574 
575 /*
576  * textlen -
577  * returns the logical length of a text*
578  * (which is less than the VARSIZE of the text*)
579  */
580 Datum
582 {
583  Datum str = PG_GETARG_DATUM(0);
584 
585  /* try to avoid decompressing argument */
587 }
588 
589 /*
590  * text_length -
591  * Does the real work for textlen()
592  *
593  * This is broken out so it can be called directly by other string processing
594  * functions. Note that the argument is passed as a Datum, to indicate that
595  * it may still be in compressed form. We can avoid decompressing it at all
596  * in some cases.
597  */
598 static int32
600 {
601  /* fastpath when max encoding length is one */
604  else
605  {
606  text *t = DatumGetTextPP(str);
607 
609  VARSIZE_ANY_EXHDR(t)));
610  }
611 }
612 
613 /*
614  * textoctetlen -
615  * returns the physical length of a text*
616  * (which is less than the VARSIZE of the text*)
617  */
618 Datum
620 {
621  Datum str = PG_GETARG_DATUM(0);
622 
623  /* We need not detoast the input at all */
625 }
626 
627 /*
628  * textcat -
629  * takes two text* and returns a text* that is the concatenation of
630  * the two.
631  *
632  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
633  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
634  * Allocate space for output in all cases.
635  * XXX - thomas 1997-07-10
636  */
637 Datum
639 {
640  text *t1 = PG_GETARG_TEXT_PP(0);
641  text *t2 = PG_GETARG_TEXT_PP(1);
642 
644 }
645 
646 /*
647  * text_catenate
648  * Guts of textcat(), broken out so it can be used by other functions
649  *
650  * Arguments can be in short-header form, but not compressed or out-of-line
651  */
652 static text *
654 {
655  text *result;
656  int len1,
657  len2,
658  len;
659  char *ptr;
660 
661  len1 = VARSIZE_ANY_EXHDR(t1);
662  len2 = VARSIZE_ANY_EXHDR(t2);
663 
664  /* paranoia ... probably should throw error instead? */
665  if (len1 < 0)
666  len1 = 0;
667  if (len2 < 0)
668  len2 = 0;
669 
670  len = len1 + len2 + VARHDRSZ;
671  result = (text *) palloc(len);
672 
673  /* Set size of result string... */
674  SET_VARSIZE(result, len);
675 
676  /* Fill data field of result string... */
677  ptr = VARDATA(result);
678  if (len1 > 0)
679  memcpy(ptr, VARDATA_ANY(t1), len1);
680  if (len2 > 0)
681  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
682 
683  return result;
684 }
685 
686 /*
687  * charlen_to_bytelen()
688  * Compute the number of bytes occupied by n characters starting at *p
689  *
690  * It is caller's responsibility that there actually are n characters;
691  * the string need not be null-terminated.
692  */
693 static int
694 charlen_to_bytelen(const char *p, int n)
695 {
697  {
698  /* Optimization for single-byte encodings */
699  return n;
700  }
701  else
702  {
703  const char *s;
704 
705  for (s = p; n > 0; n--)
706  s += pg_mblen(s);
707 
708  return s - p;
709  }
710 }
711 
712 /*
713  * text_substr()
714  * Return a substring starting at the specified position.
715  * - thomas 1997-12-31
716  *
717  * Input:
718  * - string
719  * - starting position (is one-based)
720  * - string length
721  *
722  * If the starting position is zero or less, then return from the start of the string
723  * adjusting the length to be consistent with the "negative start" per SQL.
724  * If the length is less than zero, return the remaining string.
725  *
726  * Added multibyte support.
727  * - Tatsuo Ishii 1998-4-21
728  * Changed behavior if starting position is less than one to conform to SQL behavior.
729  * Formerly returned the entire string; now returns a portion.
730  * - Thomas Lockhart 1998-12-10
731  * Now uses faster TOAST-slicing interface
732  * - John Gray 2002-02-22
733  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
734  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
735  * error; if E < 1, return '', not entire string). Fixed MB related bug when
736  * S > LC and < LC + 4 sometimes garbage characters are returned.
737  * - Joe Conway 2002-08-10
738  */
739 Datum
741 {
743  PG_GETARG_INT32(1),
744  PG_GETARG_INT32(2),
745  false));
746 }
747 
748 /*
749  * text_substr_no_len -
750  * Wrapper to avoid opr_sanity failure due to
751  * one function accepting a different number of args.
752  */
753 Datum
755 {
757  PG_GETARG_INT32(1),
758  -1, true));
759 }
760 
761 /*
762  * text_substring -
763  * Does the real work for text_substr() and text_substr_no_len()
764  *
765  * This is broken out so it can be called directly by other string processing
766  * functions. Note that the argument is passed as a Datum, to indicate that
767  * it may still be in compressed/toasted form. We can avoid detoasting all
768  * of it in some cases.
769  *
770  * The result is always a freshly palloc'd datum.
771  */
772 static text *
773 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
774 {
776  int32 S = start; /* start position */
777  int32 S1; /* adjusted start position */
778  int32 L1; /* adjusted substring length */
779 
780  /* life is easy if the encoding max length is 1 */
781  if (eml == 1)
782  {
783  S1 = Max(S, 1);
784 
785  if (length_not_specified) /* special case - get length to end of
786  * string */
787  L1 = -1;
788  else
789  {
790  /* end position */
791  int E = S + length;
792 
793  /*
794  * A negative value for L is the only way for the end position to
795  * be before the start. SQL99 says to throw an error.
796  */
797  if (E < S)
798  ereport(ERROR,
799  (errcode(ERRCODE_SUBSTRING_ERROR),
800  errmsg("negative substring length not allowed")));
801 
802  /*
803  * A zero or negative value for the end position can happen if the
804  * start was negative or one. SQL99 says to return a zero-length
805  * string.
806  */
807  if (E < 1)
808  return cstring_to_text("");
809 
810  L1 = E - S1;
811  }
812 
813  /*
814  * If the start position is past the end of the string, SQL99 says to
815  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
816  * that for us. Convert to zero-based starting position
817  */
818  return DatumGetTextPSlice(str, S1 - 1, L1);
819  }
820  else if (eml > 1)
821  {
822  /*
823  * When encoding max length is > 1, we can't get LC without
824  * detoasting, so we'll grab a conservatively large slice now and go
825  * back later to do the right thing
826  */
827  int32 slice_start;
828  int32 slice_size;
829  int32 slice_strlen;
830  text *slice;
831  int32 E1;
832  int32 i;
833  char *p;
834  char *s;
835  text *ret;
836 
837  /*
838  * if S is past the end of the string, the tuple toaster will return a
839  * zero-length string to us
840  */
841  S1 = Max(S, 1);
842 
843  /*
844  * We need to start at position zero because there is no way to know
845  * in advance which byte offset corresponds to the supplied start
846  * position.
847  */
848  slice_start = 0;
849 
850  if (length_not_specified) /* special case - get length to end of
851  * string */
852  slice_size = L1 = -1;
853  else
854  {
855  int E = S + length;
856 
857  /*
858  * A negative value for L is the only way for the end position to
859  * be before the start. SQL99 says to throw an error.
860  */
861  if (E < S)
862  ereport(ERROR,
863  (errcode(ERRCODE_SUBSTRING_ERROR),
864  errmsg("negative substring length not allowed")));
865 
866  /*
867  * A zero or negative value for the end position can happen if the
868  * start was negative or one. SQL99 says to return a zero-length
869  * string.
870  */
871  if (E < 1)
872  return cstring_to_text("");
873 
874  /*
875  * if E is past the end of the string, the tuple toaster will
876  * truncate the length for us
877  */
878  L1 = E - S1;
879 
880  /*
881  * Total slice size in bytes can't be any longer than the start
882  * position plus substring length times the encoding max length.
883  */
884  slice_size = (S1 + L1) * eml;
885  }
886 
887  /*
888  * If we're working with an untoasted source, no need to do an extra
889  * copying step.
890  */
893  slice = DatumGetTextPSlice(str, slice_start, slice_size);
894  else
895  slice = (text *) DatumGetPointer(str);
896 
897  /* see if we got back an empty string */
898  if (VARSIZE_ANY_EXHDR(slice) == 0)
899  {
900  if (slice != (text *) DatumGetPointer(str))
901  pfree(slice);
902  return cstring_to_text("");
903  }
904 
905  /* Now we can get the actual length of the slice in MB characters */
906  slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
907  VARSIZE_ANY_EXHDR(slice));
908 
909  /*
910  * Check that the start position wasn't > slice_strlen. If so, SQL99
911  * says to return a zero-length string.
912  */
913  if (S1 > slice_strlen)
914  {
915  if (slice != (text *) DatumGetPointer(str))
916  pfree(slice);
917  return cstring_to_text("");
918  }
919 
920  /*
921  * Adjust L1 and E1 now that we know the slice string length. Again
922  * remember that S1 is one based, and slice_start is zero based.
923  */
924  if (L1 > -1)
925  E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
926  else
927  E1 = slice_start + 1 + slice_strlen;
928 
929  /*
930  * Find the start position in the slice; remember S1 is not zero based
931  */
932  p = VARDATA_ANY(slice);
933  for (i = 0; i < S1 - 1; i++)
934  p += pg_mblen(p);
935 
936  /* hang onto a pointer to our start position */
937  s = p;
938 
939  /*
940  * Count the actual bytes used by the substring of the requested
941  * length.
942  */
943  for (i = S1; i < E1; i++)
944  p += pg_mblen(p);
945 
946  ret = (text *) palloc(VARHDRSZ + (p - s));
947  SET_VARSIZE(ret, VARHDRSZ + (p - s));
948  memcpy(VARDATA(ret), s, (p - s));
949 
950  if (slice != (text *) DatumGetPointer(str))
951  pfree(slice);
952 
953  return ret;
954  }
955  else
956  elog(ERROR, "invalid backend encoding: encoding max length < 1");
957 
958  /* not reached: suppress compiler warning */
959  return NULL;
960 }
961 
962 /*
963  * textoverlay
964  * Replace specified substring of first string with second
965  *
966  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
967  * This code is a direct implementation of what the standard says.
968  */
969 Datum
971 {
972  text *t1 = PG_GETARG_TEXT_PP(0);
973  text *t2 = PG_GETARG_TEXT_PP(1);
974  int sp = PG_GETARG_INT32(2); /* substring start position */
975  int sl = PG_GETARG_INT32(3); /* substring length */
976 
977  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
978 }
979 
980 Datum
982 {
983  text *t1 = PG_GETARG_TEXT_PP(0);
984  text *t2 = PG_GETARG_TEXT_PP(1);
985  int sp = PG_GETARG_INT32(2); /* substring start position */
986  int sl;
987 
988  sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
989  PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
990 }
991 
992 static text *
993 text_overlay(text *t1, text *t2, int sp, int sl)
994 {
995  text *result;
996  text *s1;
997  text *s2;
998  int sp_pl_sl;
999 
1000  /*
1001  * Check for possible integer-overflow cases. For negative sp, throw a
1002  * "substring length" error because that's what should be expected
1003  * according to the spec's definition of OVERLAY().
1004  */
1005  if (sp <= 0)
1006  ereport(ERROR,
1007  (errcode(ERRCODE_SUBSTRING_ERROR),
1008  errmsg("negative substring length not allowed")));
1009  sp_pl_sl = sp + sl;
1010  if (sp_pl_sl <= sl)
1011  ereport(ERROR,
1012  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1013  errmsg("integer out of range")));
1014 
1015  s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1016  s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1017  result = text_catenate(s1, t2);
1018  result = text_catenate(result, s2);
1019 
1020  return result;
1021 }
1022 
1023 /*
1024  * textpos -
1025  * Return the position of the specified substring.
1026  * Implements the SQL POSITION() function.
1027  * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1028  * - thomas 1997-07-27
1029  */
1030 Datum
1032 {
1033  text *str = PG_GETARG_TEXT_PP(0);
1034  text *search_str = PG_GETARG_TEXT_PP(1);
1035 
1036  PG_RETURN_INT32((int32) text_position(str, search_str));
1037 }
1038 
1039 /*
1040  * text_position -
1041  * Does the real work for textpos()
1042  *
1043  * Inputs:
1044  * t1 - string to be searched
1045  * t2 - pattern to match within t1
1046  * Result:
1047  * Character index of the first matched char, starting from 1,
1048  * or 0 if no match.
1049  *
1050  * This is broken out so it can be called directly by other string processing
1051  * functions.
1052  */
1053 static int
1055 {
1057  int result;
1058 
1059  text_position_setup(t1, t2, &state);
1060  result = text_position_next(1, &state);
1061  text_position_cleanup(&state);
1062  return result;
1063 }
1064 
1065 
1066 /*
1067  * text_position_setup, text_position_next, text_position_cleanup -
1068  * Component steps of text_position()
1069  *
1070  * These are broken out so that a string can be efficiently searched for
1071  * multiple occurrences of the same pattern. text_position_next may be
1072  * called multiple times with increasing values of start_pos, which is
1073  * the 1-based character position to start the search from. The "state"
1074  * variable is normally just a local variable in the caller.
1075  */
1076 
1077 static void
1079 {
1080  int len1 = VARSIZE_ANY_EXHDR(t1);
1081  int len2 = VARSIZE_ANY_EXHDR(t2);
1082 
1084  {
1085  /* simple case - single byte encoding */
1086  state->use_wchar = false;
1087  state->str1 = VARDATA_ANY(t1);
1088  state->str2 = VARDATA_ANY(t2);
1089  state->len1 = len1;
1090  state->len2 = len2;
1091  }
1092  else
1093  {
1094  /* not as simple - multibyte encoding */
1095  pg_wchar *p1,
1096  *p2;
1097 
1098  p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1099  len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1100  p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1101  len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1102 
1103  state->use_wchar = true;
1104  state->wstr1 = p1;
1105  state->wstr2 = p2;
1106  state->len1 = len1;
1107  state->len2 = len2;
1108  }
1109 
1110  /*
1111  * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1112  * notes we use the terminology that the "haystack" is the string to be
1113  * searched (t1) and the "needle" is the pattern being sought (t2).
1114  *
1115  * If the needle is empty or bigger than the haystack then there is no
1116  * point in wasting cycles initializing the table. We also choose not to
1117  * use B-M-H for needles of length 1, since the skip table can't possibly
1118  * save anything in that case.
1119  */
1120  if (len1 >= len2 && len2 > 1)
1121  {
1122  int searchlength = len1 - len2;
1123  int skiptablemask;
1124  int last;
1125  int i;
1126 
1127  /*
1128  * First we must determine how much of the skip table to use. The
1129  * declaration of TextPositionState allows up to 256 elements, but for
1130  * short search problems we don't really want to have to initialize so
1131  * many elements --- it would take too long in comparison to the
1132  * actual search time. So we choose a useful skip table size based on
1133  * the haystack length minus the needle length. The closer the needle
1134  * length is to the haystack length the less useful skipping becomes.
1135  *
1136  * Note: since we use bit-masking to select table elements, the skip
1137  * table size MUST be a power of 2, and so the mask must be 2^N-1.
1138  */
1139  if (searchlength < 16)
1140  skiptablemask = 3;
1141  else if (searchlength < 64)
1142  skiptablemask = 7;
1143  else if (searchlength < 128)
1144  skiptablemask = 15;
1145  else if (searchlength < 512)
1146  skiptablemask = 31;
1147  else if (searchlength < 2048)
1148  skiptablemask = 63;
1149  else if (searchlength < 4096)
1150  skiptablemask = 127;
1151  else
1152  skiptablemask = 255;
1153  state->skiptablemask = skiptablemask;
1154 
1155  /*
1156  * Initialize the skip table. We set all elements to the needle
1157  * length, since this is the correct skip distance for any character
1158  * not found in the needle.
1159  */
1160  for (i = 0; i <= skiptablemask; i++)
1161  state->skiptable[i] = len2;
1162 
1163  /*
1164  * Now examine the needle. For each character except the last one,
1165  * set the corresponding table element to the appropriate skip
1166  * distance. Note that when two characters share the same skip table
1167  * entry, the one later in the needle must determine the skip
1168  * distance.
1169  */
1170  last = len2 - 1;
1171 
1172  if (!state->use_wchar)
1173  {
1174  const char *str2 = state->str2;
1175 
1176  for (i = 0; i < last; i++)
1177  state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1178  }
1179  else
1180  {
1181  const pg_wchar *wstr2 = state->wstr2;
1182 
1183  for (i = 0; i < last; i++)
1184  state->skiptable[wstr2[i] & skiptablemask] = last - i;
1185  }
1186  }
1187 }
1188 
1189 static int
1191 {
1192  int haystack_len = state->len1;
1193  int needle_len = state->len2;
1194  int skiptablemask = state->skiptablemask;
1195 
1196  Assert(start_pos > 0); /* else caller error */
1197 
1198  if (needle_len <= 0)
1199  return start_pos; /* result for empty pattern */
1200 
1201  start_pos--; /* adjust for zero based arrays */
1202 
1203  /* Done if the needle can't possibly fit */
1204  if (haystack_len < start_pos + needle_len)
1205  return 0;
1206 
1207  if (!state->use_wchar)
1208  {
1209  /* simple case - single byte encoding */
1210  const char *haystack = state->str1;
1211  const char *needle = state->str2;
1212  const char *haystack_end = &haystack[haystack_len];
1213  const char *hptr;
1214 
1215  if (needle_len == 1)
1216  {
1217  /* No point in using B-M-H for a one-character needle */
1218  char nchar = *needle;
1219 
1220  hptr = &haystack[start_pos];
1221  while (hptr < haystack_end)
1222  {
1223  if (*hptr == nchar)
1224  return hptr - haystack + 1;
1225  hptr++;
1226  }
1227  }
1228  else
1229  {
1230  const char *needle_last = &needle[needle_len - 1];
1231 
1232  /* Start at startpos plus the length of the needle */
1233  hptr = &haystack[start_pos + needle_len - 1];
1234  while (hptr < haystack_end)
1235  {
1236  /* Match the needle scanning *backward* */
1237  const char *nptr;
1238  const char *p;
1239 
1240  nptr = needle_last;
1241  p = hptr;
1242  while (*nptr == *p)
1243  {
1244  /* Matched it all? If so, return 1-based position */
1245  if (nptr == needle)
1246  return p - haystack + 1;
1247  nptr--, p--;
1248  }
1249 
1250  /*
1251  * No match, so use the haystack char at hptr to decide how
1252  * far to advance. If the needle had any occurrence of that
1253  * character (or more precisely, one sharing the same
1254  * skiptable entry) before its last character, then we advance
1255  * far enough to align the last such needle character with
1256  * that haystack position. Otherwise we can advance by the
1257  * whole needle length.
1258  */
1259  hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1260  }
1261  }
1262  }
1263  else
1264  {
1265  /* The multibyte char version. This works exactly the same way. */
1266  const pg_wchar *haystack = state->wstr1;
1267  const pg_wchar *needle = state->wstr2;
1268  const pg_wchar *haystack_end = &haystack[haystack_len];
1269  const pg_wchar *hptr;
1270 
1271  if (needle_len == 1)
1272  {
1273  /* No point in using B-M-H for a one-character needle */
1274  pg_wchar nchar = *needle;
1275 
1276  hptr = &haystack[start_pos];
1277  while (hptr < haystack_end)
1278  {
1279  if (*hptr == nchar)
1280  return hptr - haystack + 1;
1281  hptr++;
1282  }
1283  }
1284  else
1285  {
1286  const pg_wchar *needle_last = &needle[needle_len - 1];
1287 
1288  /* Start at startpos plus the length of the needle */
1289  hptr = &haystack[start_pos + needle_len - 1];
1290  while (hptr < haystack_end)
1291  {
1292  /* Match the needle scanning *backward* */
1293  const pg_wchar *nptr;
1294  const pg_wchar *p;
1295 
1296  nptr = needle_last;
1297  p = hptr;
1298  while (*nptr == *p)
1299  {
1300  /* Matched it all? If so, return 1-based position */
1301  if (nptr == needle)
1302  return p - haystack + 1;
1303  nptr--, p--;
1304  }
1305 
1306  /*
1307  * No match, so use the haystack char at hptr to decide how
1308  * far to advance. If the needle had any occurrence of that
1309  * character (or more precisely, one sharing the same
1310  * skiptable entry) before its last character, then we advance
1311  * far enough to align the last such needle character with
1312  * that haystack position. Otherwise we can advance by the
1313  * whole needle length.
1314  */
1315  hptr += state->skiptable[*hptr & skiptablemask];
1316  }
1317  }
1318  }
1319 
1320  return 0; /* not found */
1321 }
1322 
1323 static void
1325 {
1326  if (state->use_wchar)
1327  {
1328  pfree(state->wstr1);
1329  pfree(state->wstr2);
1330  }
1331 }
1332 
1333 /* varstr_cmp()
1334  * Comparison function for text strings with given lengths.
1335  * Includes locale support, but must copy strings to temporary memory
1336  * to allow null-termination for inputs to strcoll().
1337  * Returns an integer less than, equal to, or greater than zero, indicating
1338  * whether arg1 is less than, equal to, or greater than arg2.
1339  */
1340 int
1341 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1342 {
1343  int result;
1344 
1345  /*
1346  * Unfortunately, there is no strncoll(), so in the non-C locale case we
1347  * have to do some memory copying. This turns out to be significantly
1348  * slower, so we optimize the case where LC_COLLATE is C. We also try to
1349  * optimize relatively-short strings by avoiding palloc/pfree overhead.
1350  */
1351  if (lc_collate_is_c(collid))
1352  {
1353  result = memcmp(arg1, arg2, Min(len1, len2));
1354  if ((result == 0) && (len1 != len2))
1355  result = (len1 < len2) ? -1 : 1;
1356  }
1357  else
1358  {
1359 #define STACKBUFLEN 1024
1360 
1361  char a1buf[STACKBUFLEN];
1362  char a2buf[STACKBUFLEN];
1363  char *a1p,
1364  *a2p;
1365 
1366 #ifdef HAVE_LOCALE_T
1367  pg_locale_t mylocale = 0;
1368 #endif
1369 
1370  if (collid != DEFAULT_COLLATION_OID)
1371  {
1372  if (!OidIsValid(collid))
1373  {
1374  /*
1375  * This typically means that the parser could not resolve a
1376  * conflict of implicit collations, so report it that way.
1377  */
1378  ereport(ERROR,
1379  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1380  errmsg("could not determine which collation to use for string comparison"),
1381  errhint("Use the COLLATE clause to set the collation explicitly.")));
1382  }
1383 #ifdef HAVE_LOCALE_T
1384  mylocale = pg_newlocale_from_collation(collid);
1385 #endif
1386  }
1387 
1388 #ifdef WIN32
1389  /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1390  if (GetDatabaseEncoding() == PG_UTF8)
1391  {
1392  int a1len;
1393  int a2len;
1394  int r;
1395 
1396  if (len1 >= STACKBUFLEN / 2)
1397  {
1398  a1len = len1 * 2 + 2;
1399  a1p = palloc(a1len);
1400  }
1401  else
1402  {
1403  a1len = STACKBUFLEN;
1404  a1p = a1buf;
1405  }
1406  if (len2 >= STACKBUFLEN / 2)
1407  {
1408  a2len = len2 * 2 + 2;
1409  a2p = palloc(a2len);
1410  }
1411  else
1412  {
1413  a2len = STACKBUFLEN;
1414  a2p = a2buf;
1415  }
1416 
1417  /* stupid Microsloth API does not work for zero-length input */
1418  if (len1 == 0)
1419  r = 0;
1420  else
1421  {
1422  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1423  (LPWSTR) a1p, a1len / 2);
1424  if (!r)
1425  ereport(ERROR,
1426  (errmsg("could not convert string to UTF-16: error code %lu",
1427  GetLastError())));
1428  }
1429  ((LPWSTR) a1p)[r] = 0;
1430 
1431  if (len2 == 0)
1432  r = 0;
1433  else
1434  {
1435  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1436  (LPWSTR) a2p, a2len / 2);
1437  if (!r)
1438  ereport(ERROR,
1439  (errmsg("could not convert string to UTF-16: error code %lu",
1440  GetLastError())));
1441  }
1442  ((LPWSTR) a2p)[r] = 0;
1443 
1444  errno = 0;
1445 #ifdef HAVE_LOCALE_T
1446  if (mylocale)
1447  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
1448  else
1449 #endif
1450  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1451  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1452  * headers */
1453  ereport(ERROR,
1454  (errmsg("could not compare Unicode strings: %m")));
1455 
1456  /*
1457  * In some locales wcscoll() can claim that nonidentical strings
1458  * are equal. Believing that would be bad news for a number of
1459  * reasons, so we follow Perl's lead and sort "equal" strings
1460  * according to strcmp (on the UTF-8 representation).
1461  */
1462  if (result == 0)
1463  {
1464  result = memcmp(arg1, arg2, Min(len1, len2));
1465  if ((result == 0) && (len1 != len2))
1466  result = (len1 < len2) ? -1 : 1;
1467  }
1468 
1469  if (a1p != a1buf)
1470  pfree(a1p);
1471  if (a2p != a2buf)
1472  pfree(a2p);
1473 
1474  return result;
1475  }
1476 #endif /* WIN32 */
1477 
1478  if (len1 >= STACKBUFLEN)
1479  a1p = (char *) palloc(len1 + 1);
1480  else
1481  a1p = a1buf;
1482  if (len2 >= STACKBUFLEN)
1483  a2p = (char *) palloc(len2 + 1);
1484  else
1485  a2p = a2buf;
1486 
1487  memcpy(a1p, arg1, len1);
1488  a1p[len1] = '\0';
1489  memcpy(a2p, arg2, len2);
1490  a2p[len2] = '\0';
1491 
1492 #ifdef HAVE_LOCALE_T
1493  if (mylocale)
1494  result = strcoll_l(a1p, a2p, mylocale);
1495  else
1496 #endif
1497  result = strcoll(a1p, a2p);
1498 
1499  /*
1500  * In some locales strcoll() can claim that nonidentical strings are
1501  * equal. Believing that would be bad news for a number of reasons,
1502  * so we follow Perl's lead and sort "equal" strings according to
1503  * strcmp().
1504  */
1505  if (result == 0)
1506  result = strcmp(a1p, a2p);
1507 
1508  if (a1p != a1buf)
1509  pfree(a1p);
1510  if (a2p != a2buf)
1511  pfree(a2p);
1512  }
1513 
1514  return result;
1515 }
1516 
1517 
1518 /* text_cmp()
1519  * Internal comparison function for text strings.
1520  * Returns -1, 0 or 1
1521  */
1522 static int
1523 text_cmp(text *arg1, text *arg2, Oid collid)
1524 {
1525  char *a1p,
1526  *a2p;
1527  int len1,
1528  len2;
1529 
1530  a1p = VARDATA_ANY(arg1);
1531  a2p = VARDATA_ANY(arg2);
1532 
1533  len1 = VARSIZE_ANY_EXHDR(arg1);
1534  len2 = VARSIZE_ANY_EXHDR(arg2);
1535 
1536  return varstr_cmp(a1p, len1, a2p, len2, collid);
1537 }
1538 
1539 /*
1540  * Comparison functions for text strings.
1541  *
1542  * Note: btree indexes need these routines not to leak memory; therefore,
1543  * be careful to free working copies of toasted datums. Most places don't
1544  * need to be so careful.
1545  */
1546 
1547 Datum
1549 {
1550  Datum arg1 = PG_GETARG_DATUM(0);
1551  Datum arg2 = PG_GETARG_DATUM(1);
1552  bool result;
1553  Size len1,
1554  len2;
1555 
1556  /*
1557  * Since we only care about equality or not-equality, we can avoid all the
1558  * expense of strcoll() here, and just do bitwise comparison. In fact, we
1559  * don't even have to do a bitwise comparison if we can show the lengths
1560  * of the strings are unequal; which might save us from having to detoast
1561  * one or both values.
1562  */
1563  len1 = toast_raw_datum_size(arg1);
1564  len2 = toast_raw_datum_size(arg2);
1565  if (len1 != len2)
1566  result = false;
1567  else
1568  {
1569  text *targ1 = DatumGetTextPP(arg1);
1570  text *targ2 = DatumGetTextPP(arg2);
1571 
1572  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1573  len1 - VARHDRSZ) == 0);
1574 
1575  PG_FREE_IF_COPY(targ1, 0);
1576  PG_FREE_IF_COPY(targ2, 1);
1577  }
1578 
1579  PG_RETURN_BOOL(result);
1580 }
1581 
1582 Datum
1584 {
1585  Datum arg1 = PG_GETARG_DATUM(0);
1586  Datum arg2 = PG_GETARG_DATUM(1);
1587  bool result;
1588  Size len1,
1589  len2;
1590 
1591  /* See comment in texteq() */
1592  len1 = toast_raw_datum_size(arg1);
1593  len2 = toast_raw_datum_size(arg2);
1594  if (len1 != len2)
1595  result = true;
1596  else
1597  {
1598  text *targ1 = DatumGetTextPP(arg1);
1599  text *targ2 = DatumGetTextPP(arg2);
1600 
1601  result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1602  len1 - VARHDRSZ) != 0);
1603 
1604  PG_FREE_IF_COPY(targ1, 0);
1605  PG_FREE_IF_COPY(targ2, 1);
1606  }
1607 
1608  PG_RETURN_BOOL(result);
1609 }
1610 
1611 Datum
1613 {
1614  text *arg1 = PG_GETARG_TEXT_PP(0);
1615  text *arg2 = PG_GETARG_TEXT_PP(1);
1616  bool result;
1617 
1618  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1619 
1620  PG_FREE_IF_COPY(arg1, 0);
1621  PG_FREE_IF_COPY(arg2, 1);
1622 
1623  PG_RETURN_BOOL(result);
1624 }
1625 
1626 Datum
1628 {
1629  text *arg1 = PG_GETARG_TEXT_PP(0);
1630  text *arg2 = PG_GETARG_TEXT_PP(1);
1631  bool result;
1632 
1633  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1634 
1635  PG_FREE_IF_COPY(arg1, 0);
1636  PG_FREE_IF_COPY(arg2, 1);
1637 
1638  PG_RETURN_BOOL(result);
1639 }
1640 
1641 Datum
1643 {
1644  text *arg1 = PG_GETARG_TEXT_PP(0);
1645  text *arg2 = PG_GETARG_TEXT_PP(1);
1646  bool result;
1647 
1648  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1649 
1650  PG_FREE_IF_COPY(arg1, 0);
1651  PG_FREE_IF_COPY(arg2, 1);
1652 
1653  PG_RETURN_BOOL(result);
1654 }
1655 
1656 Datum
1658 {
1659  text *arg1 = PG_GETARG_TEXT_PP(0);
1660  text *arg2 = PG_GETARG_TEXT_PP(1);
1661  bool result;
1662 
1663  result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1664 
1665  PG_FREE_IF_COPY(arg1, 0);
1666  PG_FREE_IF_COPY(arg2, 1);
1667 
1668  PG_RETURN_BOOL(result);
1669 }
1670 
1671 Datum
1673 {
1674  text *arg1 = PG_GETARG_TEXT_PP(0);
1675  text *arg2 = PG_GETARG_TEXT_PP(1);
1676  int32 result;
1677 
1678  result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1679 
1680  PG_FREE_IF_COPY(arg1, 0);
1681  PG_FREE_IF_COPY(arg2, 1);
1682 
1683  PG_RETURN_INT32(result);
1684 }
1685 
1686 
1687 Datum
1689 {
1690  text *arg1 = PG_GETARG_TEXT_PP(0);
1691  text *arg2 = PG_GETARG_TEXT_PP(1);
1692  text *result;
1693 
1694  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
1695 
1696  PG_RETURN_TEXT_P(result);
1697 }
1698 
1699 Datum
1701 {
1702  text *arg1 = PG_GETARG_TEXT_PP(0);
1703  text *arg2 = PG_GETARG_TEXT_PP(1);
1704  text *result;
1705 
1706  result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
1707 
1708  PG_RETURN_TEXT_P(result);
1709 }
1710 
1711 
1712 /*
1713  * The following operators support character-by-character comparison
1714  * of text datums, to allow building indexes suitable for LIKE clauses.
1715  * Note that the regular texteq/textne comparison operators are assumed
1716  * to be compatible with these!
1717  */
1718 
1719 static int
1721 {
1722  int result;
1723  int len1,
1724  len2;
1725 
1726  len1 = VARSIZE_ANY_EXHDR(arg1);
1727  len2 = VARSIZE_ANY_EXHDR(arg2);
1728 
1729  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1730  if (result != 0)
1731  return result;
1732  else if (len1 < len2)
1733  return -1;
1734  else if (len1 > len2)
1735  return 1;
1736  else
1737  return 0;
1738 }
1739 
1740 
1741 Datum
1743 {
1744  text *arg1 = PG_GETARG_TEXT_PP(0);
1745  text *arg2 = PG_GETARG_TEXT_PP(1);
1746  int result;
1747 
1748  result = internal_text_pattern_compare(arg1, arg2);
1749 
1750  PG_FREE_IF_COPY(arg1, 0);
1751  PG_FREE_IF_COPY(arg2, 1);
1752 
1753  PG_RETURN_BOOL(result < 0);
1754 }
1755 
1756 
1757 Datum
1759 {
1760  text *arg1 = PG_GETARG_TEXT_PP(0);
1761  text *arg2 = PG_GETARG_TEXT_PP(1);
1762  int result;
1763 
1764  result = internal_text_pattern_compare(arg1, arg2);
1765 
1766  PG_FREE_IF_COPY(arg1, 0);
1767  PG_FREE_IF_COPY(arg2, 1);
1768 
1769  PG_RETURN_BOOL(result <= 0);
1770 }
1771 
1772 
1773 Datum
1775 {
1776  text *arg1 = PG_GETARG_TEXT_PP(0);
1777  text *arg2 = PG_GETARG_TEXT_PP(1);
1778  int result;
1779 
1780  result = internal_text_pattern_compare(arg1, arg2);
1781 
1782  PG_FREE_IF_COPY(arg1, 0);
1783  PG_FREE_IF_COPY(arg2, 1);
1784 
1785  PG_RETURN_BOOL(result >= 0);
1786 }
1787 
1788 
1789 Datum
1791 {
1792  text *arg1 = PG_GETARG_TEXT_PP(0);
1793  text *arg2 = PG_GETARG_TEXT_PP(1);
1794  int result;
1795 
1796  result = internal_text_pattern_compare(arg1, arg2);
1797 
1798  PG_FREE_IF_COPY(arg1, 0);
1799  PG_FREE_IF_COPY(arg2, 1);
1800 
1801  PG_RETURN_BOOL(result > 0);
1802 }
1803 
1804 
1805 Datum
1807 {
1808  text *arg1 = PG_GETARG_TEXT_PP(0);
1809  text *arg2 = PG_GETARG_TEXT_PP(1);
1810  int result;
1811 
1812  result = internal_text_pattern_compare(arg1, arg2);
1813 
1814  PG_FREE_IF_COPY(arg1, 0);
1815  PG_FREE_IF_COPY(arg2, 1);
1816 
1817  PG_RETURN_INT32(result);
1818 }
1819 
1820 
1821 /*-------------------------------------------------------------
1822  * byteaoctetlen
1823  *
1824  * get the number of bytes contained in an instance of type 'bytea'
1825  *-------------------------------------------------------------
1826  */
1827 Datum
1829 {
1830  Datum str = PG_GETARG_DATUM(0);
1831 
1832  /* We need not detoast the input at all */
1834 }
1835 
1836 /*
1837  * byteacat -
1838  * takes two bytea* and returns a bytea* that is the concatenation of
1839  * the two.
1840  *
1841  * Cloned from textcat and modified as required.
1842  */
1843 Datum
1845 {
1846  bytea *t1 = PG_GETARG_BYTEA_PP(0);
1847  bytea *t2 = PG_GETARG_BYTEA_PP(1);
1848 
1850 }
1851 
1852 /*
1853  * bytea_catenate
1854  * Guts of byteacat(), broken out so it can be used by other functions
1855  *
1856  * Arguments can be in short-header form, but not compressed or out-of-line
1857  */
1858 static bytea *
1860 {
1861  bytea *result;
1862  int len1,
1863  len2,
1864  len;
1865  char *ptr;
1866 
1867  len1 = VARSIZE_ANY_EXHDR(t1);
1868  len2 = VARSIZE_ANY_EXHDR(t2);
1869 
1870  /* paranoia ... probably should throw error instead? */
1871  if (len1 < 0)
1872  len1 = 0;
1873  if (len2 < 0)
1874  len2 = 0;
1875 
1876  len = len1 + len2 + VARHDRSZ;
1877  result = (bytea *) palloc(len);
1878 
1879  /* Set size of result string... */
1880  SET_VARSIZE(result, len);
1881 
1882  /* Fill data field of result string... */
1883  ptr = VARDATA(result);
1884  if (len1 > 0)
1885  memcpy(ptr, VARDATA_ANY(t1), len1);
1886  if (len2 > 0)
1887  memcpy(ptr + len1, VARDATA_ANY(t2), len2);
1888 
1889  return result;
1890 }
1891 
1892 #define PG_STR_GET_BYTEA(str_) \
1893  DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1894 
1895 /*
1896  * bytea_substr()
1897  * Return a substring starting at the specified position.
1898  * Cloned from text_substr and modified as required.
1899  *
1900  * Input:
1901  * - string
1902  * - starting position (is one-based)
1903  * - string length (optional)
1904  *
1905  * If the starting position is zero or less, then return from the start of the string
1906  * adjusting the length to be consistent with the "negative start" per SQL.
1907  * If the length is less than zero, an ERROR is thrown. If no third argument
1908  * (length) is provided, the length to the end of the string is assumed.
1909  */
1910 Datum
1912 {
1914  PG_GETARG_INT32(1),
1915  PG_GETARG_INT32(2),
1916  false));
1917 }
1918 
1919 /*
1920  * bytea_substr_no_len -
1921  * Wrapper to avoid opr_sanity failure due to
1922  * one function accepting a different number of args.
1923  */
1924 Datum
1926 {
1928  PG_GETARG_INT32(1),
1929  -1,
1930  true));
1931 }
1932 
1933 static bytea *
1935  int S,
1936  int L,
1937  bool length_not_specified)
1938 {
1939  int S1; /* adjusted start position */
1940  int L1; /* adjusted substring length */
1941 
1942  S1 = Max(S, 1);
1943 
1944  if (length_not_specified)
1945  {
1946  /*
1947  * Not passed a length - DatumGetByteaPSlice() grabs everything to the
1948  * end of the string if we pass it a negative value for length.
1949  */
1950  L1 = -1;
1951  }
1952  else
1953  {
1954  /* end position */
1955  int E = S + L;
1956 
1957  /*
1958  * A negative value for L is the only way for the end position to be
1959  * before the start. SQL99 says to throw an error.
1960  */
1961  if (E < S)
1962  ereport(ERROR,
1963  (errcode(ERRCODE_SUBSTRING_ERROR),
1964  errmsg("negative substring length not allowed")));
1965 
1966  /*
1967  * A zero or negative value for the end position can happen if the
1968  * start was negative or one. SQL99 says to return a zero-length
1969  * string.
1970  */
1971  if (E < 1)
1972  return PG_STR_GET_BYTEA("");
1973 
1974  L1 = E - S1;
1975  }
1976 
1977  /*
1978  * If the start position is past the end of the string, SQL99 says to
1979  * return a zero-length string -- DatumGetByteaPSlice() will do that for
1980  * us. Convert to zero-based starting position
1981  */
1982  return DatumGetByteaPSlice(str, S1 - 1, L1);
1983 }
1984 
1985 /*
1986  * byteaoverlay
1987  * Replace specified substring of first string with second
1988  *
1989  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1990  * This code is a direct implementation of what the standard says.
1991  */
1992 Datum
1994 {
1995  bytea *t1 = PG_GETARG_BYTEA_PP(0);
1996  bytea *t2 = PG_GETARG_BYTEA_PP(1);
1997  int sp = PG_GETARG_INT32(2); /* substring start position */
1998  int sl = PG_GETARG_INT32(3); /* substring length */
1999 
2000  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2001 }
2002 
2003 Datum
2005 {
2006  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2007  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2008  int sp = PG_GETARG_INT32(2); /* substring start position */
2009  int sl;
2010 
2011  sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2012  PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2013 }
2014 
2015 static bytea *
2016 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2017 {
2018  bytea *result;
2019  bytea *s1;
2020  bytea *s2;
2021  int sp_pl_sl;
2022 
2023  /*
2024  * Check for possible integer-overflow cases. For negative sp, throw a
2025  * "substring length" error because that's what should be expected
2026  * according to the spec's definition of OVERLAY().
2027  */
2028  if (sp <= 0)
2029  ereport(ERROR,
2030  (errcode(ERRCODE_SUBSTRING_ERROR),
2031  errmsg("negative substring length not allowed")));
2032  sp_pl_sl = sp + sl;
2033  if (sp_pl_sl <= sl)
2034  ereport(ERROR,
2035  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2036  errmsg("integer out of range")));
2037 
2038  s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2039  s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2040  result = bytea_catenate(s1, t2);
2041  result = bytea_catenate(result, s2);
2042 
2043  return result;
2044 }
2045 
2046 /*
2047  * byteapos -
2048  * Return the position of the specified substring.
2049  * Implements the SQL POSITION() function.
2050  * Cloned from textpos and modified as required.
2051  */
2052 Datum
2054 {
2055  bytea *t1 = PG_GETARG_BYTEA_PP(0);
2056  bytea *t2 = PG_GETARG_BYTEA_PP(1);
2057  int pos;
2058  int px,
2059  p;
2060  int len1,
2061  len2;
2062  char *p1,
2063  *p2;
2064 
2065  len1 = VARSIZE_ANY_EXHDR(t1);
2066  len2 = VARSIZE_ANY_EXHDR(t2);
2067 
2068  if (len2 <= 0)
2069  PG_RETURN_INT32(1); /* result for empty pattern */
2070 
2071  p1 = VARDATA_ANY(t1);
2072  p2 = VARDATA_ANY(t2);
2073 
2074  pos = 0;
2075  px = (len1 - len2);
2076  for (p = 0; p <= px; p++)
2077  {
2078  if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
2079  {
2080  pos = p + 1;
2081  break;
2082  };
2083  p1++;
2084  };
2085 
2086  PG_RETURN_INT32(pos);
2087 }
2088 
2089 /*-------------------------------------------------------------
2090  * byteaGetByte
2091  *
2092  * this routine treats "bytea" as an array of bytes.
2093  * It returns the Nth byte (a number between 0 and 255).
2094  *-------------------------------------------------------------
2095  */
2096 Datum
2098 {
2099  bytea *v = PG_GETARG_BYTEA_PP(0);
2100  int32 n = PG_GETARG_INT32(1);
2101  int len;
2102  int byte;
2103 
2104  len = VARSIZE_ANY_EXHDR(v);
2105 
2106  if (n < 0 || n >= len)
2107  ereport(ERROR,
2108  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2109  errmsg("index %d out of valid range, 0..%d",
2110  n, len - 1)));
2111 
2112  byte = ((unsigned char *) VARDATA_ANY(v))[n];
2113 
2114  PG_RETURN_INT32(byte);
2115 }
2116 
2117 /*-------------------------------------------------------------
2118  * byteaGetBit
2119  *
2120  * This routine treats a "bytea" type like an array of bits.
2121  * It returns the value of the Nth bit (0 or 1).
2122  *
2123  *-------------------------------------------------------------
2124  */
2125 Datum
2127 {
2128  bytea *v = PG_GETARG_BYTEA_PP(0);
2129  int32 n = PG_GETARG_INT32(1);
2130  int byteNo,
2131  bitNo;
2132  int len;
2133  int byte;
2134 
2135  len = VARSIZE_ANY_EXHDR(v);
2136 
2137  if (n < 0 || n >= len * 8)
2138  ereport(ERROR,
2139  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2140  errmsg("index %d out of valid range, 0..%d",
2141  n, len * 8 - 1)));
2142 
2143  byteNo = n / 8;
2144  bitNo = n % 8;
2145 
2146  byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
2147 
2148  if (byte & (1 << bitNo))
2149  PG_RETURN_INT32(1);
2150  else
2151  PG_RETURN_INT32(0);
2152 }
2153 
2154 /*-------------------------------------------------------------
2155  * byteaSetByte
2156  *
2157  * Given an instance of type 'bytea' creates a new one with
2158  * the Nth byte set to the given value.
2159  *
2160  *-------------------------------------------------------------
2161  */
2162 Datum
2164 {
2165  bytea *v = PG_GETARG_BYTEA_P(0);
2166  int32 n = PG_GETARG_INT32(1);
2167  int32 newByte = PG_GETARG_INT32(2);
2168  int len;
2169  bytea *res;
2170 
2171  len = VARSIZE(v) - VARHDRSZ;
2172 
2173  if (n < 0 || n >= len)
2174  ereport(ERROR,
2175  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2176  errmsg("index %d out of valid range, 0..%d",
2177  n, len - 1)));
2178 
2179  /*
2180  * Make a copy of the original varlena.
2181  */
2182  res = (bytea *) palloc(VARSIZE(v));
2183  memcpy((char *) res, (char *) v, VARSIZE(v));
2184 
2185  /*
2186  * Now set the byte.
2187  */
2188  ((unsigned char *) VARDATA(res))[n] = newByte;
2189 
2190  PG_RETURN_BYTEA_P(res);
2191 }
2192 
2193 /*-------------------------------------------------------------
2194  * byteaSetBit
2195  *
2196  * Given an instance of type 'bytea' creates a new one with
2197  * the Nth bit set to the given value.
2198  *
2199  *-------------------------------------------------------------
2200  */
2201 Datum
2203 {
2204  bytea *v = PG_GETARG_BYTEA_P(0);
2205  int32 n = PG_GETARG_INT32(1);
2206  int32 newBit = PG_GETARG_INT32(2);
2207  bytea *res;
2208  int len;
2209  int oldByte,
2210  newByte;
2211  int byteNo,
2212  bitNo;
2213 
2214  len = VARSIZE(v) - VARHDRSZ;
2215 
2216  if (n < 0 || n >= len * 8)
2217  ereport(ERROR,
2218  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2219  errmsg("index %d out of valid range, 0..%d",
2220  n, len * 8 - 1)));
2221 
2222  byteNo = n / 8;
2223  bitNo = n % 8;
2224 
2225  /*
2226  * sanity check!
2227  */
2228  if (newBit != 0 && newBit != 1)
2229  ereport(ERROR,
2230  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2231  errmsg("new bit must be 0 or 1")));
2232 
2233  /*
2234  * Make a copy of the original varlena.
2235  */
2236  res = (bytea *) palloc(VARSIZE(v));
2237  memcpy((char *) res, (char *) v, VARSIZE(v));
2238 
2239  /*
2240  * Update the byte.
2241  */
2242  oldByte = ((unsigned char *) VARDATA(res))[byteNo];
2243 
2244  if (newBit == 0)
2245  newByte = oldByte & (~(1 << bitNo));
2246  else
2247  newByte = oldByte | (1 << bitNo);
2248 
2249  ((unsigned char *) VARDATA(res))[byteNo] = newByte;
2250 
2251  PG_RETURN_BYTEA_P(res);
2252 }
2253 
2254 
2255 /* text_name()
2256  * Converts a text type to a Name type.
2257  */
2258 Datum
2260 {
2261  text *s = PG_GETARG_TEXT_PP(0);
2262  Name result;
2263  int len;
2264 
2265  len = VARSIZE_ANY_EXHDR(s);
2266 
2267  /* Truncate oversize input */
2268  if (len >= NAMEDATALEN)
2269  len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
2270 
2271  /* We use palloc0 here to ensure result is zero-padded */
2272  result = (Name) palloc0(NAMEDATALEN);
2273  memcpy(NameStr(*result), VARDATA_ANY(s), len);
2274 
2275  PG_RETURN_NAME(result);
2276 }
2277 
2278 /* name_text()
2279  * Converts a Name type to a text type.
2280  */
2281 Datum
2283 {
2284  Name s = PG_GETARG_NAME(0);
2285 
2287 }
2288 
2289 
2290 /*
2291  * textToQualifiedNameList - convert a text object to list of names
2292  *
2293  * This implements the input parsing needed by nextval() and other
2294  * functions that take a text parameter representing a qualified name.
2295  * We split the name at dots, downcase if not double-quoted, and
2296  * truncate names if they're too long.
2297  */
2298 List *
2300 {
2301  char *rawname;
2302  List *result = NIL;
2303  List *namelist;
2304  ListCell *l;
2305 
2306  /* Convert to C string (handles possible detoasting). */
2307  /* Note we rely on being able to modify rawname below. */
2308  rawname = text_to_cstring(textval);
2309 
2310  if (!SplitIdentifierString(rawname, '.', &namelist))
2311  ereport(ERROR,
2312  (errcode(ERRCODE_INVALID_NAME),
2313  errmsg("invalid name syntax")));
2314 
2315  if (namelist == NIL)
2316  ereport(ERROR,
2317  (errcode(ERRCODE_INVALID_NAME),
2318  errmsg("invalid name syntax")));
2319 
2320  foreach(l, namelist)
2321  {
2322  char *curname = (char *) lfirst(l);
2323 
2324  result = lappend(result, makeString(pstrdup(curname)));
2325  }
2326 
2327  pfree(rawname);
2328  list_free(namelist);
2329 
2330  return result;
2331 }
2332 
2333 /*
2334  * SplitIdentifierString --- parse a string containing identifiers
2335  *
2336  * This is the guts of textToQualifiedNameList, and is exported for use in
2337  * other situations such as parsing GUC variables. In the GUC case, it's
2338  * important to avoid memory leaks, so the API is designed to minimize the
2339  * amount of stuff that needs to be allocated and freed.
2340  *
2341  * Inputs:
2342  * rawstring: the input string; must be overwritable! On return, it's
2343  * been modified to contain the separated identifiers.
2344  * separator: the separator punctuation expected between identifiers
2345  * (typically '.' or ','). Whitespace may also appear around
2346  * identifiers.
2347  * Outputs:
2348  * namelist: filled with a palloc'd list of pointers to identifiers within
2349  * rawstring. Caller should list_free() this even on error return.
2350  *
2351  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2352  *
2353  * Note that an empty string is considered okay here, though not in
2354  * textToQualifiedNameList.
2355  */
2356 bool
2357 SplitIdentifierString(char *rawstring, char separator,
2358  List **namelist)
2359 {
2360  char *nextp = rawstring;
2361  bool done = false;
2362 
2363  *namelist = NIL;
2364 
2365  while (isspace((unsigned char) *nextp))
2366  nextp++; /* skip leading whitespace */
2367 
2368  if (*nextp == '\0')
2369  return true; /* allow empty string */
2370 
2371  /* At the top of the loop, we are at start of a new identifier. */
2372  do
2373  {
2374  char *curname;
2375  char *endp;
2376 
2377  if (*nextp == '\"')
2378  {
2379  /* Quoted name --- collapse quote-quote pairs, no downcasing */
2380  curname = nextp + 1;
2381  for (;;)
2382  {
2383  endp = strchr(nextp + 1, '\"');
2384  if (endp == NULL)
2385  return false; /* mismatched quotes */
2386  if (endp[1] != '\"')
2387  break; /* found end of quoted name */
2388  /* Collapse adjacent quotes into one quote, and look again */
2389  memmove(endp, endp + 1, strlen(endp));
2390  nextp = endp;
2391  }
2392  /* endp now points at the terminating quote */
2393  nextp = endp + 1;
2394  }
2395  else
2396  {
2397  /* Unquoted name --- extends to separator or whitespace */
2398  char *downname;
2399  int len;
2400 
2401  curname = nextp;
2402  while (*nextp && *nextp != separator &&
2403  !isspace((unsigned char) *nextp))
2404  nextp++;
2405  endp = nextp;
2406  if (curname == nextp)
2407  return false; /* empty unquoted name not allowed */
2408 
2409  /*
2410  * Downcase the identifier, using same code as main lexer does.
2411  *
2412  * XXX because we want to overwrite the input in-place, we cannot
2413  * support a downcasing transformation that increases the string
2414  * length. This is not a problem given the current implementation
2415  * of downcase_truncate_identifier, but we'll probably have to do
2416  * something about this someday.
2417  */
2418  len = endp - curname;
2419  downname = downcase_truncate_identifier(curname, len, false);
2420  Assert(strlen(downname) <= len);
2421  strncpy(curname, downname, len);
2422  pfree(downname);
2423  }
2424 
2425  while (isspace((unsigned char) *nextp))
2426  nextp++; /* skip trailing whitespace */
2427 
2428  if (*nextp == separator)
2429  {
2430  nextp++;
2431  while (isspace((unsigned char) *nextp))
2432  nextp++; /* skip leading whitespace for next */
2433  /* we expect another name, so done remains false */
2434  }
2435  else if (*nextp == '\0')
2436  done = true;
2437  else
2438  return false; /* invalid syntax */
2439 
2440  /* Now safe to overwrite separator with a null */
2441  *endp = '\0';
2442 
2443  /* Truncate name if it's overlength */
2444  truncate_identifier(curname, strlen(curname), false);
2445 
2446  /*
2447  * Finished isolating current name --- add it to list
2448  */
2449  *namelist = lappend(*namelist, curname);
2450 
2451  /* Loop back if we didn't reach end of string */
2452  } while (!done);
2453 
2454  return true;
2455 }
2456 
2457 
2458 /*
2459  * SplitDirectoriesString --- parse a string containing directory names
2460  *
2461  * This is similar to SplitIdentifierString, except that the parsing
2462  * rules are meant to handle pathnames instead of identifiers: there is
2463  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
2464  * and we apply canonicalize_path() to each extracted string. Because of the
2465  * last, the returned strings are separately palloc'd rather than being
2466  * pointers into rawstring --- but we still scribble on rawstring.
2467  *
2468  * Inputs:
2469  * rawstring: the input string; must be modifiable!
2470  * separator: the separator punctuation expected between directories
2471  * (typically ',' or ';'). Whitespace may also appear around
2472  * directories.
2473  * Outputs:
2474  * namelist: filled with a palloc'd list of directory names.
2475  * Caller should list_free_deep() this even on error return.
2476  *
2477  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2478  *
2479  * Note that an empty string is considered okay here.
2480  */
2481 bool
2482 SplitDirectoriesString(char *rawstring, char separator,
2483  List **namelist)
2484 {
2485  char *nextp = rawstring;
2486  bool done = false;
2487 
2488  *namelist = NIL;
2489 
2490  while (isspace((unsigned char) *nextp))
2491  nextp++; /* skip leading whitespace */
2492 
2493  if (*nextp == '\0')
2494  return true; /* allow empty string */
2495 
2496  /* At the top of the loop, we are at start of a new directory. */
2497  do
2498  {
2499  char *curname;
2500  char *endp;
2501 
2502  if (*nextp == '\"')
2503  {
2504  /* Quoted name --- collapse quote-quote pairs */
2505  curname = nextp + 1;
2506  for (;;)
2507  {
2508  endp = strchr(nextp + 1, '\"');
2509  if (endp == NULL)
2510  return false; /* mismatched quotes */
2511  if (endp[1] != '\"')
2512  break; /* found end of quoted name */
2513  /* Collapse adjacent quotes into one quote, and look again */
2514  memmove(endp, endp + 1, strlen(endp));
2515  nextp = endp;
2516  }
2517  /* endp now points at the terminating quote */
2518  nextp = endp + 1;
2519  }
2520  else
2521  {
2522  /* Unquoted name --- extends to separator or end of string */
2523  curname = endp = nextp;
2524  while (*nextp && *nextp != separator)
2525  {
2526  /* trailing whitespace should not be included in name */
2527  if (!isspace((unsigned char) *nextp))
2528  endp = nextp + 1;
2529  nextp++;
2530  }
2531  if (curname == endp)
2532  return false; /* empty unquoted name not allowed */
2533  }
2534 
2535  while (isspace((unsigned char) *nextp))
2536  nextp++; /* skip trailing whitespace */
2537 
2538  if (*nextp == separator)
2539  {
2540  nextp++;
2541  while (isspace((unsigned char) *nextp))
2542  nextp++; /* skip leading whitespace for next */
2543  /* we expect another name, so done remains false */
2544  }
2545  else if (*nextp == '\0')
2546  done = true;
2547  else
2548  return false; /* invalid syntax */
2549 
2550  /* Now safe to overwrite separator with a null */
2551  *endp = '\0';
2552 
2553  /* Truncate path if it's overlength */
2554  if (strlen(curname) >= MAXPGPATH)
2555  curname[MAXPGPATH - 1] = '\0';
2556 
2557  /*
2558  * Finished isolating current name --- add it to list
2559  */
2560  curname = pstrdup(curname);
2561  canonicalize_path(curname);
2562  *namelist = lappend(*namelist, curname);
2563 
2564  /* Loop back if we didn't reach end of string */
2565  } while (!done);
2566 
2567  return true;
2568 }
2569 
2570 
2571 /*****************************************************************************
2572  * Comparison Functions used for bytea
2573  *
2574  * Note: btree indexes need these routines not to leak memory; therefore,
2575  * be careful to free working copies of toasted datums. Most places don't
2576  * need to be so careful.
2577  *****************************************************************************/
2578 
2579 Datum
2581 {
2582  Datum arg1 = PG_GETARG_DATUM(0);
2583  Datum arg2 = PG_GETARG_DATUM(1);
2584  bool result;
2585  Size len1,
2586  len2;
2587 
2588  /*
2589  * We can use a fast path for unequal lengths, which might save us from
2590  * having to detoast one or both values.
2591  */
2592  len1 = toast_raw_datum_size(arg1);
2593  len2 = toast_raw_datum_size(arg2);
2594  if (len1 != len2)
2595  result = false;
2596  else
2597  {
2598  bytea *barg1 = DatumGetByteaPP(arg1);
2599  bytea *barg2 = DatumGetByteaPP(arg2);
2600 
2601  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2602  len1 - VARHDRSZ) == 0);
2603 
2604  PG_FREE_IF_COPY(barg1, 0);
2605  PG_FREE_IF_COPY(barg2, 1);
2606  }
2607 
2608  PG_RETURN_BOOL(result);
2609 }
2610 
2611 Datum
2613 {
2614  Datum arg1 = PG_GETARG_DATUM(0);
2615  Datum arg2 = PG_GETARG_DATUM(1);
2616  bool result;
2617  Size len1,
2618  len2;
2619 
2620  /*
2621  * We can use a fast path for unequal lengths, which might save us from
2622  * having to detoast one or both values.
2623  */
2624  len1 = toast_raw_datum_size(arg1);
2625  len2 = toast_raw_datum_size(arg2);
2626  if (len1 != len2)
2627  result = true;
2628  else
2629  {
2630  bytea *barg1 = DatumGetByteaPP(arg1);
2631  bytea *barg2 = DatumGetByteaPP(arg2);
2632 
2633  result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
2634  len1 - VARHDRSZ) != 0);
2635 
2636  PG_FREE_IF_COPY(barg1, 0);
2637  PG_FREE_IF_COPY(barg2, 1);
2638  }
2639 
2640  PG_RETURN_BOOL(result);
2641 }
2642 
2643 Datum
2645 {
2646  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2647  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2648  int len1,
2649  len2;
2650  int cmp;
2651 
2652  len1 = VARSIZE_ANY_EXHDR(arg1);
2653  len2 = VARSIZE_ANY_EXHDR(arg2);
2654 
2655  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2656 
2657  PG_FREE_IF_COPY(arg1, 0);
2658  PG_FREE_IF_COPY(arg2, 1);
2659 
2660  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2661 }
2662 
2663 Datum
2665 {
2666  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2667  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2668  int len1,
2669  len2;
2670  int cmp;
2671 
2672  len1 = VARSIZE_ANY_EXHDR(arg1);
2673  len2 = VARSIZE_ANY_EXHDR(arg2);
2674 
2675  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2676 
2677  PG_FREE_IF_COPY(arg1, 0);
2678  PG_FREE_IF_COPY(arg2, 1);
2679 
2680  PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2681 }
2682 
2683 Datum
2685 {
2686  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2687  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2688  int len1,
2689  len2;
2690  int cmp;
2691 
2692  len1 = VARSIZE_ANY_EXHDR(arg1);
2693  len2 = VARSIZE_ANY_EXHDR(arg2);
2694 
2695  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2696 
2697  PG_FREE_IF_COPY(arg1, 0);
2698  PG_FREE_IF_COPY(arg2, 1);
2699 
2700  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2701 }
2702 
2703 Datum
2705 {
2706  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2707  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2708  int len1,
2709  len2;
2710  int cmp;
2711 
2712  len1 = VARSIZE_ANY_EXHDR(arg1);
2713  len2 = VARSIZE_ANY_EXHDR(arg2);
2714 
2715  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2716 
2717  PG_FREE_IF_COPY(arg1, 0);
2718  PG_FREE_IF_COPY(arg2, 1);
2719 
2720  PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2721 }
2722 
2723 Datum
2725 {
2726  bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2727  bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2728  int len1,
2729  len2;
2730  int cmp;
2731 
2732  len1 = VARSIZE_ANY_EXHDR(arg1);
2733  len2 = VARSIZE_ANY_EXHDR(arg2);
2734 
2735  cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2736  if ((cmp == 0) && (len1 != len2))
2737  cmp = (len1 < len2) ? -1 : 1;
2738 
2739  PG_FREE_IF_COPY(arg1, 0);
2740  PG_FREE_IF_COPY(arg2, 1);
2741 
2742  PG_RETURN_INT32(cmp);
2743 }
2744 
2745 /*
2746  * appendStringInfoText
2747  *
2748  * Append a text to str.
2749  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2750  */
2751 static void
2753 {
2755 }
2756 
2757 /*
2758  * replace_text
2759  * replace all occurrences of 'old_sub_str' in 'orig_str'
2760  * with 'new_sub_str' to form 'new_str'
2761  *
2762  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2763  * otherwise returns 'new_str'
2764  */
2765 Datum
2767 {
2768  text *src_text = PG_GETARG_TEXT_PP(0);
2769  text *from_sub_text = PG_GETARG_TEXT_PP(1);
2770  text *to_sub_text = PG_GETARG_TEXT_PP(2);
2771  int src_text_len;
2772  int from_sub_text_len;
2774  text *ret_text;
2775  int start_posn;
2776  int curr_posn;
2777  int chunk_len;
2778  char *start_ptr;
2779  StringInfoData str;
2780 
2781  text_position_setup(src_text, from_sub_text, &state);
2782 
2783  /*
2784  * Note: we check the converted string length, not the original, because
2785  * they could be different if the input contained invalid encoding.
2786  */
2787  src_text_len = state.len1;
2788  from_sub_text_len = state.len2;
2789 
2790  /* Return unmodified source string if empty source or pattern */
2791  if (src_text_len < 1 || from_sub_text_len < 1)
2792  {
2793  text_position_cleanup(&state);
2794  PG_RETURN_TEXT_P(src_text);
2795  }
2796 
2797  start_posn = 1;
2798  curr_posn = text_position_next(1, &state);
2799 
2800  /* When the from_sub_text is not found, there is nothing to do. */
2801  if (curr_posn == 0)
2802  {
2803  text_position_cleanup(&state);
2804  PG_RETURN_TEXT_P(src_text);
2805  }
2806 
2807  /* start_ptr points to the start_posn'th character of src_text */
2808  start_ptr = VARDATA_ANY(src_text);
2809 
2810  initStringInfo(&str);
2811 
2812  do
2813  {
2815 
2816  /* copy the data skipped over by last text_position_next() */
2817  chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
2818  appendBinaryStringInfo(&str, start_ptr, chunk_len);
2819 
2820  appendStringInfoText(&str, to_sub_text);
2821 
2822  start_posn = curr_posn;
2823  start_ptr += chunk_len;
2824  start_posn += from_sub_text_len;
2825  start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
2826 
2827  curr_posn = text_position_next(start_posn, &state);
2828  }
2829  while (curr_posn > 0);
2830 
2831  /* copy trailing data */
2832  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2833  appendBinaryStringInfo(&str, start_ptr, chunk_len);
2834 
2835  text_position_cleanup(&state);
2836 
2837  ret_text = cstring_to_text_with_len(str.data, str.len);
2838  pfree(str.data);
2839 
2840  PG_RETURN_TEXT_P(ret_text);
2841 }
2842 
2843 /*
2844  * check_replace_text_has_escape_char
2845  *
2846  * check whether replace_text contains escape char.
2847  */
2848 static bool
2850 {
2851  const char *p = VARDATA_ANY(replace_text);
2852  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2853 
2855  {
2856  for (; p < p_end; p++)
2857  {
2858  if (*p == '\\')
2859  return true;
2860  }
2861  }
2862  else
2863  {
2864  for (; p < p_end; p += pg_mblen(p))
2865  {
2866  if (*p == '\\')
2867  return true;
2868  }
2869  }
2870 
2871  return false;
2872 }
2873 
2874 /*
2875  * appendStringInfoRegexpSubstr
2876  *
2877  * Append replace_text to str, substituting regexp back references for
2878  * \n escapes. start_ptr is the start of the match in the source string,
2879  * at logical character position data_pos.
2880  */
2881 static void
2883  regmatch_t *pmatch,
2884  char *start_ptr, int data_pos)
2885 {
2886  const char *p = VARDATA_ANY(replace_text);
2887  const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2888  int eml = pg_database_encoding_max_length();
2889 
2890  for (;;)
2891  {
2892  const char *chunk_start = p;
2893  int so;
2894  int eo;
2895 
2896  /* Find next escape char. */
2897  if (eml == 1)
2898  {
2899  for (; p < p_end && *p != '\\'; p++)
2900  /* nothing */ ;
2901  }
2902  else
2903  {
2904  for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2905  /* nothing */ ;
2906  }
2907 
2908  /* Copy the text we just scanned over, if any. */
2909  if (p > chunk_start)
2910  appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2911 
2912  /* Done if at end of string, else advance over escape char. */
2913  if (p >= p_end)
2914  break;
2915  p++;
2916 
2917  if (p >= p_end)
2918  {
2919  /* Escape at very end of input. Treat same as unexpected char */
2920  appendStringInfoChar(str, '\\');
2921  break;
2922  }
2923 
2924  if (*p >= '1' && *p <= '9')
2925  {
2926  /* Use the back reference of regexp. */
2927  int idx = *p - '0';
2928 
2929  so = pmatch[idx].rm_so;
2930  eo = pmatch[idx].rm_eo;
2931  p++;
2932  }
2933  else if (*p == '&')
2934  {
2935  /* Use the entire matched string. */
2936  so = pmatch[0].rm_so;
2937  eo = pmatch[0].rm_eo;
2938  p++;
2939  }
2940  else if (*p == '\\')
2941  {
2942  /* \\ means transfer one \ to output. */
2943  appendStringInfoChar(str, '\\');
2944  p++;
2945  continue;
2946  }
2947  else
2948  {
2949  /*
2950  * If escape char is not followed by any expected char, just treat
2951  * it as ordinary data to copy. (XXX would it be better to throw
2952  * an error?)
2953  */
2954  appendStringInfoChar(str, '\\');
2955  continue;
2956  }
2957 
2958  if (so != -1 && eo != -1)
2959  {
2960  /*
2961  * Copy the text that is back reference of regexp. Note so and eo
2962  * are counted in characters not bytes.
2963  */
2964  char *chunk_start;
2965  int chunk_len;
2966 
2967  Assert(so >= data_pos);
2968  chunk_start = start_ptr;
2969  chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
2970  chunk_len = charlen_to_bytelen(chunk_start, eo - so);
2971  appendBinaryStringInfo(str, chunk_start, chunk_len);
2972  }
2973  }
2974 }
2975 
2976 #define REGEXP_REPLACE_BACKREF_CNT 10
2977 
2978 /*
2979  * replace_text_regexp
2980  *
2981  * replace text that matches to regexp in src_text to replace_text.
2982  *
2983  * Note: to avoid having to include regex.h in builtins.h, we declare
2984  * the regexp argument as void *, but really it's regex_t *.
2985  */
2986 text *
2987 replace_text_regexp(text *src_text, void *regexp,
2988  text *replace_text, bool glob)
2989 {
2990  text *ret_text;
2991  regex_t *re = (regex_t *) regexp;
2992  int src_text_len = VARSIZE_ANY_EXHDR(src_text);
2995  pg_wchar *data;
2996  size_t data_len;
2997  int search_start;
2998  int data_pos;
2999  char *start_ptr;
3000  bool have_escape;
3001 
3002  initStringInfo(&buf);
3003 
3004  /* Convert data string to wide characters. */
3005  data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
3006  data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
3007 
3008  /* Check whether replace_text has escape char. */
3009  have_escape = check_replace_text_has_escape_char(replace_text);
3010 
3011  /* start_ptr points to the data_pos'th character of src_text */
3012  start_ptr = (char *) VARDATA_ANY(src_text);
3013  data_pos = 0;
3014 
3015  search_start = 0;
3016  while (search_start <= data_len)
3017  {
3018  int regexec_result;
3019 
3021 
3022  regexec_result = pg_regexec(re,
3023  data,
3024  data_len,
3025  search_start,
3026  NULL, /* no details */
3028  pmatch,
3029  0);
3030 
3031  if (regexec_result == REG_NOMATCH)
3032  break;
3033 
3034  if (regexec_result != REG_OKAY)
3035  {
3036  char errMsg[100];
3037 
3039  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
3040  ereport(ERROR,
3041  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
3042  errmsg("regular expression failed: %s", errMsg)));
3043  }
3044 
3045  /*
3046  * Copy the text to the left of the match position. Note we are given
3047  * character not byte indexes.
3048  */
3049  if (pmatch[0].rm_so - data_pos > 0)
3050  {
3051  int chunk_len;
3052 
3053  chunk_len = charlen_to_bytelen(start_ptr,
3054  pmatch[0].rm_so - data_pos);
3055  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3056 
3057  /*
3058  * Advance start_ptr over that text, to avoid multiple rescans of
3059  * it if the replace_text contains multiple back-references.
3060  */
3061  start_ptr += chunk_len;
3062  data_pos = pmatch[0].rm_so;
3063  }
3064 
3065  /*
3066  * Copy the replace_text. Process back references when the
3067  * replace_text has escape characters.
3068  */
3069  if (have_escape)
3070  appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
3071  start_ptr, data_pos);
3072  else
3073  appendStringInfoText(&buf, replace_text);
3074 
3075  /* Advance start_ptr and data_pos over the matched text. */
3076  start_ptr += charlen_to_bytelen(start_ptr,
3077  pmatch[0].rm_eo - data_pos);
3078  data_pos = pmatch[0].rm_eo;
3079 
3080  /*
3081  * When global option is off, replace the first instance only.
3082  */
3083  if (!glob)
3084  break;
3085 
3086  /*
3087  * Advance search position. Normally we start the next search at the
3088  * end of the previous match; but if the match was of zero length, we
3089  * have to advance by one character, or we'd just find the same match
3090  * again.
3091  */
3092  search_start = data_pos;
3093  if (pmatch[0].rm_so == pmatch[0].rm_eo)
3094  search_start++;
3095  }
3096 
3097  /*
3098  * Copy the text to the right of the last match.
3099  */
3100  if (data_pos < data_len)
3101  {
3102  int chunk_len;
3103 
3104  chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3105  appendBinaryStringInfo(&buf, start_ptr, chunk_len);
3106  }
3107 
3108  ret_text = cstring_to_text_with_len(buf.data, buf.len);
3109  pfree(buf.data);
3110  pfree(data);
3111 
3112  return ret_text;
3113 }
3114 
3115 /*
3116  * split_text
3117  * parse input string
3118  * return ord item (1 based)
3119  * based on provided field separator
3120  */
3121 Datum
3123 {
3124  text *inputstring = PG_GETARG_TEXT_PP(0);
3125  text *fldsep = PG_GETARG_TEXT_PP(1);
3126  int fldnum = PG_GETARG_INT32(2);
3127  int inputstring_len;
3128  int fldsep_len;
3130  int start_posn;
3131  int end_posn;
3132  text *result_text;
3133 
3134  /* field number is 1 based */
3135  if (fldnum < 1)
3136  ereport(ERROR,
3137  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3138  errmsg("field position must be greater than zero")));
3139 
3140  text_position_setup(inputstring, fldsep, &state);
3141 
3142  /*
3143  * Note: we check the converted string length, not the original, because
3144  * they could be different if the input contained invalid encoding.
3145  */
3146  inputstring_len = state.len1;
3147  fldsep_len = state.len2;
3148 
3149  /* return empty string for empty input string */
3150  if (inputstring_len < 1)
3151  {
3152  text_position_cleanup(&state);
3154  }
3155 
3156  /* empty field separator */
3157  if (fldsep_len < 1)
3158  {
3159  text_position_cleanup(&state);
3160  /* if first field, return input string, else empty string */
3161  if (fldnum == 1)
3162  PG_RETURN_TEXT_P(inputstring);
3163  else
3165  }
3166 
3167  /* identify bounds of first field */
3168  start_posn = 1;
3169  end_posn = text_position_next(1, &state);
3170 
3171  /* special case if fldsep not found at all */
3172  if (end_posn == 0)
3173  {
3174  text_position_cleanup(&state);
3175  /* if field 1 requested, return input string, else empty string */
3176  if (fldnum == 1)
3177  PG_RETURN_TEXT_P(inputstring);
3178  else
3180  }
3181 
3182  while (end_posn > 0 && --fldnum > 0)
3183  {
3184  /* identify bounds of next field */
3185  start_posn = end_posn + fldsep_len;
3186  end_posn = text_position_next(start_posn, &state);
3187  }
3188 
3189  text_position_cleanup(&state);
3190 
3191  if (fldnum > 0)
3192  {
3193  /* N'th field separator not found */
3194  /* if last field requested, return it, else empty string */
3195  if (fldnum == 1)
3196  result_text = text_substring(PointerGetDatum(inputstring),
3197  start_posn,
3198  -1,
3199  true);
3200  else
3201  result_text = cstring_to_text("");
3202  }
3203  else
3204  {
3205  /* non-last field requested */
3206  result_text = text_substring(PointerGetDatum(inputstring),
3207  start_posn,
3208  end_posn - start_posn,
3209  false);
3210  }
3211 
3212  PG_RETURN_TEXT_P(result_text);
3213 }
3214 
3215 /*
3216  * Convenience function to return true when two text params are equal.
3217  */
3218 static bool
3219 text_isequal(text *txt1, text *txt2)
3220 {
3222  PointerGetDatum(txt1),
3223  PointerGetDatum(txt2)));
3224 }
3225 
3226 /*
3227  * text_to_array
3228  * parse input string and return text array of elements,
3229  * based on provided field separator
3230  */
3231 Datum
3233 {
3234  return text_to_array_internal(fcinfo);
3235 }
3236 
3237 /*
3238  * text_to_array_null
3239  * parse input string and return text array of elements,
3240  * based on provided field separator and null string
3241  *
3242  * This is a separate entry point only to prevent the regression tests from
3243  * complaining about different argument sets for the same internal function.
3244  */
3245 Datum
3247 {
3248  return text_to_array_internal(fcinfo);
3249 }
3250 
3251 /*
3252  * common code for text_to_array and text_to_array_null functions
3253  *
3254  * These are not strict so we have to test for null inputs explicitly.
3255  */
3256 static Datum
3258 {
3259  text *inputstring;
3260  text *fldsep;
3261  text *null_string;
3262  int inputstring_len;
3263  int fldsep_len;
3264  char *start_ptr;
3265  text *result_text;
3266  bool is_null;
3267  ArrayBuildState *astate = NULL;
3268 
3269  /* when input string is NULL, then result is NULL too */
3270  if (PG_ARGISNULL(0))
3271  PG_RETURN_NULL();
3272 
3273  inputstring = PG_GETARG_TEXT_PP(0);
3274 
3275  /* fldsep can be NULL */
3276  if (!PG_ARGISNULL(1))
3277  fldsep = PG_GETARG_TEXT_PP(1);
3278  else
3279  fldsep = NULL;
3280 
3281  /* null_string can be NULL or omitted */
3282  if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
3283  null_string = PG_GETARG_TEXT_PP(2);
3284  else
3285  null_string = NULL;
3286 
3287  if (fldsep != NULL)
3288  {
3289  /*
3290  * Normal case with non-null fldsep. Use the text_position machinery
3291  * to search for occurrences of fldsep.
3292  */
3294  int fldnum;
3295  int start_posn;
3296  int end_posn;
3297  int chunk_len;
3298 
3299  text_position_setup(inputstring, fldsep, &state);
3300 
3301  /*
3302  * Note: we check the converted string length, not the original,
3303  * because they could be different if the input contained invalid
3304  * encoding.
3305  */
3306  inputstring_len = state.len1;
3307  fldsep_len = state.len2;
3308 
3309  /* return empty array for empty input string */
3310  if (inputstring_len < 1)
3311  {
3312  text_position_cleanup(&state);
3314  }
3315 
3316  /*
3317  * empty field separator: return the input string as a one-element
3318  * array
3319  */
3320  if (fldsep_len < 1)
3321  {
3322  text_position_cleanup(&state);
3323  /* single element can be a NULL too */
3324  is_null = null_string ? text_isequal(inputstring, null_string) : false;
3326  PointerGetDatum(inputstring),
3327  is_null, 1));
3328  }
3329 
3330  start_posn = 1;
3331  /* start_ptr points to the start_posn'th character of inputstring */
3332  start_ptr = VARDATA_ANY(inputstring);
3333 
3334  for (fldnum = 1;; fldnum++) /* field number is 1 based */
3335  {
3337 
3338  end_posn = text_position_next(start_posn, &state);
3339 
3340  if (end_posn == 0)
3341  {
3342  /* fetch last field */
3343  chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
3344  }
3345  else
3346  {
3347  /* fetch non-last field */
3348  chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
3349  }
3350 
3351  /* must build a temp text datum to pass to accumArrayResult */
3352  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3353  is_null = null_string ? text_isequal(result_text, null_string) : false;
3354 
3355  /* stash away this field */
3356  astate = accumArrayResult(astate,
3357  PointerGetDatum(result_text),
3358  is_null,
3359  TEXTOID,
3361 
3362  pfree(result_text);
3363 
3364  if (end_posn == 0)
3365  break;
3366 
3367  start_posn = end_posn;
3368  start_ptr += chunk_len;
3369  start_posn += fldsep_len;
3370  start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
3371  }
3372 
3373  text_position_cleanup(&state);
3374  }
3375  else
3376  {
3377  /*
3378  * When fldsep is NULL, each character in the inputstring becomes an
3379  * element in the result array. The separator is effectively the
3380  * space between characters.
3381  */
3382  inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
3383 
3384  /* return empty array for empty input string */
3385  if (inputstring_len < 1)
3387 
3388  start_ptr = VARDATA_ANY(inputstring);
3389 
3390  while (inputstring_len > 0)
3391  {
3392  int chunk_len = pg_mblen(start_ptr);
3393 
3395 
3396  /* must build a temp text datum to pass to accumArrayResult */
3397  result_text = cstring_to_text_with_len(start_ptr, chunk_len);
3398  is_null = null_string ? text_isequal(result_text, null_string) : false;
3399 
3400  /* stash away this field */
3401  astate = accumArrayResult(astate,
3402  PointerGetDatum(result_text),
3403  is_null,
3404  TEXTOID,
3406 
3407  pfree(result_text);
3408 
3409  start_ptr += chunk_len;
3410  inputstring_len -= chunk_len;
3411  }
3412  }
3413 
3416 }
3417 
3418 /*
3419  * array_to_text
3420  * concatenate Cstring representation of input array elements
3421  * using provided field separator
3422  */
3423 Datum
3425 {
3427  char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3428 
3429  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
3430 }
3431 
3432 /*
3433  * array_to_text_null
3434  * concatenate Cstring representation of input array elements
3435  * using provided field separator and null string
3436  *
3437  * This version is not strict so we have to test for null inputs explicitly.
3438  */
3439 Datum
3441 {
3442  ArrayType *v;
3443  char *fldsep;
3444  char *null_string;
3445 
3446  /* returns NULL when first or second parameter is NULL */
3447  if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
3448  PG_RETURN_NULL();
3449 
3450  v = PG_GETARG_ARRAYTYPE_P(0);
3451  fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
3452 
3453  /* NULL null string is passed through as a null pointer */
3454  if (!PG_ARGISNULL(2))
3455  null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
3456  else
3457  null_string = NULL;
3458 
3459  PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
3460 }
3461 
3462 /*
3463  * common code for array_to_text and array_to_text_null functions
3464  */
3465 static text *
3467  const char *fldsep, const char *null_string)
3468 {
3469  text *result;
3470  int nitems,
3471  *dims,
3472  ndims;
3473  Oid element_type;
3474  int typlen;
3475  bool typbyval;
3476  char typalign;
3478  bool printed = false;
3479  char *p;
3480  bits8 *bitmap;
3481  int bitmask;
3482  int i;
3483  ArrayMetaState *my_extra;
3484 
3485  ndims = ARR_NDIM(v);
3486  dims = ARR_DIMS(v);
3487  nitems = ArrayGetNItems(ndims, dims);
3488 
3489  /* if there are no elements, return an empty string */
3490  if (nitems == 0)
3491  return cstring_to_text_with_len("", 0);
3492 
3493  element_type = ARR_ELEMTYPE(v);
3494  initStringInfo(&buf);
3495 
3496  /*
3497  * We arrange to look up info about element type, including its output
3498  * conversion proc, only once per series of calls, assuming the element
3499  * type doesn't change underneath us.
3500  */
3501  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3502  if (my_extra == NULL)
3503  {
3504  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3505  sizeof(ArrayMetaState));
3506  my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
3507  my_extra->element_type = ~element_type;
3508  }
3509 
3510  if (my_extra->element_type != element_type)
3511  {
3512  /*
3513  * Get info about element type, including its output conversion proc
3514  */
3515  get_type_io_data(element_type, IOFunc_output,
3516  &my_extra->typlen, &my_extra->typbyval,
3517  &my_extra->typalign, &my_extra->typdelim,
3518  &my_extra->typioparam, &my_extra->typiofunc);
3519  fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
3520  fcinfo->flinfo->fn_mcxt);
3521  my_extra->element_type = element_type;
3522  }
3523  typlen = my_extra->typlen;
3524  typbyval = my_extra->typbyval;
3525  typalign = my_extra->typalign;
3526 
3527  p = ARR_DATA_PTR(v);
3528  bitmap = ARR_NULLBITMAP(v);
3529  bitmask = 1;
3530 
3531  for (i = 0; i < nitems; i++)
3532  {
3533  Datum itemvalue;
3534  char *value;
3535 
3536  /* Get source element, checking for NULL */
3537  if (bitmap && (*bitmap & bitmask) == 0)
3538  {
3539  /* if null_string is NULL, we just ignore null elements */
3540  if (null_string != NULL)
3541  {
3542  if (printed)
3543  appendStringInfo(&buf, "%s%s", fldsep, null_string);
3544  else
3545  appendStringInfoString(&buf, null_string);
3546  printed = true;
3547  }
3548  }
3549  else
3550  {
3551  itemvalue = fetch_att(p, typbyval, typlen);
3552 
3553  value = OutputFunctionCall(&my_extra->proc, itemvalue);
3554 
3555  if (printed)
3556  appendStringInfo(&buf, "%s%s", fldsep, value);
3557  else
3558  appendStringInfoString(&buf, value);
3559  printed = true;
3560 
3561  p = att_addlength_pointer(p, typlen, p);
3562  p = (char *) att_align_nominal(p, typalign);
3563  }
3564 
3565  /* advance bitmap pointer if any */
3566  if (bitmap)
3567  {
3568  bitmask <<= 1;
3569  if (bitmask == 0x100)
3570  {
3571  bitmap++;
3572  bitmask = 1;
3573  }
3574  }
3575  }
3576 
3577  result = cstring_to_text_with_len(buf.data, buf.len);
3578  pfree(buf.data);
3579 
3580  return result;
3581 }
3582 
3583 #define HEXBASE 16
3584 /*
3585  * Convert a int32 to a string containing a base 16 (hex) representation of
3586  * the number.
3587  */
3588 Datum
3590 {
3592  char *ptr;
3593  const char *digits = "0123456789abcdef";
3594  char buf[32]; /* bigger than needed, but reasonable */
3595 
3596  ptr = buf + sizeof(buf) - 1;
3597  *ptr = '\0';
3598 
3599  do
3600  {
3601  *--ptr = digits[value % HEXBASE];
3602  value /= HEXBASE;
3603  } while (ptr > buf && value);
3604 
3606 }
3607 
3608 /*
3609  * Convert a int64 to a string containing a base 16 (hex) representation of
3610  * the number.
3611  */
3612 Datum
3614 {
3615  uint64 value = (uint64) PG_GETARG_INT64(0);
3616  char *ptr;
3617  const char *digits = "0123456789abcdef";
3618  char buf[32]; /* bigger than needed, but reasonable */
3619 
3620  ptr = buf + sizeof(buf) - 1;
3621  *ptr = '\0';
3622 
3623  do
3624  {
3625  *--ptr = digits[value % HEXBASE];
3626  value /= HEXBASE;
3627  } while (ptr > buf && value);
3628 
3630 }
3631 
3632 /*
3633  * Create an md5 hash of a text string and return it as hex
3634  *
3635  * md5 produces a 16 byte (128 bit) hash; double it for hex
3636  */
3637 #define MD5_HASH_LEN 32
3638 
3639 Datum
3641 {
3642  text *in_text = PG_GETARG_TEXT_PP(0);
3643  size_t len;
3644  char hexsum[MD5_HASH_LEN + 1];
3645 
3646  /* Calculate the length of the buffer using varlena metadata */
3647  len = VARSIZE_ANY_EXHDR(in_text);
3648 
3649  /* get the hash result */
3650  if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3651  ereport(ERROR,
3652  (errcode(ERRCODE_OUT_OF_MEMORY),
3653  errmsg("out of memory")));
3654 
3655  /* convert to text and return it */
3657 }
3658 
3659 /*
3660  * Create an md5 hash of a bytea field and return it as a hex string:
3661  * 16-byte md5 digest is represented in 32 hex characters.
3662  */
3663 Datum
3665 {
3666  bytea *in = PG_GETARG_BYTEA_PP(0);
3667  size_t len;
3668  char hexsum[MD5_HASH_LEN + 1];
3669 
3670  len = VARSIZE_ANY_EXHDR(in);
3671  if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3672  ereport(ERROR,
3673  (errcode(ERRCODE_OUT_OF_MEMORY),
3674  errmsg("out of memory")));
3675 
3677 }
3678 
3679 /*
3680  * Return the size of a datum, possibly compressed
3681  *
3682  * Works on any data type
3683  */
3684 Datum
3686 {
3688  int32 result;
3689  int typlen;
3690 
3691  /* On first call, get the input type's typlen, and save at *fn_extra */
3692  if (fcinfo->flinfo->fn_extra == NULL)
3693  {
3694  /* Lookup the datatype of the supplied argument */
3695  Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3696 
3697  typlen = get_typlen(argtypeid);
3698  if (typlen == 0) /* should not happen */
3699  elog(ERROR, "cache lookup failed for type %u", argtypeid);
3700 
3701  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3702  sizeof(int));
3703  *((int *) fcinfo->flinfo->fn_extra) = typlen;
3704  }
3705  else
3706  typlen = *((int *) fcinfo->flinfo->fn_extra);
3707 
3708  if (typlen == -1)
3709  {
3710  /* varlena type, possibly toasted */
3711  result = toast_datum_size(value);
3712  }
3713  else if (typlen == -2)
3714  {
3715  /* cstring */
3716  result = strlen(DatumGetCString(value)) + 1;
3717  }
3718  else
3719  {
3720  /* ordinary fixed-width type */
3721  result = typlen;
3722  }
3723 
3724  PG_RETURN_INT32(result);
3725 }
3726 
3727 /*
3728  * string_agg - Concatenates values and returns string.
3729  *
3730  * Syntax: string_agg(value text, delimiter text) RETURNS text
3731  *
3732  * Note: Any NULL values are ignored. The first-call delimiter isn't
3733  * actually used at all, and on subsequent calls the delimiter precedes
3734  * the associated value.
3735  */
3736 
3737 /* subroutine to initialize state */
3738 static StringInfo
3740 {
3741  StringInfo state;
3742  MemoryContext aggcontext;
3743  MemoryContext oldcontext;
3744 
3745  if (!AggCheckCallContext(fcinfo, &aggcontext))
3746  {
3747  /* cannot be called directly because of internal-type argument */
3748  elog(ERROR, "string_agg_transfn called in non-aggregate context");
3749  }
3750 
3751  /*
3752  * Create state in aggregate context. It'll stay there across subsequent
3753  * calls.
3754  */
3755  oldcontext = MemoryContextSwitchTo(aggcontext);
3756  state = makeStringInfo();
3757  MemoryContextSwitchTo(oldcontext);
3758 
3759  return state;
3760 }
3761 
3762 Datum
3764 {
3765  StringInfo state;
3766 
3767  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3768 
3769  /* Append the value unless null. */
3770  if (!PG_ARGISNULL(1))
3771  {
3772  /* On the first time through, we ignore the delimiter. */
3773  if (state == NULL)
3774  state = makeStringAggState(fcinfo);
3775  else if (!PG_ARGISNULL(2))
3776  appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
3777 
3778  appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
3779  }
3780 
3781  /*
3782  * The transition type for string_agg() is declared to be "internal",
3783  * which is a pass-by-value type the same size as a pointer.
3784  */
3785  PG_RETURN_POINTER(state);
3786 }
3787 
3788 Datum
3790 {
3791  StringInfo state;
3792 
3793  /* cannot be called directly because of internal-type argument */
3794  Assert(AggCheckCallContext(fcinfo, NULL));
3795 
3796  state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
3797 
3798  if (state != NULL)
3800  else
3801  PG_RETURN_NULL();
3802 }
3803 
3804 /*
3805  * Implementation of both concat() and concat_ws().
3806  *
3807  * sepstr is the separator string to place between values.
3808  * argidx identifies the first argument to concatenate (counting from zero).
3809  * Returns NULL if result should be NULL, else text value.
3810  */
3811 static text *
3812 concat_internal(const char *sepstr, int argidx,
3813  FunctionCallInfo fcinfo)
3814 {
3815  text *result;
3816  StringInfoData str;
3817  bool first_arg = true;
3818  int i;
3819 
3820  /*
3821  * concat(VARIADIC some-array) is essentially equivalent to
3822  * array_to_text(), ie concat the array elements with the given separator.
3823  * So we just pass the case off to that code.
3824  */
3825  if (get_fn_expr_variadic(fcinfo->flinfo))
3826  {
3827  ArrayType *arr;
3828 
3829  /* Should have just the one argument */
3830  Assert(argidx == PG_NARGS() - 1);
3831 
3832  /* concat(VARIADIC NULL) is defined as NULL */
3833  if (PG_ARGISNULL(argidx))
3834  return NULL;
3835 
3836  /*
3837  * Non-null argument had better be an array. We assume that any call
3838  * context that could let get_fn_expr_variadic return true will have
3839  * checked that a VARIADIC-labeled parameter actually is an array. So
3840  * it should be okay to just Assert that it's an array rather than
3841  * doing a full-fledged error check.
3842  */
3844 
3845  /* OK, safe to fetch the array value */
3846  arr = PG_GETARG_ARRAYTYPE_P(argidx);
3847 
3848  /*
3849  * And serialize the array. We tell array_to_text to ignore null
3850  * elements, which matches the behavior of the loop below.
3851  */
3852  return array_to_text_internal(fcinfo, arr, sepstr, NULL);
3853  }
3854 
3855  /* Normal case without explicit VARIADIC marker */
3856  initStringInfo(&str);
3857 
3858  for (i = argidx; i < PG_NARGS(); i++)
3859  {
3860  if (!PG_ARGISNULL(i))
3861  {
3863  Oid valtype;
3864  Oid typOutput;
3865  bool typIsVarlena;
3866 
3867  /* add separator if appropriate */
3868  if (first_arg)
3869  first_arg = false;
3870  else
3871  appendStringInfoString(&str, sepstr);
3872 
3873  /* call the appropriate type output function, append the result */
3874  valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
3875  if (!OidIsValid(valtype))
3876  elog(ERROR, "could not determine data type of concat() input");
3877  getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
3879  OidOutputFunctionCall(typOutput, value));
3880  }
3881  }
3882 
3883  result = cstring_to_text_with_len(str.data, str.len);
3884  pfree(str.data);
3885 
3886  return result;
3887 }
3888 
3889 /*
3890  * Concatenate all arguments. NULL arguments are ignored.
3891  */
3892 Datum
3894 {
3895  text *result;
3896 
3897  result = concat_internal("", 0, fcinfo);
3898  if (result == NULL)
3899  PG_RETURN_NULL();
3900  PG_RETURN_TEXT_P(result);
3901 }
3902 
3903 /*
3904  * Concatenate all but first argument value with separators. The first
3905  * parameter is used as the separator. NULL arguments are ignored.
3906  */
3907 Datum
3909 {
3910  char *sep;
3911  text *result;
3912 
3913  /* return NULL when separator is NULL */
3914  if (PG_ARGISNULL(0))
3915  PG_RETURN_NULL();
3917 
3918  result = concat_internal(sep, 1, fcinfo);
3919  if (result == NULL)
3920  PG_RETURN_NULL();
3921  PG_RETURN_TEXT_P(result);
3922 }
3923 
3924 /*
3925  * Return first n characters in the string. When n is negative,
3926  * return all but last |n| characters.
3927  */
3928 Datum
3930 {
3931  text *str = PG_GETARG_TEXT_PP(0);
3932  const char *p = VARDATA_ANY(str);
3933  int len = VARSIZE_ANY_EXHDR(str);
3934  int n = PG_GETARG_INT32(1);
3935  int rlen;
3936 
3937  if (n < 0)
3938  n = pg_mbstrlen_with_len(p, len) + n;
3939  rlen = pg_mbcharcliplen(p, len, n);
3940 
3942 }
3943 
3944 /*
3945  * Return last n characters in the string. When n is negative,
3946  * return all but first |n| characters.
3947  */
3948 Datum
3950 {
3951  text *str = PG_GETARG_TEXT_PP(0);
3952  const char *p = VARDATA_ANY(str);
3953  int len = VARSIZE_ANY_EXHDR(str);
3954  int n = PG_GETARG_INT32(1);
3955  int off;
3956 
3957  if (n < 0)
3958  n = -n;
3959  else
3960  n = pg_mbstrlen_with_len(p, len) - n;
3961  off = pg_mbcharcliplen(p, len, n);
3962 
3963  PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
3964 }
3965 
3966 /*
3967  * Return reversed string
3968  */
3969 Datum
3971 {
3972  text *str = PG_GETARG_TEXT_PP(0);
3973  const char *p = VARDATA_ANY(str);
3974  int len = VARSIZE_ANY_EXHDR(str);
3975  const char *endp = p + len;
3976  text *result;
3977  char *dst;
3978 
3979  result = palloc(len + VARHDRSZ);
3980  dst = (char *) VARDATA(result) + len;
3981  SET_VARSIZE(result, len + VARHDRSZ);
3982 
3984  {
3985  /* multibyte version */
3986  while (p < endp)
3987  {
3988  int sz;
3989 
3990  sz = pg_mblen(p);
3991  dst -= sz;
3992  memcpy(dst, p, sz);
3993  p += sz;
3994  }
3995  }
3996  else
3997  {
3998  /* single byte version */
3999  while (p < endp)
4000  *(--dst) = *p++;
4001  }
4002 
4003  PG_RETURN_TEXT_P(result);
4004 }
4005 
4006 
4007 /*
4008  * Support macros for text_format()
4009  */
4010 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
4011 
4012 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
4013  do { \
4014  if (++(ptr) >= (end_ptr)) \
4015  ereport(ERROR, \
4016  (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
4017  errmsg("unterminated format specifier"))); \
4018  } while (0)
4019 
4020 /*
4021  * Returns a formatted string
4022  */
4023 Datum
4025 {
4026  text *fmt;
4027  StringInfoData str;
4028  const char *cp;
4029  const char *start_ptr;
4030  const char *end_ptr;
4031  text *result;
4032  int arg;
4033  bool funcvariadic;
4034  int nargs;
4035  Datum *elements = NULL;
4036  bool *nulls = NULL;
4037  Oid element_type = InvalidOid;
4038  Oid prev_type = InvalidOid;
4039  Oid prev_width_type = InvalidOid;
4040  FmgrInfo typoutputfinfo;
4041  FmgrInfo typoutputinfo_width;
4042 
4043  /* When format string is null, immediately return null */
4044  if (PG_ARGISNULL(0))
4045  PG_RETURN_NULL();
4046 
4047  /* If argument is marked VARIADIC, expand array into elements */
4048  if (get_fn_expr_variadic(fcinfo->flinfo))
4049  {
4050  ArrayType *arr;
4051  int16 elmlen;
4052  bool elmbyval;
4053  char elmalign;
4054  int nitems;
4055 
4056  /* Should have just the one argument */
4057  Assert(PG_NARGS() == 2);
4058 
4059  /* If argument is NULL, we treat it as zero-length array */
4060  if (PG_ARGISNULL(1))
4061  nitems = 0;
4062  else
4063  {
4064  /*
4065  * Non-null argument had better be an array. We assume that any
4066  * call context that could let get_fn_expr_variadic return true
4067  * will have checked that a VARIADIC-labeled parameter actually is
4068  * an array. So it should be okay to just Assert that it's an
4069  * array rather than doing a full-fledged error check.
4070  */
4072 
4073  /* OK, safe to fetch the array value */
4074  arr = PG_GETARG_ARRAYTYPE_P(1);
4075 
4076  /* Get info about array element type */
4077  element_type = ARR_ELEMTYPE(arr);
4078  get_typlenbyvalalign(element_type,
4079  &elmlen, &elmbyval, &elmalign);
4080 
4081  /* Extract all array elements */
4082  deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
4083  &elements, &nulls, &nitems);
4084  }
4085 
4086  nargs = nitems + 1;
4087  funcvariadic = true;
4088  }
4089  else
4090  {
4091  /* Non-variadic case, we'll process the arguments individually */
4092  nargs = PG_NARGS();
4093  funcvariadic = false;
4094  }
4095 
4096  /* Setup for main loop. */
4097  fmt = PG_GETARG_TEXT_PP(0);
4098  start_ptr = VARDATA_ANY(fmt);
4099  end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
4100  initStringInfo(&str);
4101  arg = 1; /* next argument position to print */
4102 
4103  /* Scan format string, looking for conversion specifiers. */
4104  for (cp = start_ptr; cp < end_ptr; cp++)
4105  {
4106  int argpos;
4107  int widthpos;
4108  int flags;
4109  int width;
4110  Datum value;
4111  bool isNull;
4112  Oid typid;
4113 
4114  /*
4115  * If it's not the start of a conversion specifier, just copy it to
4116  * the output buffer.
4117  */
4118  if (*cp != '%')
4119  {
4120  appendStringInfoCharMacro(&str, *cp);
4121  continue;
4122  }
4123 
4124  ADVANCE_PARSE_POINTER(cp, end_ptr);
4125 
4126  /* Easy case: %% outputs a single % */
4127  if (*cp == '%')
4128  {
4129  appendStringInfoCharMacro(&str, *cp);
4130  continue;
4131  }
4132 
4133  /* Parse the optional portions of the format specifier */
4134  cp = text_format_parse_format(cp, end_ptr,
4135  &argpos, &widthpos,
4136  &flags, &width);
4137 
4138  /*
4139  * Next we should see the main conversion specifier. Whether or not
4140  * an argument position was present, it's known that at least one
4141  * character remains in the string at this point. Experience suggests
4142  * that it's worth checking that that character is one of the expected
4143  * ones before we try to fetch arguments, so as to produce the least
4144  * confusing response to a mis-formatted specifier.
4145  */
4146  if (strchr("sIL", *cp) == NULL)
4147  ereport(ERROR,
4148  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4149  errmsg("unrecognized conversion type specifier \"%c\"",
4150  *cp)));
4151 
4152  /* If indirect width was specified, get its value */
4153  if (widthpos >= 0)
4154  {
4155  /* Collect the specified or next argument position */
4156  if (widthpos > 0)
4157  arg = widthpos;
4158  if (arg >= nargs)
4159  ereport(ERROR,
4160  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4161  errmsg("too few arguments for format")));
4162 
4163  /* Get the value and type of the selected argument */
4164  if (!funcvariadic)
4165  {
4166  value = PG_GETARG_DATUM(arg);
4167  isNull = PG_ARGISNULL(arg);
4168  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4169  }
4170  else
4171  {
4172  value = elements[arg - 1];
4173  isNull = nulls[arg - 1];
4174  typid = element_type;
4175  }
4176  if (!OidIsValid(typid))
4177  elog(ERROR, "could not determine data type of format() input");
4178 
4179  arg++;
4180 
4181  /* We can treat NULL width the same as zero */
4182  if (isNull)
4183  width = 0;
4184  else if (typid == INT4OID)
4185  width = DatumGetInt32(value);
4186  else if (typid == INT2OID)
4187  width = DatumGetInt16(value);
4188  else
4189  {
4190  /* For less-usual datatypes, convert to text then to int */
4191  char *str;
4192 
4193  if (typid != prev_width_type)
4194  {
4195  Oid typoutputfunc;
4196  bool typIsVarlena;
4197 
4198  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4199  fmgr_info(typoutputfunc, &typoutputinfo_width);
4200  prev_width_type = typid;
4201  }
4202 
4203  str = OutputFunctionCall(&typoutputinfo_width, value);
4204 
4205  /* pg_atoi will complain about bad data or overflow */
4206  width = pg_atoi(str, sizeof(int), '\0');
4207 
4208  pfree(str);
4209  }
4210  }
4211 
4212  /* Collect the specified or next argument position */
4213  if (argpos > 0)
4214  arg = argpos;
4215  if (arg >= nargs)
4216  ereport(ERROR,
4217  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4218  errmsg("too few arguments for format")));
4219 
4220  /* Get the value and type of the selected argument */
4221  if (!funcvariadic)
4222  {
4223  value = PG_GETARG_DATUM(arg);
4224  isNull = PG_ARGISNULL(arg);
4225  typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
4226  }
4227  else
4228  {
4229  value = elements[arg - 1];
4230  isNull = nulls[arg - 1];
4231  typid = element_type;
4232  }
4233  if (!OidIsValid(typid))
4234  elog(ERROR, "could not determine data type of format() input");
4235 
4236  arg++;
4237 
4238  /*
4239  * Get the appropriate typOutput function, reusing previous one if
4240  * same type as previous argument. That's particularly useful in the
4241  * variadic-array case, but often saves work even for ordinary calls.
4242  */
4243  if (typid != prev_type)
4244  {
4245  Oid typoutputfunc;
4246  bool typIsVarlena;
4247 
4248  getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
4249  fmgr_info(typoutputfunc, &typoutputfinfo);
4250  prev_type = typid;
4251  }
4252 
4253  /*
4254  * And now we can format the value.
4255  */
4256  switch (*cp)
4257  {
4258  case 's':
4259  case 'I':
4260  case 'L':
4261  text_format_string_conversion(&str, *cp, &typoutputfinfo,
4262  value, isNull,
4263  flags, width);
4264  break;
4265  default:
4266  /* should not get here, because of previous check */
4267  ereport(ERROR,
4268  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4269  errmsg("unrecognized conversion type specifier \"%c\"",
4270  *cp)));
4271  break;
4272  }
4273  }
4274 
4275  /* Don't need deconstruct_array results anymore. */
4276  if (elements != NULL)
4277  pfree(elements);
4278  if (nulls != NULL)
4279  pfree(nulls);
4280 
4281  /* Generate results. */
4282  result = cstring_to_text_with_len(str.data, str.len);
4283  pfree(str.data);
4284 
4285  PG_RETURN_TEXT_P(result);
4286 }
4287 
4288 /*
4289  * Parse contiguous digits as a decimal number.
4290  *
4291  * Returns true if some digits could be parsed.
4292  * The value is returned into *value, and *ptr is advanced to the next
4293  * character to be parsed.
4294  *
4295  * Note parsing invariant: at least one character is known available before
4296  * string end (end_ptr) at entry, and this is still true at exit.
4297  */
4298 static bool
4299 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
4300 {
4301  bool found = false;
4302  const char *cp = *ptr;
4303  int val = 0;
4304 
4305  while (*cp >= '0' && *cp <= '9')
4306  {
4307  int newval = val * 10 + (*cp - '0');
4308 
4309  if (newval / 10 != val) /* overflow? */
4310  ereport(ERROR,
4311  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4312  errmsg("number is out of range")));
4313  val = newval;
4314  ADVANCE_PARSE_POINTER(cp, end_ptr);
4315  found = true;
4316  }
4317 
4318  *ptr = cp;
4319  *value = val;
4320 
4321  return found;
4322 }
4323 
4324 /*
4325  * Parse a format specifier (generally following the SUS printf spec).
4326  *
4327  * We have already advanced over the initial '%', and we are looking for
4328  * [argpos][flags][width]type (but the type character is not consumed here).
4329  *
4330  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
4331  * Output parameters:
4332  * argpos: argument position for value to be printed. -1 means unspecified.
4333  * widthpos: argument position for width. Zero means the argument position
4334  * was unspecified (ie, take the next arg) and -1 means no width
4335  * argument (width was omitted or specified as a constant).
4336  * flags: bitmask of flags.
4337  * width: directly-specified width value. Zero means the width was omitted
4338  * (note it's not necessary to distinguish this case from an explicit
4339  * zero width value).
4340  *
4341  * The function result is the next character position to be parsed, ie, the
4342  * location where the type character is/should be.
4343  *
4344  * Note parsing invariant: at least one character is known available before
4345  * string end (end_ptr) at entry, and this is still true at exit.
4346  */
4347 static const char *
4348 text_format_parse_format(const char *start_ptr, const char *end_ptr,
4349  int *argpos, int *widthpos,
4350  int *flags, int *width)
4351 {
4352  const char *cp = start_ptr;
4353  int n;
4354 
4355  /* set defaults for output parameters */
4356  *argpos = -1;
4357  *widthpos = -1;
4358  *flags = 0;
4359  *width = 0;
4360 
4361  /* try to identify first number */
4362  if (text_format_parse_digits(&cp, end_ptr, &n))
4363  {
4364  if (*cp != '$')
4365  {
4366  /* Must be just a width and a type, so we're done */
4367  *width = n;
4368  return cp;
4369  }
4370  /* The number was argument position */
4371  *argpos = n;
4372  /* Explicit 0 for argument index is immediately refused */
4373  if (n == 0)
4374  ereport(ERROR,
4375  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4376  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4377  ADVANCE_PARSE_POINTER(cp, end_ptr);
4378  }
4379 
4380  /* Handle flags (only minus is supported now) */
4381  while (*cp == '-')
4382  {
4383  *flags |= TEXT_FORMAT_FLAG_MINUS;
4384  ADVANCE_PARSE_POINTER(cp, end_ptr);
4385  }
4386 
4387  if (*cp == '*')
4388  {
4389  /* Handle indirect width */
4390  ADVANCE_PARSE_POINTER(cp, end_ptr);
4391  if (text_format_parse_digits(&cp, end_ptr, &n))
4392  {
4393  /* number in this position must be closed by $ */
4394  if (*cp != '$')
4395  ereport(ERROR,
4396  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4397  errmsg("width argument position must be ended by \"$\"")));
4398  /* The number was width argument position */
4399  *widthpos = n;
4400  /* Explicit 0 for argument index is immediately refused */
4401  if (n == 0)
4402  ereport(ERROR,
4403  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4404  errmsg("format specifies argument 0, but arguments are numbered from 1")));
4405  ADVANCE_PARSE_POINTER(cp, end_ptr);
4406  }
4407  else
4408  *widthpos = 0; /* width's argument position is unspecified */
4409  }
4410  else
4411  {
4412  /* Check for direct width specification */
4413  if (text_format_parse_digits(&cp, end_ptr, &n))
4414  *width = n;
4415  }
4416 
4417  /* cp should now be pointing at type character */
4418  return cp;
4419 }
4420 
4421 /*
4422  * Format a %s, %I, or %L conversion
4423  */
4424 static void
4426  FmgrInfo *typOutputInfo,
4427  Datum value, bool isNull,
4428  int flags, int width)
4429 {
4430  char *str;
4431 
4432  /* Handle NULL arguments before trying to stringify the value. */
4433  if (isNull)
4434  {
4435  if (conversion == 's')
4436  text_format_append_string(buf, "", flags, width);
4437  else if (conversion == 'L')
4438  text_format_append_string(buf, "NULL", flags, width);
4439  else if (conversion == 'I')
4440  ereport(ERROR,
4441  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4442  errmsg("null values cannot be formatted as an SQL identifier")));
4443  return;
4444  }
4445 
4446  /* Stringify. */
4447  str = OutputFunctionCall(typOutputInfo, value);
4448 
4449  /* Escape. */
4450  if (conversion == 'I')
4451  {
4452  /* quote_identifier may or may not allocate a new string. */
4453  text_format_append_string(buf, quote_identifier(str), flags, width);
4454  }
4455  else if (conversion == 'L')
4456  {
4457  char *qstr = quote_literal_cstr(str);
4458 
4459  text_format_append_string(buf, qstr, flags, width);
4460  /* quote_literal_cstr() always allocates a new string */
4461  pfree(qstr);
4462  }
4463  else
4464  text_format_append_string(buf, str, flags, width);
4465 
4466  /* Cleanup. */
4467  pfree(str);
4468 }
4469 
4470 /*
4471  * Append str to buf, padding as directed by flags/width
4472  */
4473 static void
4475  int flags, int width)
4476 {
4477  bool align_to_left = false;
4478  int len;
4479 
4480  /* fast path for typical easy case */
4481  if (width == 0)
4482  {
4483  appendStringInfoString(buf, str);
4484  return;
4485  }
4486 
4487  if (width < 0)
4488  {
4489  /* Negative width: implicit '-' flag, then take absolute value */
4490  align_to_left = true;
4491  /* -INT_MIN is undefined */
4492  if (width <= INT_MIN)
4493  ereport(ERROR,
4494  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4495  errmsg("number is out of range")));
4496  width = -width;
4497  }
4498  else if (flags & TEXT_FORMAT_FLAG_MINUS)
4499  align_to_left = true;
4500 
4501  len = pg_mbstrlen(str);
4502  if (align_to_left)
4503  {
4504  /* left justify */
4505  appendStringInfoString(buf, str);
4506  if (len < width)
4507  appendStringInfoSpaces(buf, width - len);
4508  }
4509  else
4510  {
4511  /* right justify */
4512  if (len < width)
4513  appendStringInfoSpaces(buf, width - len);
4514  appendStringInfoString(buf, str);
4515  }
4516 }
4517 
4518 /*
4519  * text_format_nv - nonvariadic wrapper for text_format function.
4520  *
4521  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
4522  * which checks that all built-in functions that share the implementing C
4523  * function take the same number of arguments.
4524  */
4525 Datum
4527 {
4528  return text_format(fcinfo);
4529 }