PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
varlena.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * varlena.c
4 * Functions for the variable-length built-in types.
5 *
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varlena.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <ctype.h>
18#include <limits.h>
19
20#include "access/detoast.h"
23#include "catalog/pg_type.h"
24#include "common/hashfn.h"
25#include "common/int.h"
27#include "common/unicode_norm.h"
29#include "funcapi.h"
30#include "lib/hyperloglog.h"
31#include "libpq/pqformat.h"
32#include "miscadmin.h"
33#include "nodes/execnodes.h"
34#include "parser/scansup.h"
35#include "port/pg_bswap.h"
36#include "regex/regex.h"
37#include "utils/builtins.h"
38#include "utils/bytea.h"
39#include "utils/guc.h"
40#include "utils/lsyscache.h"
41#include "utils/memutils.h"
42#include "utils/pg_locale.h"
43#include "utils/sortsupport.h"
44#include "utils/varlena.h"
45
46
47/* GUC variable */
49
50typedef struct varlena VarString;
51
52/*
53 * State for text_position_* functions.
54 */
55typedef struct
56{
57 bool is_multibyte_char_in_char; /* need to check char boundaries? */
58
59 char *str1; /* haystack string */
60 char *str2; /* needle string */
61 int len1; /* string lengths in bytes */
62 int len2;
63
64 /* Skip table for Boyer-Moore-Horspool search algorithm: */
65 int skiptablemask; /* mask for ANDing with skiptable subscripts */
66 int skiptable[256]; /* skip distance for given mismatched char */
67
68 char *last_match; /* pointer to last match in 'str1' */
69
70 /*
71 * Sometimes we need to convert the byte position of a match to a
72 * character position. These store the last position that was converted,
73 * so that on the next call, we can continue from that point, rather than
74 * count characters from the very beginning.
75 */
76 char *refpoint; /* pointer within original haystack string */
77 int refpos; /* 0-based character offset of the same point */
79
80typedef struct
81{
82 char *buf1; /* 1st string, or abbreviation original string
83 * buf */
84 char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
85 int buflen1; /* Allocated length of buf1 */
86 int buflen2; /* Allocated length of buf2 */
87 int last_len1; /* Length of last buf1 string/strxfrm() input */
88 int last_len2; /* Length of last buf2 string/strxfrm() blob */
89 int last_returned; /* Last comparison result (cache) */
90 bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
92 Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
93 hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
94 hyperLogLogState full_card; /* Full key cardinality state */
95 double prop_card; /* Required cardinality proportion */
98
99/*
100 * Output data for split_text(): we output either to an array or a table.
101 * tupstore and tupdesc must be set up in advance to output to a table.
102 */
103typedef struct
104{
109
110/*
111 * This should be large enough that most strings will fit, but small enough
112 * that we feel comfortable putting it on the stack
113 */
114#define TEXTBUFLEN 1024
115
116#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
117#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
118
119static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
120static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
121static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
123static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
124static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
125static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
126static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
127static int32 text_length(Datum str);
128static text *text_catenate(text *t1, text *t2);
130 int32 start,
131 int32 length,
132 bool length_not_specified);
133static text *text_overlay(text *t1, text *t2, int sp, int sl);
134static int text_position(text *t1, text *t2, Oid collid);
137static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
141static void check_collation_set(Oid collid);
142static int text_cmp(text *arg1, text *arg2, Oid collid);
143static bytea *bytea_catenate(bytea *t1, bytea *t2);
145 int S,
146 int L,
147 bool length_not_specified);
148static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
149static void appendStringInfoText(StringInfo str, const text *t);
150static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
152 text *field_value,
153 text *null_string,
154 Oid collation);
156 const char *fldsep, const char *null_string);
158static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
159 int *value);
160static const char *text_format_parse_format(const char *start_ptr,
161 const char *end_ptr,
162 int *argpos, int *widthpos,
163 int *flags, int *width);
164static void text_format_string_conversion(StringInfo buf, char conversion,
165 FmgrInfo *typOutputInfo,
166 Datum value, bool isNull,
167 int flags, int width);
168static void text_format_append_string(StringInfo buf, const char *str,
169 int flags, int width);
170
171
172/*****************************************************************************
173 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
174 *****************************************************************************/
175
176/*
177 * cstring_to_text
178 *
179 * Create a text value from a null-terminated C string.
180 *
181 * The new text value is freshly palloc'd with a full-size VARHDR.
182 */
183text *
184cstring_to_text(const char *s)
185{
186 return cstring_to_text_with_len(s, strlen(s));
187}
188
189/*
190 * cstring_to_text_with_len
191 *
192 * Same as cstring_to_text except the caller specifies the string length;
193 * the string need not be null_terminated.
194 */
195text *
196cstring_to_text_with_len(const char *s, int len)
197{
198 text *result = (text *) palloc(len + VARHDRSZ);
199
200 SET_VARSIZE(result, len + VARHDRSZ);
201 memcpy(VARDATA(result), s, len);
202
203 return result;
204}
205
206/*
207 * text_to_cstring
208 *
209 * Create a palloc'd, null-terminated C string from a text value.
210 *
211 * We support being passed a compressed or toasted text value.
212 * This is a bit bogus since such values shouldn't really be referred to as
213 * "text *", but it seems useful for robustness. If we didn't handle that
214 * case here, we'd need another routine that did, anyway.
215 */
216char *
218{
219 /* must cast away the const, unfortunately */
220 text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
221 int len = VARSIZE_ANY_EXHDR(tunpacked);
222 char *result;
223
224 result = (char *) palloc(len + 1);
225 memcpy(result, VARDATA_ANY(tunpacked), len);
226 result[len] = '\0';
227
228 if (tunpacked != t)
229 pfree(tunpacked);
230
231 return result;
232}
233
234/*
235 * text_to_cstring_buffer
236 *
237 * Copy a text value into a caller-supplied buffer of size dst_len.
238 *
239 * The text string is truncated if necessary to fit. The result is
240 * guaranteed null-terminated (unless dst_len == 0).
241 *
242 * We support being passed a compressed or toasted text value.
243 * This is a bit bogus since such values shouldn't really be referred to as
244 * "text *", but it seems useful for robustness. If we didn't handle that
245 * case here, we'd need another routine that did, anyway.
246 */
247void
248text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
249{
250 /* must cast away the const, unfortunately */
251 text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
252 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
253
254 if (dst_len > 0)
255 {
256 dst_len--;
257 if (dst_len >= src_len)
258 dst_len = src_len;
259 else /* ensure truncation is encoding-safe */
260 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
261 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
262 dst[dst_len] = '\0';
263 }
264
265 if (srcunpacked != src)
266 pfree(srcunpacked);
267}
268
269
270/*****************************************************************************
271 * USER I/O ROUTINES *
272 *****************************************************************************/
273
274
275#define VAL(CH) ((CH) - '0')
276#define DIG(VAL) ((VAL) + '0')
277
278/*
279 * byteain - converts from printable representation of byte array
280 *
281 * Non-printable characters must be passed as '\nnn' (octal) and are
282 * converted to internal form. '\' must be passed as '\\'.
283 * ereport(ERROR, ...) if bad form.
284 *
285 * BUGS:
286 * The input is scanned twice.
287 * The error checking of input is minimal.
288 */
289Datum
291{
292 char *inputText = PG_GETARG_CSTRING(0);
293 Node *escontext = fcinfo->context;
294 char *tp;
295 char *rp;
296 int bc;
297 bytea *result;
298
299 /* Recognize hex input */
300 if (inputText[0] == '\\' && inputText[1] == 'x')
301 {
302 size_t len = strlen(inputText);
303
304 bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
305 result = palloc(bc);
306 bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
307 escontext);
308 SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
309
310 PG_RETURN_BYTEA_P(result);
311 }
312
313 /* Else, it's the traditional escaped style */
314 for (bc = 0, tp = inputText; *tp != '\0'; bc++)
315 {
316 if (tp[0] != '\\')
317 tp++;
318 else if ((tp[0] == '\\') &&
319 (tp[1] >= '0' && tp[1] <= '3') &&
320 (tp[2] >= '0' && tp[2] <= '7') &&
321 (tp[3] >= '0' && tp[3] <= '7'))
322 tp += 4;
323 else if ((tp[0] == '\\') &&
324 (tp[1] == '\\'))
325 tp += 2;
326 else
327 {
328 /*
329 * one backslash, not followed by another or ### valid octal
330 */
331 ereturn(escontext, (Datum) 0,
332 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
333 errmsg("invalid input syntax for type %s", "bytea")));
334 }
335 }
336
337 bc += VARHDRSZ;
338
339 result = (bytea *) palloc(bc);
340 SET_VARSIZE(result, bc);
341
342 tp = inputText;
343 rp = VARDATA(result);
344 while (*tp != '\0')
345 {
346 if (tp[0] != '\\')
347 *rp++ = *tp++;
348 else if ((tp[0] == '\\') &&
349 (tp[1] >= '0' && tp[1] <= '3') &&
350 (tp[2] >= '0' && tp[2] <= '7') &&
351 (tp[3] >= '0' && tp[3] <= '7'))
352 {
353 bc = VAL(tp[1]);
354 bc <<= 3;
355 bc += VAL(tp[2]);
356 bc <<= 3;
357 *rp++ = bc + VAL(tp[3]);
358
359 tp += 4;
360 }
361 else if ((tp[0] == '\\') &&
362 (tp[1] == '\\'))
363 {
364 *rp++ = '\\';
365 tp += 2;
366 }
367 else
368 {
369 /*
370 * We should never get here. The first pass should not allow it.
371 */
372 ereturn(escontext, (Datum) 0,
373 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
374 errmsg("invalid input syntax for type %s", "bytea")));
375 }
376 }
377
378 PG_RETURN_BYTEA_P(result);
379}
380
381/*
382 * byteaout - converts to printable representation of byte array
383 *
384 * In the traditional escaped format, non-printable characters are
385 * printed as '\nnn' (octal) and '\' as '\\'.
386 */
387Datum
389{
390 bytea *vlena = PG_GETARG_BYTEA_PP(0);
391 char *result;
392 char *rp;
393
395 {
396 /* Print hex format */
397 rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
398 *rp++ = '\\';
399 *rp++ = 'x';
400 rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
401 }
403 {
404 /* Print traditional escaped format */
405 char *vp;
406 uint64 len;
407 int i;
408
409 len = 1; /* empty string has 1 char */
410 vp = VARDATA_ANY(vlena);
411 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
412 {
413 if (*vp == '\\')
414 len += 2;
415 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
416 len += 4;
417 else
418 len++;
419 }
420
421 /*
422 * In principle len can't overflow uint32 if the input fit in 1GB, but
423 * for safety let's check rather than relying on palloc's internal
424 * check.
425 */
426 if (len > MaxAllocSize)
428 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
429 errmsg_internal("result of bytea output conversion is too large")));
430 rp = result = (char *) palloc(len);
431
432 vp = VARDATA_ANY(vlena);
433 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
434 {
435 if (*vp == '\\')
436 {
437 *rp++ = '\\';
438 *rp++ = '\\';
439 }
440 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
441 {
442 int val; /* holds unprintable chars */
443
444 val = *vp;
445 rp[0] = '\\';
446 rp[3] = DIG(val & 07);
447 val >>= 3;
448 rp[2] = DIG(val & 07);
449 val >>= 3;
450 rp[1] = DIG(val & 03);
451 rp += 4;
452 }
453 else
454 *rp++ = *vp;
455 }
456 }
457 else
458 {
459 elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
461 rp = result = NULL; /* keep compiler quiet */
462 }
463 *rp = '\0';
464 PG_RETURN_CSTRING(result);
465}
466
467/*
468 * bytearecv - converts external binary format to bytea
469 */
470Datum
472{
474 bytea *result;
475 int nbytes;
476
477 nbytes = buf->len - buf->cursor;
478 result = (bytea *) palloc(nbytes + VARHDRSZ);
479 SET_VARSIZE(result, nbytes + VARHDRSZ);
480 pq_copymsgbytes(buf, VARDATA(result), nbytes);
481 PG_RETURN_BYTEA_P(result);
482}
483
484/*
485 * byteasend - converts bytea to binary format
486 *
487 * This is a special case: just copy the input...
488 */
489Datum
491{
492 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
493
494 PG_RETURN_BYTEA_P(vlena);
495}
496
497Datum
499{
501
503
504 /* Append the value unless null, preceding it with the delimiter. */
505 if (!PG_ARGISNULL(1))
506 {
508 bool isfirst = false;
509
510 /*
511 * You might think we can just throw away the first delimiter, however
512 * we must keep it as we may be a parallel worker doing partial
513 * aggregation building a state to send to the main process. We need
514 * to keep the delimiter of every aggregation so that the combine
515 * function can properly join up the strings of two separately
516 * partially aggregated results. The first delimiter is only stripped
517 * off in the final function. To know how much to strip off the front
518 * of the string, we store the length of the first delimiter in the
519 * StringInfo's cursor field, which we don't otherwise need here.
520 */
521 if (state == NULL)
522 {
523 state = makeStringAggState(fcinfo);
524 isfirst = true;
525 }
526
527 if (!PG_ARGISNULL(2))
528 {
529 bytea *delim = PG_GETARG_BYTEA_PP(2);
530
532 VARSIZE_ANY_EXHDR(delim));
533 if (isfirst)
534 state->cursor = VARSIZE_ANY_EXHDR(delim);
535 }
536
539 }
540
541 /*
542 * The transition type for string_agg() is declared to be "internal",
543 * which is a pass-by-value type the same size as a pointer.
544 */
545 if (state)
548}
549
550Datum
552{
554
555 /* cannot be called directly because of internal-type argument */
556 Assert(AggCheckCallContext(fcinfo, NULL));
557
559
560 if (state != NULL)
561 {
562 /* As per comment in transfn, strip data before the cursor position */
563 bytea *result;
564 int strippedlen = state->len - state->cursor;
565
566 result = (bytea *) palloc(strippedlen + VARHDRSZ);
567 SET_VARSIZE(result, strippedlen + VARHDRSZ);
568 memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
569 PG_RETURN_BYTEA_P(result);
570 }
571 else
573}
574
575/*
576 * textin - converts cstring to internal representation
577 */
578Datum
580{
581 char *inputText = PG_GETARG_CSTRING(0);
582
584}
585
586/*
587 * textout - converts internal representation to cstring
588 */
589Datum
591{
592 Datum txt = PG_GETARG_DATUM(0);
593
595}
596
597/*
598 * textrecv - converts external binary format to text
599 */
600Datum
602{
604 text *result;
605 char *str;
606 int nbytes;
607
608 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
609
610 result = cstring_to_text_with_len(str, nbytes);
611 pfree(str);
612 PG_RETURN_TEXT_P(result);
613}
614
615/*
616 * textsend - converts text to binary format
617 */
618Datum
620{
621 text *t = PG_GETARG_TEXT_PP(0);
623
627}
628
629
630/*
631 * unknownin - converts cstring to internal representation
632 */
633Datum
635{
636 char *str = PG_GETARG_CSTRING(0);
637
638 /* representation is same as cstring */
640}
641
642/*
643 * unknownout - converts internal representation to cstring
644 */
645Datum
647{
648 /* representation is same as cstring */
649 char *str = PG_GETARG_CSTRING(0);
650
652}
653
654/*
655 * unknownrecv - converts external binary format to unknown
656 */
657Datum
659{
661 char *str;
662 int nbytes;
663
664 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
665 /* representation is same as cstring */
667}
668
669/*
670 * unknownsend - converts unknown to binary format
671 */
672Datum
674{
675 /* representation is same as cstring */
676 char *str = PG_GETARG_CSTRING(0);
678
680 pq_sendtext(&buf, str, strlen(str));
682}
683
684
685/* ========== PUBLIC ROUTINES ========== */
686
687/*
688 * textlen -
689 * returns the logical length of a text*
690 * (which is less than the VARSIZE of the text*)
691 */
692Datum
694{
696
697 /* try to avoid decompressing argument */
699}
700
701/*
702 * text_length -
703 * Does the real work for textlen()
704 *
705 * This is broken out so it can be called directly by other string processing
706 * functions. Note that the argument is passed as a Datum, to indicate that
707 * it may still be in compressed form. We can avoid decompressing it at all
708 * in some cases.
709 */
710static int32
712{
713 /* fastpath when max encoding length is one */
716 else
717 {
718 text *t = DatumGetTextPP(str);
719
722 }
723}
724
725/*
726 * textoctetlen -
727 * returns the physical length of a text*
728 * (which is less than the VARSIZE of the text*)
729 */
730Datum
732{
734
735 /* We need not detoast the input at all */
737}
738
739/*
740 * textcat -
741 * takes two text* and returns a text* that is the concatenation of
742 * the two.
743 *
744 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
745 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
746 * Allocate space for output in all cases.
747 * XXX - thomas 1997-07-10
748 */
749Datum
751{
752 text *t1 = PG_GETARG_TEXT_PP(0);
753 text *t2 = PG_GETARG_TEXT_PP(1);
754
756}
757
758/*
759 * text_catenate
760 * Guts of textcat(), broken out so it can be used by other functions
761 *
762 * Arguments can be in short-header form, but not compressed or out-of-line
763 */
764static text *
766{
767 text *result;
768 int len1,
769 len2,
770 len;
771 char *ptr;
772
773 len1 = VARSIZE_ANY_EXHDR(t1);
774 len2 = VARSIZE_ANY_EXHDR(t2);
775
776 /* paranoia ... probably should throw error instead? */
777 if (len1 < 0)
778 len1 = 0;
779 if (len2 < 0)
780 len2 = 0;
781
782 len = len1 + len2 + VARHDRSZ;
783 result = (text *) palloc(len);
784
785 /* Set size of result string... */
786 SET_VARSIZE(result, len);
787
788 /* Fill data field of result string... */
789 ptr = VARDATA(result);
790 if (len1 > 0)
791 memcpy(ptr, VARDATA_ANY(t1), len1);
792 if (len2 > 0)
793 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
794
795 return result;
796}
797
798/*
799 * charlen_to_bytelen()
800 * Compute the number of bytes occupied by n characters starting at *p
801 *
802 * It is caller's responsibility that there actually are n characters;
803 * the string need not be null-terminated.
804 */
805static int
806charlen_to_bytelen(const char *p, int n)
807{
809 {
810 /* Optimization for single-byte encodings */
811 return n;
812 }
813 else
814 {
815 const char *s;
816
817 for (s = p; n > 0; n--)
818 s += pg_mblen(s);
819
820 return s - p;
821 }
822}
823
824/*
825 * text_substr()
826 * Return a substring starting at the specified position.
827 * - thomas 1997-12-31
828 *
829 * Input:
830 * - string
831 * - starting position (is one-based)
832 * - string length
833 *
834 * If the starting position is zero or less, then return from the start of the string
835 * adjusting the length to be consistent with the "negative start" per SQL.
836 * If the length is less than zero, return the remaining string.
837 *
838 * Added multibyte support.
839 * - Tatsuo Ishii 1998-4-21
840 * Changed behavior if starting position is less than one to conform to SQL behavior.
841 * Formerly returned the entire string; now returns a portion.
842 * - Thomas Lockhart 1998-12-10
843 * Now uses faster TOAST-slicing interface
844 * - John Gray 2002-02-22
845 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
846 * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
847 * error; if E < 1, return '', not entire string). Fixed MB related bug when
848 * S > LC and < LC + 4 sometimes garbage characters are returned.
849 * - Joe Conway 2002-08-10
850 */
851Datum
853{
857 false));
858}
859
860/*
861 * text_substr_no_len -
862 * Wrapper to avoid opr_sanity failure due to
863 * one function accepting a different number of args.
864 */
865Datum
867{
870 -1, true));
871}
872
873/*
874 * text_substring -
875 * Does the real work for text_substr() and text_substr_no_len()
876 *
877 * This is broken out so it can be called directly by other string processing
878 * functions. Note that the argument is passed as a Datum, to indicate that
879 * it may still be in compressed/toasted form. We can avoid detoasting all
880 * of it in some cases.
881 *
882 * The result is always a freshly palloc'd datum.
883 */
884static text *
885text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
886{
888 int32 S = start; /* start position */
889 int32 S1; /* adjusted start position */
890 int32 L1; /* adjusted substring length */
891 int32 E; /* end position */
892
893 /*
894 * SQL99 says S can be zero or negative (which we don't document), but we
895 * still must fetch from the start of the string.
896 * https://www.postgresql.org/message-id/170905442373.643.11536838320909376197%40wrigleys.postgresql.org
897 */
898 S1 = Max(S, 1);
899
900 /* life is easy if the encoding max length is 1 */
901 if (eml == 1)
902 {
903 if (length_not_specified) /* special case - get length to end of
904 * string */
905 L1 = -1;
906 else if (length < 0)
907 {
908 /* SQL99 says to throw an error for E < S, i.e., negative length */
910 (errcode(ERRCODE_SUBSTRING_ERROR),
911 errmsg("negative substring length not allowed")));
912 L1 = -1; /* silence stupider compilers */
913 }
914 else if (pg_add_s32_overflow(S, length, &E))
915 {
916 /*
917 * L could be large enough for S + L to overflow, in which case
918 * the substring must run to end of string.
919 */
920 L1 = -1;
921 }
922 else
923 {
924 /*
925 * A zero or negative value for the end position can happen if the
926 * start was negative or one. SQL99 says to return a zero-length
927 * string.
928 */
929 if (E < 1)
930 return cstring_to_text("");
931
932 L1 = E - S1;
933 }
934
935 /*
936 * If the start position is past the end of the string, SQL99 says to
937 * return a zero-length string -- DatumGetTextPSlice() will do that
938 * for us. We need only convert S1 to zero-based starting position.
939 */
940 return DatumGetTextPSlice(str, S1 - 1, L1);
941 }
942 else if (eml > 1)
943 {
944 /*
945 * When encoding max length is > 1, we can't get LC without
946 * detoasting, so we'll grab a conservatively large slice now and go
947 * back later to do the right thing
948 */
949 int32 slice_start;
950 int32 slice_size;
951 int32 slice_strlen;
952 text *slice;
953 int32 E1;
954 int32 i;
955 char *p;
956 char *s;
957 text *ret;
958
959 /*
960 * We need to start at position zero because there is no way to know
961 * in advance which byte offset corresponds to the supplied start
962 * position.
963 */
964 slice_start = 0;
965
966 if (length_not_specified) /* special case - get length to end of
967 * string */
968 slice_size = L1 = -1;
969 else if (length < 0)
970 {
971 /* SQL99 says to throw an error for E < S, i.e., negative length */
973 (errcode(ERRCODE_SUBSTRING_ERROR),
974 errmsg("negative substring length not allowed")));
975 slice_size = L1 = -1; /* silence stupider compilers */
976 }
977 else if (pg_add_s32_overflow(S, length, &E))
978 {
979 /*
980 * L could be large enough for S + L to overflow, in which case
981 * the substring must run to end of string.
982 */
983 slice_size = L1 = -1;
984 }
985 else
986 {
987 /*
988 * A zero or negative value for the end position can happen if the
989 * start was negative or one. SQL99 says to return a zero-length
990 * string.
991 */
992 if (E < 1)
993 return cstring_to_text("");
994
995 /*
996 * if E is past the end of the string, the tuple toaster will
997 * truncate the length for us
998 */
999 L1 = E - S1;
1000
1001 /*
1002 * Total slice size in bytes can't be any longer than the start
1003 * position plus substring length times the encoding max length.
1004 * If that overflows, we can just use -1.
1005 */
1006 if (pg_mul_s32_overflow(E, eml, &slice_size))
1007 slice_size = -1;
1008 }
1009
1010 /*
1011 * If we're working with an untoasted source, no need to do an extra
1012 * copying step.
1013 */
1016 slice = DatumGetTextPSlice(str, slice_start, slice_size);
1017 else
1018 slice = (text *) DatumGetPointer(str);
1019
1020 /* see if we got back an empty string */
1021 if (VARSIZE_ANY_EXHDR(slice) == 0)
1022 {
1023 if (slice != (text *) DatumGetPointer(str))
1024 pfree(slice);
1025 return cstring_to_text("");
1026 }
1027
1028 /* Now we can get the actual length of the slice in MB characters */
1029 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
1030 VARSIZE_ANY_EXHDR(slice));
1031
1032 /*
1033 * Check that the start position wasn't > slice_strlen. If so, SQL99
1034 * says to return a zero-length string.
1035 */
1036 if (S1 > slice_strlen)
1037 {
1038 if (slice != (text *) DatumGetPointer(str))
1039 pfree(slice);
1040 return cstring_to_text("");
1041 }
1042
1043 /*
1044 * Adjust L1 and E1 now that we know the slice string length. Again
1045 * remember that S1 is one based, and slice_start is zero based.
1046 */
1047 if (L1 > -1)
1048 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
1049 else
1050 E1 = slice_start + 1 + slice_strlen;
1051
1052 /*
1053 * Find the start position in the slice; remember S1 is not zero based
1054 */
1055 p = VARDATA_ANY(slice);
1056 for (i = 0; i < S1 - 1; i++)
1057 p += pg_mblen(p);
1058
1059 /* hang onto a pointer to our start position */
1060 s = p;
1061
1062 /*
1063 * Count the actual bytes used by the substring of the requested
1064 * length.
1065 */
1066 for (i = S1; i < E1; i++)
1067 p += pg_mblen(p);
1068
1069 ret = (text *) palloc(VARHDRSZ + (p - s));
1070 SET_VARSIZE(ret, VARHDRSZ + (p - s));
1071 memcpy(VARDATA(ret), s, (p - s));
1072
1073 if (slice != (text *) DatumGetPointer(str))
1074 pfree(slice);
1075
1076 return ret;
1077 }
1078 else
1079 elog(ERROR, "invalid backend encoding: encoding max length < 1");
1080
1081 /* not reached: suppress compiler warning */
1082 return NULL;
1083}
1084
1085/*
1086 * textoverlay
1087 * Replace specified substring of first string with second
1088 *
1089 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1090 * This code is a direct implementation of what the standard says.
1091 */
1092Datum
1094{
1095 text *t1 = PG_GETARG_TEXT_PP(0);
1096 text *t2 = PG_GETARG_TEXT_PP(1);
1097 int sp = PG_GETARG_INT32(2); /* substring start position */
1098 int sl = PG_GETARG_INT32(3); /* substring length */
1099
1100 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1101}
1102
1103Datum
1105{
1106 text *t1 = PG_GETARG_TEXT_PP(0);
1107 text *t2 = PG_GETARG_TEXT_PP(1);
1108 int sp = PG_GETARG_INT32(2); /* substring start position */
1109 int sl;
1110
1111 sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1112 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1113}
1114
1115static text *
1116text_overlay(text *t1, text *t2, int sp, int sl)
1117{
1118 text *result;
1119 text *s1;
1120 text *s2;
1121 int sp_pl_sl;
1122
1123 /*
1124 * Check for possible integer-overflow cases. For negative sp, throw a
1125 * "substring length" error because that's what should be expected
1126 * according to the spec's definition of OVERLAY().
1127 */
1128 if (sp <= 0)
1129 ereport(ERROR,
1130 (errcode(ERRCODE_SUBSTRING_ERROR),
1131 errmsg("negative substring length not allowed")));
1132 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1133 ereport(ERROR,
1134 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1135 errmsg("integer out of range")));
1136
1137 s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1138 s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1139 result = text_catenate(s1, t2);
1140 result = text_catenate(result, s2);
1141
1142 return result;
1143}
1144
1145/*
1146 * textpos -
1147 * Return the position of the specified substring.
1148 * Implements the SQL POSITION() function.
1149 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1150 * - thomas 1997-07-27
1151 */
1152Datum
1154{
1156 text *search_str = PG_GETARG_TEXT_PP(1);
1157
1159}
1160
1161/*
1162 * text_position -
1163 * Does the real work for textpos()
1164 *
1165 * Inputs:
1166 * t1 - string to be searched
1167 * t2 - pattern to match within t1
1168 * Result:
1169 * Character index of the first matched char, starting from 1,
1170 * or 0 if no match.
1171 *
1172 * This is broken out so it can be called directly by other string processing
1173 * functions.
1174 */
1175static int
1177{
1179 int result;
1180
1181 /* Empty needle always matches at position 1 */
1182 if (VARSIZE_ANY_EXHDR(t2) < 1)
1183 return 1;
1184
1185 /* Otherwise, can't match if haystack is shorter than needle */
1187 return 0;
1188
1189 text_position_setup(t1, t2, collid, &state);
1191 result = 0;
1192 else
1195 return result;
1196}
1197
1198
1199/*
1200 * text_position_setup, text_position_next, text_position_cleanup -
1201 * Component steps of text_position()
1202 *
1203 * These are broken out so that a string can be efficiently searched for
1204 * multiple occurrences of the same pattern. text_position_next may be
1205 * called multiple times, and it advances to the next match on each call.
1206 * text_position_get_match_ptr() and text_position_get_match_pos() return
1207 * a pointer or 1-based character position of the last match, respectively.
1208 *
1209 * The "state" variable is normally just a local variable in the caller.
1210 *
1211 * NOTE: text_position_next skips over the matched portion. For example,
1212 * searching for "xx" in "xxx" returns only one match, not two.
1213 */
1214
1215static void
1217{
1218 int len1 = VARSIZE_ANY_EXHDR(t1);
1219 int len2 = VARSIZE_ANY_EXHDR(t2);
1220 pg_locale_t mylocale;
1221
1223
1225
1226 if (!mylocale->deterministic)
1227 ereport(ERROR,
1228 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1229 errmsg("nondeterministic collations are not supported for substring searches")));
1230
1231 Assert(len1 > 0);
1232 Assert(len2 > 0);
1233
1234 /*
1235 * Even with a multi-byte encoding, we perform the search using the raw
1236 * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1237 * because in UTF-8 the byte sequence of one character cannot contain
1238 * another character. For other multi-byte encodings, we do the search
1239 * initially as a simple byte search, ignoring multibyte issues, but
1240 * verify afterwards that the match we found is at a character boundary,
1241 * and continue the search if it was a false match.
1242 */
1244 state->is_multibyte_char_in_char = false;
1245 else if (GetDatabaseEncoding() == PG_UTF8)
1246 state->is_multibyte_char_in_char = false;
1247 else
1248 state->is_multibyte_char_in_char = true;
1249
1250 state->str1 = VARDATA_ANY(t1);
1251 state->str2 = VARDATA_ANY(t2);
1252 state->len1 = len1;
1253 state->len2 = len2;
1254 state->last_match = NULL;
1255 state->refpoint = state->str1;
1256 state->refpos = 0;
1257
1258 /*
1259 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1260 * notes we use the terminology that the "haystack" is the string to be
1261 * searched (t1) and the "needle" is the pattern being sought (t2).
1262 *
1263 * If the needle is empty or bigger than the haystack then there is no
1264 * point in wasting cycles initializing the table. We also choose not to
1265 * use B-M-H for needles of length 1, since the skip table can't possibly
1266 * save anything in that case.
1267 */
1268 if (len1 >= len2 && len2 > 1)
1269 {
1270 int searchlength = len1 - len2;
1271 int skiptablemask;
1272 int last;
1273 int i;
1274 const char *str2 = state->str2;
1275
1276 /*
1277 * First we must determine how much of the skip table to use. The
1278 * declaration of TextPositionState allows up to 256 elements, but for
1279 * short search problems we don't really want to have to initialize so
1280 * many elements --- it would take too long in comparison to the
1281 * actual search time. So we choose a useful skip table size based on
1282 * the haystack length minus the needle length. The closer the needle
1283 * length is to the haystack length the less useful skipping becomes.
1284 *
1285 * Note: since we use bit-masking to select table elements, the skip
1286 * table size MUST be a power of 2, and so the mask must be 2^N-1.
1287 */
1288 if (searchlength < 16)
1289 skiptablemask = 3;
1290 else if (searchlength < 64)
1291 skiptablemask = 7;
1292 else if (searchlength < 128)
1293 skiptablemask = 15;
1294 else if (searchlength < 512)
1295 skiptablemask = 31;
1296 else if (searchlength < 2048)
1297 skiptablemask = 63;
1298 else if (searchlength < 4096)
1299 skiptablemask = 127;
1300 else
1301 skiptablemask = 255;
1302 state->skiptablemask = skiptablemask;
1303
1304 /*
1305 * Initialize the skip table. We set all elements to the needle
1306 * length, since this is the correct skip distance for any character
1307 * not found in the needle.
1308 */
1309 for (i = 0; i <= skiptablemask; i++)
1310 state->skiptable[i] = len2;
1311
1312 /*
1313 * Now examine the needle. For each character except the last one,
1314 * set the corresponding table element to the appropriate skip
1315 * distance. Note that when two characters share the same skip table
1316 * entry, the one later in the needle must determine the skip
1317 * distance.
1318 */
1319 last = len2 - 1;
1320
1321 for (i = 0; i < last; i++)
1322 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1323 }
1324}
1325
1326/*
1327 * Advance to the next match, starting from the end of the previous match
1328 * (or the beginning of the string, on first call). Returns true if a match
1329 * is found.
1330 *
1331 * Note that this refuses to match an empty-string needle. Most callers
1332 * will have handled that case specially and we'll never see it here.
1333 */
1334static bool
1336{
1337 int needle_len = state->len2;
1338 char *start_ptr;
1339 char *matchptr;
1340
1341 if (needle_len <= 0)
1342 return false; /* result for empty pattern */
1343
1344 /* Start from the point right after the previous match. */
1345 if (state->last_match)
1346 start_ptr = state->last_match + needle_len;
1347 else
1348 start_ptr = state->str1;
1349
1350retry:
1351 matchptr = text_position_next_internal(start_ptr, state);
1352
1353 if (!matchptr)
1354 return false;
1355
1356 /*
1357 * Found a match for the byte sequence. If this is a multibyte encoding,
1358 * where one character's byte sequence can appear inside a longer
1359 * multi-byte character, we need to verify that the match was at a
1360 * character boundary, not in the middle of a multi-byte character.
1361 */
1362 if (state->is_multibyte_char_in_char)
1363 {
1364 /* Walk one character at a time, until we reach the match. */
1365
1366 /* the search should never move backwards. */
1367 Assert(state->refpoint <= matchptr);
1368
1369 while (state->refpoint < matchptr)
1370 {
1371 /* step to next character. */
1372 state->refpoint += pg_mblen(state->refpoint);
1373 state->refpos++;
1374
1375 /*
1376 * If we stepped over the match's start position, then it was a
1377 * false positive, where the byte sequence appeared in the middle
1378 * of a multi-byte character. Skip it, and continue the search at
1379 * the next character boundary.
1380 */
1381 if (state->refpoint > matchptr)
1382 {
1383 start_ptr = state->refpoint;
1384 goto retry;
1385 }
1386 }
1387 }
1388
1389 state->last_match = matchptr;
1390 return true;
1391}
1392
1393/*
1394 * Subroutine of text_position_next(). This searches for the raw byte
1395 * sequence, ignoring any multi-byte encoding issues. Returns the first
1396 * match starting at 'start_ptr', or NULL if no match is found.
1397 */
1398static char *
1400{
1401 int haystack_len = state->len1;
1402 int needle_len = state->len2;
1403 int skiptablemask = state->skiptablemask;
1404 const char *haystack = state->str1;
1405 const char *needle = state->str2;
1406 const char *haystack_end = &haystack[haystack_len];
1407 const char *hptr;
1408
1409 Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1410
1411 if (needle_len == 1)
1412 {
1413 /* No point in using B-M-H for a one-character needle */
1414 char nchar = *needle;
1415
1416 hptr = start_ptr;
1417 while (hptr < haystack_end)
1418 {
1419 if (*hptr == nchar)
1420 return (char *) hptr;
1421 hptr++;
1422 }
1423 }
1424 else
1425 {
1426 const char *needle_last = &needle[needle_len - 1];
1427
1428 /* Start at startpos plus the length of the needle */
1429 hptr = start_ptr + needle_len - 1;
1430 while (hptr < haystack_end)
1431 {
1432 /* Match the needle scanning *backward* */
1433 const char *nptr;
1434 const char *p;
1435
1436 nptr = needle_last;
1437 p = hptr;
1438 while (*nptr == *p)
1439 {
1440 /* Matched it all? If so, return 1-based position */
1441 if (nptr == needle)
1442 return (char *) p;
1443 nptr--, p--;
1444 }
1445
1446 /*
1447 * No match, so use the haystack char at hptr to decide how far to
1448 * advance. If the needle had any occurrence of that character
1449 * (or more precisely, one sharing the same skiptable entry)
1450 * before its last character, then we advance far enough to align
1451 * the last such needle character with that haystack position.
1452 * Otherwise we can advance by the whole needle length.
1453 */
1454 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1455 }
1456 }
1457
1458 return 0; /* not found */
1459}
1460
1461/*
1462 * Return a pointer to the current match.
1463 *
1464 * The returned pointer points into the original haystack string.
1465 */
1466static char *
1468{
1469 return state->last_match;
1470}
1471
1472/*
1473 * Return the offset of the current match.
1474 *
1475 * The offset is in characters, 1-based.
1476 */
1477static int
1479{
1480 /* Convert the byte position to char position. */
1481 state->refpos += pg_mbstrlen_with_len(state->refpoint,
1482 state->last_match - state->refpoint);
1483 state->refpoint = state->last_match;
1484 return state->refpos + 1;
1485}
1486
1487/*
1488 * Reset search state to the initial state installed by text_position_setup.
1489 *
1490 * The next call to text_position_next will search from the beginning
1491 * of the string.
1492 */
1493static void
1495{
1496 state->last_match = NULL;
1497 state->refpoint = state->str1;
1498 state->refpos = 0;
1499}
1500
1501static void
1503{
1504 /* no cleanup needed */
1505}
1506
1507
1508static void
1510{
1511 if (!OidIsValid(collid))
1512 {
1513 /*
1514 * This typically means that the parser could not resolve a conflict
1515 * of implicit collations, so report it that way.
1516 */
1517 ereport(ERROR,
1518 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1519 errmsg("could not determine which collation to use for string comparison"),
1520 errhint("Use the COLLATE clause to set the collation explicitly.")));
1521 }
1522}
1523
1524/*
1525 * varstr_cmp()
1526 *
1527 * Comparison function for text strings with given lengths, using the
1528 * appropriate locale. Returns an integer less than, equal to, or greater than
1529 * zero, indicating whether arg1 is less than, equal to, or greater than arg2.
1530 *
1531 * Note: many functions that depend on this are marked leakproof; therefore,
1532 * avoid reporting the actual contents of the input when throwing errors.
1533 * All errors herein should be things that can't happen except on corrupt
1534 * data, anyway; otherwise we will have trouble with indexing strings that
1535 * would cause them.
1536 */
1537int
1538varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1539{
1540 int result;
1541 pg_locale_t mylocale;
1542
1544
1546
1547 if (mylocale->collate_is_c)
1548 {
1549 result = memcmp(arg1, arg2, Min(len1, len2));
1550 if ((result == 0) && (len1 != len2))
1551 result = (len1 < len2) ? -1 : 1;
1552 }
1553 else
1554 {
1555 /*
1556 * memcmp() can't tell us which of two unequal strings sorts first,
1557 * but it's a cheap way to tell if they're equal. Testing shows that
1558 * memcmp() followed by strcoll() is only trivially slower than
1559 * strcoll() by itself, so we don't lose much if this doesn't work out
1560 * very often, and if it does - for example, because there are many
1561 * equal strings in the input - then we win big by avoiding expensive
1562 * collation-aware comparisons.
1563 */
1564 if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1565 return 0;
1566
1567 result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
1568
1569 /* Break tie if necessary. */
1570 if (result == 0 && mylocale->deterministic)
1571 {
1572 result = memcmp(arg1, arg2, Min(len1, len2));
1573 if ((result == 0) && (len1 != len2))
1574 result = (len1 < len2) ? -1 : 1;
1575 }
1576 }
1577
1578 return result;
1579}
1580
1581/* text_cmp()
1582 * Internal comparison function for text strings.
1583 * Returns -1, 0 or 1
1584 */
1585static int
1587{
1588 char *a1p,
1589 *a2p;
1590 int len1,
1591 len2;
1592
1593 a1p = VARDATA_ANY(arg1);
1594 a2p = VARDATA_ANY(arg2);
1595
1596 len1 = VARSIZE_ANY_EXHDR(arg1);
1597 len2 = VARSIZE_ANY_EXHDR(arg2);
1598
1599 return varstr_cmp(a1p, len1, a2p, len2, collid);
1600}
1601
1602/*
1603 * Comparison functions for text strings.
1604 *
1605 * Note: btree indexes need these routines not to leak memory; therefore,
1606 * be careful to free working copies of toasted datums. Most places don't
1607 * need to be so careful.
1608 */
1609
1610Datum
1612{
1614 pg_locale_t mylocale = 0;
1615 bool result;
1616
1618
1620
1621 if (mylocale->deterministic)
1622 {
1623 Datum arg1 = PG_GETARG_DATUM(0);
1624 Datum arg2 = PG_GETARG_DATUM(1);
1625 Size len1,
1626 len2;
1627
1628 /*
1629 * Since we only care about equality or not-equality, we can avoid all
1630 * the expense of strcoll() here, and just do bitwise comparison. In
1631 * fact, we don't even have to do a bitwise comparison if we can show
1632 * the lengths of the strings are unequal; which might save us from
1633 * having to detoast one or both values.
1634 */
1635 len1 = toast_raw_datum_size(arg1);
1636 len2 = toast_raw_datum_size(arg2);
1637 if (len1 != len2)
1638 result = false;
1639 else
1640 {
1641 text *targ1 = DatumGetTextPP(arg1);
1642 text *targ2 = DatumGetTextPP(arg2);
1643
1644 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1645 len1 - VARHDRSZ) == 0);
1646
1647 PG_FREE_IF_COPY(targ1, 0);
1648 PG_FREE_IF_COPY(targ2, 1);
1649 }
1650 }
1651 else
1652 {
1653 text *arg1 = PG_GETARG_TEXT_PP(0);
1654 text *arg2 = PG_GETARG_TEXT_PP(1);
1655
1656 result = (text_cmp(arg1, arg2, collid) == 0);
1657
1658 PG_FREE_IF_COPY(arg1, 0);
1659 PG_FREE_IF_COPY(arg2, 1);
1660 }
1661
1662 PG_RETURN_BOOL(result);
1663}
1664
1665Datum
1667{
1669 pg_locale_t mylocale;
1670 bool result;
1671
1673
1675
1676 if (mylocale->deterministic)
1677 {
1678 Datum arg1 = PG_GETARG_DATUM(0);
1679 Datum arg2 = PG_GETARG_DATUM(1);
1680 Size len1,
1681 len2;
1682
1683 /* See comment in texteq() */
1684 len1 = toast_raw_datum_size(arg1);
1685 len2 = toast_raw_datum_size(arg2);
1686 if (len1 != len2)
1687 result = true;
1688 else
1689 {
1690 text *targ1 = DatumGetTextPP(arg1);
1691 text *targ2 = DatumGetTextPP(arg2);
1692
1693 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1694 len1 - VARHDRSZ) != 0);
1695
1696 PG_FREE_IF_COPY(targ1, 0);
1697 PG_FREE_IF_COPY(targ2, 1);
1698 }
1699 }
1700 else
1701 {
1702 text *arg1 = PG_GETARG_TEXT_PP(0);
1703 text *arg2 = PG_GETARG_TEXT_PP(1);
1704
1705 result = (text_cmp(arg1, arg2, collid) != 0);
1706
1707 PG_FREE_IF_COPY(arg1, 0);
1708 PG_FREE_IF_COPY(arg2, 1);
1709 }
1710
1711 PG_RETURN_BOOL(result);
1712}
1713
1714Datum
1716{
1717 text *arg1 = PG_GETARG_TEXT_PP(0);
1718 text *arg2 = PG_GETARG_TEXT_PP(1);
1719 bool result;
1720
1721 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1722
1723 PG_FREE_IF_COPY(arg1, 0);
1724 PG_FREE_IF_COPY(arg2, 1);
1725
1726 PG_RETURN_BOOL(result);
1727}
1728
1729Datum
1731{
1732 text *arg1 = PG_GETARG_TEXT_PP(0);
1733 text *arg2 = PG_GETARG_TEXT_PP(1);
1734 bool result;
1735
1736 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1737
1738 PG_FREE_IF_COPY(arg1, 0);
1739 PG_FREE_IF_COPY(arg2, 1);
1740
1741 PG_RETURN_BOOL(result);
1742}
1743
1744Datum
1746{
1747 text *arg1 = PG_GETARG_TEXT_PP(0);
1748 text *arg2 = PG_GETARG_TEXT_PP(1);
1749 bool result;
1750
1751 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1752
1753 PG_FREE_IF_COPY(arg1, 0);
1754 PG_FREE_IF_COPY(arg2, 1);
1755
1756 PG_RETURN_BOOL(result);
1757}
1758
1759Datum
1761{
1762 text *arg1 = PG_GETARG_TEXT_PP(0);
1763 text *arg2 = PG_GETARG_TEXT_PP(1);
1764 bool result;
1765
1766 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1767
1768 PG_FREE_IF_COPY(arg1, 0);
1769 PG_FREE_IF_COPY(arg2, 1);
1770
1771 PG_RETURN_BOOL(result);
1772}
1773
1774Datum
1776{
1777 Datum arg1 = PG_GETARG_DATUM(0);
1778 Datum arg2 = PG_GETARG_DATUM(1);
1780 pg_locale_t mylocale;
1781 bool result;
1782 Size len1,
1783 len2;
1784
1786
1788
1789 if (!mylocale->deterministic)
1790 ereport(ERROR,
1791 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1792 errmsg("nondeterministic collations are not supported for substring searches")));
1793
1794 len1 = toast_raw_datum_size(arg1);
1795 len2 = toast_raw_datum_size(arg2);
1796 if (len2 > len1)
1797 result = false;
1798 else
1799 {
1800 text *targ1 = text_substring(arg1, 1, len2, false);
1801 text *targ2 = DatumGetTextPP(arg2);
1802
1803 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1804 VARSIZE_ANY_EXHDR(targ2)) == 0);
1805
1806 PG_FREE_IF_COPY(targ1, 0);
1807 PG_FREE_IF_COPY(targ2, 1);
1808 }
1809
1810 PG_RETURN_BOOL(result);
1811}
1812
1813Datum
1815{
1816 text *arg1 = PG_GETARG_TEXT_PP(0);
1817 text *arg2 = PG_GETARG_TEXT_PP(1);
1818 int32 result;
1819
1820 result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1821
1822 PG_FREE_IF_COPY(arg1, 0);
1823 PG_FREE_IF_COPY(arg2, 1);
1824
1825 PG_RETURN_INT32(result);
1826}
1827
1828Datum
1830{
1832 Oid collid = ssup->ssup_collation;
1833 MemoryContext oldcontext;
1834
1835 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1836
1837 /* Use generic string SortSupport */
1838 varstr_sortsupport(ssup, TEXTOID, collid);
1839
1840 MemoryContextSwitchTo(oldcontext);
1841
1843}
1844
1845/*
1846 * Generic sortsupport interface for character type's operator classes.
1847 * Includes locale support, and support for BpChar semantics (i.e. removing
1848 * trailing spaces before comparison).
1849 *
1850 * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1851 * same representation. Callers that always use the C collation (e.g.
1852 * non-collatable type callers like bytea) may have NUL bytes in their strings;
1853 * this will not work with any other collation, though.
1854 */
1855void
1857{
1858 bool abbreviate = ssup->abbreviate;
1859 bool collate_c = false;
1862
1864
1866
1867 /*
1868 * If possible, set ssup->comparator to a function which can be used to
1869 * directly compare two datums. If we can do this, we'll avoid the
1870 * overhead of a trip through the fmgr layer for every comparison, which
1871 * can be substantial.
1872 *
1873 * Most typically, we'll set the comparator to varlenafastcmp_locale,
1874 * which uses strcoll() to perform comparisons. We use that for the
1875 * BpChar case too, but type NAME uses namefastcmp_locale. However, if
1876 * LC_COLLATE = C, we can make things quite a bit faster with
1877 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1878 * memcmp() rather than strcoll().
1879 */
1880 if (locale->collate_is_c)
1881 {
1882 if (typid == BPCHAROID)
1884 else if (typid == NAMEOID)
1885 {
1886 ssup->comparator = namefastcmp_c;
1887 /* Not supporting abbreviation with type NAME, for now */
1888 abbreviate = false;
1889 }
1890 else
1892
1893 collate_c = true;
1894 }
1895 else
1896 {
1897 /*
1898 * We use varlenafastcmp_locale except for type NAME.
1899 */
1900 if (typid == NAMEOID)
1901 {
1903 /* Not supporting abbreviation with type NAME, for now */
1904 abbreviate = false;
1905 }
1906 else
1908
1909 /*
1910 * Unfortunately, it seems that abbreviation for non-C collations is
1911 * broken on many common platforms; see pg_strxfrm_enabled().
1912 *
1913 * Even apart from the risk of broken locales, it's possible that
1914 * there are platforms where the use of abbreviated keys should be
1915 * disabled at compile time. Having only 4 byte datums could make
1916 * worst-case performance drastically more likely, for example.
1917 * Moreover, macOS's strxfrm() implementation is known to not
1918 * effectively concentrate a significant amount of entropy from the
1919 * original string in earlier transformed blobs. It's possible that
1920 * other supported platforms are similarly encumbered. So, if we ever
1921 * get past disabling this categorically, we may still want or need to
1922 * disable it for particular platforms.
1923 */
1925 abbreviate = false;
1926 }
1927
1928 /*
1929 * If we're using abbreviated keys, or if we're using a locale-aware
1930 * comparison, we need to initialize a VarStringSortSupport object. Both
1931 * cases will make use of the temporary buffers we initialize here for
1932 * scratch space (and to detect requirement for BpChar semantics from
1933 * caller), and the abbreviation case requires additional state.
1934 */
1935 if (abbreviate || !collate_c)
1936 {
1937 sss = palloc(sizeof(VarStringSortSupport));
1938 sss->buf1 = palloc(TEXTBUFLEN);
1939 sss->buflen1 = TEXTBUFLEN;
1940 sss->buf2 = palloc(TEXTBUFLEN);
1941 sss->buflen2 = TEXTBUFLEN;
1942 /* Start with invalid values */
1943 sss->last_len1 = -1;
1944 sss->last_len2 = -1;
1945 /* Initialize */
1946 sss->last_returned = 0;
1947 if (collate_c)
1948 sss->locale = NULL;
1949 else
1950 sss->locale = locale;
1951
1952 /*
1953 * To avoid somehow confusing a strxfrm() blob and an original string,
1954 * constantly keep track of the variety of data that buf1 and buf2
1955 * currently contain.
1956 *
1957 * Comparisons may be interleaved with conversion calls. Frequently,
1958 * conversions and comparisons are batched into two distinct phases,
1959 * but the correctness of caching cannot hinge upon this. For
1960 * comparison caching, buffer state is only trusted if cache_blob is
1961 * found set to false, whereas strxfrm() caching only trusts the state
1962 * when cache_blob is found set to true.
1963 *
1964 * Arbitrarily initialize cache_blob to true.
1965 */
1966 sss->cache_blob = true;
1967 sss->collate_c = collate_c;
1968 sss->typid = typid;
1969 ssup->ssup_extra = sss;
1970
1971 /*
1972 * If possible, plan to use the abbreviated keys optimization. The
1973 * core code may switch back to authoritative comparator should
1974 * abbreviation be aborted.
1975 */
1976 if (abbreviate)
1977 {
1978 sss->prop_card = 0.20;
1979 initHyperLogLog(&sss->abbr_card, 10);
1980 initHyperLogLog(&sss->full_card, 10);
1981 ssup->abbrev_full_comparator = ssup->comparator;
1985 }
1986 }
1987}
1988
1989/*
1990 * sortsupport comparison func (for C locale case)
1991 */
1992static int
1994{
1997 char *a1p,
1998 *a2p;
1999 int len1,
2000 len2,
2001 result;
2002
2003 a1p = VARDATA_ANY(arg1);
2004 a2p = VARDATA_ANY(arg2);
2005
2006 len1 = VARSIZE_ANY_EXHDR(arg1);
2007 len2 = VARSIZE_ANY_EXHDR(arg2);
2008
2009 result = memcmp(a1p, a2p, Min(len1, len2));
2010 if ((result == 0) && (len1 != len2))
2011 result = (len1 < len2) ? -1 : 1;
2012
2013 /* We can't afford to leak memory here. */
2014 if (PointerGetDatum(arg1) != x)
2015 pfree(arg1);
2016 if (PointerGetDatum(arg2) != y)
2017 pfree(arg2);
2018
2019 return result;
2020}
2021
2022/*
2023 * sortsupport comparison func (for BpChar C locale case)
2024 *
2025 * BpChar outsources its sortsupport to this module. Specialization for the
2026 * varstr_sortsupport BpChar case, modeled on
2027 * internal_bpchar_pattern_compare().
2028 */
2029static int
2031{
2032 BpChar *arg1 = DatumGetBpCharPP(x);
2033 BpChar *arg2 = DatumGetBpCharPP(y);
2034 char *a1p,
2035 *a2p;
2036 int len1,
2037 len2,
2038 result;
2039
2040 a1p = VARDATA_ANY(arg1);
2041 a2p = VARDATA_ANY(arg2);
2042
2043 len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2044 len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2045
2046 result = memcmp(a1p, a2p, Min(len1, len2));
2047 if ((result == 0) && (len1 != len2))
2048 result = (len1 < len2) ? -1 : 1;
2049
2050 /* We can't afford to leak memory here. */
2051 if (PointerGetDatum(arg1) != x)
2052 pfree(arg1);
2053 if (PointerGetDatum(arg2) != y)
2054 pfree(arg2);
2055
2056 return result;
2057}
2058
2059/*
2060 * sortsupport comparison func (for NAME C locale case)
2061 */
2062static int
2064{
2065 Name arg1 = DatumGetName(x);
2066 Name arg2 = DatumGetName(y);
2067
2068 return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2069}
2070
2071/*
2072 * sortsupport comparison func (for locale case with all varlena types)
2073 */
2074static int
2076{
2079 char *a1p,
2080 *a2p;
2081 int len1,
2082 len2,
2083 result;
2084
2085 a1p = VARDATA_ANY(arg1);
2086 a2p = VARDATA_ANY(arg2);
2087
2088 len1 = VARSIZE_ANY_EXHDR(arg1);
2089 len2 = VARSIZE_ANY_EXHDR(arg2);
2090
2091 result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2092
2093 /* We can't afford to leak memory here. */
2094 if (PointerGetDatum(arg1) != x)
2095 pfree(arg1);
2096 if (PointerGetDatum(arg2) != y)
2097 pfree(arg2);
2098
2099 return result;
2100}
2101
2102/*
2103 * sortsupport comparison func (for locale case with NAME type)
2104 */
2105static int
2107{
2108 Name arg1 = DatumGetName(x);
2109 Name arg2 = DatumGetName(y);
2110
2111 return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2112 NameStr(*arg2), strlen(NameStr(*arg2)),
2113 ssup);
2114}
2115
2116/*
2117 * sortsupport comparison func for locale cases
2118 */
2119static int
2120varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2121{
2123 int result;
2124 bool arg1_match;
2125
2126 /* Fast pre-check for equality, as discussed in varstr_cmp() */
2127 if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2128 {
2129 /*
2130 * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2131 * last_len2. Existing contents of buffers might still be used by
2132 * next call.
2133 *
2134 * It's fine to allow the comparison of BpChar padding bytes here,
2135 * even though that implies that the memcmp() will usually be
2136 * performed for BpChar callers (though multibyte characters could
2137 * still prevent that from occurring). The memcmp() is still very
2138 * cheap, and BpChar's funny semantics have us remove trailing spaces
2139 * (not limited to padding), so we need make no distinction between
2140 * padding space characters and "real" space characters.
2141 */
2142 return 0;
2143 }
2144
2145 if (sss->typid == BPCHAROID)
2146 {
2147 /* Get true number of bytes, ignoring trailing spaces */
2148 len1 = bpchartruelen(a1p, len1);
2149 len2 = bpchartruelen(a2p, len2);
2150 }
2151
2152 if (len1 >= sss->buflen1)
2153 {
2154 sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2155 sss->buf1 = repalloc(sss->buf1, sss->buflen1);
2156 }
2157 if (len2 >= sss->buflen2)
2158 {
2159 sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2160 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2161 }
2162
2163 /*
2164 * We're likely to be asked to compare the same strings repeatedly, and
2165 * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2166 * comparisons, even though in general there is no reason to think that
2167 * that will work out (every string datum may be unique). Caching does
2168 * not slow things down measurably when it doesn't work out, and can speed
2169 * things up by rather a lot when it does. In part, this is because the
2170 * memcmp() compares data from cachelines that are needed in L1 cache even
2171 * when the last comparison's result cannot be reused.
2172 */
2173 arg1_match = true;
2174 if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2175 {
2176 arg1_match = false;
2177 memcpy(sss->buf1, a1p, len1);
2178 sss->buf1[len1] = '\0';
2179 sss->last_len1 = len1;
2180 }
2181
2182 /*
2183 * If we're comparing the same two strings as last time, we can return the
2184 * same answer without calling strcoll() again. This is more likely than
2185 * it seems (at least with moderate to low cardinality sets), because
2186 * quicksort compares the same pivot against many values.
2187 */
2188 if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2189 {
2190 memcpy(sss->buf2, a2p, len2);
2191 sss->buf2[len2] = '\0';
2192 sss->last_len2 = len2;
2193 }
2194 else if (arg1_match && !sss->cache_blob)
2195 {
2196 /* Use result cached following last actual strcoll() call */
2197 return sss->last_returned;
2198 }
2199
2200 result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
2201
2202 /* Break tie if necessary. */
2203 if (result == 0 && sss->locale->deterministic)
2204 result = strcmp(sss->buf1, sss->buf2);
2205
2206 /* Cache result, perhaps saving an expensive strcoll() call next time */
2207 sss->cache_blob = false;
2208 sss->last_returned = result;
2209 return result;
2210}
2211
2212/*
2213 * Conversion routine for sortsupport. Converts original to abbreviated key
2214 * representation. Our encoding strategy is simple -- pack the first 8 bytes
2215 * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2216 * stored in reverse order), and treat it as an unsigned integer. When the "C"
2217 * locale is used, or in case of bytea, just memcpy() from original instead.
2218 */
2219static Datum
2221{
2222 const size_t max_prefix_bytes = sizeof(Datum);
2224 VarString *authoritative = DatumGetVarStringPP(original);
2225 char *authoritative_data = VARDATA_ANY(authoritative);
2226
2227 /* working state */
2228 Datum res;
2229 char *pres;
2230 int len;
2231 uint32 hash;
2232
2233 pres = (char *) &res;
2234 /* memset(), so any non-overwritten bytes are NUL */
2235 memset(pres, 0, max_prefix_bytes);
2236 len = VARSIZE_ANY_EXHDR(authoritative);
2237
2238 /* Get number of bytes, ignoring trailing spaces */
2239 if (sss->typid == BPCHAROID)
2240 len = bpchartruelen(authoritative_data, len);
2241
2242 /*
2243 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2244 * abbreviate keys. The full comparator for the C locale is always
2245 * memcmp(). It would be incorrect to allow bytea callers (callers that
2246 * always force the C collation -- bytea isn't a collatable type, but this
2247 * approach is convenient) to use strxfrm(). This is because bytea
2248 * strings may contain NUL bytes. Besides, this should be faster, too.
2249 *
2250 * More generally, it's okay that bytea callers can have NUL bytes in
2251 * strings because abbreviated cmp need not make a distinction between
2252 * terminating NUL bytes, and NUL bytes representing actual NULs in the
2253 * authoritative representation. Hopefully a comparison at or past one
2254 * abbreviated key's terminating NUL byte will resolve the comparison
2255 * without consulting the authoritative representation; specifically, some
2256 * later non-NUL byte in the longer string can resolve the comparison
2257 * against a subsequent terminating NUL in the shorter string. There will
2258 * usually be what is effectively a "length-wise" resolution there and
2259 * then.
2260 *
2261 * If that doesn't work out -- if all bytes in the longer string
2262 * positioned at or past the offset of the smaller string's (first)
2263 * terminating NUL are actually representative of NUL bytes in the
2264 * authoritative binary string (perhaps with some *terminating* NUL bytes
2265 * towards the end of the longer string iff it happens to still be small)
2266 * -- then an authoritative tie-breaker will happen, and do the right
2267 * thing: explicitly consider string length.
2268 */
2269 if (sss->collate_c)
2270 memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
2271 else
2272 {
2273 Size bsize;
2274
2275 /*
2276 * We're not using the C collation, so fall back on strxfrm or ICU
2277 * analogs.
2278 */
2279
2280 /* By convention, we use buffer 1 to store and NUL-terminate */
2281 if (len >= sss->buflen1)
2282 {
2283 sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2284 sss->buf1 = repalloc(sss->buf1, sss->buflen1);
2285 }
2286
2287 /* Might be able to reuse strxfrm() blob from last call */
2288 if (sss->last_len1 == len && sss->cache_blob &&
2289 memcmp(sss->buf1, authoritative_data, len) == 0)
2290 {
2291 memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
2292 /* No change affecting cardinality, so no hashing required */
2293 goto done;
2294 }
2295
2296 memcpy(sss->buf1, authoritative_data, len);
2297
2298 /*
2299 * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
2300 */
2301 sss->buf1[len] = '\0';
2302 sss->last_len1 = len;
2303
2305 {
2306 if (sss->buflen2 < max_prefix_bytes)
2307 {
2308 sss->buflen2 = Max(max_prefix_bytes,
2309 Min(sss->buflen2 * 2, MaxAllocSize));
2310 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2311 }
2312
2313 bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
2314 max_prefix_bytes, sss->locale);
2315 sss->last_len2 = bsize;
2316 }
2317 else
2318 {
2319 /*
2320 * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
2321 * again. The pg_strxfrm() function leaves the result buffer
2322 * content undefined if the result did not fit, so we need to
2323 * retry until everything fits, even though we only need the first
2324 * few bytes in the end.
2325 */
2326 for (;;)
2327 {
2328 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
2329 sss->locale);
2330
2331 sss->last_len2 = bsize;
2332 if (bsize < sss->buflen2)
2333 break;
2334
2335 /*
2336 * Grow buffer and retry.
2337 */
2338 sss->buflen2 = Max(bsize + 1,
2339 Min(sss->buflen2 * 2, MaxAllocSize));
2340 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2341 }
2342 }
2343
2344 /*
2345 * Every Datum byte is always compared. This is safe because the
2346 * strxfrm() blob is itself NUL terminated, leaving no danger of
2347 * misinterpreting any NUL bytes not intended to be interpreted as
2348 * logically representing termination.
2349 *
2350 * (Actually, even if there were NUL bytes in the blob it would be
2351 * okay. See remarks on bytea case above.)
2352 */
2353 memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
2354 }
2355
2356 /*
2357 * Maintain approximate cardinality of both abbreviated keys and original,
2358 * authoritative keys using HyperLogLog. Used as cheap insurance against
2359 * the worst case, where we do many string transformations for no saving
2360 * in full strcoll()-based comparisons. These statistics are used by
2361 * varstr_abbrev_abort().
2362 *
2363 * First, Hash key proper, or a significant fraction of it. Mix in length
2364 * in order to compensate for cases where differences are past
2365 * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2366 */
2367 hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2369
2370 if (len > PG_CACHE_LINE_SIZE)
2372
2374
2375 /* Hash abbreviated key */
2376#if SIZEOF_DATUM == 8
2377 {
2378 uint32 lohalf,
2379 hihalf;
2380
2381 lohalf = (uint32) res;
2382 hihalf = (uint32) (res >> 32);
2383 hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2384 }
2385#else /* SIZEOF_DATUM != 8 */
2387#endif
2388
2390
2391 /* Cache result, perhaps saving an expensive strxfrm() call next time */
2392 sss->cache_blob = true;
2393done:
2394
2395 /*
2396 * Byteswap on little-endian machines.
2397 *
2398 * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
2399 * 3-way comparator) works correctly on all platforms. If we didn't do
2400 * this, the comparator would have to call memcmp() with a pair of
2401 * pointers to the first byte of each abbreviated key, which is slower.
2402 */
2403 res = DatumBigEndianToNative(res);
2404
2405 /* Don't leak memory here */
2406 if (PointerGetDatum(authoritative) != original)
2407 pfree(authoritative);
2408
2409 return res;
2410}
2411
2412/*
2413 * Callback for estimating effectiveness of abbreviated key optimization, using
2414 * heuristic rules. Returns value indicating if the abbreviation optimization
2415 * should be aborted, based on its projected effectiveness.
2416 */
2417static bool
2418varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2419{
2421 double abbrev_distinct,
2422 key_distinct;
2423
2424 Assert(ssup->abbreviate);
2425
2426 /* Have a little patience */
2427 if (memtupcount < 100)
2428 return false;
2429
2430 abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2431 key_distinct = estimateHyperLogLog(&sss->full_card);
2432
2433 /*
2434 * Clamp cardinality estimates to at least one distinct value. While
2435 * NULLs are generally disregarded, if only NULL values were seen so far,
2436 * that might misrepresent costs if we failed to clamp.
2437 */
2438 if (abbrev_distinct <= 1.0)
2439 abbrev_distinct = 1.0;
2440
2441 if (key_distinct <= 1.0)
2442 key_distinct = 1.0;
2443
2444 /*
2445 * In the worst case all abbreviated keys are identical, while at the same
2446 * time there are differences within full key strings not captured in
2447 * abbreviations.
2448 */
2449 if (trace_sort)
2450 {
2451 double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2452
2453 elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2454 "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2455 memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2456 sss->prop_card);
2457 }
2458
2459 /*
2460 * If the number of distinct abbreviated keys approximately matches the
2461 * number of distinct authoritative original keys, that's reason enough to
2462 * proceed. We can win even with a very low cardinality set if most
2463 * tie-breakers only memcmp(). This is by far the most important
2464 * consideration.
2465 *
2466 * While comparisons that are resolved at the abbreviated key level are
2467 * considerably cheaper than tie-breakers resolved with memcmp(), both of
2468 * those two outcomes are so much cheaper than a full strcoll() once
2469 * sorting is underway that it doesn't seem worth it to weigh abbreviated
2470 * cardinality against the overall size of the set in order to more
2471 * accurately model costs. Assume that an abbreviated comparison, and an
2472 * abbreviated comparison with a cheap memcmp()-based authoritative
2473 * resolution are equivalent.
2474 */
2475 if (abbrev_distinct > key_distinct * sss->prop_card)
2476 {
2477 /*
2478 * When we have exceeded 10,000 tuples, decay required cardinality
2479 * aggressively for next call.
2480 *
2481 * This is useful because the number of comparisons required on
2482 * average increases at a linearithmic rate, and at roughly 10,000
2483 * tuples that factor will start to dominate over the linear costs of
2484 * string transformation (this is a conservative estimate). The decay
2485 * rate is chosen to be a little less aggressive than halving -- which
2486 * (since we're called at points at which memtupcount has doubled)
2487 * would never see the cost model actually abort past the first call
2488 * following a decay. This decay rate is mostly a precaution against
2489 * a sudden, violent swing in how well abbreviated cardinality tracks
2490 * full key cardinality. The decay also serves to prevent a marginal
2491 * case from being aborted too late, when too much has already been
2492 * invested in string transformation.
2493 *
2494 * It's possible for sets of several million distinct strings with
2495 * mere tens of thousands of distinct abbreviated keys to still
2496 * benefit very significantly. This will generally occur provided
2497 * each abbreviated key is a proxy for a roughly uniform number of the
2498 * set's full keys. If it isn't so, we hope to catch that early and
2499 * abort. If it isn't caught early, by the time the problem is
2500 * apparent it's probably not worth aborting.
2501 */
2502 if (memtupcount > 10000)
2503 sss->prop_card *= 0.65;
2504
2505 return false;
2506 }
2507
2508 /*
2509 * Abort abbreviation strategy.
2510 *
2511 * The worst case, where all abbreviated keys are identical while all
2512 * original strings differ will typically only see a regression of about
2513 * 10% in execution time for small to medium sized lists of strings.
2514 * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2515 * often expect very large improvements, particularly with sets of strings
2516 * of moderately high to high abbreviated cardinality. There is little to
2517 * lose but much to gain, which our strategy reflects.
2518 */
2519 if (trace_sort)
2520 elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2521 "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2522 memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2523
2524 return true;
2525}
2526
2527/*
2528 * Generic equalimage support function for character type's operator classes.
2529 * Disables the use of deduplication with nondeterministic collations.
2530 */
2531Datum
2533{
2534 /* Oid opcintype = PG_GETARG_OID(0); */
2537
2539
2541
2542 PG_RETURN_BOOL(locale->deterministic);
2543}
2544
2545Datum
2547{
2548 text *arg1 = PG_GETARG_TEXT_PP(0);
2549 text *arg2 = PG_GETARG_TEXT_PP(1);
2550 text *result;
2551
2552 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2553
2554 PG_RETURN_TEXT_P(result);
2555}
2556
2557Datum
2559{
2560 text *arg1 = PG_GETARG_TEXT_PP(0);
2561 text *arg2 = PG_GETARG_TEXT_PP(1);
2562 text *result;
2563
2564 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2565
2566 PG_RETURN_TEXT_P(result);
2567}
2568
2569
2570/*
2571 * Cross-type comparison functions for types text and name.
2572 */
2573
2574Datum
2576{
2577 Name arg1 = PG_GETARG_NAME(0);
2578 text *arg2 = PG_GETARG_TEXT_PP(1);
2579 size_t len1 = strlen(NameStr(*arg1));
2580 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2582 bool result;
2583
2585
2586 if (collid == C_COLLATION_OID)
2587 result = (len1 == len2 &&
2588 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2589 else
2590 result = (varstr_cmp(NameStr(*arg1), len1,
2591 VARDATA_ANY(arg2), len2,
2592 collid) == 0);
2593
2594 PG_FREE_IF_COPY(arg2, 1);
2595
2596 PG_RETURN_BOOL(result);
2597}
2598
2599Datum
2601{
2602 text *arg1 = PG_GETARG_TEXT_PP(0);
2603 Name arg2 = PG_GETARG_NAME(1);
2604 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2605 size_t len2 = strlen(NameStr(*arg2));
2607 bool result;
2608
2610
2611 if (collid == C_COLLATION_OID)
2612 result = (len1 == len2 &&
2613 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2614 else
2615 result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2616 NameStr(*arg2), len2,
2617 collid) == 0);
2618
2619 PG_FREE_IF_COPY(arg1, 0);
2620
2621 PG_RETURN_BOOL(result);
2622}
2623
2624Datum
2626{
2627 Name arg1 = PG_GETARG_NAME(0);
2628 text *arg2 = PG_GETARG_TEXT_PP(1);
2629 size_t len1 = strlen(NameStr(*arg1));
2630 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2632 bool result;
2633
2635
2636 if (collid == C_COLLATION_OID)
2637 result = !(len1 == len2 &&
2638 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2639 else
2640 result = !(varstr_cmp(NameStr(*arg1), len1,
2641 VARDATA_ANY(arg2), len2,
2642 collid) == 0);
2643
2644 PG_FREE_IF_COPY(arg2, 1);
2645
2646 PG_RETURN_BOOL(result);
2647}
2648
2649Datum
2651{
2652 text *arg1 = PG_GETARG_TEXT_PP(0);
2653 Name arg2 = PG_GETARG_NAME(1);
2654 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2655 size_t len2 = strlen(NameStr(*arg2));
2657 bool result;
2658
2660
2661 if (collid == C_COLLATION_OID)
2662 result = !(len1 == len2 &&
2663 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2664 else
2665 result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2666 NameStr(*arg2), len2,
2667 collid) == 0);
2668
2669 PG_FREE_IF_COPY(arg1, 0);
2670
2671 PG_RETURN_BOOL(result);
2672}
2673
2674Datum
2676{
2677 Name arg1 = PG_GETARG_NAME(0);
2678 text *arg2 = PG_GETARG_TEXT_PP(1);
2679 int32 result;
2680
2681 result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
2682 VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2684
2685 PG_FREE_IF_COPY(arg2, 1);
2686
2687 PG_RETURN_INT32(result);
2688}
2689
2690Datum
2692{
2693 text *arg1 = PG_GETARG_TEXT_PP(0);
2694 Name arg2 = PG_GETARG_NAME(1);
2695 int32 result;
2696
2697 result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2698 NameStr(*arg2), strlen(NameStr(*arg2)),
2700
2701 PG_FREE_IF_COPY(arg1, 0);
2702
2703 PG_RETURN_INT32(result);
2704}
2705
2706#define CmpCall(cmpfunc) \
2707 DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2708 PG_GET_COLLATION(), \
2709 PG_GETARG_DATUM(0), \
2710 PG_GETARG_DATUM(1)))
2711
2712Datum
2714{
2716}
2717
2718Datum
2720{
2722}
2723
2724Datum
2726{
2728}
2729
2730Datum
2732{
2734}
2735
2736Datum
2738{
2740}
2741
2742Datum
2744{
2746}
2747
2748Datum
2750{
2752}
2753
2754Datum
2756{
2758}
2759
2760#undef CmpCall
2761
2762
2763/*
2764 * The following operators support character-by-character comparison
2765 * of text datums, to allow building indexes suitable for LIKE clauses.
2766 * Note that the regular texteq/textne comparison operators, and regular
2767 * support functions 1 and 2 with "C" collation are assumed to be
2768 * compatible with these!
2769 */
2770
2771static int
2773{
2774 int result;
2775 int len1,
2776 len2;
2777
2778 len1 = VARSIZE_ANY_EXHDR(arg1);
2779 len2 = VARSIZE_ANY_EXHDR(arg2);
2780
2781 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2782 if (result != 0)
2783 return result;
2784 else if (len1 < len2)
2785 return -1;
2786 else if (len1 > len2)
2787 return 1;
2788 else
2789 return 0;
2790}
2791
2792
2793Datum
2795{
2796 text *arg1 = PG_GETARG_TEXT_PP(0);
2797 text *arg2 = PG_GETARG_TEXT_PP(1);
2798 int result;
2799
2800 result = internal_text_pattern_compare(arg1, arg2);
2801
2802 PG_FREE_IF_COPY(arg1, 0);
2803 PG_FREE_IF_COPY(arg2, 1);
2804
2805 PG_RETURN_BOOL(result < 0);
2806}
2807
2808
2809Datum
2811{
2812 text *arg1 = PG_GETARG_TEXT_PP(0);
2813 text *arg2 = PG_GETARG_TEXT_PP(1);
2814 int result;
2815
2816 result = internal_text_pattern_compare(arg1, arg2);
2817
2818 PG_FREE_IF_COPY(arg1, 0);
2819 PG_FREE_IF_COPY(arg2, 1);
2820
2821 PG_RETURN_BOOL(result <= 0);
2822}
2823
2824
2825Datum
2827{
2828 text *arg1 = PG_GETARG_TEXT_PP(0);
2829 text *arg2 = PG_GETARG_TEXT_PP(1);
2830 int result;
2831
2832 result = internal_text_pattern_compare(arg1, arg2);
2833
2834 PG_FREE_IF_COPY(arg1, 0);
2835 PG_FREE_IF_COPY(arg2, 1);
2836
2837 PG_RETURN_BOOL(result >= 0);
2838}
2839
2840
2841Datum
2843{
2844 text *arg1 = PG_GETARG_TEXT_PP(0);
2845 text *arg2 = PG_GETARG_TEXT_PP(1);
2846 int result;
2847
2848 result = internal_text_pattern_compare(arg1, arg2);
2849
2850 PG_FREE_IF_COPY(arg1, 0);
2851 PG_FREE_IF_COPY(arg2, 1);
2852
2853 PG_RETURN_BOOL(result > 0);
2854}
2855
2856
2857Datum
2859{
2860 text *arg1 = PG_GETARG_TEXT_PP(0);
2861 text *arg2 = PG_GETARG_TEXT_PP(1);
2862 int result;
2863
2864 result = internal_text_pattern_compare(arg1, arg2);
2865
2866 PG_FREE_IF_COPY(arg1, 0);
2867 PG_FREE_IF_COPY(arg2, 1);
2868
2869 PG_RETURN_INT32(result);
2870}
2871
2872
2873Datum
2875{
2877 MemoryContext oldcontext;
2878
2879 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2880
2881 /* Use generic string SortSupport, forcing "C" collation */
2882 varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
2883
2884 MemoryContextSwitchTo(oldcontext);
2885
2887}
2888
2889
2890/*-------------------------------------------------------------
2891 * byteaoctetlen
2892 *
2893 * get the number of bytes contained in an instance of type 'bytea'
2894 *-------------------------------------------------------------
2895 */
2896Datum
2898{
2900
2901 /* We need not detoast the input at all */
2903}
2904
2905/*
2906 * byteacat -
2907 * takes two bytea* and returns a bytea* that is the concatenation of
2908 * the two.
2909 *
2910 * Cloned from textcat and modified as required.
2911 */
2912Datum
2914{
2915 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2916 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2917
2919}
2920
2921/*
2922 * bytea_catenate
2923 * Guts of byteacat(), broken out so it can be used by other functions
2924 *
2925 * Arguments can be in short-header form, but not compressed or out-of-line
2926 */
2927static bytea *
2929{
2930 bytea *result;
2931 int len1,
2932 len2,
2933 len;
2934 char *ptr;
2935
2936 len1 = VARSIZE_ANY_EXHDR(t1);
2937 len2 = VARSIZE_ANY_EXHDR(t2);
2938
2939 /* paranoia ... probably should throw error instead? */
2940 if (len1 < 0)
2941 len1 = 0;
2942 if (len2 < 0)
2943 len2 = 0;
2944
2945 len = len1 + len2 + VARHDRSZ;
2946 result = (bytea *) palloc(len);
2947
2948 /* Set size of result string... */
2949 SET_VARSIZE(result, len);
2950
2951 /* Fill data field of result string... */
2952 ptr = VARDATA(result);
2953 if (len1 > 0)
2954 memcpy(ptr, VARDATA_ANY(t1), len1);
2955 if (len2 > 0)
2956 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2957
2958 return result;
2959}
2960
2961#define PG_STR_GET_BYTEA(str_) \
2962 DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2963
2964/*
2965 * bytea_substr()
2966 * Return a substring starting at the specified position.
2967 * Cloned from text_substr and modified as required.
2968 *
2969 * Input:
2970 * - string
2971 * - starting position (is one-based)
2972 * - string length (optional)
2973 *
2974 * If the starting position is zero or less, then return from the start of the string
2975 * adjusting the length to be consistent with the "negative start" per SQL.
2976 * If the length is less than zero, an ERROR is thrown. If no third argument
2977 * (length) is provided, the length to the end of the string is assumed.
2978 */
2979Datum
2981{
2983 PG_GETARG_INT32(1),
2984 PG_GETARG_INT32(2),
2985 false));
2986}
2987
2988/*
2989 * bytea_substr_no_len -
2990 * Wrapper to avoid opr_sanity failure due to
2991 * one function accepting a different number of args.
2992 */
2993Datum
2995{
2997 PG_GETARG_INT32(1),
2998 -1,
2999 true));
3000}
3001
3002static bytea *
3004 int S,
3005 int L,
3006 bool length_not_specified)
3007{
3008 int32 S1; /* adjusted start position */
3009 int32 L1; /* adjusted substring length */
3010 int32 E; /* end position */
3011
3012 /*
3013 * The logic here should generally match text_substring().
3014 */
3015 S1 = Max(S, 1);
3016
3017 if (length_not_specified)
3018 {
3019 /*
3020 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3021 * end of the string if we pass it a negative value for length.
3022 */
3023 L1 = -1;
3024 }
3025 else if (L < 0)
3026 {
3027 /* SQL99 says to throw an error for E < S, i.e., negative length */
3028 ereport(ERROR,
3029 (errcode(ERRCODE_SUBSTRING_ERROR),
3030 errmsg("negative substring length not allowed")));
3031 L1 = -1; /* silence stupider compilers */
3032 }
3033 else if (pg_add_s32_overflow(S, L, &E))
3034 {
3035 /*
3036 * L could be large enough for S + L to overflow, in which case the
3037 * substring must run to end of string.
3038 */
3039 L1 = -1;
3040 }
3041 else
3042 {
3043 /*
3044 * A zero or negative value for the end position can happen if the
3045 * start was negative or one. SQL99 says to return a zero-length
3046 * string.
3047 */
3048 if (E < 1)
3049 return PG_STR_GET_BYTEA("");
3050
3051 L1 = E - S1;
3052 }
3053
3054 /*
3055 * If the start position is past the end of the string, SQL99 says to
3056 * return a zero-length string -- DatumGetByteaPSlice() will do that for
3057 * us. We need only convert S1 to zero-based starting position.
3058 */
3059 return DatumGetByteaPSlice(str, S1 - 1, L1);
3060}
3061
3062/*
3063 * byteaoverlay
3064 * Replace specified substring of first string with second
3065 *
3066 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3067 * This code is a direct implementation of what the standard says.
3068 */
3069Datum
3071{
3072 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3073 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3074 int sp = PG_GETARG_INT32(2); /* substring start position */
3075 int sl = PG_GETARG_INT32(3); /* substring length */
3076
3077 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3078}
3079
3080Datum
3082{
3083 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3084 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3085 int sp = PG_GETARG_INT32(2); /* substring start position */
3086 int sl;
3087
3088 sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3089 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3090}
3091
3092static bytea *
3093bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3094{
3095 bytea *result;
3096 bytea *s1;
3097 bytea *s2;
3098 int sp_pl_sl;
3099
3100 /*
3101 * Check for possible integer-overflow cases. For negative sp, throw a
3102 * "substring length" error because that's what should be expected
3103 * according to the spec's definition of OVERLAY().
3104 */
3105 if (sp <= 0)
3106 ereport(ERROR,
3107 (errcode(ERRCODE_SUBSTRING_ERROR),
3108 errmsg("negative substring length not allowed")));
3109 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3110 ereport(ERROR,
3111 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3112 errmsg("integer out of range")));
3113
3114 s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3115 s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3116 result = bytea_catenate(s1, t2);
3117 result = bytea_catenate(result, s2);
3118
3119 return result;
3120}
3121
3122/*
3123 * bit_count
3124 */
3125Datum
3127{
3128 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3129
3131}
3132
3133/*
3134 * byteapos -
3135 * Return the position of the specified substring.
3136 * Implements the SQL POSITION() function.
3137 * Cloned from textpos and modified as required.
3138 */
3139Datum
3141{
3142 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3143 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3144 int pos;
3145 int px,
3146 p;
3147 int len1,
3148 len2;
3149 char *p1,
3150 *p2;
3151
3152 len1 = VARSIZE_ANY_EXHDR(t1);
3153 len2 = VARSIZE_ANY_EXHDR(t2);
3154
3155 if (len2 <= 0)
3156 PG_RETURN_INT32(1); /* result for empty pattern */
3157
3158 p1 = VARDATA_ANY(t1);
3159 p2 = VARDATA_ANY(t2);
3160
3161 pos = 0;
3162 px = (len1 - len2);
3163 for (p = 0; p <= px; p++)
3164 {
3165 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3166 {
3167 pos = p + 1;
3168 break;
3169 };
3170 p1++;
3171 };
3172
3173 PG_RETURN_INT32(pos);
3174}
3175
3176/*-------------------------------------------------------------
3177 * byteaGetByte
3178 *
3179 * this routine treats "bytea" as an array of bytes.
3180 * It returns the Nth byte (a number between 0 and 255).
3181 *-------------------------------------------------------------
3182 */
3183Datum
3185{
3186 bytea *v = PG_GETARG_BYTEA_PP(0);
3187 int32 n = PG_GETARG_INT32(1);
3188 int len;
3189 int byte;
3190
3192
3193 if (n < 0 || n >= len)
3194 ereport(ERROR,
3195 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3196 errmsg("index %d out of valid range, 0..%d",
3197 n, len - 1)));
3198
3199 byte = ((unsigned char *) VARDATA_ANY(v))[n];
3200
3201 PG_RETURN_INT32(byte);
3202}
3203
3204/*-------------------------------------------------------------
3205 * byteaGetBit
3206 *
3207 * This routine treats a "bytea" type like an array of bits.
3208 * It returns the value of the Nth bit (0 or 1).
3209 *
3210 *-------------------------------------------------------------
3211 */
3212Datum
3214{
3215 bytea *v = PG_GETARG_BYTEA_PP(0);
3216 int64 n = PG_GETARG_INT64(1);
3217 int byteNo,
3218 bitNo;
3219 int len;
3220 int byte;
3221
3223
3224 if (n < 0 || n >= (int64) len * 8)
3225 ereport(ERROR,
3226 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3227 errmsg("index %lld out of valid range, 0..%lld",
3228 (long long) n, (long long) len * 8 - 1)));
3229
3230 /* n/8 is now known < len, so safe to cast to int */
3231 byteNo = (int) (n / 8);
3232 bitNo = (int) (n % 8);
3233
3234 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3235
3236 if (byte & (1 << bitNo))
3237 PG_RETURN_INT32(1);
3238 else
3239 PG_RETURN_INT32(0);
3240}
3241
3242/*-------------------------------------------------------------
3243 * byteaSetByte
3244 *
3245 * Given an instance of type 'bytea' creates a new one with
3246 * the Nth byte set to the given value.
3247 *
3248 *-------------------------------------------------------------
3249 */
3250Datum
3252{
3254 int32 n = PG_GETARG_INT32(1);
3255 int32 newByte = PG_GETARG_INT32(2);
3256 int len;
3257
3258 len = VARSIZE(res) - VARHDRSZ;
3259
3260 if (n < 0 || n >= len)
3261 ereport(ERROR,
3262 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3263 errmsg("index %d out of valid range, 0..%d",
3264 n, len - 1)));
3265
3266 /*
3267 * Now set the byte.
3268 */
3269 ((unsigned char *) VARDATA(res))[n] = newByte;
3270
3272}
3273
3274/*-------------------------------------------------------------
3275 * byteaSetBit
3276 *
3277 * Given an instance of type 'bytea' creates a new one with
3278 * the Nth bit set to the given value.
3279 *
3280 *-------------------------------------------------------------
3281 */
3282Datum
3284{
3286 int64 n = PG_GETARG_INT64(1);
3287 int32 newBit = PG_GETARG_INT32(2);
3288 int len;
3289 int oldByte,
3290 newByte;
3291 int byteNo,
3292 bitNo;
3293
3294 len = VARSIZE(res) - VARHDRSZ;
3295
3296 if (n < 0 || n >= (int64) len * 8)
3297 ereport(ERROR,
3298 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3299 errmsg("index %lld out of valid range, 0..%lld",
3300 (long long) n, (long long) len * 8 - 1)));
3301
3302 /* n/8 is now known < len, so safe to cast to int */
3303 byteNo = (int) (n / 8);
3304 bitNo = (int) (n % 8);
3305
3306 /*
3307 * sanity check!
3308 */
3309 if (newBit != 0 && newBit != 1)
3310 ereport(ERROR,
3311 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3312 errmsg("new bit must be 0 or 1")));
3313
3314 /*
3315 * Update the byte.
3316 */
3317 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3318
3319 if (newBit == 0)
3320 newByte = oldByte & (~(1 << bitNo));
3321 else
3322 newByte = oldByte | (1 << bitNo);
3323
3324 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3325
3327}
3328
3329
3330/* text_name()
3331 * Converts a text type to a Name type.
3332 */
3333Datum
3335{
3336 text *s = PG_GETARG_TEXT_PP(0);
3337 Name result;
3338 int len;
3339
3341
3342 /* Truncate oversize input */
3343 if (len >= NAMEDATALEN)
3345
3346 /* We use palloc0 here to ensure result is zero-padded */
3347 result = (Name) palloc0(NAMEDATALEN);
3348 memcpy(NameStr(*result), VARDATA_ANY(s), len);
3349
3350 PG_RETURN_NAME(result);
3351}
3352
3353/* name_text()
3354 * Converts a Name type to a text type.
3355 */
3356Datum
3358{
3359 Name s = PG_GETARG_NAME(0);
3360
3362}
3363
3364
3365/*
3366 * textToQualifiedNameList - convert a text object to list of names
3367 *
3368 * This implements the input parsing needed by nextval() and other
3369 * functions that take a text parameter representing a qualified name.
3370 * We split the name at dots, downcase if not double-quoted, and
3371 * truncate names if they're too long.
3372 */
3373List *
3375{
3376 char *rawname;
3377 List *result = NIL;
3378 List *namelist;
3379 ListCell *l;
3380
3381 /* Convert to C string (handles possible detoasting). */
3382 /* Note we rely on being able to modify rawname below. */
3383 rawname = text_to_cstring(textval);
3384
3385 if (!SplitIdentifierString(rawname, '.', &namelist))
3386 ereport(ERROR,
3387 (errcode(ERRCODE_INVALID_NAME),
3388 errmsg("invalid name syntax")));
3389
3390 if (namelist == NIL)
3391 ereport(ERROR,
3392 (errcode(ERRCODE_INVALID_NAME),
3393 errmsg("invalid name syntax")));
3394
3395 foreach(l, namelist)
3396 {
3397 char *curname = (char *) lfirst(l);
3398
3399 result = lappend(result, makeString(pstrdup(curname)));
3400 }
3401
3402 pfree(rawname);
3403 list_free(namelist);
3404
3405 return result;
3406}
3407
3408/*
3409 * SplitIdentifierString --- parse a string containing identifiers
3410 *
3411 * This is the guts of textToQualifiedNameList, and is exported for use in
3412 * other situations such as parsing GUC variables. In the GUC case, it's
3413 * important to avoid memory leaks, so the API is designed to minimize the
3414 * amount of stuff that needs to be allocated and freed.
3415 *
3416 * Inputs:
3417 * rawstring: the input string; must be overwritable! On return, it's
3418 * been modified to contain the separated identifiers.
3419 * separator: the separator punctuation expected between identifiers
3420 * (typically '.' or ','). Whitespace may also appear around
3421 * identifiers.
3422 * Outputs:
3423 * namelist: filled with a palloc'd list of pointers to identifiers within
3424 * rawstring. Caller should list_free() this even on error return.
3425 *
3426 * Returns true if okay, false if there is a syntax error in the string.
3427 *
3428 * Note that an empty string is considered okay here, though not in
3429 * textToQualifiedNameList.
3430 */
3431bool
3432SplitIdentifierString(char *rawstring, char separator,
3433 List **namelist)
3434{
3435 char *nextp = rawstring;
3436 bool done = false;
3437
3438 *namelist = NIL;
3439
3440 while (scanner_isspace(*nextp))
3441 nextp++; /* skip leading whitespace */
3442
3443 if (*nextp == '\0')
3444 return true; /* allow empty string */
3445
3446 /* At the top of the loop, we are at start of a new identifier. */
3447 do
3448 {
3449 char *curname;
3450 char *endp;
3451
3452 if (*nextp == '"')
3453 {
3454 /* Quoted name --- collapse quote-quote pairs, no downcasing */
3455 curname = nextp + 1;
3456 for (;;)
3457 {
3458 endp = strchr(nextp + 1, '"');
3459 if (endp == NULL)
3460 return false; /* mismatched quotes */
3461 if (endp[1] != '"')
3462 break; /* found end of quoted name */
3463 /* Collapse adjacent quotes into one quote, and look again */
3464 memmove(endp, endp + 1, strlen(endp));
3465 nextp = endp;
3466 }
3467 /* endp now points at the terminating quote */
3468 nextp = endp + 1;
3469 }
3470 else
3471 {
3472 /* Unquoted name --- extends to separator or whitespace */
3473 char *downname;
3474 int len;
3475
3476 curname = nextp;
3477 while (*nextp && *nextp != separator &&
3478 !scanner_isspace(*nextp))
3479 nextp++;
3480 endp = nextp;
3481 if (curname == nextp)
3482 return false; /* empty unquoted name not allowed */
3483
3484 /*
3485 * Downcase the identifier, using same code as main lexer does.
3486 *
3487 * XXX because we want to overwrite the input in-place, we cannot
3488 * support a downcasing transformation that increases the string
3489 * length. This is not a problem given the current implementation
3490 * of downcase_truncate_identifier, but we'll probably have to do
3491 * something about this someday.
3492 */
3493 len = endp - curname;
3494 downname = downcase_truncate_identifier(curname, len, false);
3495 Assert(strlen(downname) <= len);
3496 strncpy(curname, downname, len); /* strncpy is required here */
3497 pfree(downname);
3498 }
3499
3500 while (scanner_isspace(*nextp))
3501 nextp++; /* skip trailing whitespace */
3502
3503 if (*nextp == separator)
3504 {
3505 nextp++;
3506 while (scanner_isspace(*nextp))
3507 nextp++; /* skip leading whitespace for next */
3508 /* we expect another name, so done remains false */
3509 }
3510 else if (*nextp == '\0')
3511 done = true;
3512 else
3513 return false; /* invalid syntax */
3514
3515 /* Now safe to overwrite separator with a null */
3516 *endp = '\0';
3517
3518 /* Truncate name if it's overlength */
3519 truncate_identifier(curname, strlen(curname), false);
3520
3521 /*
3522 * Finished isolating current name --- add it to list
3523 */
3524 *namelist = lappend(*namelist, curname);
3525
3526 /* Loop back if we didn't reach end of string */
3527 } while (!done);
3528
3529 return true;
3530}
3531
3532
3533/*
3534 * SplitDirectoriesString --- parse a string containing file/directory names
3535 *
3536 * This works fine on file names too; the function name is historical.
3537 *
3538 * This is similar to SplitIdentifierString, except that the parsing
3539 * rules are meant to handle pathnames instead of identifiers: there is
3540 * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3541 * and we apply canonicalize_path() to each extracted string. Because of the
3542 * last, the returned strings are separately palloc'd rather than being
3543 * pointers into rawstring --- but we still scribble on rawstring.
3544 *
3545 * Inputs:
3546 * rawstring: the input string; must be modifiable!
3547 * separator: the separator punctuation expected between directories
3548 * (typically ',' or ';'). Whitespace may also appear around
3549 * directories.
3550 * Outputs:
3551 * namelist: filled with a palloc'd list of directory names.
3552 * Caller should list_free_deep() this even on error return.
3553 *
3554 * Returns true if okay, false if there is a syntax error in the string.
3555 *
3556 * Note that an empty string is considered okay here.
3557 */
3558bool
3559SplitDirectoriesString(char *rawstring, char separator,
3560 List **namelist)
3561{
3562 char *nextp = rawstring;
3563 bool done = false;
3564
3565 *namelist = NIL;
3566
3567 while (scanner_isspace(*nextp))
3568 nextp++; /* skip leading whitespace */
3569
3570 if (*nextp == '\0')
3571 return true; /* allow empty string */
3572
3573 /* At the top of the loop, we are at start of a new directory. */
3574 do
3575 {
3576 char *curname;
3577 char *endp;
3578
3579 if (*nextp == '"')
3580 {
3581 /* Quoted name --- collapse quote-quote pairs */
3582 curname = nextp + 1;
3583 for (;;)
3584 {
3585 endp = strchr(nextp + 1, '"');
3586 if (endp == NULL)
3587 return false; /* mismatched quotes */
3588 if (endp[1] != '"')
3589 break; /* found end of quoted name */
3590 /* Collapse adjacent quotes into one quote, and look again */
3591 memmove(endp, endp + 1, strlen(endp));
3592 nextp = endp;
3593 }
3594 /* endp now points at the terminating quote */
3595 nextp = endp + 1;
3596 }
3597 else
3598 {
3599 /* Unquoted name --- extends to separator or end of string */
3600 curname = endp = nextp;
3601 while (*nextp && *nextp != separator)
3602 {
3603 /* trailing whitespace should not be included in name */
3604 if (!scanner_isspace(*nextp))
3605 endp = nextp + 1;
3606 nextp++;
3607 }
3608 if (curname == endp)
3609 return false; /* empty unquoted name not allowed */
3610 }
3611
3612 while (scanner_isspace(*nextp))
3613 nextp++; /* skip trailing whitespace */
3614
3615 if (*nextp == separator)
3616 {
3617 nextp++;
3618 while (scanner_isspace(*nextp))
3619 nextp++; /* skip leading whitespace for next */
3620 /* we expect another name, so done remains false */
3621 }
3622 else if (*nextp == '\0')
3623 done = true;
3624 else
3625 return false; /* invalid syntax */
3626
3627 /* Now safe to overwrite separator with a null */
3628 *endp = '\0';
3629
3630 /* Truncate path if it's overlength */
3631 if (strlen(curname) >= MAXPGPATH)
3632 curname[MAXPGPATH - 1] = '\0';
3633
3634 /*
3635 * Finished isolating current name --- add it to list
3636 */
3637 curname = pstrdup(curname);
3638 canonicalize_path(curname);
3639 *namelist = lappend(*namelist, curname);
3640
3641 /* Loop back if we didn't reach end of string */
3642 } while (!done);
3643
3644 return true;
3645}
3646
3647
3648/*
3649 * SplitGUCList --- parse a string containing identifiers or file names
3650 *
3651 * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3652 * presuming whether the elements will be taken as identifiers or file names.
3653 * We assume the input has already been through flatten_set_variable_args(),
3654 * so that we need never downcase (if appropriate, that was done already).
3655 * Nor do we ever truncate, since we don't know the correct max length.
3656 * We disallow embedded whitespace for simplicity (it shouldn't matter,
3657 * because any embedded whitespace should have led to double-quoting).
3658 * Otherwise the API is identical to SplitIdentifierString.
3659 *
3660 * XXX it's annoying to have so many copies of this string-splitting logic.
3661 * However, it's not clear that having one function with a bunch of option
3662 * flags would be much better.
3663 *
3664 * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3665 * Be sure to update that if you have to change this.
3666 *
3667 * Inputs:
3668 * rawstring: the input string; must be overwritable! On return, it's
3669 * been modified to contain the separated identifiers.
3670 * separator: the separator punctuation expected between identifiers
3671 * (typically '.' or ','). Whitespace may also appear around
3672 * identifiers.
3673 * Outputs:
3674 * namelist: filled with a palloc'd list of pointers to identifiers within
3675 * rawstring. Caller should list_free() this even on error return.
3676 *
3677 * Returns true if okay, false if there is a syntax error in the string.
3678 */
3679bool
3680SplitGUCList(char *rawstring, char separator,
3681 List **namelist)
3682{
3683 char *nextp = rawstring;
3684 bool done = false;
3685
3686 *namelist = NIL;
3687
3688 while (scanner_isspace(*nextp))
3689 nextp++; /* skip leading whitespace */
3690
3691 if (*nextp == '\0')
3692 return true; /* allow empty string */
3693
3694 /* At the top of the loop, we are at start of a new identifier. */
3695 do
3696 {
3697 char *curname;
3698 char *endp;
3699
3700 if (*nextp == '"')
3701 {
3702 /* Quoted name --- collapse quote-quote pairs */
3703 curname = nextp + 1;
3704 for (;;)
3705 {
3706 endp = strchr(nextp + 1, '"');
3707 if (endp == NULL)
3708 return false; /* mismatched quotes */
3709 if (endp[1] != '"')
3710 break; /* found end of quoted name */
3711 /* Collapse adjacent quotes into one quote, and look again */
3712 memmove(endp, endp + 1, strlen(endp));
3713 nextp = endp;
3714 }
3715 /* endp now points at the terminating quote */
3716 nextp = endp + 1;
3717 }
3718 else
3719 {
3720 /* Unquoted name --- extends to separator or whitespace */
3721 curname = nextp;
3722 while (*nextp && *nextp != separator &&
3723 !scanner_isspace(*nextp))
3724 nextp++;
3725 endp = nextp;
3726 if (curname == nextp)
3727 return false; /* empty unquoted name not allowed */
3728 }
3729
3730 while (scanner_isspace(*nextp))
3731 nextp++; /* skip trailing whitespace */
3732
3733 if (*nextp == separator)
3734 {
3735 nextp++;
3736 while (scanner_isspace(*nextp))
3737 nextp++; /* skip leading whitespace for next */
3738 /* we expect another name, so done remains false */
3739 }
3740 else if (*nextp == '\0')
3741 done = true;
3742 else
3743 return false; /* invalid syntax */
3744
3745 /* Now safe to overwrite separator with a null */
3746 *endp = '\0';
3747
3748 /*
3749 * Finished isolating current name --- add it to list
3750 */
3751 *namelist = lappend(*namelist, curname);
3752
3753 /* Loop back if we didn't reach end of string */
3754 } while (!done);
3755
3756 return true;
3757}
3758
3759
3760/*****************************************************************************
3761 * Comparison Functions used for bytea
3762 *
3763 * Note: btree indexes need these routines not to leak memory; therefore,
3764 * be careful to free working copies of toasted datums. Most places don't
3765 * need to be so careful.
3766 *****************************************************************************/
3767
3768Datum
3770{
3771 Datum arg1 = PG_GETARG_DATUM(0);
3772 Datum arg2 = PG_GETARG_DATUM(1);
3773 bool result;
3774 Size len1,
3775 len2;
3776
3777 /*
3778 * We can use a fast path for unequal lengths, which might save us from
3779 * having to detoast one or both values.
3780 */
3781 len1 = toast_raw_datum_size(arg1);
3782 len2 = toast_raw_datum_size(arg2);
3783 if (len1 != len2)
3784 result = false;
3785 else
3786 {
3787 bytea *barg1 = DatumGetByteaPP(arg1);
3788 bytea *barg2 = DatumGetByteaPP(arg2);
3789
3790 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3791 len1 - VARHDRSZ) == 0);
3792
3793 PG_FREE_IF_COPY(barg1, 0);
3794 PG_FREE_IF_COPY(barg2, 1);
3795 }
3796
3797 PG_RETURN_BOOL(result);
3798}
3799
3800Datum
3802{
3803 Datum arg1 = PG_GETARG_DATUM(0);
3804 Datum arg2 = PG_GETARG_DATUM(1);
3805 bool result;
3806 Size len1,
3807 len2;
3808
3809 /*
3810 * We can use a fast path for unequal lengths, which might save us from
3811 * having to detoast one or both values.
3812 */
3813 len1 = toast_raw_datum_size(arg1);
3814 len2 = toast_raw_datum_size(arg2);
3815 if (len1 != len2)
3816 result = true;
3817 else
3818 {
3819 bytea *barg1 = DatumGetByteaPP(arg1);
3820 bytea *barg2 = DatumGetByteaPP(arg2);
3821
3822 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3823 len1 - VARHDRSZ) != 0);
3824
3825 PG_FREE_IF_COPY(barg1, 0);
3826 PG_FREE_IF_COPY(barg2, 1);
3827 }
3828
3829 PG_RETURN_BOOL(result);
3830}
3831
3832Datum
3834{
3835 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3836 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3837 int len1,
3838 len2;
3839 int cmp;
3840
3841 len1 = VARSIZE_ANY_EXHDR(arg1);
3842 len2 = VARSIZE_ANY_EXHDR(arg2);
3843
3844 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3845
3846 PG_FREE_IF_COPY(arg1, 0);
3847 PG_FREE_IF_COPY(arg2, 1);
3848
3849 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3850}
3851
3852Datum
3854{
3855 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3856 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3857 int len1,
3858 len2;
3859 int cmp;
3860
3861 len1 = VARSIZE_ANY_EXHDR(arg1);
3862 len2 = VARSIZE_ANY_EXHDR(arg2);
3863
3864 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3865
3866 PG_FREE_IF_COPY(arg1, 0);
3867 PG_FREE_IF_COPY(arg2, 1);
3868
3869 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3870}
3871
3872Datum
3874{
3875 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3876 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3877 int len1,
3878 len2;
3879 int cmp;
3880
3881 len1 = VARSIZE_ANY_EXHDR(arg1);
3882 len2 = VARSIZE_ANY_EXHDR(arg2);
3883
3884 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3885
3886 PG_FREE_IF_COPY(arg1, 0);
3887 PG_FREE_IF_COPY(arg2, 1);
3888
3889 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3890}
3891
3892Datum
3894{
3895 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3896 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3897 int len1,
3898 len2;
3899 int cmp;
3900
3901 len1 = VARSIZE_ANY_EXHDR(arg1);
3902 len2 = VARSIZE_ANY_EXHDR(arg2);
3903
3904 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3905
3906 PG_FREE_IF_COPY(arg1, 0);
3907 PG_FREE_IF_COPY(arg2, 1);
3908
3909 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3910}
3911
3912Datum
3914{
3915 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3916 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3917 int len1,
3918 len2;
3919 int cmp;
3920
3921 len1 = VARSIZE_ANY_EXHDR(arg1);
3922 len2 = VARSIZE_ANY_EXHDR(arg2);
3923
3924 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3925 if ((cmp == 0) && (len1 != len2))
3926 cmp = (len1 < len2) ? -1 : 1;
3927
3928 PG_FREE_IF_COPY(arg1, 0);
3929 PG_FREE_IF_COPY(arg2, 1);
3930
3932}
3933
3934Datum
3936{
3937 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3938 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3939 bytea *result;
3940 int len1,
3941 len2;
3942 int cmp;
3943
3944 len1 = VARSIZE_ANY_EXHDR(arg1);
3945 len2 = VARSIZE_ANY_EXHDR(arg2);
3946
3947 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3948 result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
3949
3950 PG_RETURN_BYTEA_P(result);
3951}
3952
3953Datum
3955{
3956 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3957 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3958 bytea *result;
3959 int len1,
3960 len2;
3961 int cmp;
3962
3963 len1 = VARSIZE_ANY_EXHDR(arg1);
3964 len2 = VARSIZE_ANY_EXHDR(arg2);
3965
3966 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3967 result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
3968
3969 PG_RETURN_BYTEA_P(result);
3970}
3971
3972Datum
3974{
3976 MemoryContext oldcontext;
3977
3978 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3979
3980 /* Use generic string SortSupport, forcing "C" collation */
3981 varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
3982
3983 MemoryContextSwitchTo(oldcontext);
3984
3986}
3987
3988/*
3989 * appendStringInfoText
3990 *
3991 * Append a text to str.
3992 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3993 */
3994static void
3996{
3998}
3999
4000/*
4001 * replace_text
4002 * replace all occurrences of 'old_sub_str' in 'orig_str'
4003 * with 'new_sub_str' to form 'new_str'
4004 *
4005 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4006 * otherwise returns 'new_str'
4007 */
4008Datum
4010{
4011 text *src_text = PG_GETARG_TEXT_PP(0);
4012 text *from_sub_text = PG_GETARG_TEXT_PP(1);
4013 text *to_sub_text = PG_GETARG_TEXT_PP(2);
4014 int src_text_len;
4015 int from_sub_text_len;
4017 text *ret_text;
4018 int chunk_len;
4019 char *curr_ptr;
4020 char *start_ptr;
4022 bool found;
4023
4024 src_text_len = VARSIZE_ANY_EXHDR(src_text);
4025 from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4026
4027 /* Return unmodified source string if empty source or pattern */
4028 if (src_text_len < 1 || from_sub_text_len < 1)
4029 {
4030 PG_RETURN_TEXT_P(src_text);
4031 }
4032
4033 text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4034
4035 found = text_position_next(&state);
4036
4037 /* When the from_sub_text is not found, there is nothing to do. */
4038 if (!found)
4039 {
4041 PG_RETURN_TEXT_P(src_text);
4042 }
4044 start_ptr = VARDATA_ANY(src_text);
4045
4047
4048 do
4049 {
4051
4052 /* copy the data skipped over by last text_position_next() */
4053 chunk_len = curr_ptr - start_ptr;
4054 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4055
4056 appendStringInfoText(&str, to_sub_text);
4057
4058 start_ptr = curr_ptr + from_sub_text_len;
4059
4060 found = text_position_next(&state);
4061 if (found)
4063 }
4064 while (found);
4065
4066 /* copy trailing data */
4067 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4068 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4069
4071
4072 ret_text = cstring_to_text_with_len(str.data, str.len);
4073 pfree(str.data);
4074
4075 PG_RETURN_TEXT_P(ret_text);
4076}
4077
4078/*
4079 * check_replace_text_has_escape
4080 *
4081 * Returns 0 if text contains no backslashes that need processing.
4082 * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
4083 * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
4084 */
4085static int
4087{
4088 int result = 0;
4089 const char *p = VARDATA_ANY(replace_text);
4090 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4091
4092 while (p < p_end)
4093 {
4094 /* Find next escape char, if any. */
4095 p = memchr(p, '\\', p_end - p);
4096 if (p == NULL)
4097 break;
4098 p++;
4099 /* Note: a backslash at the end doesn't require extra processing. */
4100 if (p < p_end)
4101 {
4102 if (*p >= '1' && *p <= '9')
4103 return 2; /* Found a submatch specifier, so done */
4104 result = 1; /* Found some other sequence, keep looking */
4105 p++;
4106 }
4107 }
4108 return result;
4109}
4110
4111/*
4112 * appendStringInfoRegexpSubstr
4113 *
4114 * Append replace_text to str, substituting regexp back references for
4115 * \n escapes. start_ptr is the start of the match in the source string,
4116 * at logical character position data_pos.
4117 */
4118static void
4120 regmatch_t *pmatch,
4121 char *start_ptr, int data_pos)
4122{
4123 const char *p = VARDATA_ANY(replace_text);
4124 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4125
4126 while (p < p_end)
4127 {
4128 const char *chunk_start = p;
4129 int so;
4130 int eo;
4131
4132 /* Find next escape char, if any. */
4133 p = memchr(p, '\\', p_end - p);
4134 if (p == NULL)
4135 p = p_end;
4136
4137 /* Copy the text we just scanned over, if any. */
4138 if (p > chunk_start)
4139 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4140
4141 /* Done if at end of string, else advance over escape char. */
4142 if (p >= p_end)
4143 break;
4144 p++;
4145
4146 if (p >= p_end)
4147 {
4148 /* Escape at very end of input. Treat same as unexpected char */
4150 break;
4151 }
4152
4153 if (*p >= '1' && *p <= '9')
4154 {
4155 /* Use the back reference of regexp. */
4156 int idx = *p - '0';
4157
4158 so = pmatch[idx].rm_so;
4159 eo = pmatch[idx].rm_eo;
4160 p++;
4161 }
4162 else if (*p == '&')
4163 {
4164 /* Use the entire matched string. */
4165 so = pmatch[0].rm_so;
4166 eo = pmatch[0].rm_eo;
4167 p++;
4168 }
4169 else if (*p == '\\')
4170 {
4171 /* \\ means transfer one \ to output. */
4173 p++;
4174 continue;
4175 }
4176 else
4177 {
4178 /*
4179 * If escape char is not followed by any expected char, just treat
4180 * it as ordinary data to copy. (XXX would it be better to throw
4181 * an error?)
4182 */
4184 continue;
4185 }
4186
4187 if (so >= 0 && eo >= 0)
4188 {
4189 /*
4190 * Copy the text that is back reference of regexp. Note so and eo
4191 * are counted in characters not bytes.
4192 */
4193 char *chunk_start;
4194 int chunk_len;
4195
4196 Assert(so >= data_pos);
4197 chunk_start = start_ptr;
4198 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4199 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4200 appendBinaryStringInfo(str, chunk_start, chunk_len);
4201 }
4202 }
4203}
4204
4205/*
4206 * replace_text_regexp
4207 *
4208 * replace substring(s) in src_text that match pattern with replace_text.
4209 * The replace_text can contain backslash markers to substitute
4210 * (parts of) the matched text.
4211 *
4212 * cflags: regexp compile flags.
4213 * collation: collation to use.
4214 * search_start: the character (not byte) offset in src_text at which to
4215 * begin searching.
4216 * n: if 0, replace all matches; if > 0, replace only the N'th match.
4217 */
4218text *
4219replace_text_regexp(text *src_text, text *pattern_text,
4221 int cflags, Oid collation,
4222 int search_start, int n)
4223{
4224 text *ret_text;
4225 regex_t *re;
4226 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4227 int nmatches = 0;
4229 regmatch_t pmatch[10]; /* main match, plus \1 to \9 */
4230 int nmatch = lengthof(pmatch);
4231 pg_wchar *data;
4232 size_t data_len;
4233 int data_pos;
4234 char *start_ptr;
4235 int escape_status;
4236
4238
4239 /* Convert data string to wide characters. */
4240 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4241 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4242
4243 /* Check whether replace_text has escapes, especially regexp submatches. */
4245
4246 /* If no regexp submatches, we can use REG_NOSUB. */
4247 if (escape_status < 2)
4248 {
4249 cflags |= REG_NOSUB;
4250 /* Also tell pg_regexec we only want the whole-match location. */
4251 nmatch = 1;
4252 }
4253
4254 /* Prepare the regexp. */
4255 re = RE_compile_and_cache(pattern_text, cflags, collation);
4256
4257 /* start_ptr points to the data_pos'th character of src_text */
4258 start_ptr = (char *) VARDATA_ANY(src_text);
4259 data_pos = 0;
4260
4261 while (search_start <= data_len)
4262 {
4263 int regexec_result;
4264
4266
4267 regexec_result = pg_regexec(re,
4268 data,
4269 data_len,
4270 search_start,
4271 NULL, /* no details */
4272 nmatch,
4273 pmatch,
4274 0);
4275
4276 if (regexec_result == REG_NOMATCH)
4277 break;
4278
4279 if (regexec_result != REG_OKAY)
4280 {
4281 char errMsg[100];
4282
4283 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4284 ereport(ERROR,
4285 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4286 errmsg("regular expression failed: %s", errMsg)));
4287 }
4288
4289 /*
4290 * Count matches, and decide whether to replace this match.
4291 */
4292 nmatches++;
4293 if (n > 0 && nmatches != n)
4294 {
4295 /*
4296 * No, so advance search_start, but not start_ptr/data_pos. (Thus,
4297 * we treat the matched text as if it weren't matched, and copy it
4298 * to the output later.)
4299 */
4300 search_start = pmatch[0].rm_eo;
4301 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4302 search_start++;
4303 continue;
4304 }
4305
4306 /*
4307 * Copy the text to the left of the match position. Note we are given
4308 * character not byte indexes.
4309 */
4310 if (pmatch[0].rm_so - data_pos > 0)
4311 {
4312 int chunk_len;
4313
4314 chunk_len = charlen_to_bytelen(start_ptr,
4315 pmatch[0].rm_so - data_pos);
4316 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4317
4318 /*
4319 * Advance start_ptr over that text, to avoid multiple rescans of
4320 * it if the replace_text contains multiple back-references.
4321 */
4322 start_ptr += chunk_len;
4323 data_pos = pmatch[0].rm_so;
4324 }
4325
4326 /*
4327 * Copy the replace_text, processing escapes if any are present.
4328 */
4329 if (escape_status > 0)
4331 start_ptr, data_pos);
4332 else
4334
4335 /* Advance start_ptr and data_pos over the matched text. */
4336 start_ptr += charlen_to_bytelen(start_ptr,
4337 pmatch[0].rm_eo - data_pos);
4338 data_pos = pmatch[0].rm_eo;
4339
4340 /*
4341 * If we only want to replace one occurrence, we're done.
4342 */
4343 if (n > 0)
4344 break;
4345
4346 /*
4347 * Advance search position. Normally we start the next search at the
4348 * end of the previous match; but if the match was of zero length, we
4349 * have to advance by one character, or we'd just find the same match
4350 * again.
4351 */
4352 search_start = data_pos;
4353 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4354 search_start++;
4355 }
4356
4357 /*
4358 * Copy the text to the right of the last match.
4359 */
4360 if (data_pos < data_len)
4361 {
4362 int chunk_len;
4363
4364 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4365 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4366 }
4367
4368 ret_text = cstring_to_text_with_len(buf.data, buf.len);
4369 pfree(buf.data);
4370 pfree(data);
4371
4372 return ret_text;
4373}
4374
4375/*
4376 * split_part
4377 * parse input string based on provided field separator
4378 * return N'th item (1 based, negative counts from end)
4379 */
4380Datum
4382{
4383 text *inputstring = PG_GETARG_TEXT_PP(0);
4384 text *fldsep = PG_GETARG_TEXT_PP(1);
4385 int fldnum = PG_GETARG_INT32(2);
4386 int inputstring_len;
4387 int fldsep_len;
4389 char *start_ptr;
4390 char *end_ptr;
4391 text *result_text;
4392 bool found;
4393
4394 /* field number is 1 based */
4395 if (fldnum == 0)
4396 ereport(ERROR,
4397 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4398 errmsg("field position must not be zero")));
4399
4400 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4401 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4402
4403 /* return empty string for empty input string */
4404 if (inputstring_len < 1)
4406
4407 /* handle empty field separator */
4408 if (fldsep_len < 1)
4409 {
4410 /* if first or last field, return input string, else empty string */
4411 if (fldnum == 1 || fldnum == -1)
4412 PG_RETURN_TEXT_P(inputstring);
4413 else
4415 }
4416
4417 /* find the first field separator */
4418 text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4419
4420 found = text_position_next(&state);
4421
4422 /* special case if fldsep not found at all */
4423 if (!found)
4424 {
4426 /* if first or last field, return input string, else empty string */
4427 if (fldnum == 1 || fldnum == -1)
4428 PG_RETURN_TEXT_P(inputstring);
4429 else
4431 }
4432
4433 /*
4434 * take care of a negative field number (i.e. count from the right) by
4435 * converting to a positive field number; we need total number of fields
4436 */
4437 if (fldnum < 0)
4438 {
4439 /* we found a fldsep, so there are at least two fields */
4440 int numfields = 2;
4441
4442 while (text_position_next(&state))
4443 numfields++;
4444
4445 /* special case of last field does not require an extra pass */
4446 if (fldnum == -1)
4447 {
4448 start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
4449 end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
4452 end_ptr - start_ptr));
4453 }
4454
4455 /* else, convert fldnum to positive notation */
4456 fldnum += numfields + 1;
4457
4458 /* if nonexistent field, return empty string */
4459 if (fldnum <= 0)
4460 {
4463 }
4464
4465 /* reset to pointing at first match, but now with positive fldnum */
4467 found = text_position_next(&state);
4468 Assert(found);
4469 }
4470
4471 /* identify bounds of first field */
4472 start_ptr = VARDATA_ANY(inputstring);
4474
4475 while (found && --fldnum > 0)
4476 {
4477 /* identify bounds of next field */
4478 start_ptr = end_ptr + fldsep_len;
4479 found = text_position_next(&state);
4480 if (found)
4482 }
4483
4485
4486 if (fldnum > 0)
4487 {
4488 /* N'th field separator not found */
4489 /* if last field requested, return it, else empty string */
4490 if (fldnum == 1)
4491 {
4492 int last_len = start_ptr - VARDATA_ANY(inputstring);
4493
4494 result_text = cstring_to_text_with_len(start_ptr,
4495 inputstring_len - last_len);
4496 }
4497 else
4498 result_text = cstring_to_text("");
4499 }
4500 else
4501 {
4502 /* non-last field requested */
4503 result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4504 }
4505
4506 PG_RETURN_TEXT_P(result_text);
4507}
4508
4509/*
4510 * Convenience function to return true when two text params are equal.
4511 */
4512static bool
4514{
4516 collid,
4517 PointerGetDatum(txt1),
4518 PointerGetDatum(txt2)));
4519}
4520
4521/*
4522 * text_to_array
4523 * parse input string and return text array of elements,
4524 * based on provided field separator
4525 */
4526Datum
4528{
4529 SplitTextOutputData tstate;
4530
4531 /* For array output, tstate should start as all zeroes */
4532 memset(&tstate, 0, sizeof(tstate));
4533
4534 if (!split_text(fcinfo, &tstate))
4536
4537 if (tstate.astate == NULL)
4539
4542}
4543
4544/*
4545 * text_to_array_null
4546 * parse input string and return text array of elements,
4547 * based on provided field separator and null string
4548 *
4549 * This is a separate entry point only to prevent the regression tests from
4550 * complaining about different argument sets for the same internal function.
4551 */
4552Datum
4554{
4555 return text_to_array(fcinfo);
4556}
4557
4558/*
4559 * text_to_table
4560 * parse input string and return table of elements,
4561 * based on provided field separator
4562 */
4563Datum
4565{
4566 ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
4567 SplitTextOutputData tstate;
4568
4569 tstate.astate = NULL;
4571 tstate.tupstore = rsi->setResult;
4572 tstate.tupdesc = rsi->setDesc;
4573
4574 (void) split_text(fcinfo, &tstate);
4575
4576 return (Datum) 0;
4577}
4578
4579/*
4580 * text_to_table_null
4581 * parse input string and return table of elements,
4582 * based on provided field separator and null string
4583 *
4584 * This is a separate entry point only to prevent the regression tests from
4585 * complaining about different argument sets for the same internal function.
4586 */
4587Datum
4589{
4590 return text_to_table(fcinfo);
4591}
4592
4593/*
4594 * Common code for text_to_array, text_to_array_null, text_to_table
4595 * and text_to_table_null functions.
4596 *
4597 * These are not strict so we have to test for null inputs explicitly.
4598 * Returns false if result is to be null, else returns true.
4599 *
4600 * Note that if the result is valid but empty (zero elements), we return
4601 * without changing *tstate --- caller must handle that case, too.
4602 */
4603static bool
4605{
4606 text *inputstring;
4607 text *fldsep;
4608 text *null_string;
4609 Oid collation = PG_GET_COLLATION();
4610 int inputstring_len;
4611 int fldsep_len;
4612 char *start_ptr;
4613 text *result_text;
4614
4615 /* when input string is NULL, then result is NULL too */
4616 if (PG_ARGISNULL(0))
4617 return false;
4618
4619 inputstring = PG_GETARG_TEXT_PP(0);
4620
4621 /* fldsep can be NULL */
4622 if (!PG_ARGISNULL(1))
4623 fldsep = PG_GETARG_TEXT_PP(1);
4624 else
4625 fldsep = NULL;
4626
4627 /* null_string can be NULL or omitted */
4628 if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4629 null_string = PG_GETARG_TEXT_PP(2);
4630 else
4631 null_string = NULL;
4632
4633 if (fldsep != NULL)
4634 {
4635 /*
4636 * Normal case with non-null fldsep. Use the text_position machinery
4637 * to search for occurrences of fldsep.
4638 */
4640
4641 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4642 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4643
4644 /* return empty set for empty input string */
4645 if (inputstring_len < 1)
4646 return true;
4647
4648 /* empty field separator: return input string as a one-element set */
4649 if (fldsep_len < 1)
4650 {
4651 split_text_accum_result(tstate, inputstring,
4652 null_string, collation);
4653 return true;
4654 }
4655
4656 text_position_setup(inputstring, fldsep, collation, &state);
4657
4658 start_ptr = VARDATA_ANY(inputstring);
4659
4660 for (;;)
4661 {
4662 bool found;
4663 char *end_ptr;
4664 int chunk_len;
4665
4667
4668 found = text_position_next(&state);
4669 if (!found)
4670 {
4671 /* fetch last field */
4672 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4673 end_ptr = NULL; /* not used, but some compilers complain */
4674 }
4675 else
4676 {
4677 /* fetch non-last field */
4679 chunk_len = end_ptr - start_ptr;
4680 }
4681
4682 /* build a temp text datum to pass to split_text_accum_result */
4683 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4684
4685 /* stash away this field */
4686 split_text_accum_result(tstate, result_text,
4687 null_string, collation);
4688
4689 pfree(result_text);
4690
4691 if (!found)
4692 break;
4693
4694 start_ptr = end_ptr + fldsep_len;
4695 }
4696
4698 }
4699 else
4700 {
4701 /*
4702 * When fldsep is NULL, each character in the input string becomes a
4703 * separate element in the result set. The separator is effectively
4704 * the space between characters.
4705 */
4706 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4707
4708 start_ptr = VARDATA_ANY(inputstring);
4709
4710 while (inputstring_len > 0)
4711 {
4712 int chunk_len = pg_mblen(start_ptr);
4713
4715
4716 /* build a temp text datum to pass to split_text_accum_result */
4717 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4718
4719 /* stash away this field */
4720 split_text_accum_result(tstate, result_text,
4721 null_string, collation);
4722
4723 pfree(result_text);
4724
4725 start_ptr += chunk_len;
4726 inputstring_len -= chunk_len;
4727 }
4728 }
4729
4730 return true;
4731}
4732
4733/*
4734 * Add text item to result set (table or array).
4735 *
4736 * This is also responsible for checking to see if the item matches
4737 * the null_string, in which case we should emit NULL instead.
4738 */
4739static void
4741 text *field_value,
4742 text *null_string,
4743 Oid collation)
4744{
4745 bool is_null = false;
4746
4747 if (null_string && text_isequal(field_value, null_string, collation))
4748 is_null = true;
4749
4750 if (tstate->tupstore)
4751 {
4752 Datum values[1];
4753 bool nulls[1];
4754
4755 values[0] = PointerGetDatum(field_value);
4756 nulls[0] = is_null;
4757
4759 tstate->tupdesc,
4760 values,
4761 nulls);
4762 }
4763 else
4764 {
4765 tstate->astate = accumArrayResult(tstate->astate,
4766 PointerGetDatum(field_value),
4767 is_null,
4768 TEXTOID,
4770 }
4771}
4772
4773/*
4774 * array_to_text
4775 * concatenate Cstring representation of input array elements
4776 * using provided field separator
4777 */
4778Datum
4780{
4782 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4783
4784 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4785}
4786
4787/*
4788 * array_to_text_null
4789 * concatenate Cstring representation of input array elements
4790 * using provided field separator and null string
4791 *
4792 * This version is not strict so we have to test for null inputs explicitly.
4793 */
4794Datum
4796{
4797 ArrayType *v;
4798 char *fldsep;
4799 char *null_string;
4800
4801 /* returns NULL when first or second parameter is NULL */
4802 if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4804
4805 v = PG_GETARG_ARRAYTYPE_P(0);
4807
4808 /* NULL null string is passed through as a null pointer */
4809 if (!PG_ARGISNULL(2))
4810 null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4811 else
4812 null_string = NULL;
4813
4814 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4815}
4816
4817/*
4818 * common code for array_to_text and array_to_text_null functions
4819 */
4820static text *
4822 const char *fldsep, const char *null_string)
4823{
4824 text *result;
4825 int nitems,
4826 *dims,
4827 ndims;
4828 Oid element_type;
4829 int typlen;
4830 bool typbyval;
4831 char typalign;
4833 bool printed = false;
4834 char *p;
4835 bits8 *bitmap;
4836 int bitmask;
4837 int i;
4838 ArrayMetaState *my_extra;
4839
4840 ndims = ARR_NDIM(v);
4841 dims = ARR_DIMS(v);
4842 nitems = ArrayGetNItems(ndims, dims);
4843
4844 /* if there are no elements, return an empty string */
4845 if (nitems == 0)
4846 return cstring_to_text_with_len("", 0);
4847
4848 element_type = ARR_ELEMTYPE(v);
4850
4851 /*
4852 * We arrange to look up info about element type, including its output
4853 * conversion proc, only once per series of calls, assuming the element
4854 * type doesn't change underneath us.
4855 */
4856 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4857 if (my_extra == NULL)
4858 {
4859 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4860 sizeof(ArrayMetaState));
4861 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4862 my_extra->element_type = ~element_type;
4863 }
4864
4865 if (my_extra->element_type != element_type)
4866 {
4867 /*
4868 * Get info about element type, including its output conversion proc
4869 */
4870 get_type_io_data(element_type, IOFunc_output,
4871 &my_extra->typlen, &my_extra->typbyval,
4872 &my_extra->typalign, &my_extra->typdelim,
4873 &my_extra->typioparam, &my_extra->typiofunc);
4874 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4875 fcinfo->flinfo->fn_mcxt);
4876 my_extra->element_type = element_type;
4877 }
4878 typlen = my_extra->typlen;
4879 typbyval = my_extra->typbyval;
4880 typalign = my_extra->typalign;
4881
4882 p = ARR_DATA_PTR(v);
4883 bitmap = ARR_NULLBITMAP(v);
4884 bitmask = 1;
4885
4886 for (i = 0; i < nitems; i++)
4887 {
4888 Datum itemvalue;
4889 char *value;
4890
4891 /* Get source element, checking for NULL */
4892 if (bitmap && (*bitmap & bitmask) == 0)
4893 {
4894 /* if null_string is NULL, we just ignore null elements */
4895 if (null_string != NULL)
4896 {
4897 if (printed)
4898 appendStringInfo(&buf, "%s%s", fldsep, null_string);
4899 else
4900 appendStringInfoString(&buf, null_string);
4901 printed = true;
4902 }
4903 }
4904 else
4905 {
4906 itemvalue = fetch_att(p, typbyval, typlen);
4907
4908 value = OutputFunctionCall(&my_extra->proc, itemvalue);
4909
4910 if (printed)
4911 appendStringInfo(&buf, "%s%s", fldsep, value);
4912 else
4914 printed = true;
4915
4916 p = att_addlength_pointer(p, typlen, p);
4917 p = (char *) att_align_nominal(p, typalign);
4918 }
4919
4920 /* advance bitmap pointer if any */
4921 if (bitmap)
4922 {
4923 bitmask <<= 1;
4924 if (bitmask == 0x100)
4925 {
4926 bitmap++;
4927 bitmask = 1;
4928 }
4929 }
4930 }
4931
4932 result = cstring_to_text_with_len(buf.data, buf.len);
4933 pfree(buf.data);
4934
4935 return result;
4936}
4937
4938/*
4939 * Workhorse for to_bin, to_oct, and to_hex. Note that base must be > 1 and <=
4940 * 16.
4941 */
4942static inline text *
4944{
4945 const char *digits = "0123456789abcdef";
4946
4947 /* We size the buffer for to_bin's longest possible return value. */
4948 char buf[sizeof(uint64) * BITS_PER_BYTE];
4949 char *const end = buf + sizeof(buf);
4950 char *ptr = end;
4951
4952 Assert(base > 1);
4953 Assert(base <= 16);
4954
4955 do
4956 {
4957 *--ptr = digits[value % base];
4958 value /= base;
4959 } while (ptr > buf && value);
4960
4961 return cstring_to_text_with_len(ptr, end - ptr);
4962}
4963
4964/*
4965 * Convert an integer to a string containing a base-2 (binary) representation
4966 * of the number.
4967 */
4968Datum
4970{
4972
4974}
4975Datum
4977{
4979
4981}
4982
4983/*
4984 * Convert an integer to a string containing a base-8 (oct) representation of
4985 * the number.
4986 */
4987Datum
4989{
4991
4993}
4994Datum
4996{
4998
5000}
5001
5002/*
5003 * Convert an integer to a string containing a base-16 (hex) representation of
5004 * the number.
5005 */
5006Datum
5008{
5010
5012}
5013Datum
5015{
5017
5019}
5020
5021/*
5022 * Return the size of a datum, possibly compressed
5023 *
5024 * Works on any data type
5025 */
5026Datum
5028{
5030 int32 result;
5031 int typlen;
5032
5033 /* On first call, get the input type's typlen, and save at *fn_extra */
5034 if (fcinfo->flinfo->fn_extra == NULL)
5035 {
5036 /* Lookup the datatype of the supplied argument */
5037 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5038
5039 typlen = get_typlen(argtypeid);
5040 if (typlen == 0) /* should not happen */
5041 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5042
5043 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5044 sizeof(int));
5045 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5046 }
5047 else
5048 typlen = *((int *) fcinfo->flinfo->fn_extra);
5049
5050 if (typlen == -1)
5051 {
5052 /* varlena type, possibly toasted */
5053 result = toast_datum_size(value);
5054 }
5055 else if (typlen == -2)
5056 {
5057 /* cstring */
5058 result = strlen(DatumGetCString(value)) + 1;
5059 }
5060 else
5061 {
5062 /* ordinary fixed-width type */
5063 result = typlen;
5064 }
5065
5066 PG_RETURN_INT32(result);
5067}
5068
5069/*
5070 * Return the compression method stored in the compressed attribute. Return
5071 * NULL for non varlena type or uncompressed data.
5072 */
5073Datum
5075{
5076 int typlen;
5077 char *result;
5078 ToastCompressionId cmid;
5079
5080 /* On first call, get the input type's typlen, and save at *fn_extra */
5081 if (fcinfo->flinfo->fn_extra == NULL)
5082 {
5083 /* Lookup the datatype of the supplied argument */
5084 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5085
5086 typlen = get_typlen(argtypeid);
5087 if (typlen == 0) /* should not happen */
5088 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5089
5090 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5091 sizeof(int));
5092 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5093 }
5094 else
5095 typlen = *((int *) fcinfo->flinfo->fn_extra);
5096
5097 if (typlen != -1)
5099
5100 /* get the compression method id stored in the compressed varlena */
5101 cmid = toast_get_compression_id((struct varlena *)
5103 if (cmid == TOAST_INVALID_COMPRESSION_ID)
5105
5106 /* convert compression method id to compression method name */
5107 switch (cmid)
5108 {
5110 result = "pglz";
5111 break;
5113 result = "lz4";
5114 break;
5115 default:
5116 elog(ERROR, "invalid compression method id %d", cmid);
5117 }
5118
5120}
5121
5122/*
5123 * Return the chunk_id of the on-disk TOASTed value. Return NULL if the value
5124 * is un-TOASTed or not on-disk.
5125 */
5126Datum
5128{
5129 int typlen;
5130 struct varlena *attr;
5131 struct varatt_external toast_pointer;
5132
5133 /* On first call, get the input type's typlen, and save at *fn_extra */
5134 if (fcinfo->flinfo->fn_extra == NULL)
5135 {
5136 /* Lookup the datatype of the supplied argument */
5137 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5138
5139 typlen = get_typlen(argtypeid);
5140 if (typlen == 0) /* should not happen */
5141 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5142
5143 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5144 sizeof(int));
5145 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5146 }
5147 else
5148 typlen = *((int *) fcinfo->flinfo->fn_extra);
5149
5150 if (typlen != -1)
5152
5153 attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
5154
5155 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
5157
5158 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
5159
5160 PG_RETURN_OID(toast_pointer.va_valueid);
5161}
5162
5163/*
5164 * string_agg - Concatenates values and returns string.
5165 *
5166 * Syntax: string_agg(value text, delimiter text) RETURNS text
5167 *
5168 * Note: Any NULL values are ignored. The first-call delimiter isn't
5169 * actually used at all, and on subsequent calls the delimiter precedes
5170 * the associated value.
5171 */
5172
5173/* subroutine to initialize state */
5174static StringInfo
5176{
5178 MemoryContext aggcontext;
5179 MemoryContext oldcontext;
5180
5181 if (!AggCheckCallContext(fcinfo, &aggcontext))
5182 {
5183 /* cannot be called directly because of internal-type argument */
5184 elog(ERROR, "string_agg_transfn called in non-aggregate context");
5185 }
5186
5187 /*
5188 * Create state in aggregate context. It'll stay there across subsequent
5189 * calls.
5190 */
5191 oldcontext = MemoryContextSwitchTo(aggcontext);
5193 MemoryContextSwitchTo(oldcontext);
5194
5195 return state;
5196}
5197
5198Datum
5200{
5202
5204
5205 /* Append the value unless null, preceding it with the delimiter. */
5206 if (!PG_ARGISNULL(1))
5207 {
5209 bool isfirst = false;
5210
5211 /*
5212 * You might think we can just throw away the first delimiter, however
5213 * we must keep it as we may be a parallel worker doing partial
5214 * aggregation building a state to send to the main process. We need
5215 * to keep the delimiter of every aggregation so that the combine
5216 * function can properly join up the strings of two separately
5217 * partially aggregated results. The first delimiter is only stripped
5218 * off in the final function. To know how much to strip off the front
5219 * of the string, we store the length of the first delimiter in the
5220 * StringInfo's cursor field, which we don't otherwise need here.
5221 */
5222 if (state == NULL)
5223 {
5224 state = makeStringAggState(fcinfo);
5225 isfirst = true;
5226 }
5227
5228 if (!PG_ARGISNULL(2))
5229 {
5230 text *delim = PG_GETARG_TEXT_PP(2);
5231
5233 if (isfirst)
5234 state->cursor = VARSIZE_ANY_EXHDR(delim);
5235 }
5236
5238 }
5239
5240 /*
5241 * The transition type for string_agg() is declared to be "internal",
5242 * which is a pass-by-value type the same size as a pointer.
5243 */
5244 if (state)
5247}
5248
5249/*
5250 * string_agg_combine
5251 * Aggregate combine function for string_agg(text) and string_agg(bytea)
5252 */
5253Datum
5255{
5256 StringInfo state1;
5257 StringInfo state2;
5258 MemoryContext agg_context;
5259
5260 if (!AggCheckCallContext(fcinfo, &agg_context))
5261 elog(ERROR, "aggregate function called in non-aggregate context");
5262
5263 state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5264 state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
5265
5266 if (state2 == NULL)
5267 {
5268 /*
5269 * NULL state2 is easy, just return state1, which we know is already
5270 * in the agg_context
5271 */
5272 if (state1 == NULL)
5274 PG_RETURN_POINTER(state1);
5275 }
5276
5277 if (state1 == NULL)
5278 {
5279 /* We must copy state2's data into the agg_context */
5280 MemoryContext old_context;
5281
5282 old_context = MemoryContextSwitchTo(agg_context);
5283 state1 = makeStringAggState(fcinfo);
5284 appendBinaryStringInfo(state1, state2->data, state2->len);
5285 state1->cursor = state2->cursor;
5286 MemoryContextSwitchTo(old_context);
5287 }
5288 else if (state2->len > 0)
5289 {
5290 /* Combine ... state1->cursor does not change in this case */
5291 appendBinaryStringInfo(state1, state2->data, state2->len);
5292 }
5293
5294 PG_RETURN_POINTER(state1);
5295}
5296
5297/*
5298 * string_agg_serialize
5299 * Aggregate serialize function for string_agg(text) and string_agg(bytea)
5300 *
5301 * This is strict, so we need not handle NULL input
5302 */
5303Datum
5305{
5308 bytea *result;
5309
5310 /* cannot be called directly because of internal-type argument */
5311 Assert(AggCheckCallContext(fcinfo, NULL));
5312
5314
5316
5317 /* cursor */
5318 pq_sendint(&buf, state->cursor, 4);
5319
5320 /* data */
5321 pq_sendbytes(&buf, state->data, state->len);
5322
5323 result = pq_endtypsend(&buf);
5324
5325 PG_RETURN_BYTEA_P(result);
5326}
5327
5328/*
5329 * string_agg_deserialize
5330 * Aggregate deserial function for string_agg(text) and string_agg(bytea)
5331 *
5332 * This is strict, so we need not handle NULL input
5333 */
5334Datum
5336{
5337 bytea *sstate;
5338 StringInfo result;
5340 char *data;
5341 int datalen;
5342
5343 /* cannot be called directly because of internal-type argument */
5344 Assert(AggCheckCallContext(fcinfo, NULL));
5345
5346 sstate = PG_GETARG_BYTEA_PP(0);
5347
5348 /*
5349 * Initialize a StringInfo so that we can "receive" it using the standard
5350 * recv-function infrastructure.
5351 */
5353 VARSIZE_ANY_EXHDR(sstate));
5354
5355 result = makeStringAggState(fcinfo);
5356
5357 /* cursor */
5358 result->cursor = pq_getmsgint(&buf, 4);
5359
5360 /* data */
5361 datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
5362 data = (char *) pq_getmsgbytes(&buf, datalen);
5363 appendBinaryStringInfo(result, data, datalen);
5364
5365 pq_getmsgend(&buf);
5366
5367 PG_RETURN_POINTER(result);
5368}
5369
5370Datum
5372{
5374
5375 /* cannot be called directly because of internal-type argument */
5376 Assert(AggCheckCallContext(fcinfo, NULL));
5377
5379
5380 if (state != NULL)
5381 {
5382 /* As per comment in transfn, strip data before the cursor position */
5384 state->len - state->cursor));
5385 }
5386 else
5388}
5389
5390/*
5391 * Prepare cache with fmgr info for the output functions of the datatypes of
5392 * the arguments of a concat-like function, beginning with argument "argidx".
5393 * (Arguments before that will have corresponding slots in the resulting
5394 * FmgrInfo array, but we don't fill those slots.)
5395 */
5396static FmgrInfo *
5398{
5399 FmgrInfo *foutcache;
5400 int i;
5401
5402 /* We keep the info in fn_mcxt so it survives across calls */
5403 foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5404 PG_NARGS() * sizeof(FmgrInfo));
5405
5406 for (i = argidx; i < PG_NARGS(); i++)
5407 {
5408 Oid valtype;
5409 Oid typOutput;
5410 bool typIsVarlena;
5411
5412 valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5413 if (!OidIsValid(valtype))
5414 elog(ERROR, "could not determine data type of concat() input");
5415
5416 getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5417 fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5418 }
5419
5420 fcinfo->flinfo->fn_extra = foutcache;
5421
5422 return foutcache;
5423}
5424
5425/*
5426 * Implementation of both concat() and concat_ws().
5427 *
5428 * sepstr is the separator string to place between values.
5429 * argidx identifies the first argument to concatenate (counting from zero);
5430 * note that this must be constant across any one series of calls.
5431 *
5432 * Returns NULL if result should be NULL, else text value.
5433 */
5434static text *
5435concat_internal(const char *sepstr, int argidx,
5436 FunctionCallInfo fcinfo)
5437{
5438 text *result;
5440 FmgrInfo *foutcache;
5441 bool first_arg = true;
5442 int i;
5443
5444 /*
5445 * concat(VARIADIC some-array) is essentially equivalent to
5446 * array_to_text(), ie concat the array elements with the given separator.
5447 * So we just pass the case off to that code.
5448 */
5449 if (get_fn_expr_variadic(fcinfo->flinfo))
5450 {
5451 ArrayType *arr;
5452
5453 /* Should have just the one argument */
5454 Assert(argidx == PG_NARGS() - 1);
5455
5456 /* concat(VARIADIC NULL) is defined as NULL */
5457 if (PG_ARGISNULL(argidx))
5458 return NULL;
5459
5460 /*
5461 * Non-null argument had better be an array. We assume that any call
5462 * context that could let get_fn_expr_variadic return true will have
5463 * checked that a VARIADIC-labeled parameter actually is an array. So
5464 * it should be okay to just Assert that it's an array rather than
5465 * doing a full-fledged error check.
5466 */
5468
5469 /* OK, safe to fetch the array value */
5470 arr = PG_GETARG_ARRAYTYPE_P(argidx);
5471
5472 /*
5473 * And serialize the array. We tell array_to_text to ignore null
5474 * elements, which matches the behavior of the loop below.
5475 */
5476 return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5477 }
5478
5479 /* Normal case without explicit VARIADIC marker */
5481
5482 /* Get output function info, building it if first time through */
5483 foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5484 if (foutcache == NULL)
5485 foutcache = build_concat_foutcache(fcinfo, argidx);
5486
5487 for (i = argidx; i < PG_NARGS(); i++)
5488 {
5489 if (!PG_ARGISNULL(i))
5490 {
5492
5493 /* add separator if appropriate */
5494 if (first_arg)
5495 first_arg = false;
5496 else
5497 appendStringInfoString(&str, sepstr);
5498
5499 /* call the appropriate type output function, append the result */
5501 OutputFunctionCall(&foutcache[i], value));
5502 }
5503 }
5504
5505 result = cstring_to_text_with_len(str.data, str.len);
5506 pfree(str.data);
5507
5508 return result;
5509}
5510
5511/*
5512 * Concatenate all arguments. NULL arguments are ignored.
5513 */
5514Datum
5516{
5517 text *result;
5518
5519 result = concat_internal("", 0, fcinfo);
5520 if (result == NULL)
5522 PG_RETURN_TEXT_P(result);
5523}
5524
5525/*
5526 * Concatenate all but first argument value with separators. The first
5527 * parameter is used as the separator. NULL arguments are ignored.
5528 */
5529Datum
5531{
5532 char *sep;
5533 text *result;
5534
5535 /* return NULL when separator is NULL */
5536 if (PG_ARGISNULL(0))
5539
5540 result = concat_internal(sep, 1, fcinfo);
5541 if (result == NULL)
5543 PG_RETURN_TEXT_P(result);
5544}
5545
5546/*
5547 * Return first n characters in the string. When n is negative,
5548 * return all but last |n| characters.
5549 */
5550Datum
5552{
5553 int n = PG_GETARG_INT32(1);
5554
5555 if (n < 0)
5556 {
5558 const char *p = VARDATA_ANY(str);
5559 int len = VARSIZE_ANY_EXHDR(str);
5560 int rlen;
5561
5562 n = pg_mbstrlen_with_len(p, len) + n;
5563 rlen = pg_mbcharcliplen(p, len, n);
5565 }
5566 else
5568}
5569
5570/*
5571 * Return last n characters in the string. When n is negative,
5572 * return all but first |n| characters.
5573 */
5574Datum
5576{
5578 const char *p = VARDATA_ANY(str);
5579 int len = VARSIZE_ANY_EXHDR(str);
5580 int n = PG_GETARG_INT32(1);
5581 int off;
5582
5583 if (n < 0)
5584 n = -n;
5585 else
5586 n = pg_mbstrlen_with_len(p, len) - n;
5587 off = pg_mbcharcliplen(p, len, n);
5588
5590}
5591
5592/*
5593 * Return reversed string
5594 */
5595Datum
5597{
5599 const char *p = VARDATA_ANY(str);
5600 int len = VARSIZE_ANY_EXHDR(str);
5601 const char *endp = p + len;
5602 text *result;
5603 char *dst;
5604
5605 result = palloc(len + VARHDRSZ);
5606 dst = (char *) VARDATA(result) + len;
5607 SET_VARSIZE(result, len + VARHDRSZ);
5608
5610 {
5611 /* multibyte version */
5612 while (p < endp)
5613 {
5614 int sz;
5615
5616 sz = pg_mblen(p);
5617 dst -= sz;
5618 memcpy(dst, p, sz);
5619 p += sz;
5620 }
5621 }
5622 else
5623 {
5624 /* single byte version */
5625 while (p < endp)
5626 *(--dst) = *p++;
5627 }
5628
5629 PG_RETURN_TEXT_P(result);
5630}
5631
5632
5633/*
5634 * Support macros for text_format()
5635 */
5636#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5637
5638#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5639 do { \
5640 if (++(ptr) >= (end_ptr)) \
5641 ereport(ERROR, \
5642 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5643 errmsg("unterminated format() type specifier"), \
5644 errhint("For a single \"%%\" use \"%%%%\"."))); \
5645 } while (0)
5646
5647/*
5648 * Returns a formatted string
5649 */
5650Datum
5652{
5653 text *fmt;
5655 const char *cp;
5656 const char *start_ptr;
5657 const char *end_ptr;
5658 text *result;
5659 int arg;
5660 bool funcvariadic;
5661 int nargs;
5662 Datum *elements = NULL;
5663 bool *nulls = NULL;
5664 Oid element_type = InvalidOid;
5665 Oid prev_type = InvalidOid;
5666 Oid prev_width_type = InvalidOid;
5667 FmgrInfo typoutputfinfo;
5668 FmgrInfo typoutputinfo_width;
5669
5670 /* When format string is null, immediately return null */
5671 if (PG_ARGISNULL(0))
5673
5674 /* If argument is marked VARIADIC, expand array into elements */
5675 if (get_fn_expr_variadic(fcinfo->flinfo))
5676 {
5677 ArrayType *arr;
5678 int16 elmlen;
5679 bool elmbyval;
5680 char elmalign;
5681 int nitems;
5682
5683 /* Should have just the one argument */
5684 Assert(PG_NARGS() == 2);
5685
5686 /* If argument is NULL, we treat it as zero-length array */
5687 if (PG_ARGISNULL(1))
5688 nitems = 0;
5689 else
5690 {
5691 /*
5692 * Non-null argument had better be an array. We assume that any
5693 * call context that could let get_fn_expr_variadic return true
5694 * will have checked that a VARIADIC-labeled parameter actually is
5695 * an array. So it should be okay to just Assert that it's an
5696 * array rather than doing a full-fledged error check.
5697 */
5699
5700 /* OK, safe to fetch the array value */
5701 arr = PG_GETARG_ARRAYTYPE_P(1);
5702
5703 /* Get info about array element type */
5704 element_type = ARR_ELEMTYPE(arr);
5705 get_typlenbyvalalign(element_type,
5706 &elmlen, &elmbyval, &elmalign);
5707
5708 /* Extract all array elements */
5709 deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5710 &elements, &nulls, &nitems);
5711 }
5712
5713 nargs = nitems + 1;
5714 funcvariadic = true;
5715 }
5716 else
5717 {
5718 /* Non-variadic case, we'll process the arguments individually */
5719 nargs = PG_NARGS();
5720 funcvariadic = false;
5721 }
5722
5723 /* Setup for main loop. */
5725 start_ptr = VARDATA_ANY(fmt);
5726 end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5728 arg = 1; /* next argument position to print */
5729
5730 /* Scan format string, looking for conversion specifiers. */
5731 for (cp = start_ptr; cp < end_ptr; cp++)
5732 {
5733 int argpos;
5734 int widthpos;
5735 int flags;
5736 int width;
5737 Datum value;
5738 bool isNull;
5739 Oid typid;
5740
5741 /*
5742 * If it's not the start of a conversion specifier, just copy it to
5743 * the output buffer.
5744 */
5745 if (*cp != '%')
5746 {
5748 continue;
5749 }
5750
5751 ADVANCE_PARSE_POINTER(cp, end_ptr);
5752
5753 /* Easy case: %% outputs a single % */
5754 if (*cp == '%')
5755 {
5757 continue;
5758 }
5759
5760 /* Parse the optional portions of the format specifier */
5761 cp = text_format_parse_format(cp, end_ptr,
5762 &argpos, &widthpos,
5763 &flags, &width);
5764
5765 /*
5766 * Next we should see the main conversion specifier. Whether or not
5767 * an argument position was present, it's known that at least one
5768 * character remains in the string at this point. Experience suggests
5769 * that it's worth checking that that character is one of the expected
5770 * ones before we try to fetch arguments, so as to produce the least
5771 * confusing response to a mis-formatted specifier.
5772 */
5773 if (strchr("sIL", *cp) == NULL)
5774 ereport(ERROR,
5775 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5776 errmsg("unrecognized format() type specifier \"%.*s\"",
5777 pg_mblen(cp), cp),
5778 errhint("For a single \"%%\" use \"%%%%\".")));
5779
5780 /* If indirect width was specified, get its value */
5781 if (widthpos >= 0)
5782 {
5783 /* Collect the specified or next argument position */
5784 if (widthpos > 0)
5785 arg = widthpos;
5786 if (arg >= nargs)
5787 ereport(ERROR,
5788 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5789 errmsg("too few arguments for format()")));
5790
5791 /* Get the value and type of the selected argument */
5792 if (!funcvariadic)
5793 {
5795 isNull = PG_ARGISNULL(arg);
5796 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5797 }
5798 else
5799 {
5800 value = elements[arg - 1];
5801 isNull = nulls[arg - 1];
5802 typid = element_type;
5803 }
5804 if (!OidIsValid(typid))
5805 elog(ERROR, "could not determine data type of format() input");
5806
5807 arg++;
5808
5809 /* We can treat NULL width the same as zero */
5810 if (isNull)
5811 width = 0;
5812 else if (typid == INT4OID)
5813 width = DatumGetInt32(value);
5814 else if (typid == INT2OID)
5815 width = DatumGetInt16(value);
5816 else
5817 {
5818 /* For less-usual datatypes, convert to text then to int */
5819 char *str;
5820
5821 if (typid != prev_width_type)
5822 {
5823 Oid typoutputfunc;
5824 bool typIsVarlena;
5825
5826 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5827 fmgr_info(typoutputfunc, &typoutputinfo_width);
5828 prev_width_type = typid;
5829 }
5830
5831 str = OutputFunctionCall(&typoutputinfo_width, value);
5832
5833 /* pg_strtoint32 will complain about bad data or overflow */
5834 width = pg_strtoint32(str);
5835
5836 pfree(str);
5837 }
5838 }
5839
5840 /* Collect the specified or next argument position */
5841 if (argpos > 0)
5842 arg = argpos;
5843 if (arg >= nargs)
5844 ereport(ERROR,
5845 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5846 errmsg("too few arguments for format()")));
5847
5848 /* Get the value and type of the selected argument */
5849 if (!funcvariadic)
5850 {
5852 isNull = PG_ARGISNULL(arg);
5853 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5854 }
5855 else
5856 {
5857 value = elements[arg - 1];
5858 isNull = nulls[arg - 1];
5859 typid = element_type;
5860 }
5861 if (!OidIsValid(typid))
5862 elog(ERROR, "could not determine data type of format() input");
5863
5864 arg++;
5865
5866 /*
5867 * Get the appropriate typOutput function, reusing previous one if
5868 * same type as previous argument. That's particularly useful in the
5869 * variadic-array case, but often saves work even for ordinary calls.
5870 */
5871 if (typid != prev_type)
5872 {
5873 Oid typoutputfunc;
5874 bool typIsVarlena;
5875
5876 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5877 fmgr_info(typoutputfunc, &typoutputfinfo);
5878 prev_type = typid;
5879 }
5880
5881 /*
5882 * And now we can format the value.
5883 */
5884 switch (*cp)
5885 {
5886 case 's':
5887 case 'I':
5888 case 'L':
5889 text_format_string_conversion(&str, *cp, &typoutputfinfo,
5890 value, isNull,
5891 flags, width);
5892 break;
5893 default:
5894 /* should not get here, because of previous check */
5895 ereport(ERROR,
5896 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5897 errmsg("unrecognized format() type specifier \"%.*s\"",
5898 pg_mblen(cp), cp),
5899 errhint("For a single \"%%\" use \"%%%%\".")));
5900 break;
5901 }
5902 }
5903
5904 /* Don't need deconstruct_array results anymore. */
5905 if (elements != NULL)
5906 pfree(elements);
5907 if (nulls != NULL)
5908 pfree(nulls);
5909
5910 /* Generate results. */
5911 result = cstring_to_text_with_len(str.data, str.len);
5912 pfree(str.data);
5913
5914 PG_RETURN_TEXT_P(result);
5915}
5916
5917/*
5918 * Parse contiguous digits as a decimal number.
5919 *
5920 * Returns true if some digits could be parsed.
5921 * The value is returned into *value, and *ptr is advanced to the next
5922 * character to be parsed.
5923 *
5924 * Note parsing invariant: at least one character is known available before
5925 * string end (end_ptr) at entry, and this is still true at exit.
5926 */
5927static bool
5928text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5929{
5930 bool found = false;
5931 const char *cp = *ptr;
5932 int val = 0;
5933
5934 while (*cp >= '0' && *cp <= '9')
5935 {
5936 int8 digit = (*cp - '0');
5937
5938 if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5940 ereport(ERROR,
5941 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5942 errmsg("number is out of range")));
5943 ADVANCE_PARSE_POINTER(cp, end_ptr);
5944 found = true;
5945 }
5946
5947 *ptr = cp;
5948 *value = val;
5949
5950 return found;
5951}
5952
5953/*
5954 * Parse a format specifier (generally following the SUS printf spec).
5955 *
5956 * We have already advanced over the initial '%', and we are looking for
5957 * [argpos][flags][width]type (but the type character is not consumed here).
5958 *
5959 * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5960 * Output parameters:
5961 * argpos: argument position for value to be printed. -1 means unspecified.
5962 * widthpos: argument position for width. Zero means the argument position
5963 * was unspecified (ie, take the next arg) and -1 means no width
5964 * argument (width was omitted or specified as a constant).
5965 * flags: bitmask of flags.
5966 * width: directly-specified width value. Zero means the width was omitted
5967 * (note it's not necessary to distinguish this case from an explicit
5968 * zero width value).
5969 *
5970 * The function result is the next character position to be parsed, ie, the
5971 * location where the type character is/should be.
5972 *
5973 * Note parsing invariant: at least one character is known available before
5974 * string end (end_ptr) at entry, and this is still true at exit.
5975 */
5976static const char *
5977text_format_parse_format(const char *start_ptr, const char *end_ptr,
5978 int *argpos, int *widthpos,
5979 int *flags, int *width)
5980{
5981 const char *cp = start_ptr;
5982 int n;
5983
5984 /* set defaults for output parameters */
5985 *argpos = -1;
5986 *widthpos = -1;
5987 *flags = 0;
5988 *width = 0;
5989
5990 /* try to identify first number */
5991 if (text_format_parse_digits(&cp, end_ptr, &n))
5992 {
5993 if (*cp != '$')
5994 {
5995 /* Must be just a width and a type, so we're done */
5996 *width = n;
5997 return cp;
5998 }
5999 /* The number was argument position */
6000 *argpos = n;
6001 /* Explicit 0 for argument index is immediately refused */
6002 if (n == 0)
6003 ereport(ERROR,
6004 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6005 errmsg("format specifies argument 0, but arguments are numbered from 1")));
6006 ADVANCE_PARSE_POINTER(cp, end_ptr);
6007 }
6008
6009 /* Handle flags (only minus is supported now) */
6010 while (*cp == '-')
6011 {
6012 *flags |= TEXT_FORMAT_FLAG_MINUS;
6013 ADVANCE_PARSE_POINTER(cp, end_ptr);
6014 }
6015
6016 if (*cp == '*')
6017 {
6018 /* Handle indirect width */
6019 ADVANCE_PARSE_POINTER(cp, end_ptr);
6020 if (text_format_parse_digits(&cp, end_ptr, &n))
6021 {
6022 /* number in this position must be closed by $ */
6023 if (*cp != '$')
6024 ereport(ERROR,
6025 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6026 errmsg("width argument position must be ended by \"$\"")));
6027 /* The number was width argument position */
6028 *widthpos = n;
6029 /* Explicit 0 for argument index is immediately refused */
6030 if (n == 0)
6031 ereport(ERROR,
6032 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6033 errmsg("format specifies argument 0, but arguments are numbered from 1")));
6034 ADVANCE_PARSE_POINTER(cp, end_ptr);
6035 }
6036 else
6037 *widthpos = 0; /* width's argument position is unspecified */
6038 }
6039 else
6040 {
6041 /* Check for direct width specification */
6042 if (text_format_parse_digits(&cp, end_ptr, &n))
6043 *width = n;
6044 }
6045
6046 /* cp should now be pointing at type character */
6047 return cp;
6048}
6049
6050/*
6051 * Format a %s, %I, or %L conversion
6052 */
6053static void
6055 FmgrInfo *typOutputInfo,
6056 Datum value, bool isNull,
6057 int flags, int width)
6058{
6059 char *str;
6060
6061 /* Handle NULL arguments before trying to stringify the value. */
6062 if (isNull)
6063 {
6064 if (conversion == 's')
6065 text_format_append_string(buf, "", flags, width);
6066 else if (conversion == 'L')
6067 text_format_append_string(buf, "NULL", flags, width);
6068 else if (conversion == 'I')
6069 ereport(ERROR,
6070 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
6071 errmsg("null values cannot be formatted as an SQL identifier")));
6072 return;
6073 }
6074
6075 /* Stringify. */
6076 str = OutputFunctionCall(typOutputInfo, value);
6077
6078 /* Escape. */
6079 if (conversion == 'I')
6080 {
6081 /* quote_identifier may or may not allocate a new string. */
6083 }
6084 else if (conversion == 'L')
6085 {
6086 char *qstr = quote_literal_cstr(str);
6087
6088 text_format_append_string(buf, qstr, flags, width);
6089 /* quote_literal_cstr() always allocates a new string */
6090 pfree(qstr);
6091 }
6092 else
6093 text_format_append_string(buf, str, flags, width);
6094
6095 /* Cleanup. */
6096 pfree(str);
6097}
6098
6099/*
6100 * Append str to buf, padding as directed by flags/width
6101 */
6102static void
6104 int flags, int width)
6105{
6106 bool align_to_left = false;
6107 int len;
6108
6109 /* fast path for typical easy case */
6110 if (width == 0)
6111 {
6113 return;
6114 }
6115
6116 if (width < 0)
6117 {
6118 /* Negative width: implicit '-' flag, then take absolute value */
6119 align_to_left = true;
6120 /* -INT_MIN is undefined */
6121 if (width <= INT_MIN)
6122 ereport(ERROR,
6123 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
6124 errmsg("number is out of range")));
6125 width = -width;
6126 }
6127 else if (flags & TEXT_FORMAT_FLAG_MINUS)
6128 align_to_left = true;
6129
6130 len = pg_mbstrlen(str);
6131 if (align_to_left)
6132 {
6133 /* left justify */
6135 if (len < width)
6136 appendStringInfoSpaces(buf, width - len);
6137 }
6138 else
6139 {
6140 /* right justify */
6141 if (len < width)
6142 appendStringInfoSpaces(buf, width - len);
6144 }
6145}
6146
6147/*
6148 * text_format_nv - nonvariadic wrapper for text_format function.
6149 *
6150 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
6151 * which checks that all built-in functions that share the implementing C
6152 * function take the same number of arguments.
6153 */
6154Datum
6156{
6157 return text_format(fcinfo);
6158}
6159
6160/*
6161 * Helper function for Levenshtein distance functions. Faster than memcmp(),
6162 * for this use case.
6163 */
6164static inline bool
6165rest_of_char_same(const char *s1, const char *s2, int len)
6166{
6167 while (len > 0)
6168 {
6169 len--;
6170 if (s1[len] != s2[len])
6171 return false;
6172 }
6173 return true;
6174}
6175
6176/* Expand each Levenshtein distance variant */
6177#include "levenshtein.c"
6178#define LEVENSHTEIN_LESS_EQUAL
6179#include "levenshtein.c"
6180
6181
6182/*
6183 * The following *ClosestMatch() functions can be used to determine whether a
6184 * user-provided string resembles any known valid values, which is useful for
6185 * providing hints in log messages, among other things. Use these functions
6186 * like so:
6187 *
6188 * initClosestMatch(&state, source_string, max_distance);
6189 *
6190 * for (int i = 0; i < num_valid_strings; i++)
6191 * updateClosestMatch(&state, valid_strings[i]);
6192 *
6193 * closestMatch = getClosestMatch(&state);
6194 */
6195
6196/*
6197 * Initialize the given state with the source string and maximum Levenshtein
6198 * distance to consider.
6199 */
6200void
6202{
6203 Assert(state);
6204 Assert(max_d >= 0);
6205
6206 state->source = source;
6207 state->min_d = -1;
6208 state->max_d = max_d;
6209 state->match = NULL;
6210}
6211
6212/*
6213 * If the candidate string is a closer match than the current one saved (or
6214 * there is no match saved), save it as the closest match.
6215 *
6216 * If the source or candidate string is NULL, empty, or too long, this function
6217 * takes no action. Likewise, if the Levenshtein distance exceeds the maximum
6218 * allowed or more than half the characters are different, no action is taken.
6219 */
6220void
6222{
6223 int dist;
6224
6225 Assert(state);
6226
6227 if (state->source == NULL || state->source[0] == '\0' ||
6228 candidate == NULL || candidate[0] == '\0')
6229 return;
6230
6231 /*
6232 * To avoid ERROR-ing, we check the lengths here instead of setting
6233 * 'trusted' to false in the call to varstr_levenshtein_less_equal().
6234 */
6235 if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
6236 strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
6237 return;
6238
6239 dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
6240 candidate, strlen(candidate), 1, 1, 1,
6241 state->max_d, true);
6242 if (dist <= state->max_d &&
6243 dist <= strlen(state->source) / 2 &&
6244 (state->min_d == -1 || dist < state->min_d))
6245 {
6246 state->min_d = dist;
6247 state->match = candidate;
6248 }
6249}
6250
6251/*
6252 * Return the closest match. If no suitable candidates were provided via
6253 * updateClosestMatch(), return NULL.
6254 */
6255const char *
6257{
6258 Assert(state);
6259
6260 return state->match;
6261}
6262
6263
6264/*
6265 * Unicode support
6266 */
6267
6270{
6271 UnicodeNormalizationForm form = -1;
6272
6273 /*
6274 * Might as well check this while we're here.
6275 */
6277 ereport(ERROR,
6278 (errcode(ERRCODE_SYNTAX_ERROR),
6279 errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
6280
6281 if (pg_strcasecmp(formstr, "NFC") == 0)
6282 form = UNICODE_NFC;
6283 else if (pg_strcasecmp(formstr, "NFD") == 0)
6284 form = UNICODE_NFD;
6285 else if (pg_strcasecmp(formstr, "NFKC") == 0)
6286 form = UNICODE_NFKC;
6287 else if (pg_strcasecmp(formstr, "NFKD") == 0)
6288 form = UNICODE_NFKD;
6289 else
6290 ereport(ERROR,
6291 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6292 errmsg("invalid normalization form: %s", formstr)));
6293
6294 return form;
6295}
6296
6297/*
6298 * Returns version of Unicode used by Postgres in "major.minor" format (the
6299 * same format as the Unicode version reported by ICU). The third component
6300 * ("update version") never involves additions to the character repertoire and
6301 * is unimportant for most purposes.
6302 *
6303 * See: https://unicode.org/versions/
6304 */
6305Datum
6307{
6309}
6310
6311/*
6312 * Returns version of Unicode used by ICU, if enabled; otherwise NULL.
6313 */
6314Datum
6316{
6317#ifdef USE_ICU
6318 PG_RETURN_TEXT_P(cstring_to_text(U_UNICODE_VERSION));
6319#else
6321#endif
6322}
6323
6324/*
6325 * Check whether the string contains only assigned Unicode code
6326 * points. Requires that the database encoding is UTF-8.
6327 */
6328Datum
6330{
6332 unsigned char *p;
6333 int size;
6334
6336 ereport(ERROR,
6337 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
6338
6339 /* convert to pg_wchar */
6341 p = (unsigned char *) VARDATA_ANY(input);
6342 for (int i = 0; i < size; i++)
6343 {
6344 pg_wchar uchar = utf8_to_unicode(p);
6345 int category = unicode_category(uchar);
6346
6347 if (category == PG_U_UNASSIGNED)
6348 PG_RETURN_BOOL(false);
6349
6350 p += pg_utf_mblen(p);
6351 }
6352
6353 PG_RETURN_BOOL(true);
6354}
6355
6356Datum
6358{
6360 char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6362 int size;
6363 pg_wchar *input_chars;
6364 pg_wchar *output_chars;
6365 unsigned char *p;
6366 text *result;
6367 int i;
6368
6369 form = unicode_norm_form_from_string(formstr);
6370
6371 /* convert to pg_wchar */
6373 input_chars = palloc((size + 1) * sizeof(pg_wchar));
6374 p = (unsigned char *) VARDATA_ANY(input);
6375 for (i = 0; i < size; i++)
6376 {
6377 input_chars[i] = utf8_to_unicode(p);
6378 p += pg_utf_mblen(p);
6379 }
6380 input_chars[i] = (pg_wchar) '\0';
6381 Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6382
6383 /* action */
6384 output_chars = unicode_normalize(form, input_chars);
6385
6386 /* convert back to UTF-8 string */
6387 size = 0;
6388 for (pg_wchar *wp = output_chars; *wp; wp++)
6389 {
6390 unsigned char buf[4];
6391
6392 unicode_to_utf8(*wp, buf);
6393 size += pg_utf_mblen(buf);
6394 }
6395
6396 result = palloc(size + VARHDRSZ);
6397 SET_VARSIZE(result, size + VARHDRSZ);
6398
6399 p = (unsigned char *) VARDATA_ANY(result);
6400 for (pg_wchar *wp = output_chars; *wp; wp++)
6401 {
6402 unicode_to_utf8(*wp, p);
6403 p += pg_utf_mblen(p);
6404 }
6405 Assert((char *) p == (char *) result + size + VARHDRSZ);
6406
6407 PG_RETURN_TEXT_P(result);
6408}
6409
6410/*
6411 * Check whether the string is in the specified Unicode normalization form.
6412 *
6413 * This is done by converting the string to the specified normal form and then
6414 * comparing that to the original string. To speed that up, we also apply the
6415 * "quick check" algorithm specified in UAX #15, which can give a yes or no
6416 * answer for many strings by just scanning the string once.
6417 *
6418 * This function should generally be optimized for the case where the string
6419 * is in fact normalized. In that case, we'll end up looking at the entire
6420 * string, so it's probably not worth doing any incremental conversion etc.
6421 */
6422Datum
6424{
6426 char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6428 int size;
6429 pg_wchar *input_chars;
6430 pg_wchar *output_chars;
6431 unsigned char *p;
6432 int i;
6433 UnicodeNormalizationQC quickcheck;
6434 int output_size;
6435 bool result;
6436
6437 form = unicode_norm_form_from_string(formstr);
6438
6439 /* convert to pg_wchar */
6441 input_chars = palloc((size + 1) * sizeof(pg_wchar));
6442 p = (unsigned char *) VARDATA_ANY(input);
6443 for (i = 0; i < size; i++)
6444 {
6445 input_chars[i] = utf8_to_unicode(p);
6446 p += pg_utf_mblen(p);
6447 }
6448 input_chars[i] = (pg_wchar) '\0';
6449 Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6450
6451 /* quick check (see UAX #15) */
6452 quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
6453 if (quickcheck == UNICODE_NORM_QC_YES)
6454 PG_RETURN_BOOL(true);
6455 else if (quickcheck == UNICODE_NORM_QC_NO)
6456 PG_RETURN_BOOL(false);
6457
6458 /* normalize and compare with original */
6459 output_chars = unicode_normalize(form, input_chars);
6460
6461 output_size = 0;
6462 for (pg_wchar *wp = output_chars; *wp; wp++)
6463 output_size++;
6464
6465 result = (size == output_size) &&
6466 (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
6467
6468 PG_RETURN_BOOL(result);
6469}
6470
6471/*
6472 * Check if first n chars are hexadecimal digits
6473 */
6474static bool
6475isxdigits_n(const char *instr, size_t n)
6476{
6477 for (size_t i = 0; i < n; i++)
6478 if (!isxdigit((unsigned char) instr[i]))
6479 return false;
6480
6481 return true;
6482}
6483
6484static unsigned int
6485hexval(unsigned char c)
6486{
6487 if (c >= '0' && c <= '9')
6488 return c - '0';
6489 if (c >= 'a' && c <= 'f')
6490 return c - 'a' + 0xA;
6491 if (c >= 'A' && c <= 'F')
6492 return c - 'A' + 0xA;
6493 elog(ERROR, "invalid hexadecimal digit");
6494 return 0; /* not reached */
6495}
6496
6497/*
6498 * Translate string with hexadecimal digits to number
6499 */
6500static unsigned int
6501hexval_n(const char *instr, size_t n)
6502{
6503 unsigned int result = 0;
6504
6505 for (size_t i = 0; i < n; i++)
6506 result += hexval(instr[i]) << (4 * (n - i - 1));
6507
6508 return result;
6509}
6510
6511/*
6512 * Replaces Unicode escape sequences by Unicode characters
6513 */
6514Datum
6516{
6517 text *input_text = PG_GETARG_TEXT_PP(0);
6518 char *instr;
6519 int len;
6521 text *result;
6522 pg_wchar pair_first = 0;
6523 char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
6524
6525 instr = VARDATA_ANY(input_text);
6526 len = VARSIZE_ANY_EXHDR(input_text);
6527
6529
6530 while (len > 0)
6531 {
6532 if (instr[0] == '\\')
6533 {
6534 if (len >= 2 &&
6535 instr[1] == '\\')
6536 {
6537 if (pair_first)
6538 goto invalid_pair;
6539 appendStringInfoChar(&str, '\\');
6540 instr += 2;
6541 len -= 2;
6542 }
6543 else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
6544 (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
6545 {
6546 pg_wchar unicode;
6547 int offset = instr[1] == 'u' ? 2 : 1;
6548
6549 unicode = hexval_n(instr + offset, 4);
6550
6551 if (!is_valid_unicode_codepoint(unicode))
6552 ereport(ERROR,
6553 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6554 errmsg("invalid Unicode code point: %04X", unicode));
6555
6556 if (pair_first)
6557 {
6558 if (is_utf16_surrogate_second(unicode))
6559 {
6560 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6561 pair_first = 0;
6562 }
6563 else
6564 goto invalid_pair;
6565 }
6566 else if (is_utf16_surrogate_second(unicode))
6567 goto invalid_pair;
6568
6569 if (is_utf16_surrogate_first(unicode))
6570 pair_first = unicode;
6571 else
6572 {
6573 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6575 }
6576
6577 instr += 4 + offset;
6578 len -= 4 + offset;
6579 }
6580 else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
6581 {
6582 pg_wchar unicode;
6583
6584 unicode = hexval_n(instr + 2, 6);
6585
6586 if (!is_valid_unicode_codepoint(unicode))
6587 ereport(ERROR,
6588 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6589 errmsg("invalid Unicode code point: %04X", unicode));
6590
6591 if (pair_first)
6592 {
6593 if (is_utf16_surrogate_second(unicode))
6594 {
6595 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6596 pair_first = 0;
6597 }
6598 else
6599 goto invalid_pair;
6600 }
6601 else if (is_utf16_surrogate_second(unicode))
6602 goto invalid_pair;
6603
6604 if (is_utf16_surrogate_first(unicode))
6605 pair_first = unicode;
6606 else
6607 {
6608 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6610 }
6611
6612 instr += 8;
6613 len -= 8;
6614 }
6615 else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
6616 {
6617 pg_wchar unicode;
6618
6619 unicode = hexval_n(instr + 2, 8);
6620
6621 if (!is_valid_unicode_codepoint(unicode))
6622 ereport(ERROR,
6623 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6624 errmsg("invalid Unicode code point: %04X", unicode));
6625
6626 if (pair_first)
6627 {
6628 if (is_utf16_surrogate_second(unicode))
6629 {
6630 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6631 pair_first = 0;
6632 }
6633 else
6634 goto invalid_pair;
6635 }
6636 else if (is_utf16_surrogate_second(unicode))
6637 goto invalid_pair;
6638
6639 if (is_utf16_surrogate_first(unicode))
6640 pair_first = unicode;
6641 else
6642 {
6643 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6645 }
6646
6647 instr += 10;
6648 len -= 10;
6649 }
6650 else
6651 ereport(ERROR,
6652 (errcode(ERRCODE_SYNTAX_ERROR),
6653 errmsg("invalid Unicode escape"),
6654 errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
6655 }
6656 else
6657 {
6658 if (pair_first)
6659 goto invalid_pair;
6660
6661 appendStringInfoChar(&str, *instr++);
6662 len--;
6663 }
6664 }
6665
6666 /* unfinished surrogate pair? */
6667 if (pair_first)
6668 goto invalid_pair;
6669
6670 result = cstring_to_text_with_len(str.data, str.len);
6671 pfree(str.data);
6672
6673 PG_RETURN_TEXT_P(result);
6674
6675invalid_pair:
6676 ereport(ERROR,
6677 (errcode(ERRCODE_SYNTAX_ERROR),
6678 errmsg("invalid Unicode surrogate pair")));
6679 PG_RETURN_NULL(); /* keep compiler quiet */
6680}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define ARR_NDIM(a)
Definition: array.h:290
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:263
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_NULLBITMAP(a)
Definition: array.h:300
#define ARR_ELEMTYPE(a)
Definition: array.h:292
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:265
#define ARR_DIMS(a)
Definition: array.h:294
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5350
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3580
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3631
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5420
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define TextDatumGetCString(d)
Definition: builtins.h:98
@ BYTEA_OUTPUT_HEX
Definition: bytea.h:22
@ BYTEA_OUTPUT_ESCAPE
Definition: bytea.h:21
#define NameStr(name)
Definition: c.h:700
#define unconstify(underlying_type, expr)
Definition: c.h:1199
NameData * Name
Definition: c.h:698
#define Min(x, y)
Definition: c.h:958
#define Max(x, y)
Definition: c.h:952
#define VARHDRSZ
Definition: c.h:646
#define Assert(condition)
Definition: c.h:812
int64_t int64
Definition: c.h:482
int16_t int16
Definition: c.h:480
int8_t int8
Definition: c.h:479
uint8 bits8
Definition: c.h:492
int32_t int32
Definition: c.h:481
uint64_t uint64
Definition: c.h:486
#define unlikely(x)
Definition: c.h:330
uint32_t uint32
Definition: c.h:485
#define lengthof(array)
Definition: c.h:742
#define OidIsValid(objectId)
Definition: c.h:729
size_t Size
Definition: c.h:559
Oid collid
Size toast_datum_size(Datum value)
Definition: detoast.c:601
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition: detoast.h:22
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define ereturn(context, dummy_value,...)
Definition: elog.h:277
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
uint64 hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
Definition: encode.c:196
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:162
#define MaxAllocSize
Definition: fe_memutils.h:22
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
Datum DirectFunctionCall2Coll(PGFunction func, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:812
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:1864
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1683
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2044
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1910
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:303
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DatumGetBpCharPP(X)
Definition: fmgr.h:293
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
#define PG_RETURN_INT64(x)
Definition: fmgr.h:368
struct FmgrInfo FmgrInfo
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:304
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_NAME(x)
Definition: fmgr.h:363
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:314
#define PG_RETURN_OID(x)
Definition: fmgr.h:360
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
#define MAT_SRF_USE_EXPECTED_DESC
Definition: funcapi.h:296
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
return str start
for(;;)
const char * str
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:186
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:167
#define nitems(x)
Definition: indent.h:31
FILE * input
static struct @161 value
long val
Definition: informix.c:689
int digits
Definition: informix.c:691
static char * locale
Definition: initdb.c:140
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:187
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:151
int y
Definition: isn.c:71
int x
Definition: isn.c:70
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
#define MAX_LEVENSHTEIN_STRLEN
Definition: levenshtein.c:26
static void const char * fmt
List * lappend(List *list, void *datum)
Definition: list.c:339
void list_free(List *list)
Definition: list.c:1546
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2907
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2271
void get_type_io_data(Oid typid, IOFuncSelector which_func, int16 *typlen, bool *typbyval, char *typalign, char *typdelim, Oid *typioparam, Oid *func)
Definition: lsyscache.c:2325
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2197
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2832
@ IOFunc_output
Definition: lsyscache.h:36
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1057
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1125
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1037
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
Definition: mbutils.c:864
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:986
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:4511
int32 pg_strtoint32(const char *s)
Definition: numutils.c:383
void * arg
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:339
#define BITS_PER_BYTE
#define NAMEDATALEN
#define MAXPGPATH
#define PG_CACHE_LINE_SIZE
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
#define NIL
Definition: pg_list.h:68
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:1620
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1341
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1555
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:1703
int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.c:1587
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1643
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1722
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
char typalign
Definition: pg_type.h:176
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_UTF8
Definition: pg_wchar.h:232
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:519
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:537
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:525
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:531
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
void canonicalize_path(char *path)
Definition: path.c:265
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Name DatumGetName(Datum X)
Definition: postgres.h:360
uintptr_t Datum
Definition: postgres.h:64
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:162
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:415
void pq_sendbytes(StringInfo buf, const void *data, int datalen)
Definition: pqformat.c:126
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:172
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:635
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:528
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:546
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:326
const char * pq_getmsgbytes(StringInfo msg, int datalen)
Definition: pqformat.c:508
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:346
static void pq_sendint(StringInfo buf, uint32 i, int b)
Definition: pqformat.h:171
char * c
char * s1
char * s2
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:103
MemoryContextSwitchTo(old_ctx)
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define REG_NOMATCH
Definition: regex.h:216
#define regmatch_t
Definition: regex.h:246
#define REG_OKAY
Definition: regex.h:215
#define REG_NOSUB
Definition: regex.h:185
#define regex_t
Definition: regex.h:245
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:185
regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation)
Definition: regexp.c:141
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:12870
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:93
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:37
bool scanner_isspace(char ch)
Definition: scansup.c:117
#define S(n, x)
Definition: sha1.c:73
static pg_noinline void Size size
Definition: slab.c:607
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
StringInfo makeStringInfo(void)
Definition: stringinfo.c:38
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:230
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:209
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:191
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
StringInfoData * StringInfo
Definition: stringinfo.h:54
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:204
static void initReadOnlyStringInfo(StringInfo str, char *data, int len)
Definition: stringinfo.h:130
Oid typioparam
Definition: array.h:243
char typalign
Definition: array.h:241
Oid typiofunc
Definition: array.h:244
int16 typlen
Definition: array.h:239
Oid element_type
Definition: array.h:238
FmgrInfo proc
Definition: array.h:245
char typdelim
Definition: array.h:242
bool typbyval
Definition: array.h:240
Definition: fmgr.h:57
void * fn_extra
Definition: fmgr.h:64
MemoryContext fn_mcxt
Definition: fmgr.h:65
FmgrInfo * flinfo
Definition: fmgr.h:87
Definition: pg_list.h:54
Definition: nodes.h:129
TupleDesc setDesc
Definition: execnodes.h:343
Tuplestorestate * setResult
Definition: execnodes.h:342
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:106
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
void * ssup_extra
Definition: sortsupport.h:87
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
TupleDesc tupdesc
Definition: varlena.c:107
ArrayBuildState * astate
Definition: varlena.c:105
Tuplestorestate * tupstore
Definition: varlena.c:106
bool is_multibyte_char_in_char
Definition: varlena.c:57
char * last_match
Definition: varlena.c:68
char * refpoint
Definition: varlena.c:76
pg_locale_t locale
Definition: varlena.c:96
hyperLogLogState full_card
Definition: varlena.c:94
hyperLogLogState abbr_card
Definition: varlena.c:93
Definition: c.h:695
bool deterministic
Definition: pg_locale.h:69
Definition: regguts.h:323
Oid va_valueid
Definition: varatt.h:37
Definition: c.h:641
ToastCompressionId toast_get_compression_id(struct varlena *attr)
ToastCompressionId
@ TOAST_INVALID_COMPRESSION_ID
@ TOAST_LZ4_COMPRESSION_ID
@ TOAST_PGLZ_COMPRESSION_ID
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3139
bool trace_sort
Definition: tuplesort.c:124
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:150
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:185
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:53
pg_unicode_category unicode_category(pg_wchar code)
@ PG_U_UNASSIGNED
UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:598
pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:402
UnicodeNormalizationForm
Definition: unicode_norm.h:20
@ UNICODE_NFKD
Definition: unicode_norm.h:24
@ UNICODE_NFD
Definition: unicode_norm.h:22
@ UNICODE_NFC
Definition: unicode_norm.h:21
@ UNICODE_NFKC
Definition: unicode_norm.h:23
UnicodeNormalizationQC
Definition: unicode_norm.h:29
@ UNICODE_NORM_QC_YES
Definition: unicode_norm.h:31
@ UNICODE_NORM_QC_NO
Definition: unicode_norm.h:30
#define PG_UNICODE_VERSION
String * makeString(char *str)
Definition: value.c:63
#define VARATT_IS_EXTERNAL_ONDISK(PTR)
Definition: varatt.h:290
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARATT_IS_COMPRESSED(PTR)
Definition: varatt.h:288
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279
#define VARATT_IS_EXTERNAL(PTR)
Definition: varatt.h:289
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
int bpchartruelen(char *s, int len)
Definition: varchar.c:676
static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
Definition: varlena.c:2120
Datum byteacat(PG_FUNCTION_ARGS)
Definition: varlena.c:2913
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:658
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4779
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:3070
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1586
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:619
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1104
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:6054
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1116
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:5651
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:693
Datum pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
Definition: varlena.c:5127
static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
Definition: varlena.c:1216
int bytea_output
Definition: varlena.c:48
static int32 text_length(Datum str)
Definition: varlena.c:711
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2418
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3769
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:5551
#define DIG(VAL)
Definition: varlena.c:276
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3873
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5199
static bool text_isequal(text *txt1, text *txt2, Oid collid)
Definition: varlena.c:4513
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1502
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:765
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:5435
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:3995
Datum textgtname(PG_FUNCTION_ARGS)
Definition: varlena.c:2749
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:590
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:750
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:852
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:551
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2558
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:885
static int check_replace_text_has_escape(const text *replace_text)
Definition: varlena.c:4086
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:5530
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2772
Datum string_agg_serialize(PG_FUNCTION_ARGS)
Definition: varlena.c:5304
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1760
static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2075
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4795
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:5977
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2546
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:3140
Datum unicode_assigned(PG_FUNCTION_ARGS)
Definition: varlena.c:6329
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:3093
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1538
static char * text_position_get_match_ptr(TextPositionState *state)
Definition: varlena.c:1467
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2030
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4553
static unsigned int hexval_n(const char *instr, size_t n)
Definition: varlena.c:6501
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3801
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:6165
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3893
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:196
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:3913
Datum text_to_table_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4588
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:5575
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1666
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:601
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3213
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:6103
static int text_position(text *t1, text *t2, Oid collid)
Definition: varlena.c:1176
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3559
Datum bytea_bit_count(PG_FUNCTION_ARGS)
Definition: varlena.c:3126
Datum unicode_normalize_func(PG_FUNCTION_ARGS)
Definition: varlena.c:6357
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2874
static void split_text_accum_result(SplitTextOutputData *tstate, text *field_value, text *null_string, Oid collation)
Definition: varlena.c:4740
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3283
Datum split_part(PG_FUNCTION_ARGS)
Definition: varlena.c:4381
Datum texteqname(PG_FUNCTION_ARGS)
Definition: varlena.c:2600
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:866
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3334
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3251
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1730
const char * getClosestMatch(ClosestMatchState *state)
Definition: varlena.c:6256
static void text_position_reset(TextPositionState *state)
Definition: varlena.c:1494
Datum text_to_table(PG_FUNCTION_ARGS)
Definition: varlena.c:4564
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:5638
Datum textnename(PG_FUNCTION_ARGS)
Definition: varlena.c:2650
static char * text_position_next_internal(char *start_ptr, TextPositionState *state)
Definition: varlena.c:1399
static FmgrInfo * build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
Definition: varlena.c:5397
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:5014
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4527
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:2994
Datum unicode_is_normalized(PG_FUNCTION_ARGS)
Definition: varlena.c:6423
#define TEXT_FORMAT_FLAG_MINUS
Definition: varlena.c:5636
static void check_collation_set(Oid collid)
Definition: varlena.c:1509
bool SplitGUCList(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3680
static text * convert_to_base(uint64 value, int base)
Definition: varlena.c:4943
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1093
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:4119
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3432
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:4821
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:5007
Datum text_starts_with(PG_FUNCTION_ARGS)
Definition: varlena.c:1775
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3853
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1745
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:5596
Datum to_bin64(PG_FUNCTION_ARGS)
Definition: varlena.c:4976
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1611
Datum to_oct64(PG_FUNCTION_ARGS)
Definition: varlena.c:4995
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2842
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:806
void varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
Definition: varlena.c:1856
static int namefastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2063
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:2961
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:5175
Datum textlename(PG_FUNCTION_ARGS)
Definition: varlena.c:2743
Datum icu_unicode_version(PG_FUNCTION_ARGS)
Definition: varlena.c:6315
static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2106
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:471
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2220
text * cstring_to_text(const char *s)
Definition: varlena.c:184
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:5515
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2794
Datum text_pattern_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:2826
Datum btvarstrequalimage(PG_FUNCTION_ARGS)
Definition: varlena.c:2532
Datum bytea_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:3954
Datum nameletext(PG_FUNCTION_ARGS)
Definition: varlena.c:2719
#define CmpCall(cmpfunc)
Definition: varlena.c:2706
text * replace_text_regexp(text *src_text, text *pattern_text, text *replace_text, int cflags, Oid collation, int search_start, int n)
Definition: varlena.c:4219
Datum namenetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2625
static int text_position_get_match_pos(TextPositionState *state)
Definition: varlena.c:1478
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:248
Datum to_bin32(PG_FUNCTION_ARGS)
Definition: varlena.c:4969
Datum bytea_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:3973
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:3003
Datum to_oct32(PG_FUNCTION_ARGS)
Definition: varlena.c:4988
Datum namegttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2725
Datum unicode_version(PG_FUNCTION_ARGS)
Definition: varlena.c:6306
Datum namegetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2731
static UnicodeNormalizationForm unicode_norm_form_from_string(const char *formstr)
Definition: varlena.c:6269
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:2928
static bool text_position_next(TextPositionState *state)
Definition: varlena.c:1335
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:731
Datum textltname(PG_FUNCTION_ARGS)
Definition: varlena.c:2737
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3081
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1829
Datum bytea_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:3935
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:6155
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1153
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:1993
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2858
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5371
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2897
Datum unistr(PG_FUNCTION_ARGS)
Definition: varlena.c:6515
static unsigned int hexval(unsigned char c)
Definition: varlena.c:6485
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:5928
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:634
static bool isxdigits_n(const char *instr, size_t n)
Definition: varlena.c:6475
Datum string_agg_deserialize(PG_FUNCTION_ARGS)
Definition: varlena.c:5335
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:388
Datum namelttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2713
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:5027
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:290
#define DatumGetVarStringPP(X)
Definition: varlena.c:117
Datum pg_column_compression(PG_FUNCTION_ARGS)
Definition: varlena.c:5074
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3357
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:498
Datum nameeqtext(PG_FUNCTION_ARGS)
Definition: varlena.c:2575
Datum bttextnamecmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2691
void initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
Definition: varlena.c:6201
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:579
Datum string_agg_combine(PG_FUNCTION_ARGS)
Definition: varlena.c:5254
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3184
Datum btnametextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2675
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:673
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2810
#define TEXTBUFLEN
Definition: varlena.c:114
void updateClosestMatch(ClosestMatchState *state, const char *candidate)
Definition: varlena.c:6221
#define VAL(CH)
Definition: varlena.c:275
char * text_to_cstring(const text *t)
Definition: varlena.c:217
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1814
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:646
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4009
Datum textgename(PG_FUNCTION_ARGS)
Definition: varlena.c:2755
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3374
static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
Definition: varlena.c:4604
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:2980
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3833
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:490
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1715
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)