PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
varlena.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * varlena.c
4 * Functions for the variable-length built-in types.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varlena.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <ctype.h>
18#include <limits.h>
19
20#include "access/detoast.h"
23#include "catalog/pg_type.h"
24#include "common/hashfn.h"
25#include "common/int.h"
27#include "common/unicode_norm.h"
29#include "funcapi.h"
30#include "lib/hyperloglog.h"
31#include "libpq/pqformat.h"
32#include "miscadmin.h"
33#include "nodes/execnodes.h"
34#include "parser/scansup.h"
35#include "port/pg_bswap.h"
36#include "regex/regex.h"
37#include "utils/builtins.h"
38#include "utils/bytea.h"
39#include "utils/guc.h"
40#include "utils/lsyscache.h"
41#include "utils/memutils.h"
42#include "utils/pg_locale.h"
43#include "utils/sortsupport.h"
44#include "utils/varlena.h"
45
46
47/* GUC variable */
49
50typedef struct varlena VarString;
51
52/*
53 * State for text_position_* functions.
54 */
55typedef struct
56{
57 pg_locale_t locale; /* collation used for substring matching */
58 bool is_multibyte_char_in_char; /* need to check char boundaries? */
59 bool greedy; /* find longest possible substring? */
60
61 char *str1; /* haystack string */
62 char *str2; /* needle string */
63 int len1; /* string lengths in bytes */
64 int len2;
65
66 /* Skip table for Boyer-Moore-Horspool search algorithm: */
67 int skiptablemask; /* mask for ANDing with skiptable subscripts */
68 int skiptable[256]; /* skip distance for given mismatched char */
69
70 /*
71 * Note that with nondeterministic collations, the length of the last
72 * match is not necessarily equal to the length of the "needle" passed in.
73 */
74 char *last_match; /* pointer to last match in 'str1' */
75 int last_match_len; /* length of last match */
76 int last_match_len_tmp; /* same but for internal use */
77
78 /*
79 * Sometimes we need to convert the byte position of a match to a
80 * character position. These store the last position that was converted,
81 * so that on the next call, we can continue from that point, rather than
82 * count characters from the very beginning.
83 */
84 char *refpoint; /* pointer within original haystack string */
85 int refpos; /* 0-based character offset of the same point */
87
88typedef struct
89{
90 char *buf1; /* 1st string, or abbreviation original string
91 * buf */
92 char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
93 int buflen1; /* Allocated length of buf1 */
94 int buflen2; /* Allocated length of buf2 */
95 int last_len1; /* Length of last buf1 string/strxfrm() input */
96 int last_len2; /* Length of last buf2 string/strxfrm() blob */
97 int last_returned; /* Last comparison result (cache) */
98 bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
100 Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
101 hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
102 hyperLogLogState full_card; /* Full key cardinality state */
103 double prop_card; /* Required cardinality proportion */
106
107/*
108 * Output data for split_text(): we output either to an array or a table.
109 * tupstore and tupdesc must be set up in advance to output to a table.
110 */
111typedef struct
112{
117
118/*
119 * This should be large enough that most strings will fit, but small enough
120 * that we feel comfortable putting it on the stack
121 */
122#define TEXTBUFLEN 1024
123
124#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
125#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
126
127static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
128static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
129static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
131static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
132static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
133static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
134static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
135static int32 text_length(Datum str);
136static text *text_catenate(text *t1, text *t2);
138 int32 start,
139 int32 length,
140 bool length_not_specified);
141static text *text_overlay(text *t1, text *t2, int sp, int sl);
142static int text_position(text *t1, text *t2, Oid collid);
145static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
149static void check_collation_set(Oid collid);
150static int text_cmp(text *arg1, text *arg2, Oid collid);
151static bytea *bytea_catenate(bytea *t1, bytea *t2);
153 int S,
154 int L,
155 bool length_not_specified);
156static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
157static void appendStringInfoText(StringInfo str, const text *t);
158static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
160 text *field_value,
161 text *null_string,
162 Oid collation);
164 const char *fldsep, const char *null_string);
166static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
167 int *value);
168static const char *text_format_parse_format(const char *start_ptr,
169 const char *end_ptr,
170 int *argpos, int *widthpos,
171 int *flags, int *width);
172static void text_format_string_conversion(StringInfo buf, char conversion,
173 FmgrInfo *typOutputInfo,
174 Datum value, bool isNull,
175 int flags, int width);
176static void text_format_append_string(StringInfo buf, const char *str,
177 int flags, int width);
178
179
180/*****************************************************************************
181 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
182 *****************************************************************************/
183
184/*
185 * cstring_to_text
186 *
187 * Create a text value from a null-terminated C string.
188 *
189 * The new text value is freshly palloc'd with a full-size VARHDR.
190 */
191text *
192cstring_to_text(const char *s)
193{
194 return cstring_to_text_with_len(s, strlen(s));
195}
196
197/*
198 * cstring_to_text_with_len
199 *
200 * Same as cstring_to_text except the caller specifies the string length;
201 * the string need not be null_terminated.
202 */
203text *
204cstring_to_text_with_len(const char *s, int len)
205{
206 text *result = (text *) palloc(len + VARHDRSZ);
207
208 SET_VARSIZE(result, len + VARHDRSZ);
209 memcpy(VARDATA(result), s, len);
210
211 return result;
212}
213
214/*
215 * text_to_cstring
216 *
217 * Create a palloc'd, null-terminated C string from a text value.
218 *
219 * We support being passed a compressed or toasted text value.
220 * This is a bit bogus since such values shouldn't really be referred to as
221 * "text *", but it seems useful for robustness. If we didn't handle that
222 * case here, we'd need another routine that did, anyway.
223 */
224char *
226{
227 /* must cast away the const, unfortunately */
228 text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
229 int len = VARSIZE_ANY_EXHDR(tunpacked);
230 char *result;
231
232 result = (char *) palloc(len + 1);
233 memcpy(result, VARDATA_ANY(tunpacked), len);
234 result[len] = '\0';
235
236 if (tunpacked != t)
237 pfree(tunpacked);
238
239 return result;
240}
241
242/*
243 * text_to_cstring_buffer
244 *
245 * Copy a text value into a caller-supplied buffer of size dst_len.
246 *
247 * The text string is truncated if necessary to fit. The result is
248 * guaranteed null-terminated (unless dst_len == 0).
249 *
250 * We support being passed a compressed or toasted text value.
251 * This is a bit bogus since such values shouldn't really be referred to as
252 * "text *", but it seems useful for robustness. If we didn't handle that
253 * case here, we'd need another routine that did, anyway.
254 */
255void
256text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
257{
258 /* must cast away the const, unfortunately */
259 text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
260 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
261
262 if (dst_len > 0)
263 {
264 dst_len--;
265 if (dst_len >= src_len)
266 dst_len = src_len;
267 else /* ensure truncation is encoding-safe */
268 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
269 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
270 dst[dst_len] = '\0';
271 }
272
273 if (srcunpacked != src)
274 pfree(srcunpacked);
275}
276
277
278/*****************************************************************************
279 * USER I/O ROUTINES *
280 *****************************************************************************/
281
282
283#define VAL(CH) ((CH) - '0')
284#define DIG(VAL) ((VAL) + '0')
285
286/*
287 * byteain - converts from printable representation of byte array
288 *
289 * Non-printable characters must be passed as '\nnn' (octal) and are
290 * converted to internal form. '\' must be passed as '\\'.
291 * ereport(ERROR, ...) if bad form.
292 *
293 * BUGS:
294 * The input is scanned twice.
295 * The error checking of input is minimal.
296 */
297Datum
299{
300 char *inputText = PG_GETARG_CSTRING(0);
301 Node *escontext = fcinfo->context;
302 char *tp;
303 char *rp;
304 int bc;
305 bytea *result;
306
307 /* Recognize hex input */
308 if (inputText[0] == '\\' && inputText[1] == 'x')
309 {
310 size_t len = strlen(inputText);
311
312 bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
313 result = palloc(bc);
314 bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
315 escontext);
316 SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
317
318 PG_RETURN_BYTEA_P(result);
319 }
320
321 /* Else, it's the traditional escaped style */
322 for (bc = 0, tp = inputText; *tp != '\0'; bc++)
323 {
324 if (tp[0] != '\\')
325 tp++;
326 else if ((tp[0] == '\\') &&
327 (tp[1] >= '0' && tp[1] <= '3') &&
328 (tp[2] >= '0' && tp[2] <= '7') &&
329 (tp[3] >= '0' && tp[3] <= '7'))
330 tp += 4;
331 else if ((tp[0] == '\\') &&
332 (tp[1] == '\\'))
333 tp += 2;
334 else
335 {
336 /*
337 * one backslash, not followed by another or ### valid octal
338 */
339 ereturn(escontext, (Datum) 0,
340 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
341 errmsg("invalid input syntax for type %s", "bytea")));
342 }
343 }
344
345 bc += VARHDRSZ;
346
347 result = (bytea *) palloc(bc);
348 SET_VARSIZE(result, bc);
349
350 tp = inputText;
351 rp = VARDATA(result);
352 while (*tp != '\0')
353 {
354 if (tp[0] != '\\')
355 *rp++ = *tp++;
356 else if ((tp[0] == '\\') &&
357 (tp[1] >= '0' && tp[1] <= '3') &&
358 (tp[2] >= '0' && tp[2] <= '7') &&
359 (tp[3] >= '0' && tp[3] <= '7'))
360 {
361 bc = VAL(tp[1]);
362 bc <<= 3;
363 bc += VAL(tp[2]);
364 bc <<= 3;
365 *rp++ = bc + VAL(tp[3]);
366
367 tp += 4;
368 }
369 else if ((tp[0] == '\\') &&
370 (tp[1] == '\\'))
371 {
372 *rp++ = '\\';
373 tp += 2;
374 }
375 else
376 {
377 /*
378 * We should never get here. The first pass should not allow it.
379 */
380 ereturn(escontext, (Datum) 0,
381 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
382 errmsg("invalid input syntax for type %s", "bytea")));
383 }
384 }
385
386 PG_RETURN_BYTEA_P(result);
387}
388
389/*
390 * byteaout - converts to printable representation of byte array
391 *
392 * In the traditional escaped format, non-printable characters are
393 * printed as '\nnn' (octal) and '\' as '\\'.
394 */
395Datum
397{
398 bytea *vlena = PG_GETARG_BYTEA_PP(0);
399 char *result;
400 char *rp;
401
403 {
404 /* Print hex format */
405 rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
406 *rp++ = '\\';
407 *rp++ = 'x';
408 rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
409 }
411 {
412 /* Print traditional escaped format */
413 char *vp;
414 uint64 len;
415 int i;
416
417 len = 1; /* empty string has 1 char */
418 vp = VARDATA_ANY(vlena);
419 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
420 {
421 if (*vp == '\\')
422 len += 2;
423 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
424 len += 4;
425 else
426 len++;
427 }
428
429 /*
430 * In principle len can't overflow uint32 if the input fit in 1GB, but
431 * for safety let's check rather than relying on palloc's internal
432 * check.
433 */
434 if (len > MaxAllocSize)
436 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
437 errmsg_internal("result of bytea output conversion is too large")));
438 rp = result = (char *) palloc(len);
439
440 vp = VARDATA_ANY(vlena);
441 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
442 {
443 if (*vp == '\\')
444 {
445 *rp++ = '\\';
446 *rp++ = '\\';
447 }
448 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
449 {
450 int val; /* holds unprintable chars */
451
452 val = *vp;
453 rp[0] = '\\';
454 rp[3] = DIG(val & 07);
455 val >>= 3;
456 rp[2] = DIG(val & 07);
457 val >>= 3;
458 rp[1] = DIG(val & 03);
459 rp += 4;
460 }
461 else
462 *rp++ = *vp;
463 }
464 }
465 else
466 {
467 elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
469 rp = result = NULL; /* keep compiler quiet */
470 }
471 *rp = '\0';
472 PG_RETURN_CSTRING(result);
473}
474
475/*
476 * bytearecv - converts external binary format to bytea
477 */
478Datum
480{
482 bytea *result;
483 int nbytes;
484
485 nbytes = buf->len - buf->cursor;
486 result = (bytea *) palloc(nbytes + VARHDRSZ);
487 SET_VARSIZE(result, nbytes + VARHDRSZ);
488 pq_copymsgbytes(buf, VARDATA(result), nbytes);
489 PG_RETURN_BYTEA_P(result);
490}
491
492/*
493 * byteasend - converts bytea to binary format
494 *
495 * This is a special case: just copy the input...
496 */
497Datum
499{
500 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
501
502 PG_RETURN_BYTEA_P(vlena);
503}
504
505Datum
507{
509
511
512 /* Append the value unless null, preceding it with the delimiter. */
513 if (!PG_ARGISNULL(1))
514 {
516 bool isfirst = false;
517
518 /*
519 * You might think we can just throw away the first delimiter, however
520 * we must keep it as we may be a parallel worker doing partial
521 * aggregation building a state to send to the main process. We need
522 * to keep the delimiter of every aggregation so that the combine
523 * function can properly join up the strings of two separately
524 * partially aggregated results. The first delimiter is only stripped
525 * off in the final function. To know how much to strip off the front
526 * of the string, we store the length of the first delimiter in the
527 * StringInfo's cursor field, which we don't otherwise need here.
528 */
529 if (state == NULL)
530 {
531 state = makeStringAggState(fcinfo);
532 isfirst = true;
533 }
534
535 if (!PG_ARGISNULL(2))
536 {
537 bytea *delim = PG_GETARG_BYTEA_PP(2);
538
540 VARSIZE_ANY_EXHDR(delim));
541 if (isfirst)
542 state->cursor = VARSIZE_ANY_EXHDR(delim);
543 }
544
547 }
548
549 /*
550 * The transition type for string_agg() is declared to be "internal",
551 * which is a pass-by-value type the same size as a pointer.
552 */
553 if (state)
556}
557
558Datum
560{
562
563 /* cannot be called directly because of internal-type argument */
564 Assert(AggCheckCallContext(fcinfo, NULL));
565
567
568 if (state != NULL)
569 {
570 /* As per comment in transfn, strip data before the cursor position */
571 bytea *result;
572 int strippedlen = state->len - state->cursor;
573
574 result = (bytea *) palloc(strippedlen + VARHDRSZ);
575 SET_VARSIZE(result, strippedlen + VARHDRSZ);
576 memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
577 PG_RETURN_BYTEA_P(result);
578 }
579 else
581}
582
583/*
584 * textin - converts cstring to internal representation
585 */
586Datum
588{
589 char *inputText = PG_GETARG_CSTRING(0);
590
592}
593
594/*
595 * textout - converts internal representation to cstring
596 */
597Datum
599{
600 Datum txt = PG_GETARG_DATUM(0);
601
603}
604
605/*
606 * textrecv - converts external binary format to text
607 */
608Datum
610{
612 text *result;
613 char *str;
614 int nbytes;
615
616 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
617
618 result = cstring_to_text_with_len(str, nbytes);
619 pfree(str);
620 PG_RETURN_TEXT_P(result);
621}
622
623/*
624 * textsend - converts text to binary format
625 */
626Datum
628{
629 text *t = PG_GETARG_TEXT_PP(0);
631
635}
636
637
638/*
639 * unknownin - converts cstring to internal representation
640 */
641Datum
643{
644 char *str = PG_GETARG_CSTRING(0);
645
646 /* representation is same as cstring */
648}
649
650/*
651 * unknownout - converts internal representation to cstring
652 */
653Datum
655{
656 /* representation is same as cstring */
657 char *str = PG_GETARG_CSTRING(0);
658
660}
661
662/*
663 * unknownrecv - converts external binary format to unknown
664 */
665Datum
667{
669 char *str;
670 int nbytes;
671
672 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
673 /* representation is same as cstring */
675}
676
677/*
678 * unknownsend - converts unknown to binary format
679 */
680Datum
682{
683 /* representation is same as cstring */
684 char *str = PG_GETARG_CSTRING(0);
686
688 pq_sendtext(&buf, str, strlen(str));
690}
691
692
693/* ========== PUBLIC ROUTINES ========== */
694
695/*
696 * textlen -
697 * returns the logical length of a text*
698 * (which is less than the VARSIZE of the text*)
699 */
700Datum
702{
704
705 /* try to avoid decompressing argument */
707}
708
709/*
710 * text_length -
711 * Does the real work for textlen()
712 *
713 * This is broken out so it can be called directly by other string processing
714 * functions. Note that the argument is passed as a Datum, to indicate that
715 * it may still be in compressed form. We can avoid decompressing it at all
716 * in some cases.
717 */
718static int32
720{
721 /* fastpath when max encoding length is one */
724 else
725 {
726 text *t = DatumGetTextPP(str);
727
730 }
731}
732
733/*
734 * textoctetlen -
735 * returns the physical length of a text*
736 * (which is less than the VARSIZE of the text*)
737 */
738Datum
740{
742
743 /* We need not detoast the input at all */
745}
746
747/*
748 * textcat -
749 * takes two text* and returns a text* that is the concatenation of
750 * the two.
751 *
752 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
753 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
754 * Allocate space for output in all cases.
755 * XXX - thomas 1997-07-10
756 */
757Datum
759{
760 text *t1 = PG_GETARG_TEXT_PP(0);
761 text *t2 = PG_GETARG_TEXT_PP(1);
762
764}
765
766/*
767 * text_catenate
768 * Guts of textcat(), broken out so it can be used by other functions
769 *
770 * Arguments can be in short-header form, but not compressed or out-of-line
771 */
772static text *
774{
775 text *result;
776 int len1,
777 len2,
778 len;
779 char *ptr;
780
781 len1 = VARSIZE_ANY_EXHDR(t1);
782 len2 = VARSIZE_ANY_EXHDR(t2);
783
784 /* paranoia ... probably should throw error instead? */
785 if (len1 < 0)
786 len1 = 0;
787 if (len2 < 0)
788 len2 = 0;
789
790 len = len1 + len2 + VARHDRSZ;
791 result = (text *) palloc(len);
792
793 /* Set size of result string... */
794 SET_VARSIZE(result, len);
795
796 /* Fill data field of result string... */
797 ptr = VARDATA(result);
798 if (len1 > 0)
799 memcpy(ptr, VARDATA_ANY(t1), len1);
800 if (len2 > 0)
801 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
802
803 return result;
804}
805
806/*
807 * charlen_to_bytelen()
808 * Compute the number of bytes occupied by n characters starting at *p
809 *
810 * It is caller's responsibility that there actually are n characters;
811 * the string need not be null-terminated.
812 */
813static int
814charlen_to_bytelen(const char *p, int n)
815{
817 {
818 /* Optimization for single-byte encodings */
819 return n;
820 }
821 else
822 {
823 const char *s;
824
825 for (s = p; n > 0; n--)
826 s += pg_mblen(s);
827
828 return s - p;
829 }
830}
831
832/*
833 * text_substr()
834 * Return a substring starting at the specified position.
835 * - thomas 1997-12-31
836 *
837 * Input:
838 * - string
839 * - starting position (is one-based)
840 * - string length
841 *
842 * If the starting position is zero or less, then return from the start of the string
843 * adjusting the length to be consistent with the "negative start" per SQL.
844 * If the length is less than zero, return the remaining string.
845 *
846 * Added multibyte support.
847 * - Tatsuo Ishii 1998-4-21
848 * Changed behavior if starting position is less than one to conform to SQL behavior.
849 * Formerly returned the entire string; now returns a portion.
850 * - Thomas Lockhart 1998-12-10
851 * Now uses faster TOAST-slicing interface
852 * - John Gray 2002-02-22
853 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
854 * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
855 * error; if E < 1, return '', not entire string). Fixed MB related bug when
856 * S > LC and < LC + 4 sometimes garbage characters are returned.
857 * - Joe Conway 2002-08-10
858 */
859Datum
861{
865 false));
866}
867
868/*
869 * text_substr_no_len -
870 * Wrapper to avoid opr_sanity failure due to
871 * one function accepting a different number of args.
872 */
873Datum
875{
878 -1, true));
879}
880
881/*
882 * text_substring -
883 * Does the real work for text_substr() and text_substr_no_len()
884 *
885 * This is broken out so it can be called directly by other string processing
886 * functions. Note that the argument is passed as a Datum, to indicate that
887 * it may still be in compressed/toasted form. We can avoid detoasting all
888 * of it in some cases.
889 *
890 * The result is always a freshly palloc'd datum.
891 */
892static text *
893text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
894{
896 int32 S = start; /* start position */
897 int32 S1; /* adjusted start position */
898 int32 L1; /* adjusted substring length */
899 int32 E; /* end position */
900
901 /*
902 * SQL99 says S can be zero or negative (which we don't document), but we
903 * still must fetch from the start of the string.
904 * https://www.postgresql.org/message-id/170905442373.643.11536838320909376197%40wrigleys.postgresql.org
905 */
906 S1 = Max(S, 1);
907
908 /* life is easy if the encoding max length is 1 */
909 if (eml == 1)
910 {
911 if (length_not_specified) /* special case - get length to end of
912 * string */
913 L1 = -1;
914 else if (length < 0)
915 {
916 /* SQL99 says to throw an error for E < S, i.e., negative length */
918 (errcode(ERRCODE_SUBSTRING_ERROR),
919 errmsg("negative substring length not allowed")));
920 L1 = -1; /* silence stupider compilers */
921 }
922 else if (pg_add_s32_overflow(S, length, &E))
923 {
924 /*
925 * L could be large enough for S + L to overflow, in which case
926 * the substring must run to end of string.
927 */
928 L1 = -1;
929 }
930 else
931 {
932 /*
933 * A zero or negative value for the end position can happen if the
934 * start was negative or one. SQL99 says to return a zero-length
935 * string.
936 */
937 if (E < 1)
938 return cstring_to_text("");
939
940 L1 = E - S1;
941 }
942
943 /*
944 * If the start position is past the end of the string, SQL99 says to
945 * return a zero-length string -- DatumGetTextPSlice() will do that
946 * for us. We need only convert S1 to zero-based starting position.
947 */
948 return DatumGetTextPSlice(str, S1 - 1, L1);
949 }
950 else if (eml > 1)
951 {
952 /*
953 * When encoding max length is > 1, we can't get LC without
954 * detoasting, so we'll grab a conservatively large slice now and go
955 * back later to do the right thing
956 */
957 int32 slice_start;
958 int32 slice_size;
959 int32 slice_strlen;
960 text *slice;
961 int32 E1;
962 int32 i;
963 char *p;
964 char *s;
965 text *ret;
966
967 /*
968 * We need to start at position zero because there is no way to know
969 * in advance which byte offset corresponds to the supplied start
970 * position.
971 */
972 slice_start = 0;
973
974 if (length_not_specified) /* special case - get length to end of
975 * string */
976 slice_size = L1 = -1;
977 else if (length < 0)
978 {
979 /* SQL99 says to throw an error for E < S, i.e., negative length */
981 (errcode(ERRCODE_SUBSTRING_ERROR),
982 errmsg("negative substring length not allowed")));
983 slice_size = L1 = -1; /* silence stupider compilers */
984 }
985 else if (pg_add_s32_overflow(S, length, &E))
986 {
987 /*
988 * L could be large enough for S + L to overflow, in which case
989 * the substring must run to end of string.
990 */
991 slice_size = L1 = -1;
992 }
993 else
994 {
995 /*
996 * A zero or negative value for the end position can happen if the
997 * start was negative or one. SQL99 says to return a zero-length
998 * string.
999 */
1000 if (E < 1)
1001 return cstring_to_text("");
1002
1003 /*
1004 * if E is past the end of the string, the tuple toaster will
1005 * truncate the length for us
1006 */
1007 L1 = E - S1;
1008
1009 /*
1010 * Total slice size in bytes can't be any longer than the start
1011 * position plus substring length times the encoding max length.
1012 * If that overflows, we can just use -1.
1013 */
1014 if (pg_mul_s32_overflow(E, eml, &slice_size))
1015 slice_size = -1;
1016 }
1017
1018 /*
1019 * If we're working with an untoasted source, no need to do an extra
1020 * copying step.
1021 */
1024 slice = DatumGetTextPSlice(str, slice_start, slice_size);
1025 else
1026 slice = (text *) DatumGetPointer(str);
1027
1028 /* see if we got back an empty string */
1029 if (VARSIZE_ANY_EXHDR(slice) == 0)
1030 {
1031 if (slice != (text *) DatumGetPointer(str))
1032 pfree(slice);
1033 return cstring_to_text("");
1034 }
1035
1036 /* Now we can get the actual length of the slice in MB characters */
1037 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
1038 VARSIZE_ANY_EXHDR(slice));
1039
1040 /*
1041 * Check that the start position wasn't > slice_strlen. If so, SQL99
1042 * says to return a zero-length string.
1043 */
1044 if (S1 > slice_strlen)
1045 {
1046 if (slice != (text *) DatumGetPointer(str))
1047 pfree(slice);
1048 return cstring_to_text("");
1049 }
1050
1051 /*
1052 * Adjust L1 and E1 now that we know the slice string length. Again
1053 * remember that S1 is one based, and slice_start is zero based.
1054 */
1055 if (L1 > -1)
1056 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
1057 else
1058 E1 = slice_start + 1 + slice_strlen;
1059
1060 /*
1061 * Find the start position in the slice; remember S1 is not zero based
1062 */
1063 p = VARDATA_ANY(slice);
1064 for (i = 0; i < S1 - 1; i++)
1065 p += pg_mblen(p);
1066
1067 /* hang onto a pointer to our start position */
1068 s = p;
1069
1070 /*
1071 * Count the actual bytes used by the substring of the requested
1072 * length.
1073 */
1074 for (i = S1; i < E1; i++)
1075 p += pg_mblen(p);
1076
1077 ret = (text *) palloc(VARHDRSZ + (p - s));
1078 SET_VARSIZE(ret, VARHDRSZ + (p - s));
1079 memcpy(VARDATA(ret), s, (p - s));
1080
1081 if (slice != (text *) DatumGetPointer(str))
1082 pfree(slice);
1083
1084 return ret;
1085 }
1086 else
1087 elog(ERROR, "invalid backend encoding: encoding max length < 1");
1088
1089 /* not reached: suppress compiler warning */
1090 return NULL;
1091}
1092
1093/*
1094 * textoverlay
1095 * Replace specified substring of first string with second
1096 *
1097 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1098 * This code is a direct implementation of what the standard says.
1099 */
1100Datum
1102{
1103 text *t1 = PG_GETARG_TEXT_PP(0);
1104 text *t2 = PG_GETARG_TEXT_PP(1);
1105 int sp = PG_GETARG_INT32(2); /* substring start position */
1106 int sl = PG_GETARG_INT32(3); /* substring length */
1107
1108 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1109}
1110
1111Datum
1113{
1114 text *t1 = PG_GETARG_TEXT_PP(0);
1115 text *t2 = PG_GETARG_TEXT_PP(1);
1116 int sp = PG_GETARG_INT32(2); /* substring start position */
1117 int sl;
1118
1119 sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1120 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1121}
1122
1123static text *
1124text_overlay(text *t1, text *t2, int sp, int sl)
1125{
1126 text *result;
1127 text *s1;
1128 text *s2;
1129 int sp_pl_sl;
1130
1131 /*
1132 * Check for possible integer-overflow cases. For negative sp, throw a
1133 * "substring length" error because that's what should be expected
1134 * according to the spec's definition of OVERLAY().
1135 */
1136 if (sp <= 0)
1137 ereport(ERROR,
1138 (errcode(ERRCODE_SUBSTRING_ERROR),
1139 errmsg("negative substring length not allowed")));
1140 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1141 ereport(ERROR,
1142 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1143 errmsg("integer out of range")));
1144
1145 s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1146 s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1147 result = text_catenate(s1, t2);
1148 result = text_catenate(result, s2);
1149
1150 return result;
1151}
1152
1153/*
1154 * textpos -
1155 * Return the position of the specified substring.
1156 * Implements the SQL POSITION() function.
1157 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1158 * - thomas 1997-07-27
1159 */
1160Datum
1162{
1164 text *search_str = PG_GETARG_TEXT_PP(1);
1165
1167}
1168
1169/*
1170 * text_position -
1171 * Does the real work for textpos()
1172 *
1173 * Inputs:
1174 * t1 - string to be searched
1175 * t2 - pattern to match within t1
1176 * Result:
1177 * Character index of the first matched char, starting from 1,
1178 * or 0 if no match.
1179 *
1180 * This is broken out so it can be called directly by other string processing
1181 * functions.
1182 */
1183static int
1185{
1187 int result;
1188
1190
1191 /* Empty needle always matches at position 1 */
1192 if (VARSIZE_ANY_EXHDR(t2) < 1)
1193 return 1;
1194
1195 /* Otherwise, can't match if haystack is shorter than needle */
1196 if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2) &&
1197 pg_newlocale_from_collation(collid)->deterministic)
1198 return 0;
1199
1200 text_position_setup(t1, t2, collid, &state);
1201 /* don't need greedy mode here */
1202 state.greedy = false;
1203
1205 result = 0;
1206 else
1209 return result;
1210}
1211
1212
1213/*
1214 * text_position_setup, text_position_next, text_position_cleanup -
1215 * Component steps of text_position()
1216 *
1217 * These are broken out so that a string can be efficiently searched for
1218 * multiple occurrences of the same pattern. text_position_next may be
1219 * called multiple times, and it advances to the next match on each call.
1220 * text_position_get_match_ptr() and text_position_get_match_pos() return
1221 * a pointer or 1-based character position of the last match, respectively.
1222 *
1223 * The "state" variable is normally just a local variable in the caller.
1224 *
1225 * NOTE: text_position_next skips over the matched portion. For example,
1226 * searching for "xx" in "xxx" returns only one match, not two.
1227 */
1228
1229static void
1231{
1232 int len1 = VARSIZE_ANY_EXHDR(t1);
1233 int len2 = VARSIZE_ANY_EXHDR(t2);
1234
1236
1238
1239 /*
1240 * Most callers need greedy mode, but some might want to unset this to
1241 * optimize.
1242 */
1243 state->greedy = true;
1244
1245 Assert(len2 > 0);
1246
1247 /*
1248 * Even with a multi-byte encoding, we perform the search using the raw
1249 * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1250 * because in UTF-8 the byte sequence of one character cannot contain
1251 * another character. For other multi-byte encodings, we do the search
1252 * initially as a simple byte search, ignoring multibyte issues, but
1253 * verify afterwards that the match we found is at a character boundary,
1254 * and continue the search if it was a false match.
1255 */
1257 state->is_multibyte_char_in_char = false;
1258 else if (GetDatabaseEncoding() == PG_UTF8)
1259 state->is_multibyte_char_in_char = false;
1260 else
1261 state->is_multibyte_char_in_char = true;
1262
1263 state->str1 = VARDATA_ANY(t1);
1264 state->str2 = VARDATA_ANY(t2);
1265 state->len1 = len1;
1266 state->len2 = len2;
1267 state->last_match = NULL;
1268 state->refpoint = state->str1;
1269 state->refpos = 0;
1270
1271 /*
1272 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1273 * notes we use the terminology that the "haystack" is the string to be
1274 * searched (t1) and the "needle" is the pattern being sought (t2).
1275 *
1276 * If the needle is empty or bigger than the haystack then there is no
1277 * point in wasting cycles initializing the table. We also choose not to
1278 * use B-M-H for needles of length 1, since the skip table can't possibly
1279 * save anything in that case.
1280 *
1281 * (With nondeterministic collations, the search is already
1282 * multibyte-aware, so we don't need this.)
1283 */
1284 if (len1 >= len2 && len2 > 1 && state->locale->deterministic)
1285 {
1286 int searchlength = len1 - len2;
1287 int skiptablemask;
1288 int last;
1289 int i;
1290 const char *str2 = state->str2;
1291
1292 /*
1293 * First we must determine how much of the skip table to use. The
1294 * declaration of TextPositionState allows up to 256 elements, but for
1295 * short search problems we don't really want to have to initialize so
1296 * many elements --- it would take too long in comparison to the
1297 * actual search time. So we choose a useful skip table size based on
1298 * the haystack length minus the needle length. The closer the needle
1299 * length is to the haystack length the less useful skipping becomes.
1300 *
1301 * Note: since we use bit-masking to select table elements, the skip
1302 * table size MUST be a power of 2, and so the mask must be 2^N-1.
1303 */
1304 if (searchlength < 16)
1305 skiptablemask = 3;
1306 else if (searchlength < 64)
1307 skiptablemask = 7;
1308 else if (searchlength < 128)
1309 skiptablemask = 15;
1310 else if (searchlength < 512)
1311 skiptablemask = 31;
1312 else if (searchlength < 2048)
1313 skiptablemask = 63;
1314 else if (searchlength < 4096)
1315 skiptablemask = 127;
1316 else
1317 skiptablemask = 255;
1318 state->skiptablemask = skiptablemask;
1319
1320 /*
1321 * Initialize the skip table. We set all elements to the needle
1322 * length, since this is the correct skip distance for any character
1323 * not found in the needle.
1324 */
1325 for (i = 0; i <= skiptablemask; i++)
1326 state->skiptable[i] = len2;
1327
1328 /*
1329 * Now examine the needle. For each character except the last one,
1330 * set the corresponding table element to the appropriate skip
1331 * distance. Note that when two characters share the same skip table
1332 * entry, the one later in the needle must determine the skip
1333 * distance.
1334 */
1335 last = len2 - 1;
1336
1337 for (i = 0; i < last; i++)
1338 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1339 }
1340}
1341
1342/*
1343 * Advance to the next match, starting from the end of the previous match
1344 * (or the beginning of the string, on first call). Returns true if a match
1345 * is found.
1346 *
1347 * Note that this refuses to match an empty-string needle. Most callers
1348 * will have handled that case specially and we'll never see it here.
1349 */
1350static bool
1352{
1353 int needle_len = state->len2;
1354 char *start_ptr;
1355 char *matchptr;
1356
1357 if (needle_len <= 0)
1358 return false; /* result for empty pattern */
1359
1360 /* Start from the point right after the previous match. */
1361 if (state->last_match)
1362 start_ptr = state->last_match + state->last_match_len;
1363 else
1364 start_ptr = state->str1;
1365
1366retry:
1367 matchptr = text_position_next_internal(start_ptr, state);
1368
1369 if (!matchptr)
1370 return false;
1371
1372 /*
1373 * Found a match for the byte sequence. If this is a multibyte encoding,
1374 * where one character's byte sequence can appear inside a longer
1375 * multi-byte character, we need to verify that the match was at a
1376 * character boundary, not in the middle of a multi-byte character.
1377 */
1378 if (state->is_multibyte_char_in_char && state->locale->deterministic)
1379 {
1380 /* Walk one character at a time, until we reach the match. */
1381
1382 /* the search should never move backwards. */
1383 Assert(state->refpoint <= matchptr);
1384
1385 while (state->refpoint < matchptr)
1386 {
1387 /* step to next character. */
1388 state->refpoint += pg_mblen(state->refpoint);
1389 state->refpos++;
1390
1391 /*
1392 * If we stepped over the match's start position, then it was a
1393 * false positive, where the byte sequence appeared in the middle
1394 * of a multi-byte character. Skip it, and continue the search at
1395 * the next character boundary.
1396 */
1397 if (state->refpoint > matchptr)
1398 {
1399 start_ptr = state->refpoint;
1400 goto retry;
1401 }
1402 }
1403 }
1404
1405 state->last_match = matchptr;
1406 state->last_match_len = state->last_match_len_tmp;
1407 return true;
1408}
1409
1410/*
1411 * Subroutine of text_position_next(). This searches for the raw byte
1412 * sequence, ignoring any multi-byte encoding issues. Returns the first
1413 * match starting at 'start_ptr', or NULL if no match is found.
1414 */
1415static char *
1417{
1418 int haystack_len = state->len1;
1419 int needle_len = state->len2;
1420 int skiptablemask = state->skiptablemask;
1421 const char *haystack = state->str1;
1422 const char *needle = state->str2;
1423 const char *haystack_end = &haystack[haystack_len];
1424 const char *hptr;
1425
1426 Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1427
1428 state->last_match_len_tmp = needle_len;
1429
1430 if (!state->locale->deterministic)
1431 {
1432 /*
1433 * With a nondeterministic collation, we have to use an unoptimized
1434 * route. We walk through the haystack and see if at each position
1435 * there is a substring of the remaining string that is equal to the
1436 * needle under the given collation.
1437 *
1438 * Note, the found substring could have a different length than the
1439 * needle, including being empty. Callers that want to skip over the
1440 * found string need to read the length of the found substring from
1441 * last_match_len rather than just using the length of their needle.
1442 *
1443 * Most callers will require "greedy" semantics, meaning that we need
1444 * to find the longest such substring, not the shortest. For callers
1445 * that don't need greedy semantics, we can finish on the first match.
1446 */
1447 const char *result_hptr = NULL;
1448
1449 hptr = start_ptr;
1450 while (hptr < haystack_end)
1451 {
1452 /*
1453 * First check the common case that there is a match in the
1454 * haystack of exactly the length of the needle.
1455 */
1456 if (!state->greedy &&
1457 haystack_end - hptr >= needle_len &&
1458 pg_strncoll(hptr, needle_len, needle, needle_len, state->locale) == 0)
1459 return (char *) hptr;
1460
1461 /*
1462 * Else check if any of the possible substrings starting at hptr
1463 * are equal to the needle.
1464 */
1465 for (const char *test_end = hptr; test_end < haystack_end; test_end += pg_mblen(test_end))
1466 {
1467 if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
1468 {
1469 state->last_match_len_tmp = (test_end - hptr);
1470 result_hptr = hptr;
1471 if (!state->greedy)
1472 break;
1473 }
1474 }
1475 if (result_hptr)
1476 break;
1477
1478 hptr += pg_mblen(hptr);
1479 }
1480
1481 return (char *) result_hptr;
1482 }
1483 else if (needle_len == 1)
1484 {
1485 /* No point in using B-M-H for a one-character needle */
1486 char nchar = *needle;
1487
1488 hptr = start_ptr;
1489 while (hptr < haystack_end)
1490 {
1491 if (*hptr == nchar)
1492 return (char *) hptr;
1493 hptr++;
1494 }
1495 }
1496 else
1497 {
1498 const char *needle_last = &needle[needle_len - 1];
1499
1500 /* Start at startpos plus the length of the needle */
1501 hptr = start_ptr + needle_len - 1;
1502 while (hptr < haystack_end)
1503 {
1504 /* Match the needle scanning *backward* */
1505 const char *nptr;
1506 const char *p;
1507
1508 nptr = needle_last;
1509 p = hptr;
1510 while (*nptr == *p)
1511 {
1512 /* Matched it all? If so, return 1-based position */
1513 if (nptr == needle)
1514 return (char *) p;
1515 nptr--, p--;
1516 }
1517
1518 /*
1519 * No match, so use the haystack char at hptr to decide how far to
1520 * advance. If the needle had any occurrence of that character
1521 * (or more precisely, one sharing the same skiptable entry)
1522 * before its last character, then we advance far enough to align
1523 * the last such needle character with that haystack position.
1524 * Otherwise we can advance by the whole needle length.
1525 */
1526 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1527 }
1528 }
1529
1530 return 0; /* not found */
1531}
1532
1533/*
1534 * Return a pointer to the current match.
1535 *
1536 * The returned pointer points into the original haystack string.
1537 */
1538static char *
1540{
1541 return state->last_match;
1542}
1543
1544/*
1545 * Return the offset of the current match.
1546 *
1547 * The offset is in characters, 1-based.
1548 */
1549static int
1551{
1552 /* Convert the byte position to char position. */
1553 state->refpos += pg_mbstrlen_with_len(state->refpoint,
1554 state->last_match - state->refpoint);
1555 state->refpoint = state->last_match;
1556 return state->refpos + 1;
1557}
1558
1559/*
1560 * Reset search state to the initial state installed by text_position_setup.
1561 *
1562 * The next call to text_position_next will search from the beginning
1563 * of the string.
1564 */
1565static void
1567{
1568 state->last_match = NULL;
1569 state->refpoint = state->str1;
1570 state->refpos = 0;
1571}
1572
1573static void
1575{
1576 /* no cleanup needed */
1577}
1578
1579
1580static void
1582{
1583 if (!OidIsValid(collid))
1584 {
1585 /*
1586 * This typically means that the parser could not resolve a conflict
1587 * of implicit collations, so report it that way.
1588 */
1589 ereport(ERROR,
1590 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1591 errmsg("could not determine which collation to use for string comparison"),
1592 errhint("Use the COLLATE clause to set the collation explicitly.")));
1593 }
1594}
1595
1596/*
1597 * varstr_cmp()
1598 *
1599 * Comparison function for text strings with given lengths, using the
1600 * appropriate locale. Returns an integer less than, equal to, or greater than
1601 * zero, indicating whether arg1 is less than, equal to, or greater than arg2.
1602 *
1603 * Note: many functions that depend on this are marked leakproof; therefore,
1604 * avoid reporting the actual contents of the input when throwing errors.
1605 * All errors herein should be things that can't happen except on corrupt
1606 * data, anyway; otherwise we will have trouble with indexing strings that
1607 * would cause them.
1608 */
1609int
1610varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1611{
1612 int result;
1613 pg_locale_t mylocale;
1614
1616
1618
1619 if (mylocale->collate_is_c)
1620 {
1621 result = memcmp(arg1, arg2, Min(len1, len2));
1622 if ((result == 0) && (len1 != len2))
1623 result = (len1 < len2) ? -1 : 1;
1624 }
1625 else
1626 {
1627 /*
1628 * memcmp() can't tell us which of two unequal strings sorts first,
1629 * but it's a cheap way to tell if they're equal. Testing shows that
1630 * memcmp() followed by strcoll() is only trivially slower than
1631 * strcoll() by itself, so we don't lose much if this doesn't work out
1632 * very often, and if it does - for example, because there are many
1633 * equal strings in the input - then we win big by avoiding expensive
1634 * collation-aware comparisons.
1635 */
1636 if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1637 return 0;
1638
1639 result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
1640
1641 /* Break tie if necessary. */
1642 if (result == 0 && mylocale->deterministic)
1643 {
1644 result = memcmp(arg1, arg2, Min(len1, len2));
1645 if ((result == 0) && (len1 != len2))
1646 result = (len1 < len2) ? -1 : 1;
1647 }
1648 }
1649
1650 return result;
1651}
1652
1653/* text_cmp()
1654 * Internal comparison function for text strings.
1655 * Returns -1, 0 or 1
1656 */
1657static int
1659{
1660 char *a1p,
1661 *a2p;
1662 int len1,
1663 len2;
1664
1665 a1p = VARDATA_ANY(arg1);
1666 a2p = VARDATA_ANY(arg2);
1667
1668 len1 = VARSIZE_ANY_EXHDR(arg1);
1669 len2 = VARSIZE_ANY_EXHDR(arg2);
1670
1671 return varstr_cmp(a1p, len1, a2p, len2, collid);
1672}
1673
1674/*
1675 * Comparison functions for text strings.
1676 *
1677 * Note: btree indexes need these routines not to leak memory; therefore,
1678 * be careful to free working copies of toasted datums. Most places don't
1679 * need to be so careful.
1680 */
1681
1682Datum
1684{
1686 pg_locale_t mylocale = 0;
1687 bool result;
1688
1690
1692
1693 if (mylocale->deterministic)
1694 {
1695 Datum arg1 = PG_GETARG_DATUM(0);
1696 Datum arg2 = PG_GETARG_DATUM(1);
1697 Size len1,
1698 len2;
1699
1700 /*
1701 * Since we only care about equality or not-equality, we can avoid all
1702 * the expense of strcoll() here, and just do bitwise comparison. In
1703 * fact, we don't even have to do a bitwise comparison if we can show
1704 * the lengths of the strings are unequal; which might save us from
1705 * having to detoast one or both values.
1706 */
1707 len1 = toast_raw_datum_size(arg1);
1708 len2 = toast_raw_datum_size(arg2);
1709 if (len1 != len2)
1710 result = false;
1711 else
1712 {
1713 text *targ1 = DatumGetTextPP(arg1);
1714 text *targ2 = DatumGetTextPP(arg2);
1715
1716 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1717 len1 - VARHDRSZ) == 0);
1718
1719 PG_FREE_IF_COPY(targ1, 0);
1720 PG_FREE_IF_COPY(targ2, 1);
1721 }
1722 }
1723 else
1724 {
1725 text *arg1 = PG_GETARG_TEXT_PP(0);
1726 text *arg2 = PG_GETARG_TEXT_PP(1);
1727
1728 result = (text_cmp(arg1, arg2, collid) == 0);
1729
1730 PG_FREE_IF_COPY(arg1, 0);
1731 PG_FREE_IF_COPY(arg2, 1);
1732 }
1733
1734 PG_RETURN_BOOL(result);
1735}
1736
1737Datum
1739{
1741 pg_locale_t mylocale;
1742 bool result;
1743
1745
1747
1748 if (mylocale->deterministic)
1749 {
1750 Datum arg1 = PG_GETARG_DATUM(0);
1751 Datum arg2 = PG_GETARG_DATUM(1);
1752 Size len1,
1753 len2;
1754
1755 /* See comment in texteq() */
1756 len1 = toast_raw_datum_size(arg1);
1757 len2 = toast_raw_datum_size(arg2);
1758 if (len1 != len2)
1759 result = true;
1760 else
1761 {
1762 text *targ1 = DatumGetTextPP(arg1);
1763 text *targ2 = DatumGetTextPP(arg2);
1764
1765 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1766 len1 - VARHDRSZ) != 0);
1767
1768 PG_FREE_IF_COPY(targ1, 0);
1769 PG_FREE_IF_COPY(targ2, 1);
1770 }
1771 }
1772 else
1773 {
1774 text *arg1 = PG_GETARG_TEXT_PP(0);
1775 text *arg2 = PG_GETARG_TEXT_PP(1);
1776
1777 result = (text_cmp(arg1, arg2, collid) != 0);
1778
1779 PG_FREE_IF_COPY(arg1, 0);
1780 PG_FREE_IF_COPY(arg2, 1);
1781 }
1782
1783 PG_RETURN_BOOL(result);
1784}
1785
1786Datum
1788{
1789 text *arg1 = PG_GETARG_TEXT_PP(0);
1790 text *arg2 = PG_GETARG_TEXT_PP(1);
1791 bool result;
1792
1793 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1794
1795 PG_FREE_IF_COPY(arg1, 0);
1796 PG_FREE_IF_COPY(arg2, 1);
1797
1798 PG_RETURN_BOOL(result);
1799}
1800
1801Datum
1803{
1804 text *arg1 = PG_GETARG_TEXT_PP(0);
1805 text *arg2 = PG_GETARG_TEXT_PP(1);
1806 bool result;
1807
1808 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1809
1810 PG_FREE_IF_COPY(arg1, 0);
1811 PG_FREE_IF_COPY(arg2, 1);
1812
1813 PG_RETURN_BOOL(result);
1814}
1815
1816Datum
1818{
1819 text *arg1 = PG_GETARG_TEXT_PP(0);
1820 text *arg2 = PG_GETARG_TEXT_PP(1);
1821 bool result;
1822
1823 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1824
1825 PG_FREE_IF_COPY(arg1, 0);
1826 PG_FREE_IF_COPY(arg2, 1);
1827
1828 PG_RETURN_BOOL(result);
1829}
1830
1831Datum
1833{
1834 text *arg1 = PG_GETARG_TEXT_PP(0);
1835 text *arg2 = PG_GETARG_TEXT_PP(1);
1836 bool result;
1837
1838 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1839
1840 PG_FREE_IF_COPY(arg1, 0);
1841 PG_FREE_IF_COPY(arg2, 1);
1842
1843 PG_RETURN_BOOL(result);
1844}
1845
1846Datum
1848{
1849 Datum arg1 = PG_GETARG_DATUM(0);
1850 Datum arg2 = PG_GETARG_DATUM(1);
1852 pg_locale_t mylocale;
1853 bool result;
1854 Size len1,
1855 len2;
1856
1858
1860
1861 if (!mylocale->deterministic)
1862 ereport(ERROR,
1863 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1864 errmsg("nondeterministic collations are not supported for substring searches")));
1865
1866 len1 = toast_raw_datum_size(arg1);
1867 len2 = toast_raw_datum_size(arg2);
1868 if (len2 > len1)
1869 result = false;
1870 else
1871 {
1872 text *targ1 = text_substring(arg1, 1, len2, false);
1873 text *targ2 = DatumGetTextPP(arg2);
1874
1875 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1876 VARSIZE_ANY_EXHDR(targ2)) == 0);
1877
1878 PG_FREE_IF_COPY(targ1, 0);
1879 PG_FREE_IF_COPY(targ2, 1);
1880 }
1881
1882 PG_RETURN_BOOL(result);
1883}
1884
1885Datum
1887{
1888 text *arg1 = PG_GETARG_TEXT_PP(0);
1889 text *arg2 = PG_GETARG_TEXT_PP(1);
1890 int32 result;
1891
1892 result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1893
1894 PG_FREE_IF_COPY(arg1, 0);
1895 PG_FREE_IF_COPY(arg2, 1);
1896
1897 PG_RETURN_INT32(result);
1898}
1899
1900Datum
1902{
1904 Oid collid = ssup->ssup_collation;
1905 MemoryContext oldcontext;
1906
1907 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1908
1909 /* Use generic string SortSupport */
1910 varstr_sortsupport(ssup, TEXTOID, collid);
1911
1912 MemoryContextSwitchTo(oldcontext);
1913
1915}
1916
1917/*
1918 * Generic sortsupport interface for character type's operator classes.
1919 * Includes locale support, and support for BpChar semantics (i.e. removing
1920 * trailing spaces before comparison).
1921 *
1922 * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1923 * same representation. Callers that always use the C collation (e.g.
1924 * non-collatable type callers like bytea) may have NUL bytes in their strings;
1925 * this will not work with any other collation, though.
1926 */
1927void
1929{
1930 bool abbreviate = ssup->abbreviate;
1931 bool collate_c = false;
1934
1936
1938
1939 /*
1940 * If possible, set ssup->comparator to a function which can be used to
1941 * directly compare two datums. If we can do this, we'll avoid the
1942 * overhead of a trip through the fmgr layer for every comparison, which
1943 * can be substantial.
1944 *
1945 * Most typically, we'll set the comparator to varlenafastcmp_locale,
1946 * which uses strcoll() to perform comparisons. We use that for the
1947 * BpChar case too, but type NAME uses namefastcmp_locale. However, if
1948 * LC_COLLATE = C, we can make things quite a bit faster with
1949 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1950 * memcmp() rather than strcoll().
1951 */
1952 if (locale->collate_is_c)
1953 {
1954 if (typid == BPCHAROID)
1956 else if (typid == NAMEOID)
1957 {
1958 ssup->comparator = namefastcmp_c;
1959 /* Not supporting abbreviation with type NAME, for now */
1960 abbreviate = false;
1961 }
1962 else
1964
1965 collate_c = true;
1966 }
1967 else
1968 {
1969 /*
1970 * We use varlenafastcmp_locale except for type NAME.
1971 */
1972 if (typid == NAMEOID)
1973 {
1975 /* Not supporting abbreviation with type NAME, for now */
1976 abbreviate = false;
1977 }
1978 else
1980
1981 /*
1982 * Unfortunately, it seems that abbreviation for non-C collations is
1983 * broken on many common platforms; see pg_strxfrm_enabled().
1984 *
1985 * Even apart from the risk of broken locales, it's possible that
1986 * there are platforms where the use of abbreviated keys should be
1987 * disabled at compile time. Having only 4 byte datums could make
1988 * worst-case performance drastically more likely, for example.
1989 * Moreover, macOS's strxfrm() implementation is known to not
1990 * effectively concentrate a significant amount of entropy from the
1991 * original string in earlier transformed blobs. It's possible that
1992 * other supported platforms are similarly encumbered. So, if we ever
1993 * get past disabling this categorically, we may still want or need to
1994 * disable it for particular platforms.
1995 */
1997 abbreviate = false;
1998 }
1999
2000 /*
2001 * If we're using abbreviated keys, or if we're using a locale-aware
2002 * comparison, we need to initialize a VarStringSortSupport object. Both
2003 * cases will make use of the temporary buffers we initialize here for
2004 * scratch space (and to detect requirement for BpChar semantics from
2005 * caller), and the abbreviation case requires additional state.
2006 */
2007 if (abbreviate || !collate_c)
2008 {
2009 sss = palloc(sizeof(VarStringSortSupport));
2010 sss->buf1 = palloc(TEXTBUFLEN);
2011 sss->buflen1 = TEXTBUFLEN;
2012 sss->buf2 = palloc(TEXTBUFLEN);
2013 sss->buflen2 = TEXTBUFLEN;
2014 /* Start with invalid values */
2015 sss->last_len1 = -1;
2016 sss->last_len2 = -1;
2017 /* Initialize */
2018 sss->last_returned = 0;
2019 if (collate_c)
2020 sss->locale = NULL;
2021 else
2022 sss->locale = locale;
2023
2024 /*
2025 * To avoid somehow confusing a strxfrm() blob and an original string,
2026 * constantly keep track of the variety of data that buf1 and buf2
2027 * currently contain.
2028 *
2029 * Comparisons may be interleaved with conversion calls. Frequently,
2030 * conversions and comparisons are batched into two distinct phases,
2031 * but the correctness of caching cannot hinge upon this. For
2032 * comparison caching, buffer state is only trusted if cache_blob is
2033 * found set to false, whereas strxfrm() caching only trusts the state
2034 * when cache_blob is found set to true.
2035 *
2036 * Arbitrarily initialize cache_blob to true.
2037 */
2038 sss->cache_blob = true;
2039 sss->collate_c = collate_c;
2040 sss->typid = typid;
2041 ssup->ssup_extra = sss;
2042
2043 /*
2044 * If possible, plan to use the abbreviated keys optimization. The
2045 * core code may switch back to authoritative comparator should
2046 * abbreviation be aborted.
2047 */
2048 if (abbreviate)
2049 {
2050 sss->prop_card = 0.20;
2051 initHyperLogLog(&sss->abbr_card, 10);
2052 initHyperLogLog(&sss->full_card, 10);
2053 ssup->abbrev_full_comparator = ssup->comparator;
2057 }
2058 }
2059}
2060
2061/*
2062 * sortsupport comparison func (for C locale case)
2063 */
2064static int
2066{
2069 char *a1p,
2070 *a2p;
2071 int len1,
2072 len2,
2073 result;
2074
2075 a1p = VARDATA_ANY(arg1);
2076 a2p = VARDATA_ANY(arg2);
2077
2078 len1 = VARSIZE_ANY_EXHDR(arg1);
2079 len2 = VARSIZE_ANY_EXHDR(arg2);
2080
2081 result = memcmp(a1p, a2p, Min(len1, len2));
2082 if ((result == 0) && (len1 != len2))
2083 result = (len1 < len2) ? -1 : 1;
2084
2085 /* We can't afford to leak memory here. */
2086 if (PointerGetDatum(arg1) != x)
2087 pfree(arg1);
2088 if (PointerGetDatum(arg2) != y)
2089 pfree(arg2);
2090
2091 return result;
2092}
2093
2094/*
2095 * sortsupport comparison func (for BpChar C locale case)
2096 *
2097 * BpChar outsources its sortsupport to this module. Specialization for the
2098 * varstr_sortsupport BpChar case, modeled on
2099 * internal_bpchar_pattern_compare().
2100 */
2101static int
2103{
2104 BpChar *arg1 = DatumGetBpCharPP(x);
2105 BpChar *arg2 = DatumGetBpCharPP(y);
2106 char *a1p,
2107 *a2p;
2108 int len1,
2109 len2,
2110 result;
2111
2112 a1p = VARDATA_ANY(arg1);
2113 a2p = VARDATA_ANY(arg2);
2114
2115 len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2116 len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2117
2118 result = memcmp(a1p, a2p, Min(len1, len2));
2119 if ((result == 0) && (len1 != len2))
2120 result = (len1 < len2) ? -1 : 1;
2121
2122 /* We can't afford to leak memory here. */
2123 if (PointerGetDatum(arg1) != x)
2124 pfree(arg1);
2125 if (PointerGetDatum(arg2) != y)
2126 pfree(arg2);
2127
2128 return result;
2129}
2130
2131/*
2132 * sortsupport comparison func (for NAME C locale case)
2133 */
2134static int
2136{
2137 Name arg1 = DatumGetName(x);
2138 Name arg2 = DatumGetName(y);
2139
2140 return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2141}
2142
2143/*
2144 * sortsupport comparison func (for locale case with all varlena types)
2145 */
2146static int
2148{
2151 char *a1p,
2152 *a2p;
2153 int len1,
2154 len2,
2155 result;
2156
2157 a1p = VARDATA_ANY(arg1);
2158 a2p = VARDATA_ANY(arg2);
2159
2160 len1 = VARSIZE_ANY_EXHDR(arg1);
2161 len2 = VARSIZE_ANY_EXHDR(arg2);
2162
2163 result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2164
2165 /* We can't afford to leak memory here. */
2166 if (PointerGetDatum(arg1) != x)
2167 pfree(arg1);
2168 if (PointerGetDatum(arg2) != y)
2169 pfree(arg2);
2170
2171 return result;
2172}
2173
2174/*
2175 * sortsupport comparison func (for locale case with NAME type)
2176 */
2177static int
2179{
2180 Name arg1 = DatumGetName(x);
2181 Name arg2 = DatumGetName(y);
2182
2183 return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2184 NameStr(*arg2), strlen(NameStr(*arg2)),
2185 ssup);
2186}
2187
2188/*
2189 * sortsupport comparison func for locale cases
2190 */
2191static int
2192varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2193{
2195 int result;
2196 bool arg1_match;
2197
2198 /* Fast pre-check for equality, as discussed in varstr_cmp() */
2199 if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2200 {
2201 /*
2202 * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2203 * last_len2. Existing contents of buffers might still be used by
2204 * next call.
2205 *
2206 * It's fine to allow the comparison of BpChar padding bytes here,
2207 * even though that implies that the memcmp() will usually be
2208 * performed for BpChar callers (though multibyte characters could
2209 * still prevent that from occurring). The memcmp() is still very
2210 * cheap, and BpChar's funny semantics have us remove trailing spaces
2211 * (not limited to padding), so we need make no distinction between
2212 * padding space characters and "real" space characters.
2213 */
2214 return 0;
2215 }
2216
2217 if (sss->typid == BPCHAROID)
2218 {
2219 /* Get true number of bytes, ignoring trailing spaces */
2220 len1 = bpchartruelen(a1p, len1);
2221 len2 = bpchartruelen(a2p, len2);
2222 }
2223
2224 if (len1 >= sss->buflen1)
2225 {
2226 sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2227 sss->buf1 = repalloc(sss->buf1, sss->buflen1);
2228 }
2229 if (len2 >= sss->buflen2)
2230 {
2231 sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2232 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2233 }
2234
2235 /*
2236 * We're likely to be asked to compare the same strings repeatedly, and
2237 * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2238 * comparisons, even though in general there is no reason to think that
2239 * that will work out (every string datum may be unique). Caching does
2240 * not slow things down measurably when it doesn't work out, and can speed
2241 * things up by rather a lot when it does. In part, this is because the
2242 * memcmp() compares data from cachelines that are needed in L1 cache even
2243 * when the last comparison's result cannot be reused.
2244 */
2245 arg1_match = true;
2246 if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2247 {
2248 arg1_match = false;
2249 memcpy(sss->buf1, a1p, len1);
2250 sss->buf1[len1] = '\0';
2251 sss->last_len1 = len1;
2252 }
2253
2254 /*
2255 * If we're comparing the same two strings as last time, we can return the
2256 * same answer without calling strcoll() again. This is more likely than
2257 * it seems (at least with moderate to low cardinality sets), because
2258 * quicksort compares the same pivot against many values.
2259 */
2260 if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2261 {
2262 memcpy(sss->buf2, a2p, len2);
2263 sss->buf2[len2] = '\0';
2264 sss->last_len2 = len2;
2265 }
2266 else if (arg1_match && !sss->cache_blob)
2267 {
2268 /* Use result cached following last actual strcoll() call */
2269 return sss->last_returned;
2270 }
2271
2272 result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
2273
2274 /* Break tie if necessary. */
2275 if (result == 0 && sss->locale->deterministic)
2276 result = strcmp(sss->buf1, sss->buf2);
2277
2278 /* Cache result, perhaps saving an expensive strcoll() call next time */
2279 sss->cache_blob = false;
2280 sss->last_returned = result;
2281 return result;
2282}
2283
2284/*
2285 * Conversion routine for sortsupport. Converts original to abbreviated key
2286 * representation. Our encoding strategy is simple -- pack the first 8 bytes
2287 * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2288 * stored in reverse order), and treat it as an unsigned integer. When the "C"
2289 * locale is used, or in case of bytea, just memcpy() from original instead.
2290 */
2291static Datum
2293{
2294 const size_t max_prefix_bytes = sizeof(Datum);
2296 VarString *authoritative = DatumGetVarStringPP(original);
2297 char *authoritative_data = VARDATA_ANY(authoritative);
2298
2299 /* working state */
2300 Datum res;
2301 char *pres;
2302 int len;
2303 uint32 hash;
2304
2305 pres = (char *) &res;
2306 /* memset(), so any non-overwritten bytes are NUL */
2307 memset(pres, 0, max_prefix_bytes);
2308 len = VARSIZE_ANY_EXHDR(authoritative);
2309
2310 /* Get number of bytes, ignoring trailing spaces */
2311 if (sss->typid == BPCHAROID)
2312 len = bpchartruelen(authoritative_data, len);
2313
2314 /*
2315 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2316 * abbreviate keys. The full comparator for the C locale is always
2317 * memcmp(). It would be incorrect to allow bytea callers (callers that
2318 * always force the C collation -- bytea isn't a collatable type, but this
2319 * approach is convenient) to use strxfrm(). This is because bytea
2320 * strings may contain NUL bytes. Besides, this should be faster, too.
2321 *
2322 * More generally, it's okay that bytea callers can have NUL bytes in
2323 * strings because abbreviated cmp need not make a distinction between
2324 * terminating NUL bytes, and NUL bytes representing actual NULs in the
2325 * authoritative representation. Hopefully a comparison at or past one
2326 * abbreviated key's terminating NUL byte will resolve the comparison
2327 * without consulting the authoritative representation; specifically, some
2328 * later non-NUL byte in the longer string can resolve the comparison
2329 * against a subsequent terminating NUL in the shorter string. There will
2330 * usually be what is effectively a "length-wise" resolution there and
2331 * then.
2332 *
2333 * If that doesn't work out -- if all bytes in the longer string
2334 * positioned at or past the offset of the smaller string's (first)
2335 * terminating NUL are actually representative of NUL bytes in the
2336 * authoritative binary string (perhaps with some *terminating* NUL bytes
2337 * towards the end of the longer string iff it happens to still be small)
2338 * -- then an authoritative tie-breaker will happen, and do the right
2339 * thing: explicitly consider string length.
2340 */
2341 if (sss->collate_c)
2342 memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
2343 else
2344 {
2345 Size bsize;
2346
2347 /*
2348 * We're not using the C collation, so fall back on strxfrm or ICU
2349 * analogs.
2350 */
2351
2352 /* By convention, we use buffer 1 to store and NUL-terminate */
2353 if (len >= sss->buflen1)
2354 {
2355 sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2356 sss->buf1 = repalloc(sss->buf1, sss->buflen1);
2357 }
2358
2359 /* Might be able to reuse strxfrm() blob from last call */
2360 if (sss->last_len1 == len && sss->cache_blob &&
2361 memcmp(sss->buf1, authoritative_data, len) == 0)
2362 {
2363 memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
2364 /* No change affecting cardinality, so no hashing required */
2365 goto done;
2366 }
2367
2368 memcpy(sss->buf1, authoritative_data, len);
2369
2370 /*
2371 * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
2372 */
2373 sss->buf1[len] = '\0';
2374 sss->last_len1 = len;
2375
2377 {
2378 if (sss->buflen2 < max_prefix_bytes)
2379 {
2380 sss->buflen2 = Max(max_prefix_bytes,
2381 Min(sss->buflen2 * 2, MaxAllocSize));
2382 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2383 }
2384
2385 bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
2386 max_prefix_bytes, sss->locale);
2387 sss->last_len2 = bsize;
2388 }
2389 else
2390 {
2391 /*
2392 * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
2393 * again. The pg_strxfrm() function leaves the result buffer
2394 * content undefined if the result did not fit, so we need to
2395 * retry until everything fits, even though we only need the first
2396 * few bytes in the end.
2397 */
2398 for (;;)
2399 {
2400 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
2401 sss->locale);
2402
2403 sss->last_len2 = bsize;
2404 if (bsize < sss->buflen2)
2405 break;
2406
2407 /*
2408 * Grow buffer and retry.
2409 */
2410 sss->buflen2 = Max(bsize + 1,
2411 Min(sss->buflen2 * 2, MaxAllocSize));
2412 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
2413 }
2414 }
2415
2416 /*
2417 * Every Datum byte is always compared. This is safe because the
2418 * strxfrm() blob is itself NUL terminated, leaving no danger of
2419 * misinterpreting any NUL bytes not intended to be interpreted as
2420 * logically representing termination.
2421 *
2422 * (Actually, even if there were NUL bytes in the blob it would be
2423 * okay. See remarks on bytea case above.)
2424 */
2425 memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
2426 }
2427
2428 /*
2429 * Maintain approximate cardinality of both abbreviated keys and original,
2430 * authoritative keys using HyperLogLog. Used as cheap insurance against
2431 * the worst case, where we do many string transformations for no saving
2432 * in full strcoll()-based comparisons. These statistics are used by
2433 * varstr_abbrev_abort().
2434 *
2435 * First, Hash key proper, or a significant fraction of it. Mix in length
2436 * in order to compensate for cases where differences are past
2437 * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2438 */
2439 hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2441
2442 if (len > PG_CACHE_LINE_SIZE)
2444
2446
2447 /* Hash abbreviated key */
2448#if SIZEOF_DATUM == 8
2449 {
2450 uint32 lohalf,
2451 hihalf;
2452
2453 lohalf = (uint32) res;
2454 hihalf = (uint32) (res >> 32);
2455 hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2456 }
2457#else /* SIZEOF_DATUM != 8 */
2459#endif
2460
2462
2463 /* Cache result, perhaps saving an expensive strxfrm() call next time */
2464 sss->cache_blob = true;
2465done:
2466
2467 /*
2468 * Byteswap on little-endian machines.
2469 *
2470 * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
2471 * 3-way comparator) works correctly on all platforms. If we didn't do
2472 * this, the comparator would have to call memcmp() with a pair of
2473 * pointers to the first byte of each abbreviated key, which is slower.
2474 */
2475 res = DatumBigEndianToNative(res);
2476
2477 /* Don't leak memory here */
2478 if (PointerGetDatum(authoritative) != original)
2479 pfree(authoritative);
2480
2481 return res;
2482}
2483
2484/*
2485 * Callback for estimating effectiveness of abbreviated key optimization, using
2486 * heuristic rules. Returns value indicating if the abbreviation optimization
2487 * should be aborted, based on its projected effectiveness.
2488 */
2489static bool
2490varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2491{
2493 double abbrev_distinct,
2494 key_distinct;
2495
2496 Assert(ssup->abbreviate);
2497
2498 /* Have a little patience */
2499 if (memtupcount < 100)
2500 return false;
2501
2502 abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2503 key_distinct = estimateHyperLogLog(&sss->full_card);
2504
2505 /*
2506 * Clamp cardinality estimates to at least one distinct value. While
2507 * NULLs are generally disregarded, if only NULL values were seen so far,
2508 * that might misrepresent costs if we failed to clamp.
2509 */
2510 if (abbrev_distinct <= 1.0)
2511 abbrev_distinct = 1.0;
2512
2513 if (key_distinct <= 1.0)
2514 key_distinct = 1.0;
2515
2516 /*
2517 * In the worst case all abbreviated keys are identical, while at the same
2518 * time there are differences within full key strings not captured in
2519 * abbreviations.
2520 */
2521 if (trace_sort)
2522 {
2523 double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2524
2525 elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2526 "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2527 memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2528 sss->prop_card);
2529 }
2530
2531 /*
2532 * If the number of distinct abbreviated keys approximately matches the
2533 * number of distinct authoritative original keys, that's reason enough to
2534 * proceed. We can win even with a very low cardinality set if most
2535 * tie-breakers only memcmp(). This is by far the most important
2536 * consideration.
2537 *
2538 * While comparisons that are resolved at the abbreviated key level are
2539 * considerably cheaper than tie-breakers resolved with memcmp(), both of
2540 * those two outcomes are so much cheaper than a full strcoll() once
2541 * sorting is underway that it doesn't seem worth it to weigh abbreviated
2542 * cardinality against the overall size of the set in order to more
2543 * accurately model costs. Assume that an abbreviated comparison, and an
2544 * abbreviated comparison with a cheap memcmp()-based authoritative
2545 * resolution are equivalent.
2546 */
2547 if (abbrev_distinct > key_distinct * sss->prop_card)
2548 {
2549 /*
2550 * When we have exceeded 10,000 tuples, decay required cardinality
2551 * aggressively for next call.
2552 *
2553 * This is useful because the number of comparisons required on
2554 * average increases at a linearithmic rate, and at roughly 10,000
2555 * tuples that factor will start to dominate over the linear costs of
2556 * string transformation (this is a conservative estimate). The decay
2557 * rate is chosen to be a little less aggressive than halving -- which
2558 * (since we're called at points at which memtupcount has doubled)
2559 * would never see the cost model actually abort past the first call
2560 * following a decay. This decay rate is mostly a precaution against
2561 * a sudden, violent swing in how well abbreviated cardinality tracks
2562 * full key cardinality. The decay also serves to prevent a marginal
2563 * case from being aborted too late, when too much has already been
2564 * invested in string transformation.
2565 *
2566 * It's possible for sets of several million distinct strings with
2567 * mere tens of thousands of distinct abbreviated keys to still
2568 * benefit very significantly. This will generally occur provided
2569 * each abbreviated key is a proxy for a roughly uniform number of the
2570 * set's full keys. If it isn't so, we hope to catch that early and
2571 * abort. If it isn't caught early, by the time the problem is
2572 * apparent it's probably not worth aborting.
2573 */
2574 if (memtupcount > 10000)
2575 sss->prop_card *= 0.65;
2576
2577 return false;
2578 }
2579
2580 /*
2581 * Abort abbreviation strategy.
2582 *
2583 * The worst case, where all abbreviated keys are identical while all
2584 * original strings differ will typically only see a regression of about
2585 * 10% in execution time for small to medium sized lists of strings.
2586 * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2587 * often expect very large improvements, particularly with sets of strings
2588 * of moderately high to high abbreviated cardinality. There is little to
2589 * lose but much to gain, which our strategy reflects.
2590 */
2591 if (trace_sort)
2592 elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2593 "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2594 memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2595
2596 return true;
2597}
2598
2599/*
2600 * Generic equalimage support function for character type's operator classes.
2601 * Disables the use of deduplication with nondeterministic collations.
2602 */
2603Datum
2605{
2606 /* Oid opcintype = PG_GETARG_OID(0); */
2609
2611
2613
2614 PG_RETURN_BOOL(locale->deterministic);
2615}
2616
2617Datum
2619{
2620 text *arg1 = PG_GETARG_TEXT_PP(0);
2621 text *arg2 = PG_GETARG_TEXT_PP(1);
2622 text *result;
2623
2624 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2625
2626 PG_RETURN_TEXT_P(result);
2627}
2628
2629Datum
2631{
2632 text *arg1 = PG_GETARG_TEXT_PP(0);
2633 text *arg2 = PG_GETARG_TEXT_PP(1);
2634 text *result;
2635
2636 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2637
2638 PG_RETURN_TEXT_P(result);
2639}
2640
2641
2642/*
2643 * Cross-type comparison functions for types text and name.
2644 */
2645
2646Datum
2648{
2649 Name arg1 = PG_GETARG_NAME(0);
2650 text *arg2 = PG_GETARG_TEXT_PP(1);
2651 size_t len1 = strlen(NameStr(*arg1));
2652 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2654 bool result;
2655
2657
2658 if (collid == C_COLLATION_OID)
2659 result = (len1 == len2 &&
2660 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2661 else
2662 result = (varstr_cmp(NameStr(*arg1), len1,
2663 VARDATA_ANY(arg2), len2,
2664 collid) == 0);
2665
2666 PG_FREE_IF_COPY(arg2, 1);
2667
2668 PG_RETURN_BOOL(result);
2669}
2670
2671Datum
2673{
2674 text *arg1 = PG_GETARG_TEXT_PP(0);
2675 Name arg2 = PG_GETARG_NAME(1);
2676 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2677 size_t len2 = strlen(NameStr(*arg2));
2679 bool result;
2680
2682
2683 if (collid == C_COLLATION_OID)
2684 result = (len1 == len2 &&
2685 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2686 else
2687 result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2688 NameStr(*arg2), len2,
2689 collid) == 0);
2690
2691 PG_FREE_IF_COPY(arg1, 0);
2692
2693 PG_RETURN_BOOL(result);
2694}
2695
2696Datum
2698{
2699 Name arg1 = PG_GETARG_NAME(0);
2700 text *arg2 = PG_GETARG_TEXT_PP(1);
2701 size_t len1 = strlen(NameStr(*arg1));
2702 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2704 bool result;
2705
2707
2708 if (collid == C_COLLATION_OID)
2709 result = !(len1 == len2 &&
2710 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2711 else
2712 result = !(varstr_cmp(NameStr(*arg1), len1,
2713 VARDATA_ANY(arg2), len2,
2714 collid) == 0);
2715
2716 PG_FREE_IF_COPY(arg2, 1);
2717
2718 PG_RETURN_BOOL(result);
2719}
2720
2721Datum
2723{
2724 text *arg1 = PG_GETARG_TEXT_PP(0);
2725 Name arg2 = PG_GETARG_NAME(1);
2726 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2727 size_t len2 = strlen(NameStr(*arg2));
2729 bool result;
2730
2732
2733 if (collid == C_COLLATION_OID)
2734 result = !(len1 == len2 &&
2735 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2736 else
2737 result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2738 NameStr(*arg2), len2,
2739 collid) == 0);
2740
2741 PG_FREE_IF_COPY(arg1, 0);
2742
2743 PG_RETURN_BOOL(result);
2744}
2745
2746Datum
2748{
2749 Name arg1 = PG_GETARG_NAME(0);
2750 text *arg2 = PG_GETARG_TEXT_PP(1);
2751 int32 result;
2752
2753 result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
2754 VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2756
2757 PG_FREE_IF_COPY(arg2, 1);
2758
2759 PG_RETURN_INT32(result);
2760}
2761
2762Datum
2764{
2765 text *arg1 = PG_GETARG_TEXT_PP(0);
2766 Name arg2 = PG_GETARG_NAME(1);
2767 int32 result;
2768
2769 result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2770 NameStr(*arg2), strlen(NameStr(*arg2)),
2772
2773 PG_FREE_IF_COPY(arg1, 0);
2774
2775 PG_RETURN_INT32(result);
2776}
2777
2778#define CmpCall(cmpfunc) \
2779 DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2780 PG_GET_COLLATION(), \
2781 PG_GETARG_DATUM(0), \
2782 PG_GETARG_DATUM(1)))
2783
2784Datum
2786{
2788}
2789
2790Datum
2792{
2794}
2795
2796Datum
2798{
2800}
2801
2802Datum
2804{
2806}
2807
2808Datum
2810{
2812}
2813
2814Datum
2816{
2818}
2819
2820Datum
2822{
2824}
2825
2826Datum
2828{
2830}
2831
2832#undef CmpCall
2833
2834
2835/*
2836 * The following operators support character-by-character comparison
2837 * of text datums, to allow building indexes suitable for LIKE clauses.
2838 * Note that the regular texteq/textne comparison operators, and regular
2839 * support functions 1 and 2 with "C" collation are assumed to be
2840 * compatible with these!
2841 */
2842
2843static int
2845{
2846 int result;
2847 int len1,
2848 len2;
2849
2850 len1 = VARSIZE_ANY_EXHDR(arg1);
2851 len2 = VARSIZE_ANY_EXHDR(arg2);
2852
2853 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2854 if (result != 0)
2855 return result;
2856 else if (len1 < len2)
2857 return -1;
2858 else if (len1 > len2)
2859 return 1;
2860 else
2861 return 0;
2862}
2863
2864
2865Datum
2867{
2868 text *arg1 = PG_GETARG_TEXT_PP(0);
2869 text *arg2 = PG_GETARG_TEXT_PP(1);
2870 int result;
2871
2872 result = internal_text_pattern_compare(arg1, arg2);
2873
2874 PG_FREE_IF_COPY(arg1, 0);
2875 PG_FREE_IF_COPY(arg2, 1);
2876
2877 PG_RETURN_BOOL(result < 0);
2878}
2879
2880
2881Datum
2883{
2884 text *arg1 = PG_GETARG_TEXT_PP(0);
2885 text *arg2 = PG_GETARG_TEXT_PP(1);
2886 int result;
2887
2888 result = internal_text_pattern_compare(arg1, arg2);
2889
2890 PG_FREE_IF_COPY(arg1, 0);
2891 PG_FREE_IF_COPY(arg2, 1);
2892
2893 PG_RETURN_BOOL(result <= 0);
2894}
2895
2896
2897Datum
2899{
2900 text *arg1 = PG_GETARG_TEXT_PP(0);
2901 text *arg2 = PG_GETARG_TEXT_PP(1);
2902 int result;
2903
2904 result = internal_text_pattern_compare(arg1, arg2);
2905
2906 PG_FREE_IF_COPY(arg1, 0);
2907 PG_FREE_IF_COPY(arg2, 1);
2908
2909 PG_RETURN_BOOL(result >= 0);
2910}
2911
2912
2913Datum
2915{
2916 text *arg1 = PG_GETARG_TEXT_PP(0);
2917 text *arg2 = PG_GETARG_TEXT_PP(1);
2918 int result;
2919
2920 result = internal_text_pattern_compare(arg1, arg2);
2921
2922 PG_FREE_IF_COPY(arg1, 0);
2923 PG_FREE_IF_COPY(arg2, 1);
2924
2925 PG_RETURN_BOOL(result > 0);
2926}
2927
2928
2929Datum
2931{
2932 text *arg1 = PG_GETARG_TEXT_PP(0);
2933 text *arg2 = PG_GETARG_TEXT_PP(1);
2934 int result;
2935
2936 result = internal_text_pattern_compare(arg1, arg2);
2937
2938 PG_FREE_IF_COPY(arg1, 0);
2939 PG_FREE_IF_COPY(arg2, 1);
2940
2941 PG_RETURN_INT32(result);
2942}
2943
2944
2945Datum
2947{
2949 MemoryContext oldcontext;
2950
2951 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2952
2953 /* Use generic string SortSupport, forcing "C" collation */
2954 varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
2955
2956 MemoryContextSwitchTo(oldcontext);
2957
2959}
2960
2961
2962/*-------------------------------------------------------------
2963 * byteaoctetlen
2964 *
2965 * get the number of bytes contained in an instance of type 'bytea'
2966 *-------------------------------------------------------------
2967 */
2968Datum
2970{
2972
2973 /* We need not detoast the input at all */
2975}
2976
2977/*
2978 * byteacat -
2979 * takes two bytea* and returns a bytea* that is the concatenation of
2980 * the two.
2981 *
2982 * Cloned from textcat and modified as required.
2983 */
2984Datum
2986{
2987 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2988 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2989
2991}
2992
2993/*
2994 * bytea_catenate
2995 * Guts of byteacat(), broken out so it can be used by other functions
2996 *
2997 * Arguments can be in short-header form, but not compressed or out-of-line
2998 */
2999static bytea *
3001{
3002 bytea *result;
3003 int len1,
3004 len2,
3005 len;
3006 char *ptr;
3007
3008 len1 = VARSIZE_ANY_EXHDR(t1);
3009 len2 = VARSIZE_ANY_EXHDR(t2);
3010
3011 /* paranoia ... probably should throw error instead? */
3012 if (len1 < 0)
3013 len1 = 0;
3014 if (len2 < 0)
3015 len2 = 0;
3016
3017 len = len1 + len2 + VARHDRSZ;
3018 result = (bytea *) palloc(len);
3019
3020 /* Set size of result string... */
3021 SET_VARSIZE(result, len);
3022
3023 /* Fill data field of result string... */
3024 ptr = VARDATA(result);
3025 if (len1 > 0)
3026 memcpy(ptr, VARDATA_ANY(t1), len1);
3027 if (len2 > 0)
3028 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3029
3030 return result;
3031}
3032
3033#define PG_STR_GET_BYTEA(str_) \
3034 DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3035
3036/*
3037 * bytea_substr()
3038 * Return a substring starting at the specified position.
3039 * Cloned from text_substr and modified as required.
3040 *
3041 * Input:
3042 * - string
3043 * - starting position (is one-based)
3044 * - string length (optional)
3045 *
3046 * If the starting position is zero or less, then return from the start of the string
3047 * adjusting the length to be consistent with the "negative start" per SQL.
3048 * If the length is less than zero, an ERROR is thrown. If no third argument
3049 * (length) is provided, the length to the end of the string is assumed.
3050 */
3051Datum
3053{
3055 PG_GETARG_INT32(1),
3056 PG_GETARG_INT32(2),
3057 false));
3058}
3059
3060/*
3061 * bytea_substr_no_len -
3062 * Wrapper to avoid opr_sanity failure due to
3063 * one function accepting a different number of args.
3064 */
3065Datum
3067{
3069 PG_GETARG_INT32(1),
3070 -1,
3071 true));
3072}
3073
3074static bytea *
3076 int S,
3077 int L,
3078 bool length_not_specified)
3079{
3080 int32 S1; /* adjusted start position */
3081 int32 L1; /* adjusted substring length */
3082 int32 E; /* end position */
3083
3084 /*
3085 * The logic here should generally match text_substring().
3086 */
3087 S1 = Max(S, 1);
3088
3089 if (length_not_specified)
3090 {
3091 /*
3092 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3093 * end of the string if we pass it a negative value for length.
3094 */
3095 L1 = -1;
3096 }
3097 else if (L < 0)
3098 {
3099 /* SQL99 says to throw an error for E < S, i.e., negative length */
3100 ereport(ERROR,
3101 (errcode(ERRCODE_SUBSTRING_ERROR),
3102 errmsg("negative substring length not allowed")));
3103 L1 = -1; /* silence stupider compilers */
3104 }
3105 else if (pg_add_s32_overflow(S, L, &E))
3106 {
3107 /*
3108 * L could be large enough for S + L to overflow, in which case the
3109 * substring must run to end of string.
3110 */
3111 L1 = -1;
3112 }
3113 else
3114 {
3115 /*
3116 * A zero or negative value for the end position can happen if the
3117 * start was negative or one. SQL99 says to return a zero-length
3118 * string.
3119 */
3120 if (E < 1)
3121 return PG_STR_GET_BYTEA("");
3122
3123 L1 = E - S1;
3124 }
3125
3126 /*
3127 * If the start position is past the end of the string, SQL99 says to
3128 * return a zero-length string -- DatumGetByteaPSlice() will do that for
3129 * us. We need only convert S1 to zero-based starting position.
3130 */
3131 return DatumGetByteaPSlice(str, S1 - 1, L1);
3132}
3133
3134/*
3135 * byteaoverlay
3136 * Replace specified substring of first string with second
3137 *
3138 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3139 * This code is a direct implementation of what the standard says.
3140 */
3141Datum
3143{
3144 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3145 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3146 int sp = PG_GETARG_INT32(2); /* substring start position */
3147 int sl = PG_GETARG_INT32(3); /* substring length */
3148
3149 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3150}
3151
3152Datum
3154{
3155 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3156 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3157 int sp = PG_GETARG_INT32(2); /* substring start position */
3158 int sl;
3159
3160 sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3161 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3162}
3163
3164static bytea *
3165bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3166{
3167 bytea *result;
3168 bytea *s1;
3169 bytea *s2;
3170 int sp_pl_sl;
3171
3172 /*
3173 * Check for possible integer-overflow cases. For negative sp, throw a
3174 * "substring length" error because that's what should be expected
3175 * according to the spec's definition of OVERLAY().
3176 */
3177 if (sp <= 0)
3178 ereport(ERROR,
3179 (errcode(ERRCODE_SUBSTRING_ERROR),
3180 errmsg("negative substring length not allowed")));
3181 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3182 ereport(ERROR,
3183 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3184 errmsg("integer out of range")));
3185
3186 s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3187 s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3188 result = bytea_catenate(s1, t2);
3189 result = bytea_catenate(result, s2);
3190
3191 return result;
3192}
3193
3194/*
3195 * bit_count
3196 */
3197Datum
3199{
3200 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3201
3203}
3204
3205/*
3206 * byteapos -
3207 * Return the position of the specified substring.
3208 * Implements the SQL POSITION() function.
3209 * Cloned from textpos and modified as required.
3210 */
3211Datum
3213{
3214 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3215 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3216 int pos;
3217 int px,
3218 p;
3219 int len1,
3220 len2;
3221 char *p1,
3222 *p2;
3223
3224 len1 = VARSIZE_ANY_EXHDR(t1);
3225 len2 = VARSIZE_ANY_EXHDR(t2);
3226
3227 if (len2 <= 0)
3228 PG_RETURN_INT32(1); /* result for empty pattern */
3229
3230 p1 = VARDATA_ANY(t1);
3231 p2 = VARDATA_ANY(t2);
3232
3233 pos = 0;
3234 px = (len1 - len2);
3235 for (p = 0; p <= px; p++)
3236 {
3237 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3238 {
3239 pos = p + 1;
3240 break;
3241 };
3242 p1++;
3243 };
3244
3245 PG_RETURN_INT32(pos);
3246}
3247
3248/*-------------------------------------------------------------
3249 * byteaGetByte
3250 *
3251 * this routine treats "bytea" as an array of bytes.
3252 * It returns the Nth byte (a number between 0 and 255).
3253 *-------------------------------------------------------------
3254 */
3255Datum
3257{
3258 bytea *v = PG_GETARG_BYTEA_PP(0);
3259 int32 n = PG_GETARG_INT32(1);
3260 int len;
3261 int byte;
3262
3264
3265 if (n < 0 || n >= len)
3266 ereport(ERROR,
3267 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3268 errmsg("index %d out of valid range, 0..%d",
3269 n, len - 1)));
3270
3271 byte = ((unsigned char *) VARDATA_ANY(v))[n];
3272
3273 PG_RETURN_INT32(byte);
3274}
3275
3276/*-------------------------------------------------------------
3277 * byteaGetBit
3278 *
3279 * This routine treats a "bytea" type like an array of bits.
3280 * It returns the value of the Nth bit (0 or 1).
3281 *
3282 *-------------------------------------------------------------
3283 */
3284Datum
3286{
3287 bytea *v = PG_GETARG_BYTEA_PP(0);
3288 int64 n = PG_GETARG_INT64(1);
3289 int byteNo,
3290 bitNo;
3291 int len;
3292 int byte;
3293
3295
3296 if (n < 0 || n >= (int64) len * 8)
3297 ereport(ERROR,
3298 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3299 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
3300 n, (int64) len * 8 - 1)));
3301
3302 /* n/8 is now known < len, so safe to cast to int */
3303 byteNo = (int) (n / 8);
3304 bitNo = (int) (n % 8);
3305
3306 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3307
3308 if (byte & (1 << bitNo))
3309 PG_RETURN_INT32(1);
3310 else
3311 PG_RETURN_INT32(0);
3312}
3313
3314/*-------------------------------------------------------------
3315 * byteaSetByte
3316 *
3317 * Given an instance of type 'bytea' creates a new one with
3318 * the Nth byte set to the given value.
3319 *
3320 *-------------------------------------------------------------
3321 */
3322Datum
3324{
3325 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3326 int32 n = PG_GETARG_INT32(1);
3327 int32 newByte = PG_GETARG_INT32(2);
3328 int len;
3329
3330 len = VARSIZE(res) - VARHDRSZ;
3331
3332 if (n < 0 || n >= len)
3333 ereport(ERROR,
3334 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3335 errmsg("index %d out of valid range, 0..%d",
3336 n, len - 1)));
3337
3338 /*
3339 * Now set the byte.
3340 */
3341 ((unsigned char *) VARDATA(res))[n] = newByte;
3342
3343 PG_RETURN_BYTEA_P(res);
3344}
3345
3346/*-------------------------------------------------------------
3347 * byteaSetBit
3348 *
3349 * Given an instance of type 'bytea' creates a new one with
3350 * the Nth bit set to the given value.
3351 *
3352 *-------------------------------------------------------------
3353 */
3354Datum
3356{
3357 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3358 int64 n = PG_GETARG_INT64(1);
3359 int32 newBit = PG_GETARG_INT32(2);
3360 int len;
3361 int oldByte,
3362 newByte;
3363 int byteNo,
3364 bitNo;
3365
3366 len = VARSIZE(res) - VARHDRSZ;
3367
3368 if (n < 0 || n >= (int64) len * 8)
3369 ereport(ERROR,
3370 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3371 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
3372 n, (int64) len * 8 - 1)));
3373
3374 /* n/8 is now known < len, so safe to cast to int */
3375 byteNo = (int) (n / 8);
3376 bitNo = (int) (n % 8);
3377
3378 /*
3379 * sanity check!
3380 */
3381 if (newBit != 0 && newBit != 1)
3382 ereport(ERROR,
3383 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3384 errmsg("new bit must be 0 or 1")));
3385
3386 /*
3387 * Update the byte.
3388 */
3389 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3390
3391 if (newBit == 0)
3392 newByte = oldByte & (~(1 << bitNo));
3393 else
3394 newByte = oldByte | (1 << bitNo);
3395
3396 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3397
3398 PG_RETURN_BYTEA_P(res);
3399}
3400
3401/*
3402 * Return reversed bytea
3403 */
3404Datum
3406{
3407 bytea *v = PG_GETARG_BYTEA_PP(0);
3408 const char *p = VARDATA_ANY(v);
3409 int len = VARSIZE_ANY_EXHDR(v);
3410 const char *endp = p + len;
3411 bytea *result = palloc(len + VARHDRSZ);
3412 char *dst = (char *) VARDATA(result) + len;
3413
3414 SET_VARSIZE(result, len + VARHDRSZ);
3415
3416 while (p < endp)
3417 *(--dst) = *p++;
3418
3419 PG_RETURN_BYTEA_P(result);
3420}
3421
3422
3423/* text_name()
3424 * Converts a text type to a Name type.
3425 */
3426Datum
3428{
3429 text *s = PG_GETARG_TEXT_PP(0);
3430 Name result;
3431 int len;
3432
3434
3435 /* Truncate oversize input */
3436 if (len >= NAMEDATALEN)
3438
3439 /* We use palloc0 here to ensure result is zero-padded */
3440 result = (Name) palloc0(NAMEDATALEN);
3441 memcpy(NameStr(*result), VARDATA_ANY(s), len);
3442
3443 PG_RETURN_NAME(result);
3444}
3445
3446/* name_text()
3447 * Converts a Name type to a text type.
3448 */
3449Datum
3451{
3452 Name s = PG_GETARG_NAME(0);
3453
3455}
3456
3457
3458/*
3459 * textToQualifiedNameList - convert a text object to list of names
3460 *
3461 * This implements the input parsing needed by nextval() and other
3462 * functions that take a text parameter representing a qualified name.
3463 * We split the name at dots, downcase if not double-quoted, and
3464 * truncate names if they're too long.
3465 */
3466List *
3468{
3469 char *rawname;
3470 List *result = NIL;
3471 List *namelist;
3472 ListCell *l;
3473
3474 /* Convert to C string (handles possible detoasting). */
3475 /* Note we rely on being able to modify rawname below. */
3476 rawname = text_to_cstring(textval);
3477
3478 if (!SplitIdentifierString(rawname, '.', &namelist))
3479 ereport(ERROR,
3480 (errcode(ERRCODE_INVALID_NAME),
3481 errmsg("invalid name syntax")));
3482
3483 if (namelist == NIL)
3484 ereport(ERROR,
3485 (errcode(ERRCODE_INVALID_NAME),
3486 errmsg("invalid name syntax")));
3487
3488 foreach(l, namelist)
3489 {
3490 char *curname = (char *) lfirst(l);
3491
3492 result = lappend(result, makeString(pstrdup(curname)));
3493 }
3494
3495 pfree(rawname);
3496 list_free(namelist);
3497
3498 return result;
3499}
3500
3501/*
3502 * SplitIdentifierString --- parse a string containing identifiers
3503 *
3504 * This is the guts of textToQualifiedNameList, and is exported for use in
3505 * other situations such as parsing GUC variables. In the GUC case, it's
3506 * important to avoid memory leaks, so the API is designed to minimize the
3507 * amount of stuff that needs to be allocated and freed.
3508 *
3509 * Inputs:
3510 * rawstring: the input string; must be overwritable! On return, it's
3511 * been modified to contain the separated identifiers.
3512 * separator: the separator punctuation expected between identifiers
3513 * (typically '.' or ','). Whitespace may also appear around
3514 * identifiers.
3515 * Outputs:
3516 * namelist: filled with a palloc'd list of pointers to identifiers within
3517 * rawstring. Caller should list_free() this even on error return.
3518 *
3519 * Returns true if okay, false if there is a syntax error in the string.
3520 *
3521 * Note that an empty string is considered okay here, though not in
3522 * textToQualifiedNameList.
3523 */
3524bool
3525SplitIdentifierString(char *rawstring, char separator,
3526 List **namelist)
3527{
3528 char *nextp = rawstring;
3529 bool done = false;
3530
3531 *namelist = NIL;
3532
3533 while (scanner_isspace(*nextp))
3534 nextp++; /* skip leading whitespace */
3535
3536 if (*nextp == '\0')
3537 return true; /* allow empty string */
3538
3539 /* At the top of the loop, we are at start of a new identifier. */
3540 do
3541 {
3542 char *curname;
3543 char *endp;
3544
3545 if (*nextp == '"')
3546 {
3547 /* Quoted name --- collapse quote-quote pairs, no downcasing */
3548 curname = nextp + 1;
3549 for (;;)
3550 {
3551 endp = strchr(nextp + 1, '"');
3552 if (endp == NULL)
3553 return false; /* mismatched quotes */
3554 if (endp[1] != '"')
3555 break; /* found end of quoted name */
3556 /* Collapse adjacent quotes into one quote, and look again */
3557 memmove(endp, endp + 1, strlen(endp));
3558 nextp = endp;
3559 }
3560 /* endp now points at the terminating quote */
3561 nextp = endp + 1;
3562 }
3563 else
3564 {
3565 /* Unquoted name --- extends to separator or whitespace */
3566 char *downname;
3567 int len;
3568
3569 curname = nextp;
3570 while (*nextp && *nextp != separator &&
3571 !scanner_isspace(*nextp))
3572 nextp++;
3573 endp = nextp;
3574 if (curname == nextp)
3575 return false; /* empty unquoted name not allowed */
3576
3577 /*
3578 * Downcase the identifier, using same code as main lexer does.
3579 *
3580 * XXX because we want to overwrite the input in-place, we cannot
3581 * support a downcasing transformation that increases the string
3582 * length. This is not a problem given the current implementation
3583 * of downcase_truncate_identifier, but we'll probably have to do
3584 * something about this someday.
3585 */
3586 len = endp - curname;
3587 downname = downcase_truncate_identifier(curname, len, false);
3588 Assert(strlen(downname) <= len);
3589 strncpy(curname, downname, len); /* strncpy is required here */
3590 pfree(downname);
3591 }
3592
3593 while (scanner_isspace(*nextp))
3594 nextp++; /* skip trailing whitespace */
3595
3596 if (*nextp == separator)
3597 {
3598 nextp++;
3599 while (scanner_isspace(*nextp))
3600 nextp++; /* skip leading whitespace for next */
3601 /* we expect another name, so done remains false */
3602 }
3603 else if (*nextp == '\0')
3604 done = true;
3605 else
3606 return false; /* invalid syntax */
3607
3608 /* Now safe to overwrite separator with a null */
3609 *endp = '\0';
3610
3611 /* Truncate name if it's overlength */
3612 truncate_identifier(curname, strlen(curname), false);
3613
3614 /*
3615 * Finished isolating current name --- add it to list
3616 */
3617 *namelist = lappend(*namelist, curname);
3618
3619 /* Loop back if we didn't reach end of string */
3620 } while (!done);
3621
3622 return true;
3623}
3624
3625
3626/*
3627 * SplitDirectoriesString --- parse a string containing file/directory names
3628 *
3629 * This works fine on file names too; the function name is historical.
3630 *
3631 * This is similar to SplitIdentifierString, except that the parsing
3632 * rules are meant to handle pathnames instead of identifiers: there is
3633 * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3634 * and we apply canonicalize_path() to each extracted string. Because of the
3635 * last, the returned strings are separately palloc'd rather than being
3636 * pointers into rawstring --- but we still scribble on rawstring.
3637 *
3638 * Inputs:
3639 * rawstring: the input string; must be modifiable!
3640 * separator: the separator punctuation expected between directories
3641 * (typically ',' or ';'). Whitespace may also appear around
3642 * directories.
3643 * Outputs:
3644 * namelist: filled with a palloc'd list of directory names.
3645 * Caller should list_free_deep() this even on error return.
3646 *
3647 * Returns true if okay, false if there is a syntax error in the string.
3648 *
3649 * Note that an empty string is considered okay here.
3650 */
3651bool
3652SplitDirectoriesString(char *rawstring, char separator,
3653 List **namelist)
3654{
3655 char *nextp = rawstring;
3656 bool done = false;
3657
3658 *namelist = NIL;
3659
3660 while (scanner_isspace(*nextp))
3661 nextp++; /* skip leading whitespace */
3662
3663 if (*nextp == '\0')
3664 return true; /* allow empty string */
3665
3666 /* At the top of the loop, we are at start of a new directory. */
3667 do
3668 {
3669 char *curname;
3670 char *endp;
3671
3672 if (*nextp == '"')
3673 {
3674 /* Quoted name --- collapse quote-quote pairs */
3675 curname = nextp + 1;
3676 for (;;)
3677 {
3678 endp = strchr(nextp + 1, '"');
3679 if (endp == NULL)
3680 return false; /* mismatched quotes */
3681 if (endp[1] != '"')
3682 break; /* found end of quoted name */
3683 /* Collapse adjacent quotes into one quote, and look again */
3684 memmove(endp, endp + 1, strlen(endp));
3685 nextp = endp;
3686 }
3687 /* endp now points at the terminating quote */
3688 nextp = endp + 1;
3689 }
3690 else
3691 {
3692 /* Unquoted name --- extends to separator or end of string */
3693 curname = endp = nextp;
3694 while (*nextp && *nextp != separator)
3695 {
3696 /* trailing whitespace should not be included in name */
3697 if (!scanner_isspace(*nextp))
3698 endp = nextp + 1;
3699 nextp++;
3700 }
3701 if (curname == endp)
3702 return false; /* empty unquoted name not allowed */
3703 }
3704
3705 while (scanner_isspace(*nextp))
3706 nextp++; /* skip trailing whitespace */
3707
3708 if (*nextp == separator)
3709 {
3710 nextp++;
3711 while (scanner_isspace(*nextp))
3712 nextp++; /* skip leading whitespace for next */
3713 /* we expect another name, so done remains false */
3714 }
3715 else if (*nextp == '\0')
3716 done = true;
3717 else
3718 return false; /* invalid syntax */
3719
3720 /* Now safe to overwrite separator with a null */
3721 *endp = '\0';
3722
3723 /* Truncate path if it's overlength */
3724 if (strlen(curname) >= MAXPGPATH)
3725 curname[MAXPGPATH - 1] = '\0';
3726
3727 /*
3728 * Finished isolating current name --- add it to list
3729 */
3730 curname = pstrdup(curname);
3731 canonicalize_path(curname);
3732 *namelist = lappend(*namelist, curname);
3733
3734 /* Loop back if we didn't reach end of string */
3735 } while (!done);
3736
3737 return true;
3738}
3739
3740
3741/*
3742 * SplitGUCList --- parse a string containing identifiers or file names
3743 *
3744 * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3745 * presuming whether the elements will be taken as identifiers or file names.
3746 * We assume the input has already been through flatten_set_variable_args(),
3747 * so that we need never downcase (if appropriate, that was done already).
3748 * Nor do we ever truncate, since we don't know the correct max length.
3749 * We disallow embedded whitespace for simplicity (it shouldn't matter,
3750 * because any embedded whitespace should have led to double-quoting).
3751 * Otherwise the API is identical to SplitIdentifierString.
3752 *
3753 * XXX it's annoying to have so many copies of this string-splitting logic.
3754 * However, it's not clear that having one function with a bunch of option
3755 * flags would be much better.
3756 *
3757 * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3758 * Be sure to update that if you have to change this.
3759 *
3760 * Inputs:
3761 * rawstring: the input string; must be overwritable! On return, it's
3762 * been modified to contain the separated identifiers.
3763 * separator: the separator punctuation expected between identifiers
3764 * (typically '.' or ','). Whitespace may also appear around
3765 * identifiers.
3766 * Outputs:
3767 * namelist: filled with a palloc'd list of pointers to identifiers within
3768 * rawstring. Caller should list_free() this even on error return.
3769 *
3770 * Returns true if okay, false if there is a syntax error in the string.
3771 */
3772bool
3773SplitGUCList(char *rawstring, char separator,
3774 List **namelist)
3775{
3776 char *nextp = rawstring;
3777 bool done = false;
3778
3779 *namelist = NIL;
3780
3781 while (scanner_isspace(*nextp))
3782 nextp++; /* skip leading whitespace */
3783
3784 if (*nextp == '\0')
3785 return true; /* allow empty string */
3786
3787 /* At the top of the loop, we are at start of a new identifier. */
3788 do
3789 {
3790 char *curname;
3791 char *endp;
3792
3793 if (*nextp == '"')
3794 {
3795 /* Quoted name --- collapse quote-quote pairs */
3796 curname = nextp + 1;
3797 for (;;)
3798 {
3799 endp = strchr(nextp + 1, '"');
3800 if (endp == NULL)
3801 return false; /* mismatched quotes */
3802 if (endp[1] != '"')
3803 break; /* found end of quoted name */
3804 /* Collapse adjacent quotes into one quote, and look again */
3805 memmove(endp, endp + 1, strlen(endp));
3806 nextp = endp;
3807 }
3808 /* endp now points at the terminating quote */
3809 nextp = endp + 1;
3810 }
3811 else
3812 {
3813 /* Unquoted name --- extends to separator or whitespace */
3814 curname = nextp;
3815 while (*nextp && *nextp != separator &&
3816 !scanner_isspace(*nextp))
3817 nextp++;
3818 endp = nextp;
3819 if (curname == nextp)
3820 return false; /* empty unquoted name not allowed */
3821 }
3822
3823 while (scanner_isspace(*nextp))
3824 nextp++; /* skip trailing whitespace */
3825
3826 if (*nextp == separator)
3827 {
3828 nextp++;
3829 while (scanner_isspace(*nextp))
3830 nextp++; /* skip leading whitespace for next */
3831 /* we expect another name, so done remains false */
3832 }
3833 else if (*nextp == '\0')
3834 done = true;
3835 else
3836 return false; /* invalid syntax */
3837
3838 /* Now safe to overwrite separator with a null */
3839 *endp = '\0';
3840
3841 /*
3842 * Finished isolating current name --- add it to list
3843 */
3844 *namelist = lappend(*namelist, curname);
3845
3846 /* Loop back if we didn't reach end of string */
3847 } while (!done);
3848
3849 return true;
3850}
3851
3852
3853/*****************************************************************************
3854 * Comparison Functions used for bytea
3855 *
3856 * Note: btree indexes need these routines not to leak memory; therefore,
3857 * be careful to free working copies of toasted datums. Most places don't
3858 * need to be so careful.
3859 *****************************************************************************/
3860
3861Datum
3863{
3864 Datum arg1 = PG_GETARG_DATUM(0);
3865 Datum arg2 = PG_GETARG_DATUM(1);
3866 bool result;
3867 Size len1,
3868 len2;
3869
3870 /*
3871 * We can use a fast path for unequal lengths, which might save us from
3872 * having to detoast one or both values.
3873 */
3874 len1 = toast_raw_datum_size(arg1);
3875 len2 = toast_raw_datum_size(arg2);
3876 if (len1 != len2)
3877 result = false;
3878 else
3879 {
3880 bytea *barg1 = DatumGetByteaPP(arg1);
3881 bytea *barg2 = DatumGetByteaPP(arg2);
3882
3883 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3884 len1 - VARHDRSZ) == 0);
3885
3886 PG_FREE_IF_COPY(barg1, 0);
3887 PG_FREE_IF_COPY(barg2, 1);
3888 }
3889
3890 PG_RETURN_BOOL(result);
3891}
3892
3893Datum
3895{
3896 Datum arg1 = PG_GETARG_DATUM(0);
3897 Datum arg2 = PG_GETARG_DATUM(1);
3898 bool result;
3899 Size len1,
3900 len2;
3901
3902 /*
3903 * We can use a fast path for unequal lengths, which might save us from
3904 * having to detoast one or both values.
3905 */
3906 len1 = toast_raw_datum_size(arg1);
3907 len2 = toast_raw_datum_size(arg2);
3908 if (len1 != len2)
3909 result = true;
3910 else
3911 {
3912 bytea *barg1 = DatumGetByteaPP(arg1);
3913 bytea *barg2 = DatumGetByteaPP(arg2);
3914
3915 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3916 len1 - VARHDRSZ) != 0);
3917
3918 PG_FREE_IF_COPY(barg1, 0);
3919 PG_FREE_IF_COPY(barg2, 1);
3920 }
3921
3922 PG_RETURN_BOOL(result);
3923}
3924
3925Datum
3927{
3928 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3929 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3930 int len1,
3931 len2;
3932 int cmp;
3933
3934 len1 = VARSIZE_ANY_EXHDR(arg1);
3935 len2 = VARSIZE_ANY_EXHDR(arg2);
3936
3937 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3938
3939 PG_FREE_IF_COPY(arg1, 0);
3940 PG_FREE_IF_COPY(arg2, 1);
3941
3942 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3943}
3944
3945Datum
3947{
3948 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3949 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3950 int len1,
3951 len2;
3952 int cmp;
3953
3954 len1 = VARSIZE_ANY_EXHDR(arg1);
3955 len2 = VARSIZE_ANY_EXHDR(arg2);
3956
3957 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3958
3959 PG_FREE_IF_COPY(arg1, 0);
3960 PG_FREE_IF_COPY(arg2, 1);
3961
3962 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3963}
3964
3965Datum
3967{
3968 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3969 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3970 int len1,
3971 len2;
3972 int cmp;
3973
3974 len1 = VARSIZE_ANY_EXHDR(arg1);
3975 len2 = VARSIZE_ANY_EXHDR(arg2);
3976
3977 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3978
3979 PG_FREE_IF_COPY(arg1, 0);
3980 PG_FREE_IF_COPY(arg2, 1);
3981
3982 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3983}
3984
3985Datum
3987{
3988 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3989 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3990 int len1,
3991 len2;
3992 int cmp;
3993
3994 len1 = VARSIZE_ANY_EXHDR(arg1);
3995 len2 = VARSIZE_ANY_EXHDR(arg2);
3996
3997 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3998
3999 PG_FREE_IF_COPY(arg1, 0);
4000 PG_FREE_IF_COPY(arg2, 1);
4001
4002 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
4003}
4004
4005Datum
4007{
4008 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4009 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4010 int len1,
4011 len2;
4012 int cmp;
4013
4014 len1 = VARSIZE_ANY_EXHDR(arg1);
4015 len2 = VARSIZE_ANY_EXHDR(arg2);
4016
4017 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4018 if ((cmp == 0) && (len1 != len2))
4019 cmp = (len1 < len2) ? -1 : 1;
4020
4021 PG_FREE_IF_COPY(arg1, 0);
4022 PG_FREE_IF_COPY(arg2, 1);
4023
4025}
4026
4027Datum
4029{
4030 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4031 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4032 bytea *result;
4033 int len1,
4034 len2;
4035 int cmp;
4036
4037 len1 = VARSIZE_ANY_EXHDR(arg1);
4038 len2 = VARSIZE_ANY_EXHDR(arg2);
4039
4040 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4041 result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
4042
4043 PG_RETURN_BYTEA_P(result);
4044}
4045
4046Datum
4048{
4049 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4050 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4051 bytea *result;
4052 int len1,
4053 len2;
4054 int cmp;
4055
4056 len1 = VARSIZE_ANY_EXHDR(arg1);
4057 len2 = VARSIZE_ANY_EXHDR(arg2);
4058
4059 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4060 result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
4061
4062 PG_RETURN_BYTEA_P(result);
4063}
4064
4065Datum
4067{
4069 MemoryContext oldcontext;
4070
4071 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4072
4073 /* Use generic string SortSupport, forcing "C" collation */
4074 varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4075
4076 MemoryContextSwitchTo(oldcontext);
4077
4079}
4080
4081/* Cast bytea -> int2 */
4082Datum
4084{
4085 bytea *v = PG_GETARG_BYTEA_PP(0);
4086 int len = VARSIZE_ANY_EXHDR(v);
4087 uint16 result;
4088
4089 /* Check that the byte array is not too long */
4090 if (len > sizeof(result))
4091 ereport(ERROR,
4092 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4093 errmsg("smallint out of range"));
4094
4095 /* Convert it to an integer; most significant bytes come first */
4096 result = 0;
4097 for (int i = 0; i < len; i++)
4098 {
4099 result <<= BITS_PER_BYTE;
4100 result |= ((unsigned char *) VARDATA_ANY(v))[i];
4101 }
4102
4103 PG_RETURN_INT16(result);
4104}
4105
4106/* Cast bytea -> int4 */
4107Datum
4109{
4110 bytea *v = PG_GETARG_BYTEA_PP(0);
4111 int len = VARSIZE_ANY_EXHDR(v);
4112 uint32 result;
4113
4114 /* Check that the byte array is not too long */
4115 if (len > sizeof(result))
4116 ereport(ERROR,
4117 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4118 errmsg("integer out of range"));
4119
4120 /* Convert it to an integer; most significant bytes come first */
4121 result = 0;
4122 for (int i = 0; i < len; i++)
4123 {
4124 result <<= BITS_PER_BYTE;
4125 result |= ((unsigned char *) VARDATA_ANY(v))[i];
4126 }
4127
4128 PG_RETURN_INT32(result);
4129}
4130
4131/* Cast bytea -> int8 */
4132Datum
4134{
4135 bytea *v = PG_GETARG_BYTEA_PP(0);
4136 int len = VARSIZE_ANY_EXHDR(v);
4137 uint64 result;
4138
4139 /* Check that the byte array is not too long */
4140 if (len > sizeof(result))
4141 ereport(ERROR,
4142 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
4143 errmsg("bigint out of range"));
4144
4145 /* Convert it to an integer; most significant bytes come first */
4146 result = 0;
4147 for (int i = 0; i < len; i++)
4148 {
4149 result <<= BITS_PER_BYTE;
4150 result |= ((unsigned char *) VARDATA_ANY(v))[i];
4151 }
4152
4153 PG_RETURN_INT64(result);
4154}
4155
4156/* Cast int2 -> bytea; can just use int2send() */
4157Datum
4159{
4160 return int2send(fcinfo);
4161}
4162
4163/* Cast int4 -> bytea; can just use int4send() */
4164Datum
4166{
4167 return int4send(fcinfo);
4168}
4169
4170/* Cast int8 -> bytea; can just use int8send() */
4171Datum
4173{
4174 return int8send(fcinfo);
4175}
4176
4177/*
4178 * appendStringInfoText
4179 *
4180 * Append a text to str.
4181 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4182 */
4183static void
4185{
4187}
4188
4189/*
4190 * replace_text
4191 * replace all occurrences of 'old_sub_str' in 'orig_str'
4192 * with 'new_sub_str' to form 'new_str'
4193 *
4194 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4195 * otherwise returns 'new_str'
4196 */
4197Datum
4199{
4200 text *src_text = PG_GETARG_TEXT_PP(0);
4201 text *from_sub_text = PG_GETARG_TEXT_PP(1);
4202 text *to_sub_text = PG_GETARG_TEXT_PP(2);
4203 int src_text_len;
4204 int from_sub_text_len;
4206 text *ret_text;
4207 int chunk_len;
4208 char *curr_ptr;
4209 char *start_ptr;
4211 bool found;
4212
4213 src_text_len = VARSIZE_ANY_EXHDR(src_text);
4214 from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4215
4216 /* Return unmodified source string if empty source or pattern */
4217 if (src_text_len < 1 || from_sub_text_len < 1)
4218 {
4219 PG_RETURN_TEXT_P(src_text);
4220 }
4221
4222 text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4223
4224 found = text_position_next(&state);
4225
4226 /* When the from_sub_text is not found, there is nothing to do. */
4227 if (!found)
4228 {
4230 PG_RETURN_TEXT_P(src_text);
4231 }
4233 start_ptr = VARDATA_ANY(src_text);
4234
4236
4237 do
4238 {
4240
4241 /* copy the data skipped over by last text_position_next() */
4242 chunk_len = curr_ptr - start_ptr;
4243 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4244
4245 appendStringInfoText(&str, to_sub_text);
4246
4247 start_ptr = curr_ptr + state.last_match_len;
4248
4249 found = text_position_next(&state);
4250 if (found)
4252 }
4253 while (found);
4254
4255 /* copy trailing data */
4256 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4257 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4258
4260
4261 ret_text = cstring_to_text_with_len(str.data, str.len);
4262 pfree(str.data);
4263
4264 PG_RETURN_TEXT_P(ret_text);
4265}
4266
4267/*
4268 * check_replace_text_has_escape
4269 *
4270 * Returns 0 if text contains no backslashes that need processing.
4271 * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
4272 * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
4273 */
4274static int
4276{
4277 int result = 0;
4278 const char *p = VARDATA_ANY(replace_text);
4279 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4280
4281 while (p < p_end)
4282 {
4283 /* Find next escape char, if any. */
4284 p = memchr(p, '\\', p_end - p);
4285 if (p == NULL)
4286 break;
4287 p++;
4288 /* Note: a backslash at the end doesn't require extra processing. */
4289 if (p < p_end)
4290 {
4291 if (*p >= '1' && *p <= '9')
4292 return 2; /* Found a submatch specifier, so done */
4293 result = 1; /* Found some other sequence, keep looking */
4294 p++;
4295 }
4296 }
4297 return result;
4298}
4299
4300/*
4301 * appendStringInfoRegexpSubstr
4302 *
4303 * Append replace_text to str, substituting regexp back references for
4304 * \n escapes. start_ptr is the start of the match in the source string,
4305 * at logical character position data_pos.
4306 */
4307static void
4309 regmatch_t *pmatch,
4310 char *start_ptr, int data_pos)
4311{
4312 const char *p = VARDATA_ANY(replace_text);
4313 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4314
4315 while (p < p_end)
4316 {
4317 const char *chunk_start = p;
4318 int so;
4319 int eo;
4320
4321 /* Find next escape char, if any. */
4322 p = memchr(p, '\\', p_end - p);
4323 if (p == NULL)
4324 p = p_end;
4325
4326 /* Copy the text we just scanned over, if any. */
4327 if (p > chunk_start)
4328 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4329
4330 /* Done if at end of string, else advance over escape char. */
4331 if (p >= p_end)
4332 break;
4333 p++;
4334
4335 if (p >= p_end)
4336 {
4337 /* Escape at very end of input. Treat same as unexpected char */
4339 break;
4340 }
4341
4342 if (*p >= '1' && *p <= '9')
4343 {
4344 /* Use the back reference of regexp. */
4345 int idx = *p - '0';
4346
4347 so = pmatch[idx].rm_so;
4348 eo = pmatch[idx].rm_eo;
4349 p++;
4350 }
4351 else if (*p == '&')
4352 {
4353 /* Use the entire matched string. */
4354 so = pmatch[0].rm_so;
4355 eo = pmatch[0].rm_eo;
4356 p++;
4357 }
4358 else if (*p == '\\')
4359 {
4360 /* \\ means transfer one \ to output. */
4362 p++;
4363 continue;
4364 }
4365 else
4366 {
4367 /*
4368 * If escape char is not followed by any expected char, just treat
4369 * it as ordinary data to copy. (XXX would it be better to throw
4370 * an error?)
4371 */
4373 continue;
4374 }
4375
4376 if (so >= 0 && eo >= 0)
4377 {
4378 /*
4379 * Copy the text that is back reference of regexp. Note so and eo
4380 * are counted in characters not bytes.
4381 */
4382 char *chunk_start;
4383 int chunk_len;
4384
4385 Assert(so >= data_pos);
4386 chunk_start = start_ptr;
4387 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4388 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4389 appendBinaryStringInfo(str, chunk_start, chunk_len);
4390 }
4391 }
4392}
4393
4394/*
4395 * replace_text_regexp
4396 *
4397 * replace substring(s) in src_text that match pattern with replace_text.
4398 * The replace_text can contain backslash markers to substitute
4399 * (parts of) the matched text.
4400 *
4401 * cflags: regexp compile flags.
4402 * collation: collation to use.
4403 * search_start: the character (not byte) offset in src_text at which to
4404 * begin searching.
4405 * n: if 0, replace all matches; if > 0, replace only the N'th match.
4406 */
4407text *
4408replace_text_regexp(text *src_text, text *pattern_text,
4410 int cflags, Oid collation,
4411 int search_start, int n)
4412{
4413 text *ret_text;
4414 regex_t *re;
4415 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4416 int nmatches = 0;
4418 regmatch_t pmatch[10]; /* main match, plus \1 to \9 */
4419 int nmatch = lengthof(pmatch);
4420 pg_wchar *data;
4421 size_t data_len;
4422 int data_pos;
4423 char *start_ptr;
4424 int escape_status;
4425
4427
4428 /* Convert data string to wide characters. */
4429 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4430 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4431
4432 /* Check whether replace_text has escapes, especially regexp submatches. */
4434
4435 /* If no regexp submatches, we can use REG_NOSUB. */
4436 if (escape_status < 2)
4437 {
4438 cflags |= REG_NOSUB;
4439 /* Also tell pg_regexec we only want the whole-match location. */
4440 nmatch = 1;
4441 }
4442
4443 /* Prepare the regexp. */
4444 re = RE_compile_and_cache(pattern_text, cflags, collation);
4445
4446 /* start_ptr points to the data_pos'th character of src_text */
4447 start_ptr = (char *) VARDATA_ANY(src_text);
4448 data_pos = 0;
4449
4450 while (search_start <= data_len)
4451 {
4452 int regexec_result;
4453
4455
4456 regexec_result = pg_regexec(re,
4457 data,
4458 data_len,
4459 search_start,
4460 NULL, /* no details */
4461 nmatch,
4462 pmatch,
4463 0);
4464
4465 if (regexec_result == REG_NOMATCH)
4466 break;
4467
4468 if (regexec_result != REG_OKAY)
4469 {
4470 char errMsg[100];
4471
4472 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4473 ereport(ERROR,
4474 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4475 errmsg("regular expression failed: %s", errMsg)));
4476 }
4477
4478 /*
4479 * Count matches, and decide whether to replace this match.
4480 */
4481 nmatches++;
4482 if (n > 0 && nmatches != n)
4483 {
4484 /*
4485 * No, so advance search_start, but not start_ptr/data_pos. (Thus,
4486 * we treat the matched text as if it weren't matched, and copy it
4487 * to the output later.)
4488 */
4489 search_start = pmatch[0].rm_eo;
4490 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4491 search_start++;
4492 continue;
4493 }
4494
4495 /*
4496 * Copy the text to the left of the match position. Note we are given
4497 * character not byte indexes.
4498 */
4499 if (pmatch[0].rm_so - data_pos > 0)
4500 {
4501 int chunk_len;
4502
4503 chunk_len = charlen_to_bytelen(start_ptr,
4504 pmatch[0].rm_so - data_pos);
4505 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4506
4507 /*
4508 * Advance start_ptr over that text, to avoid multiple rescans of
4509 * it if the replace_text contains multiple back-references.
4510 */
4511 start_ptr += chunk_len;
4512 data_pos = pmatch[0].rm_so;
4513 }
4514
4515 /*
4516 * Copy the replace_text, processing escapes if any are present.
4517 */
4518 if (escape_status > 0)
4520 start_ptr, data_pos);
4521 else
4523
4524 /* Advance start_ptr and data_pos over the matched text. */
4525 start_ptr += charlen_to_bytelen(start_ptr,
4526 pmatch[0].rm_eo - data_pos);
4527 data_pos = pmatch[0].rm_eo;
4528
4529 /*
4530 * If we only want to replace one occurrence, we're done.
4531 */
4532 if (n > 0)
4533 break;
4534
4535 /*
4536 * Advance search position. Normally we start the next search at the
4537 * end of the previous match; but if the match was of zero length, we
4538 * have to advance by one character, or we'd just find the same match
4539 * again.
4540 */
4541 search_start = data_pos;
4542 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4543 search_start++;
4544 }
4545
4546 /*
4547 * Copy the text to the right of the last match.
4548 */
4549 if (data_pos < data_len)
4550 {
4551 int chunk_len;
4552
4553 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4554 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4555 }
4556
4557 ret_text = cstring_to_text_with_len(buf.data, buf.len);
4558 pfree(buf.data);
4559 pfree(data);
4560
4561 return ret_text;
4562}
4563
4564/*
4565 * split_part
4566 * parse input string based on provided field separator
4567 * return N'th item (1 based, negative counts from end)
4568 */
4569Datum
4571{
4572 text *inputstring = PG_GETARG_TEXT_PP(0);
4573 text *fldsep = PG_GETARG_TEXT_PP(1);
4574 int fldnum = PG_GETARG_INT32(2);
4575 int inputstring_len;
4576 int fldsep_len;
4578 char *start_ptr;
4579 char *end_ptr;
4580 text *result_text;
4581 bool found;
4582
4583 /* field number is 1 based */
4584 if (fldnum == 0)
4585 ereport(ERROR,
4586 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4587 errmsg("field position must not be zero")));
4588
4589 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4590 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4591
4592 /* return empty string for empty input string */
4593 if (inputstring_len < 1)
4595
4596 /* handle empty field separator */
4597 if (fldsep_len < 1)
4598 {
4599 /* if first or last field, return input string, else empty string */
4600 if (fldnum == 1 || fldnum == -1)
4601 PG_RETURN_TEXT_P(inputstring);
4602 else
4604 }
4605
4606 /* find the first field separator */
4607 text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4608
4609 found = text_position_next(&state);
4610
4611 /* special case if fldsep not found at all */
4612 if (!found)
4613 {
4615 /* if first or last field, return input string, else empty string */
4616 if (fldnum == 1 || fldnum == -1)
4617 PG_RETURN_TEXT_P(inputstring);
4618 else
4620 }
4621
4622 /*
4623 * take care of a negative field number (i.e. count from the right) by
4624 * converting to a positive field number; we need total number of fields
4625 */
4626 if (fldnum < 0)
4627 {
4628 /* we found a fldsep, so there are at least two fields */
4629 int numfields = 2;
4630
4631 while (text_position_next(&state))
4632 numfields++;
4633
4634 /* special case of last field does not require an extra pass */
4635 if (fldnum == -1)
4636 {
4637 start_ptr = text_position_get_match_ptr(&state) + state.last_match_len;
4638 end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
4641 end_ptr - start_ptr));
4642 }
4643
4644 /* else, convert fldnum to positive notation */
4645 fldnum += numfields + 1;
4646
4647 /* if nonexistent field, return empty string */
4648 if (fldnum <= 0)
4649 {
4652 }
4653
4654 /* reset to pointing at first match, but now with positive fldnum */
4656 found = text_position_next(&state);
4657 Assert(found);
4658 }
4659
4660 /* identify bounds of first field */
4661 start_ptr = VARDATA_ANY(inputstring);
4663
4664 while (found && --fldnum > 0)
4665 {
4666 /* identify bounds of next field */
4667 start_ptr = end_ptr + state.last_match_len;
4668 found = text_position_next(&state);
4669 if (found)
4671 }
4672
4674
4675 if (fldnum > 0)
4676 {
4677 /* N'th field separator not found */
4678 /* if last field requested, return it, else empty string */
4679 if (fldnum == 1)
4680 {
4681 int last_len = start_ptr - VARDATA_ANY(inputstring);
4682
4683 result_text = cstring_to_text_with_len(start_ptr,
4684 inputstring_len - last_len);
4685 }
4686 else
4687 result_text = cstring_to_text("");
4688 }
4689 else
4690 {
4691 /* non-last field requested */
4692 result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4693 }
4694
4695 PG_RETURN_TEXT_P(result_text);
4696}
4697
4698/*
4699 * Convenience function to return true when two text params are equal.
4700 */
4701static bool
4703{
4705 collid,
4706 PointerGetDatum(txt1),
4707 PointerGetDatum(txt2)));
4708}
4709
4710/*
4711 * text_to_array
4712 * parse input string and return text array of elements,
4713 * based on provided field separator
4714 */
4715Datum
4717{
4718 SplitTextOutputData tstate;
4719
4720 /* For array output, tstate should start as all zeroes */
4721 memset(&tstate, 0, sizeof(tstate));
4722
4723 if (!split_text(fcinfo, &tstate))
4725
4726 if (tstate.astate == NULL)
4728
4731}
4732
4733/*
4734 * text_to_array_null
4735 * parse input string and return text array of elements,
4736 * based on provided field separator and null string
4737 *
4738 * This is a separate entry point only to prevent the regression tests from
4739 * complaining about different argument sets for the same internal function.
4740 */
4741Datum
4743{
4744 return text_to_array(fcinfo);
4745}
4746
4747/*
4748 * text_to_table
4749 * parse input string and return table of elements,
4750 * based on provided field separator
4751 */
4752Datum
4754{
4755 ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
4756 SplitTextOutputData tstate;
4757
4758 tstate.astate = NULL;
4760 tstate.tupstore = rsi->setResult;
4761 tstate.tupdesc = rsi->setDesc;
4762
4763 (void) split_text(fcinfo, &tstate);
4764
4765 return (Datum) 0;
4766}
4767
4768/*
4769 * text_to_table_null
4770 * parse input string and return table of elements,
4771 * based on provided field separator and null string
4772 *
4773 * This is a separate entry point only to prevent the regression tests from
4774 * complaining about different argument sets for the same internal function.
4775 */
4776Datum
4778{
4779 return text_to_table(fcinfo);
4780}
4781
4782/*
4783 * Common code for text_to_array, text_to_array_null, text_to_table
4784 * and text_to_table_null functions.
4785 *
4786 * These are not strict so we have to test for null inputs explicitly.
4787 * Returns false if result is to be null, else returns true.
4788 *
4789 * Note that if the result is valid but empty (zero elements), we return
4790 * without changing *tstate --- caller must handle that case, too.
4791 */
4792static bool
4794{
4795 text *inputstring;
4796 text *fldsep;
4797 text *null_string;
4798 Oid collation = PG_GET_COLLATION();
4799 int inputstring_len;
4800 int fldsep_len;
4801 char *start_ptr;
4802 text *result_text;
4803
4804 /* when input string is NULL, then result is NULL too */
4805 if (PG_ARGISNULL(0))
4806 return false;
4807
4808 inputstring = PG_GETARG_TEXT_PP(0);
4809
4810 /* fldsep can be NULL */
4811 if (!PG_ARGISNULL(1))
4812 fldsep = PG_GETARG_TEXT_PP(1);
4813 else
4814 fldsep = NULL;
4815
4816 /* null_string can be NULL or omitted */
4817 if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4818 null_string = PG_GETARG_TEXT_PP(2);
4819 else
4820 null_string = NULL;
4821
4822 if (fldsep != NULL)
4823 {
4824 /*
4825 * Normal case with non-null fldsep. Use the text_position machinery
4826 * to search for occurrences of fldsep.
4827 */
4829
4830 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4831 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4832
4833 /* return empty set for empty input string */
4834 if (inputstring_len < 1)
4835 return true;
4836
4837 /* empty field separator: return input string as a one-element set */
4838 if (fldsep_len < 1)
4839 {
4840 split_text_accum_result(tstate, inputstring,
4841 null_string, collation);
4842 return true;
4843 }
4844
4845 text_position_setup(inputstring, fldsep, collation, &state);
4846
4847 start_ptr = VARDATA_ANY(inputstring);
4848
4849 for (;;)
4850 {
4851 bool found;
4852 char *end_ptr;
4853 int chunk_len;
4854
4856
4857 found = text_position_next(&state);
4858 if (!found)
4859 {
4860 /* fetch last field */
4861 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4862 end_ptr = NULL; /* not used, but some compilers complain */
4863 }
4864 else
4865 {
4866 /* fetch non-last field */
4868 chunk_len = end_ptr - start_ptr;
4869 }
4870
4871 /* build a temp text datum to pass to split_text_accum_result */
4872 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4873
4874 /* stash away this field */
4875 split_text_accum_result(tstate, result_text,
4876 null_string, collation);
4877
4878 pfree(result_text);
4879
4880 if (!found)
4881 break;
4882
4883 start_ptr = end_ptr + state.last_match_len;
4884 }
4885
4887 }
4888 else
4889 {
4890 /*
4891 * When fldsep is NULL, each character in the input string becomes a
4892 * separate element in the result set. The separator is effectively
4893 * the space between characters.
4894 */
4895 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4896
4897 start_ptr = VARDATA_ANY(inputstring);
4898
4899 while (inputstring_len > 0)
4900 {
4901 int chunk_len = pg_mblen(start_ptr);
4902
4904
4905 /* build a temp text datum to pass to split_text_accum_result */
4906 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4907
4908 /* stash away this field */
4909 split_text_accum_result(tstate, result_text,
4910 null_string, collation);
4911
4912 pfree(result_text);
4913
4914 start_ptr += chunk_len;
4915 inputstring_len -= chunk_len;
4916 }
4917 }
4918
4919 return true;
4920}
4921
4922/*
4923 * Add text item to result set (table or array).
4924 *
4925 * This is also responsible for checking to see if the item matches
4926 * the null_string, in which case we should emit NULL instead.
4927 */
4928static void
4930 text *field_value,
4931 text *null_string,
4932 Oid collation)
4933{
4934 bool is_null = false;
4935
4936 if (null_string && text_isequal(field_value, null_string, collation))
4937 is_null = true;
4938
4939 if (tstate->tupstore)
4940 {
4941 Datum values[1];
4942 bool nulls[1];
4943
4944 values[0] = PointerGetDatum(field_value);
4945 nulls[0] = is_null;
4946
4948 tstate->tupdesc,
4949 values,
4950 nulls);
4951 }
4952 else
4953 {
4954 tstate->astate = accumArrayResult(tstate->astate,
4955 PointerGetDatum(field_value),
4956 is_null,
4957 TEXTOID,
4959 }
4960}
4961
4962/*
4963 * array_to_text
4964 * concatenate Cstring representation of input array elements
4965 * using provided field separator
4966 */
4967Datum
4969{
4971 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4972
4973 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4974}
4975
4976/*
4977 * array_to_text_null
4978 * concatenate Cstring representation of input array elements
4979 * using provided field separator and null string
4980 *
4981 * This version is not strict so we have to test for null inputs explicitly.
4982 */
4983Datum
4985{
4986 ArrayType *v;
4987 char *fldsep;
4988 char *null_string;
4989
4990 /* returns NULL when first or second parameter is NULL */
4991 if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4993
4994 v = PG_GETARG_ARRAYTYPE_P(0);
4996
4997 /* NULL null string is passed through as a null pointer */
4998 if (!PG_ARGISNULL(2))
4999 null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
5000 else
5001 null_string = NULL;
5002
5003 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
5004}
5005
5006/*
5007 * common code for array_to_text and array_to_text_null functions
5008 */
5009static text *
5011 const char *fldsep, const char *null_string)
5012{
5013 text *result;
5014 int nitems,
5015 *dims,
5016 ndims;
5017 Oid element_type;
5018 int typlen;
5019 bool typbyval;
5020 char typalign;
5022 bool printed = false;
5023 char *p;
5024 bits8 *bitmap;
5025 int bitmask;
5026 int i;
5027 ArrayMetaState *my_extra;
5028
5029 ndims = ARR_NDIM(v);
5030 dims = ARR_DIMS(v);
5031 nitems = ArrayGetNItems(ndims, dims);
5032
5033 /* if there are no elements, return an empty string */
5034 if (nitems == 0)
5035 return cstring_to_text_with_len("", 0);
5036
5037 element_type = ARR_ELEMTYPE(v);
5039
5040 /*
5041 * We arrange to look up info about element type, including its output
5042 * conversion proc, only once per series of calls, assuming the element
5043 * type doesn't change underneath us.
5044 */
5045 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
5046 if (my_extra == NULL)
5047 {
5048 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5049 sizeof(ArrayMetaState));
5050 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
5051 my_extra->element_type = ~element_type;
5052 }
5053
5054 if (my_extra->element_type != element_type)
5055 {
5056 /*
5057 * Get info about element type, including its output conversion proc
5058 */
5059 get_type_io_data(element_type, IOFunc_output,
5060 &my_extra->typlen, &my_extra->typbyval,
5061 &my_extra->typalign, &my_extra->typdelim,
5062 &my_extra->typioparam, &my_extra->typiofunc);
5063 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
5064 fcinfo->flinfo->fn_mcxt);
5065 my_extra->element_type = element_type;
5066 }
5067 typlen = my_extra->typlen;
5068 typbyval = my_extra->typbyval;
5069 typalign = my_extra->typalign;
5070
5071 p = ARR_DATA_PTR(v);
5072 bitmap = ARR_NULLBITMAP(v);
5073 bitmask = 1;
5074
5075 for (i = 0; i < nitems; i++)
5076 {
5077 Datum itemvalue;
5078 char *value;
5079
5080 /* Get source element, checking for NULL */
5081 if (bitmap && (*bitmap & bitmask) == 0)
5082 {
5083 /* if null_string is NULL, we just ignore null elements */
5084 if (null_string != NULL)
5085 {
5086 if (printed)
5087 appendStringInfo(&buf, "%s%s", fldsep, null_string);
5088 else
5089 appendStringInfoString(&buf, null_string);
5090 printed = true;
5091 }
5092 }
5093 else
5094 {
5095 itemvalue = fetch_att(p, typbyval, typlen);
5096
5097 value = OutputFunctionCall(&my_extra->proc, itemvalue);
5098
5099 if (printed)
5100 appendStringInfo(&buf, "%s%s", fldsep, value);
5101 else
5103 printed = true;
5104
5105 p = att_addlength_pointer(p, typlen, p);
5106 p = (char *) att_align_nominal(p, typalign);
5107 }
5108
5109 /* advance bitmap pointer if any */
5110 if (bitmap)
5111 {
5112 bitmask <<= 1;
5113 if (bitmask == 0x100)
5114 {
5115 bitmap++;
5116 bitmask = 1;
5117 }
5118 }
5119 }
5120
5121 result = cstring_to_text_with_len(buf.data, buf.len);
5122 pfree(buf.data);
5123
5124 return result;
5125}
5126
5127/*
5128 * Workhorse for to_bin, to_oct, and to_hex. Note that base must be > 1 and <=
5129 * 16.
5130 */
5131static inline text *
5133{
5134 const char *digits = "0123456789abcdef";
5135
5136 /* We size the buffer for to_bin's longest possible return value. */
5137 char buf[sizeof(uint64) * BITS_PER_BYTE];
5138 char *const end = buf + sizeof(buf);
5139 char *ptr = end;
5140
5141 Assert(base > 1);
5142 Assert(base <= 16);
5143
5144 do
5145 {
5146 *--ptr = digits[value % base];
5147 value /= base;
5148 } while (ptr > buf && value);
5149
5150 return cstring_to_text_with_len(ptr, end - ptr);
5151}
5152
5153/*
5154 * Convert an integer to a string containing a base-2 (binary) representation
5155 * of the number.
5156 */
5157Datum
5159{
5161
5163}
5164Datum
5166{
5168
5170}
5171
5172/*
5173 * Convert an integer to a string containing a base-8 (oct) representation of
5174 * the number.
5175 */
5176Datum
5178{
5180
5182}
5183Datum
5185{
5187
5189}
5190
5191/*
5192 * Convert an integer to a string containing a base-16 (hex) representation of
5193 * the number.
5194 */
5195Datum
5197{
5199
5201}
5202Datum
5204{
5206
5208}
5209
5210/*
5211 * Return the size of a datum, possibly compressed
5212 *
5213 * Works on any data type
5214 */
5215Datum
5217{
5219 int32 result;
5220 int typlen;
5221
5222 /* On first call, get the input type's typlen, and save at *fn_extra */
5223 if (fcinfo->flinfo->fn_extra == NULL)
5224 {
5225 /* Lookup the datatype of the supplied argument */
5226 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5227
5228 typlen = get_typlen(argtypeid);
5229 if (typlen == 0) /* should not happen */
5230 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5231
5232 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5233 sizeof(int));
5234 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5235 }
5236 else
5237 typlen = *((int *) fcinfo->flinfo->fn_extra);
5238
5239 if (typlen == -1)
5240 {
5241 /* varlena type, possibly toasted */
5242 result = toast_datum_size(value);
5243 }
5244 else if (typlen == -2)
5245 {
5246 /* cstring */
5247 result = strlen(DatumGetCString(value)) + 1;
5248 }
5249 else
5250 {
5251 /* ordinary fixed-width type */
5252 result = typlen;
5253 }
5254
5255 PG_RETURN_INT32(result);
5256}
5257
5258/*
5259 * Return the compression method stored in the compressed attribute. Return
5260 * NULL for non varlena type or uncompressed data.
5261 */
5262Datum
5264{
5265 int typlen;
5266 char *result;
5267 ToastCompressionId cmid;
5268
5269 /* On first call, get the input type's typlen, and save at *fn_extra */
5270 if (fcinfo->flinfo->fn_extra == NULL)
5271 {
5272 /* Lookup the datatype of the supplied argument */
5273 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5274
5275 typlen = get_typlen(argtypeid);
5276 if (typlen == 0) /* should not happen */
5277 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5278
5279 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5280 sizeof(int));
5281 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5282 }
5283 else
5284 typlen = *((int *) fcinfo->flinfo->fn_extra);
5285
5286 if (typlen != -1)
5288
5289 /* get the compression method id stored in the compressed varlena */
5290 cmid = toast_get_compression_id((struct varlena *)
5292 if (cmid == TOAST_INVALID_COMPRESSION_ID)
5294
5295 /* convert compression method id to compression method name */
5296 switch (cmid)
5297 {
5299 result = "pglz";
5300 break;
5302 result = "lz4";
5303 break;
5304 default:
5305 elog(ERROR, "invalid compression method id %d", cmid);
5306 }
5307
5309}
5310
5311/*
5312 * Return the chunk_id of the on-disk TOASTed value. Return NULL if the value
5313 * is un-TOASTed or not on-disk.
5314 */
5315Datum
5317{
5318 int typlen;
5319 struct varlena *attr;
5320 struct varatt_external toast_pointer;
5321
5322 /* On first call, get the input type's typlen, and save at *fn_extra */
5323 if (fcinfo->flinfo->fn_extra == NULL)
5324 {
5325 /* Lookup the datatype of the supplied argument */
5326 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5327
5328 typlen = get_typlen(argtypeid);
5329 if (typlen == 0) /* should not happen */
5330 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5331
5332 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5333 sizeof(int));
5334 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5335 }
5336 else
5337 typlen = *((int *) fcinfo->flinfo->fn_extra);
5338
5339 if (typlen != -1)
5341
5342 attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
5343
5344 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
5346
5347 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
5348
5349 PG_RETURN_OID(toast_pointer.va_valueid);
5350}
5351
5352/*
5353 * string_agg - Concatenates values and returns string.
5354 *
5355 * Syntax: string_agg(value text, delimiter text) RETURNS text
5356 *
5357 * Note: Any NULL values are ignored. The first-call delimiter isn't
5358 * actually used at all, and on subsequent calls the delimiter precedes
5359 * the associated value.
5360 */
5361
5362/* subroutine to initialize state */
5363static StringInfo
5365{
5367 MemoryContext aggcontext;
5368 MemoryContext oldcontext;
5369
5370 if (!AggCheckCallContext(fcinfo, &aggcontext))
5371 {
5372 /* cannot be called directly because of internal-type argument */
5373 elog(ERROR, "string_agg_transfn called in non-aggregate context");
5374 }
5375
5376 /*
5377 * Create state in aggregate context. It'll stay there across subsequent
5378 * calls.
5379 */
5380 oldcontext = MemoryContextSwitchTo(aggcontext);
5382 MemoryContextSwitchTo(oldcontext);
5383
5384 return state;
5385}
5386
5387Datum
5389{
5391
5393
5394 /* Append the value unless null, preceding it with the delimiter. */
5395 if (!PG_ARGISNULL(1))
5396 {
5398 bool isfirst = false;
5399
5400 /*
5401 * You might think we can just throw away the first delimiter, however
5402 * we must keep it as we may be a parallel worker doing partial
5403 * aggregation building a state to send to the main process. We need
5404 * to keep the delimiter of every aggregation so that the combine
5405 * function can properly join up the strings of two separately
5406 * partially aggregated results. The first delimiter is only stripped
5407 * off in the final function. To know how much to strip off the front
5408 * of the string, we store the length of the first delimiter in the
5409 * StringInfo's cursor field, which we don't otherwise need here.
5410 */
5411 if (state == NULL)
5412 {
5413 state = makeStringAggState(fcinfo);
5414 isfirst = true;
5415 }
5416
5417 if (!PG_ARGISNULL(2))
5418 {
5419 text *delim = PG_GETARG_TEXT_PP(2);
5420
5422 if (isfirst)
5423 state->cursor = VARSIZE_ANY_EXHDR(delim);
5424 }
5425
5427 }
5428
5429 /*
5430 * The transition type for string_agg() is declared to be "internal",
5431 * which is a pass-by-value type the same size as a pointer.
5432 */
5433 if (state)
5436}
5437
5438/*
5439 * string_agg_combine
5440 * Aggregate combine function for string_agg(text) and string_agg(bytea)
5441 */
5442Datum
5444{
5445 StringInfo state1;
5446 StringInfo state2;
5447 MemoryContext agg_context;
5448
5449 if (!AggCheckCallContext(fcinfo, &agg_context))
5450 elog(ERROR, "aggregate function called in non-aggregate context");
5451
5452 state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5453 state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
5454
5455 if (state2 == NULL)
5456 {
5457 /*
5458 * NULL state2 is easy, just return state1, which we know is already
5459 * in the agg_context
5460 */
5461 if (state1 == NULL)
5463 PG_RETURN_POINTER(state1);
5464 }
5465
5466 if (state1 == NULL)
5467 {
5468 /* We must copy state2's data into the agg_context */
5469 MemoryContext old_context;
5470
5471 old_context = MemoryContextSwitchTo(agg_context);
5472 state1 = makeStringAggState(fcinfo);
5473 appendBinaryStringInfo(state1, state2->data, state2->len);
5474 state1->cursor = state2->cursor;
5475 MemoryContextSwitchTo(old_context);
5476 }
5477 else if (state2->len > 0)
5478 {
5479 /* Combine ... state1->cursor does not change in this case */
5480 appendBinaryStringInfo(state1, state2->data, state2->len);
5481 }
5482
5483 PG_RETURN_POINTER(state1);
5484}
5485
5486/*
5487 * string_agg_serialize
5488 * Aggregate serialize function for string_agg(text) and string_agg(bytea)
5489 *
5490 * This is strict, so we need not handle NULL input
5491 */
5492Datum
5494{
5497 bytea *result;
5498
5499 /* cannot be called directly because of internal-type argument */
5500 Assert(AggCheckCallContext(fcinfo, NULL));
5501
5503
5505
5506 /* cursor */
5507 pq_sendint(&buf, state->cursor, 4);
5508
5509 /* data */
5510 pq_sendbytes(&buf, state->data, state->len);
5511
5512 result = pq_endtypsend(&buf);
5513
5514 PG_RETURN_BYTEA_P(result);
5515}
5516
5517/*
5518 * string_agg_deserialize
5519 * Aggregate deserial function for string_agg(text) and string_agg(bytea)
5520 *
5521 * This is strict, so we need not handle NULL input
5522 */
5523Datum
5525{
5526 bytea *sstate;
5527 StringInfo result;
5529 char *data;
5530 int datalen;
5531
5532 /* cannot be called directly because of internal-type argument */
5533 Assert(AggCheckCallContext(fcinfo, NULL));
5534
5535 sstate = PG_GETARG_BYTEA_PP(0);
5536
5537 /*
5538 * Initialize a StringInfo so that we can "receive" it using the standard
5539 * recv-function infrastructure.
5540 */
5542 VARSIZE_ANY_EXHDR(sstate));
5543
5544 result = makeStringAggState(fcinfo);
5545
5546 /* cursor */
5547 result->cursor = pq_getmsgint(&buf, 4);
5548
5549 /* data */
5550 datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
5551 data = (char *) pq_getmsgbytes(&buf, datalen);
5552 appendBinaryStringInfo(result, data, datalen);
5553
5554 pq_getmsgend(&buf);
5555
5556 PG_RETURN_POINTER(result);
5557}
5558
5559Datum
5561{
5563
5564 /* cannot be called directly because of internal-type argument */
5565 Assert(AggCheckCallContext(fcinfo, NULL));
5566
5568
5569 if (state != NULL)
5570 {
5571 /* As per comment in transfn, strip data before the cursor position */
5573 state->len - state->cursor));
5574 }
5575 else
5577}
5578
5579/*
5580 * Prepare cache with fmgr info for the output functions of the datatypes of
5581 * the arguments of a concat-like function, beginning with argument "argidx".
5582 * (Arguments before that will have corresponding slots in the resulting
5583 * FmgrInfo array, but we don't fill those slots.)
5584 */
5585static FmgrInfo *
5587{
5588 FmgrInfo *foutcache;
5589 int i;
5590
5591 /* We keep the info in fn_mcxt so it survives across calls */
5592 foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5593 PG_NARGS() * sizeof(FmgrInfo));
5594
5595 for (i = argidx; i < PG_NARGS(); i++)
5596 {
5597 Oid valtype;
5598 Oid typOutput;
5599 bool typIsVarlena;
5600
5601 valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5602 if (!OidIsValid(valtype))
5603 elog(ERROR, "could not determine data type of concat() input");
5604
5605 getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5606 fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5607 }
5608
5609 fcinfo->flinfo->fn_extra = foutcache;
5610
5611 return foutcache;
5612}
5613
5614/*
5615 * Implementation of both concat() and concat_ws().
5616 *
5617 * sepstr is the separator string to place between values.
5618 * argidx identifies the first argument to concatenate (counting from zero);
5619 * note that this must be constant across any one series of calls.
5620 *
5621 * Returns NULL if result should be NULL, else text value.
5622 */
5623static text *
5624concat_internal(const char *sepstr, int argidx,
5625 FunctionCallInfo fcinfo)
5626{
5627 text *result;
5629 FmgrInfo *foutcache;
5630 bool first_arg = true;
5631 int i;
5632
5633 /*
5634 * concat(VARIADIC some-array) is essentially equivalent to
5635 * array_to_text(), ie concat the array elements with the given separator.
5636 * So we just pass the case off to that code.
5637 */
5638 if (get_fn_expr_variadic(fcinfo->flinfo))
5639 {
5640 ArrayType *arr;
5641
5642 /* Should have just the one argument */
5643 Assert(argidx == PG_NARGS() - 1);
5644
5645 /* concat(VARIADIC NULL) is defined as NULL */
5646 if (PG_ARGISNULL(argidx))
5647 return NULL;
5648
5649 /*
5650 * Non-null argument had better be an array. We assume that any call
5651 * context that could let get_fn_expr_variadic return true will have
5652 * checked that a VARIADIC-labeled parameter actually is an array. So
5653 * it should be okay to just Assert that it's an array rather than
5654 * doing a full-fledged error check.
5655 */
5657
5658 /* OK, safe to fetch the array value */
5659 arr = PG_GETARG_ARRAYTYPE_P(argidx);
5660
5661 /*
5662 * And serialize the array. We tell array_to_text to ignore null
5663 * elements, which matches the behavior of the loop below.
5664 */
5665 return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5666 }
5667
5668 /* Normal case without explicit VARIADIC marker */
5670
5671 /* Get output function info, building it if first time through */
5672 foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5673 if (foutcache == NULL)
5674 foutcache = build_concat_foutcache(fcinfo, argidx);
5675
5676 for (i = argidx; i < PG_NARGS(); i++)
5677 {
5678 if (!PG_ARGISNULL(i))
5679 {
5681
5682 /* add separator if appropriate */
5683 if (first_arg)
5684 first_arg = false;
5685 else
5686 appendStringInfoString(&str, sepstr);
5687
5688 /* call the appropriate type output function, append the result */
5690 OutputFunctionCall(&foutcache[i], value));
5691 }
5692 }
5693
5694 result = cstring_to_text_with_len(str.data, str.len);
5695 pfree(str.data);
5696
5697 return result;
5698}
5699
5700/*
5701 * Concatenate all arguments. NULL arguments are ignored.
5702 */
5703Datum
5705{
5706 text *result;
5707
5708 result = concat_internal("", 0, fcinfo);
5709 if (result == NULL)
5711 PG_RETURN_TEXT_P(result);
5712}
5713
5714/*
5715 * Concatenate all but first argument value with separators. The first
5716 * parameter is used as the separator. NULL arguments are ignored.
5717 */
5718Datum
5720{
5721 char *sep;
5722 text *result;
5723
5724 /* return NULL when separator is NULL */
5725 if (PG_ARGISNULL(0))
5728
5729 result = concat_internal(sep, 1, fcinfo);
5730 if (result == NULL)
5732 PG_RETURN_TEXT_P(result);
5733}
5734
5735/*
5736 * Return first n characters in the string. When n is negative,
5737 * return all but last |n| characters.
5738 */
5739Datum
5741{
5742 int n = PG_GETARG_INT32(1);
5743
5744 if (n < 0)
5745 {
5747 const char *p = VARDATA_ANY(str);
5748 int len = VARSIZE_ANY_EXHDR(str);
5749 int rlen;
5750
5751 n = pg_mbstrlen_with_len(p, len) + n;
5752 rlen = pg_mbcharcliplen(p, len, n);
5754 }
5755 else
5757}
5758
5759/*
5760 * Return last n characters in the string. When n is negative,
5761 * return all but first |n| characters.
5762 */
5763Datum
5765{
5767 const char *p = VARDATA_ANY(str);
5768 int len = VARSIZE_ANY_EXHDR(str);
5769 int n = PG_GETARG_INT32(1);
5770 int off;
5771
5772 if (n < 0)
5773 n = -n;
5774 else
5775 n = pg_mbstrlen_with_len(p, len) - n;
5776 off = pg_mbcharcliplen(p, len, n);
5777
5779}
5780
5781/*
5782 * Return reversed string
5783 */
5784Datum
5786{
5788 const char *p = VARDATA_ANY(str);
5789 int len = VARSIZE_ANY_EXHDR(str);
5790 const char *endp = p + len;
5791 text *result;
5792 char *dst;
5793
5794 result = palloc(len + VARHDRSZ);
5795 dst = (char *) VARDATA(result) + len;
5796 SET_VARSIZE(result, len + VARHDRSZ);
5797
5799 {
5800 /* multibyte version */
5801 while (p < endp)
5802 {
5803 int sz;
5804
5805 sz = pg_mblen(p);
5806 dst -= sz;
5807 memcpy(dst, p, sz);
5808 p += sz;
5809 }
5810 }
5811 else
5812 {
5813 /* single byte version */
5814 while (p < endp)
5815 *(--dst) = *p++;
5816 }
5817
5818 PG_RETURN_TEXT_P(result);
5819}
5820
5821
5822/*
5823 * Support macros for text_format()
5824 */
5825#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5826
5827#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5828 do { \
5829 if (++(ptr) >= (end_ptr)) \
5830 ereport(ERROR, \
5831 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5832 errmsg("unterminated format() type specifier"), \
5833 errhint("For a single \"%%\" use \"%%%%\"."))); \
5834 } while (0)
5835
5836/*
5837 * Returns a formatted string
5838 */
5839Datum
5841{
5842 text *fmt;
5844 const char *cp;
5845 const char *start_ptr;
5846 const char *end_ptr;
5847 text *result;
5848 int arg;
5849 bool funcvariadic;
5850 int nargs;
5851 Datum *elements = NULL;
5852 bool *nulls = NULL;
5853 Oid element_type = InvalidOid;
5854 Oid prev_type = InvalidOid;
5855 Oid prev_width_type = InvalidOid;
5856 FmgrInfo typoutputfinfo;
5857 FmgrInfo typoutputinfo_width;
5858
5859 /* When format string is null, immediately return null */
5860 if (PG_ARGISNULL(0))
5862
5863 /* If argument is marked VARIADIC, expand array into elements */
5864 if (get_fn_expr_variadic(fcinfo->flinfo))
5865 {
5866 ArrayType *arr;
5867 int16 elmlen;
5868 bool elmbyval;
5869 char elmalign;
5870 int nitems;
5871
5872 /* Should have just the one argument */
5873 Assert(PG_NARGS() == 2);
5874
5875 /* If argument is NULL, we treat it as zero-length array */
5876 if (PG_ARGISNULL(1))
5877 nitems = 0;
5878 else
5879 {
5880 /*
5881 * Non-null argument had better be an array. We assume that any
5882 * call context that could let get_fn_expr_variadic return true
5883 * will have checked that a VARIADIC-labeled parameter actually is
5884 * an array. So it should be okay to just Assert that it's an
5885 * array rather than doing a full-fledged error check.
5886 */
5888
5889 /* OK, safe to fetch the array value */
5890 arr = PG_GETARG_ARRAYTYPE_P(1);
5891
5892 /* Get info about array element type */
5893 element_type = ARR_ELEMTYPE(arr);
5894 get_typlenbyvalalign(element_type,
5895 &elmlen, &elmbyval, &elmalign);
5896
5897 /* Extract all array elements */
5898 deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5899 &elements, &nulls, &nitems);
5900 }
5901
5902 nargs = nitems + 1;
5903 funcvariadic = true;
5904 }
5905 else
5906 {
5907 /* Non-variadic case, we'll process the arguments individually */
5908 nargs = PG_NARGS();
5909 funcvariadic = false;
5910 }
5911
5912 /* Setup for main loop. */
5913 fmt = PG_GETARG_TEXT_PP(0);
5914 start_ptr = VARDATA_ANY(fmt);
5915 end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5917 arg = 1; /* next argument position to print */
5918
5919 /* Scan format string, looking for conversion specifiers. */
5920 for (cp = start_ptr; cp < end_ptr; cp++)
5921 {
5922 int argpos;
5923 int widthpos;
5924 int flags;
5925 int width;
5926 Datum value;
5927 bool isNull;
5928 Oid typid;
5929
5930 /*
5931 * If it's not the start of a conversion specifier, just copy it to
5932 * the output buffer.
5933 */
5934 if (*cp != '%')
5935 {
5937 continue;
5938 }
5939
5940 ADVANCE_PARSE_POINTER(cp, end_ptr);
5941
5942 /* Easy case: %% outputs a single % */
5943 if (*cp == '%')
5944 {
5946 continue;
5947 }
5948
5949 /* Parse the optional portions of the format specifier */
5950 cp = text_format_parse_format(cp, end_ptr,
5951 &argpos, &widthpos,
5952 &flags, &width);
5953
5954 /*
5955 * Next we should see the main conversion specifier. Whether or not
5956 * an argument position was present, it's known that at least one
5957 * character remains in the string at this point. Experience suggests
5958 * that it's worth checking that that character is one of the expected
5959 * ones before we try to fetch arguments, so as to produce the least
5960 * confusing response to a mis-formatted specifier.
5961 */
5962 if (strchr("sIL", *cp) == NULL)
5963 ereport(ERROR,
5964 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5965 errmsg("unrecognized format() type specifier \"%.*s\"",
5966 pg_mblen(cp), cp),
5967 errhint("For a single \"%%\" use \"%%%%\".")));
5968
5969 /* If indirect width was specified, get its value */
5970 if (widthpos >= 0)
5971 {
5972 /* Collect the specified or next argument position */
5973 if (widthpos > 0)
5974 arg = widthpos;
5975 if (arg >= nargs)
5976 ereport(ERROR,
5977 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5978 errmsg("too few arguments for format()")));
5979
5980 /* Get the value and type of the selected argument */
5981 if (!funcvariadic)
5982 {
5984 isNull = PG_ARGISNULL(arg);
5985 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5986 }
5987 else
5988 {
5989 value = elements[arg - 1];
5990 isNull = nulls[arg - 1];
5991 typid = element_type;
5992 }
5993 if (!OidIsValid(typid))
5994 elog(ERROR, "could not determine data type of format() input");
5995
5996 arg++;
5997
5998 /* We can treat NULL width the same as zero */
5999 if (isNull)
6000 width = 0;
6001 else if (typid == INT4OID)
6002 width = DatumGetInt32(value);
6003 else if (typid == INT2OID)
6004 width = DatumGetInt16(value);
6005 else
6006 {
6007 /* For less-usual datatypes, convert to text then to int */
6008 char *str;
6009
6010 if (typid != prev_width_type)
6011 {
6012 Oid typoutputfunc;
6013 bool typIsVarlena;
6014
6015 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
6016 fmgr_info(typoutputfunc, &typoutputinfo_width);
6017 prev_width_type = typid;
6018 }
6019
6020 str = OutputFunctionCall(&typoutputinfo_width, value);
6021
6022 /* pg_strtoint32 will complain about bad data or overflow */
6023 width = pg_strtoint32(str);
6024
6025 pfree(str);
6026 }
6027 }
6028
6029 /* Collect the specified or next argument position */
6030 if (argpos > 0)
6031 arg = argpos;
6032 if (arg >= nargs)
6033 ereport(ERROR,
6034 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6035 errmsg("too few arguments for format()")));
6036
6037 /* Get the value and type of the selected argument */
6038 if (!funcvariadic)
6039 {
6041 isNull = PG_ARGISNULL(arg);
6042 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
6043 }
6044 else
6045 {
6046 value = elements[arg - 1];
6047 isNull = nulls[arg - 1];
6048 typid = element_type;
6049 }
6050 if (!OidIsValid(typid))
6051 elog(ERROR, "could not determine data type of format() input");
6052
6053 arg++;
6054
6055 /*
6056 * Get the appropriate typOutput function, reusing previous one if
6057 * same type as previous argument. That's particularly useful in the
6058 * variadic-array case, but often saves work even for ordinary calls.
6059 */
6060 if (typid != prev_type)
6061 {
6062 Oid typoutputfunc;
6063 bool typIsVarlena;
6064
6065 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
6066 fmgr_info(typoutputfunc, &typoutputfinfo);
6067 prev_type = typid;
6068 }
6069
6070 /*
6071 * And now we can format the value.
6072 */
6073 switch (*cp)
6074 {
6075 case 's':
6076 case 'I':
6077 case 'L':
6078 text_format_string_conversion(&str, *cp, &typoutputfinfo,
6079 value, isNull,
6080 flags, width);
6081 break;
6082 default:
6083 /* should not get here, because of previous check */
6084 ereport(ERROR,
6085 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6086 errmsg("unrecognized format() type specifier \"%.*s\"",
6087 pg_mblen(cp), cp),
6088 errhint("For a single \"%%\" use \"%%%%\".")));
6089 break;
6090 }
6091 }
6092
6093 /* Don't need deconstruct_array results anymore. */
6094 if (elements != NULL)
6095 pfree(elements);
6096 if (nulls != NULL)
6097 pfree(nulls);
6098
6099 /* Generate results. */
6100 result = cstring_to_text_with_len(str.data, str.len);
6101 pfree(str.data);
6102
6103 PG_RETURN_TEXT_P(result);
6104}
6105
6106/*
6107 * Parse contiguous digits as a decimal number.
6108 *
6109 * Returns true if some digits could be parsed.
6110 * The value is returned into *value, and *ptr is advanced to the next
6111 * character to be parsed.
6112 *
6113 * Note parsing invariant: at least one character is known available before
6114 * string end (end_ptr) at entry, and this is still true at exit.
6115 */
6116static bool
6117text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
6118{
6119 bool found = false;
6120 const char *cp = *ptr;
6121 int val = 0;
6122
6123 while (*cp >= '0' && *cp <= '9')
6124 {
6125 int8 digit = (*cp - '0');
6126
6127 if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
6129 ereport(ERROR,
6130 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
6131 errmsg("number is out of range")));
6132 ADVANCE_PARSE_POINTER(cp, end_ptr);
6133 found = true;
6134 }
6135
6136 *ptr = cp;
6137 *value = val;
6138
6139 return found;
6140}
6141
6142/*
6143 * Parse a format specifier (generally following the SUS printf spec).
6144 *
6145 * We have already advanced over the initial '%', and we are looking for
6146 * [argpos][flags][width]type (but the type character is not consumed here).
6147 *
6148 * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
6149 * Output parameters:
6150 * argpos: argument position for value to be printed. -1 means unspecified.
6151 * widthpos: argument position for width. Zero means the argument position
6152 * was unspecified (ie, take the next arg) and -1 means no width
6153 * argument (width was omitted or specified as a constant).
6154 * flags: bitmask of flags.
6155 * width: directly-specified width value. Zero means the width was omitted
6156 * (note it's not necessary to distinguish this case from an explicit
6157 * zero width value).
6158 *
6159 * The function result is the next character position to be parsed, ie, the
6160 * location where the type character is/should be.
6161 *
6162 * Note parsing invariant: at least one character is known available before
6163 * string end (end_ptr) at entry, and this is still true at exit.
6164 */
6165static const char *
6166text_format_parse_format(const char *start_ptr, const char *end_ptr,
6167 int *argpos, int *widthpos,
6168 int *flags, int *width)
6169{
6170 const char *cp = start_ptr;
6171 int n;
6172
6173 /* set defaults for output parameters */
6174 *argpos = -1;
6175 *widthpos = -1;
6176 *flags = 0;
6177 *width = 0;
6178
6179 /* try to identify first number */
6180 if (text_format_parse_digits(&cp, end_ptr, &n))
6181 {
6182 if (*cp != '$')
6183 {
6184 /* Must be just a width and a type, so we're done */
6185 *width = n;
6186 return cp;
6187 }
6188 /* The number was argument position */
6189 *argpos = n;
6190 /* Explicit 0 for argument index is immediately refused */
6191 if (n == 0)
6192 ereport(ERROR,
6193 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6194 errmsg("format specifies argument 0, but arguments are numbered from 1")));
6195 ADVANCE_PARSE_POINTER(cp, end_ptr);
6196 }
6197
6198 /* Handle flags (only minus is supported now) */
6199 while (*cp == '-')
6200 {
6201 *flags |= TEXT_FORMAT_FLAG_MINUS;
6202 ADVANCE_PARSE_POINTER(cp, end_ptr);
6203 }
6204
6205 if (*cp == '*')
6206 {
6207 /* Handle indirect width */
6208 ADVANCE_PARSE_POINTER(cp, end_ptr);
6209 if (text_format_parse_digits(&cp, end_ptr, &n))
6210 {
6211 /* number in this position must be closed by $ */
6212 if (*cp != '$')
6213 ereport(ERROR,
6214 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6215 errmsg("width argument position must be ended by \"$\"")));
6216 /* The number was width argument position */
6217 *widthpos = n;
6218 /* Explicit 0 for argument index is immediately refused */
6219 if (n == 0)
6220 ereport(ERROR,
6221 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6222 errmsg("format specifies argument 0, but arguments are numbered from 1")));
6223 ADVANCE_PARSE_POINTER(cp, end_ptr);
6224 }
6225 else
6226 *widthpos = 0; /* width's argument position is unspecified */
6227 }
6228 else
6229 {
6230 /* Check for direct width specification */
6231 if (text_format_parse_digits(&cp, end_ptr, &n))
6232 *width = n;
6233 }
6234
6235 /* cp should now be pointing at type character */
6236 return cp;
6237}
6238
6239/*
6240 * Format a %s, %I, or %L conversion
6241 */
6242static void
6244 FmgrInfo *typOutputInfo,
6245 Datum value, bool isNull,
6246 int flags, int width)
6247{
6248 char *str;
6249
6250 /* Handle NULL arguments before trying to stringify the value. */
6251 if (isNull)
6252 {
6253 if (conversion == 's')
6254 text_format_append_string(buf, "", flags, width);
6255 else if (conversion == 'L')
6256 text_format_append_string(buf, "NULL", flags, width);
6257 else if (conversion == 'I')
6258 ereport(ERROR,
6259 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
6260 errmsg("null values cannot be formatted as an SQL identifier")));
6261 return;
6262 }
6263
6264 /* Stringify. */
6265 str = OutputFunctionCall(typOutputInfo, value);
6266
6267 /* Escape. */
6268 if (conversion == 'I')
6269 {
6270 /* quote_identifier may or may not allocate a new string. */
6272 }
6273 else if (conversion == 'L')
6274 {
6275 char *qstr = quote_literal_cstr(str);
6276
6277 text_format_append_string(buf, qstr, flags, width);
6278 /* quote_literal_cstr() always allocates a new string */
6279 pfree(qstr);
6280 }
6281 else
6282 text_format_append_string(buf, str, flags, width);
6283
6284 /* Cleanup. */
6285 pfree(str);
6286}
6287
6288/*
6289 * Append str to buf, padding as directed by flags/width
6290 */
6291static void
6293 int flags, int width)
6294{
6295 bool align_to_left = false;
6296 int len;
6297
6298 /* fast path for typical easy case */
6299 if (width == 0)
6300 {
6302 return;
6303 }
6304
6305 if (width < 0)
6306 {
6307 /* Negative width: implicit '-' flag, then take absolute value */
6308 align_to_left = true;
6309 /* -INT_MIN is undefined */
6310 if (width <= INT_MIN)
6311 ereport(ERROR,
6312 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
6313 errmsg("number is out of range")));
6314 width = -width;
6315 }
6316 else if (flags & TEXT_FORMAT_FLAG_MINUS)
6317 align_to_left = true;
6318
6319 len = pg_mbstrlen(str);
6320 if (align_to_left)
6321 {
6322 /* left justify */
6324 if (len < width)
6325 appendStringInfoSpaces(buf, width - len);
6326 }
6327 else
6328 {
6329 /* right justify */
6330 if (len < width)
6331 appendStringInfoSpaces(buf, width - len);
6333 }
6334}
6335
6336/*
6337 * text_format_nv - nonvariadic wrapper for text_format function.
6338 *
6339 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
6340 * which checks that all built-in functions that share the implementing C
6341 * function take the same number of arguments.
6342 */
6343Datum
6345{
6346 return text_format(fcinfo);
6347}
6348
6349/*
6350 * Helper function for Levenshtein distance functions. Faster than memcmp(),
6351 * for this use case.
6352 */
6353static inline bool
6354rest_of_char_same(const char *s1, const char *s2, int len)
6355{
6356 while (len > 0)
6357 {
6358 len--;
6359 if (s1[len] != s2[len])
6360 return false;
6361 }
6362 return true;
6363}
6364
6365/* Expand each Levenshtein distance variant */
6366#include "levenshtein.c"
6367#define LEVENSHTEIN_LESS_EQUAL
6368#include "levenshtein.c"
6369
6370
6371/*
6372 * The following *ClosestMatch() functions can be used to determine whether a
6373 * user-provided string resembles any known valid values, which is useful for
6374 * providing hints in log messages, among other things. Use these functions
6375 * like so:
6376 *
6377 * initClosestMatch(&state, source_string, max_distance);
6378 *
6379 * for (int i = 0; i < num_valid_strings; i++)
6380 * updateClosestMatch(&state, valid_strings[i]);
6381 *
6382 * closestMatch = getClosestMatch(&state);
6383 */
6384
6385/*
6386 * Initialize the given state with the source string and maximum Levenshtein
6387 * distance to consider.
6388 */
6389void
6391{
6392 Assert(state);
6393 Assert(max_d >= 0);
6394
6395 state->source = source;
6396 state->min_d = -1;
6397 state->max_d = max_d;
6398 state->match = NULL;
6399}
6400
6401/*
6402 * If the candidate string is a closer match than the current one saved (or
6403 * there is no match saved), save it as the closest match.
6404 *
6405 * If the source or candidate string is NULL, empty, or too long, this function
6406 * takes no action. Likewise, if the Levenshtein distance exceeds the maximum
6407 * allowed or more than half the characters are different, no action is taken.
6408 */
6409void
6411{
6412 int dist;
6413
6414 Assert(state);
6415
6416 if (state->source == NULL || state->source[0] == '\0' ||
6417 candidate == NULL || candidate[0] == '\0')
6418 return;
6419
6420 /*
6421 * To avoid ERROR-ing, we check the lengths here instead of setting
6422 * 'trusted' to false in the call to varstr_levenshtein_less_equal().
6423 */
6424 if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
6425 strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
6426 return;
6427
6428 dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
6429 candidate, strlen(candidate), 1, 1, 1,
6430 state->max_d, true);
6431 if (dist <= state->max_d &&
6432 dist <= strlen(state->source) / 2 &&
6433 (state->min_d == -1 || dist < state->min_d))
6434 {
6435 state->min_d = dist;
6436 state->match = candidate;
6437 }
6438}
6439
6440/*
6441 * Return the closest match. If no suitable candidates were provided via
6442 * updateClosestMatch(), return NULL.
6443 */
6444const char *
6446{
6447 Assert(state);
6448
6449 return state->match;
6450}
6451
6452
6453/*
6454 * Unicode support
6455 */
6456
6459{
6460 UnicodeNormalizationForm form = -1;
6461
6462 /*
6463 * Might as well check this while we're here.
6464 */
6466 ereport(ERROR,
6467 (errcode(ERRCODE_SYNTAX_ERROR),
6468 errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
6469
6470 if (pg_strcasecmp(formstr, "NFC") == 0)
6471 form = UNICODE_NFC;
6472 else if (pg_strcasecmp(formstr, "NFD") == 0)
6473 form = UNICODE_NFD;
6474 else if (pg_strcasecmp(formstr, "NFKC") == 0)
6475 form = UNICODE_NFKC;
6476 else if (pg_strcasecmp(formstr, "NFKD") == 0)
6477 form = UNICODE_NFKD;
6478 else
6479 ereport(ERROR,
6480 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6481 errmsg("invalid normalization form: %s", formstr)));
6482
6483 return form;
6484}
6485
6486/*
6487 * Returns version of Unicode used by Postgres in "major.minor" format (the
6488 * same format as the Unicode version reported by ICU). The third component
6489 * ("update version") never involves additions to the character repertoire and
6490 * is unimportant for most purposes.
6491 *
6492 * See: https://unicode.org/versions/
6493 */
6494Datum
6496{
6498}
6499
6500/*
6501 * Returns version of Unicode used by ICU, if enabled; otherwise NULL.
6502 */
6503Datum
6505{
6506#ifdef USE_ICU
6507 PG_RETURN_TEXT_P(cstring_to_text(U_UNICODE_VERSION));
6508#else
6510#endif
6511}
6512
6513/*
6514 * Check whether the string contains only assigned Unicode code
6515 * points. Requires that the database encoding is UTF-8.
6516 */
6517Datum
6519{
6521 unsigned char *p;
6522 int size;
6523
6525 ereport(ERROR,
6526 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
6527
6528 /* convert to pg_wchar */
6530 p = (unsigned char *) VARDATA_ANY(input);
6531 for (int i = 0; i < size; i++)
6532 {
6533 pg_wchar uchar = utf8_to_unicode(p);
6534 int category = unicode_category(uchar);
6535
6536 if (category == PG_U_UNASSIGNED)
6537 PG_RETURN_BOOL(false);
6538
6539 p += pg_utf_mblen(p);
6540 }
6541
6542 PG_RETURN_BOOL(true);
6543}
6544
6545Datum
6547{
6549 char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6551 int size;
6552 pg_wchar *input_chars;
6553 pg_wchar *output_chars;
6554 unsigned char *p;
6555 text *result;
6556 int i;
6557
6558 form = unicode_norm_form_from_string(formstr);
6559
6560 /* convert to pg_wchar */
6562 input_chars = palloc((size + 1) * sizeof(pg_wchar));
6563 p = (unsigned char *) VARDATA_ANY(input);
6564 for (i = 0; i < size; i++)
6565 {
6566 input_chars[i] = utf8_to_unicode(p);
6567 p += pg_utf_mblen(p);
6568 }
6569 input_chars[i] = (pg_wchar) '\0';
6570 Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6571
6572 /* action */
6573 output_chars = unicode_normalize(form, input_chars);
6574
6575 /* convert back to UTF-8 string */
6576 size = 0;
6577 for (pg_wchar *wp = output_chars; *wp; wp++)
6578 {
6579 unsigned char buf[4];
6580
6581 unicode_to_utf8(*wp, buf);
6582 size += pg_utf_mblen(buf);
6583 }
6584
6585 result = palloc(size + VARHDRSZ);
6586 SET_VARSIZE(result, size + VARHDRSZ);
6587
6588 p = (unsigned char *) VARDATA_ANY(result);
6589 for (pg_wchar *wp = output_chars; *wp; wp++)
6590 {
6591 unicode_to_utf8(*wp, p);
6592 p += pg_utf_mblen(p);
6593 }
6594 Assert((char *) p == (char *) result + size + VARHDRSZ);
6595
6596 PG_RETURN_TEXT_P(result);
6597}
6598
6599/*
6600 * Check whether the string is in the specified Unicode normalization form.
6601 *
6602 * This is done by converting the string to the specified normal form and then
6603 * comparing that to the original string. To speed that up, we also apply the
6604 * "quick check" algorithm specified in UAX #15, which can give a yes or no
6605 * answer for many strings by just scanning the string once.
6606 *
6607 * This function should generally be optimized for the case where the string
6608 * is in fact normalized. In that case, we'll end up looking at the entire
6609 * string, so it's probably not worth doing any incremental conversion etc.
6610 */
6611Datum
6613{
6615 char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6617 int size;
6618 pg_wchar *input_chars;
6619 pg_wchar *output_chars;
6620 unsigned char *p;
6621 int i;
6622 UnicodeNormalizationQC quickcheck;
6623 int output_size;
6624 bool result;
6625
6626 form = unicode_norm_form_from_string(formstr);
6627
6628 /* convert to pg_wchar */
6630 input_chars = palloc((size + 1) * sizeof(pg_wchar));
6631 p = (unsigned char *) VARDATA_ANY(input);
6632 for (i = 0; i < size; i++)
6633 {
6634 input_chars[i] = utf8_to_unicode(p);
6635 p += pg_utf_mblen(p);
6636 }
6637 input_chars[i] = (pg_wchar) '\0';
6638 Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6639
6640 /* quick check (see UAX #15) */
6641 quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
6642 if (quickcheck == UNICODE_NORM_QC_YES)
6643 PG_RETURN_BOOL(true);
6644 else if (quickcheck == UNICODE_NORM_QC_NO)
6645 PG_RETURN_BOOL(false);
6646
6647 /* normalize and compare with original */
6648 output_chars = unicode_normalize(form, input_chars);
6649
6650 output_size = 0;
6651 for (pg_wchar *wp = output_chars; *wp; wp++)
6652 output_size++;
6653
6654 result = (size == output_size) &&
6655 (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
6656
6657 PG_RETURN_BOOL(result);
6658}
6659
6660/*
6661 * Check if first n chars are hexadecimal digits
6662 */
6663static bool
6664isxdigits_n(const char *instr, size_t n)
6665{
6666 for (size_t i = 0; i < n; i++)
6667 if (!isxdigit((unsigned char) instr[i]))
6668 return false;
6669
6670 return true;
6671}
6672
6673static unsigned int
6674hexval(unsigned char c)
6675{
6676 if (c >= '0' && c <= '9')
6677 return c - '0';
6678 if (c >= 'a' && c <= 'f')
6679 return c - 'a' + 0xA;
6680 if (c >= 'A' && c <= 'F')
6681 return c - 'A' + 0xA;
6682 elog(ERROR, "invalid hexadecimal digit");
6683 return 0; /* not reached */
6684}
6685
6686/*
6687 * Translate string with hexadecimal digits to number
6688 */
6689static unsigned int
6690hexval_n(const char *instr, size_t n)
6691{
6692 unsigned int result = 0;
6693
6694 for (size_t i = 0; i < n; i++)
6695 result += hexval(instr[i]) << (4 * (n - i - 1));
6696
6697 return result;
6698}
6699
6700/*
6701 * Replaces Unicode escape sequences by Unicode characters
6702 */
6703Datum
6705{
6706 text *input_text = PG_GETARG_TEXT_PP(0);
6707 char *instr;
6708 int len;
6710 text *result;
6711 pg_wchar pair_first = 0;
6712 char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
6713
6714 instr = VARDATA_ANY(input_text);
6715 len = VARSIZE_ANY_EXHDR(input_text);
6716
6718
6719 while (len > 0)
6720 {
6721 if (instr[0] == '\\')
6722 {
6723 if (len >= 2 &&
6724 instr[1] == '\\')
6725 {
6726 if (pair_first)
6727 goto invalid_pair;
6728 appendStringInfoChar(&str, '\\');
6729 instr += 2;
6730 len -= 2;
6731 }
6732 else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
6733 (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
6734 {
6735 pg_wchar unicode;
6736 int offset = instr[1] == 'u' ? 2 : 1;
6737
6738 unicode = hexval_n(instr + offset, 4);
6739
6740 if (!is_valid_unicode_codepoint(unicode))
6741 ereport(ERROR,
6742 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6743 errmsg("invalid Unicode code point: %04X", unicode));
6744
6745 if (pair_first)
6746 {
6747 if (is_utf16_surrogate_second(unicode))
6748 {
6749 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6750 pair_first = 0;
6751 }
6752 else
6753 goto invalid_pair;
6754 }
6755 else if (is_utf16_surrogate_second(unicode))
6756 goto invalid_pair;
6757
6758 if (is_utf16_surrogate_first(unicode))
6759 pair_first = unicode;
6760 else
6761 {
6762 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6764 }
6765
6766 instr += 4 + offset;
6767 len -= 4 + offset;
6768 }
6769 else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
6770 {
6771 pg_wchar unicode;
6772
6773 unicode = hexval_n(instr + 2, 6);
6774
6775 if (!is_valid_unicode_codepoint(unicode))
6776 ereport(ERROR,
6777 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6778 errmsg("invalid Unicode code point: %04X", unicode));
6779
6780 if (pair_first)
6781 {
6782 if (is_utf16_surrogate_second(unicode))
6783 {
6784 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6785 pair_first = 0;
6786 }
6787 else
6788 goto invalid_pair;
6789 }
6790 else if (is_utf16_surrogate_second(unicode))
6791 goto invalid_pair;
6792
6793 if (is_utf16_surrogate_first(unicode))
6794 pair_first = unicode;
6795 else
6796 {
6797 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6799 }
6800
6801 instr += 8;
6802 len -= 8;
6803 }
6804 else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
6805 {
6806 pg_wchar unicode;
6807
6808 unicode = hexval_n(instr + 2, 8);
6809
6810 if (!is_valid_unicode_codepoint(unicode))
6811 ereport(ERROR,
6812 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6813 errmsg("invalid Unicode code point: %04X", unicode));
6814
6815 if (pair_first)
6816 {
6817 if (is_utf16_surrogate_second(unicode))
6818 {
6819 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
6820 pair_first = 0;
6821 }
6822 else
6823 goto invalid_pair;
6824 }
6825 else if (is_utf16_surrogate_second(unicode))
6826 goto invalid_pair;
6827
6828 if (is_utf16_surrogate_first(unicode))
6829 pair_first = unicode;
6830 else
6831 {
6832 pg_unicode_to_server(unicode, (unsigned char *) cbuf);
6834 }
6835
6836 instr += 10;
6837 len -= 10;
6838 }
6839 else
6840 ereport(ERROR,
6841 (errcode(ERRCODE_SYNTAX_ERROR),
6842 errmsg("invalid Unicode escape"),
6843 errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
6844 }
6845 else
6846 {
6847 if (pair_first)
6848 goto invalid_pair;
6849
6850 appendStringInfoChar(&str, *instr++);
6851 len--;
6852 }
6853 }
6854
6855 /* unfinished surrogate pair? */
6856 if (pair_first)
6857 goto invalid_pair;
6858
6859 result = cstring_to_text_with_len(str.data, str.len);
6860 pfree(str.data);
6861
6862 PG_RETURN_TEXT_P(result);
6863
6864invalid_pair:
6865 ereport(ERROR,
6866 (errcode(ERRCODE_SYNTAX_ERROR),
6867 errmsg("invalid Unicode surrogate pair")));
6868 PG_RETURN_NULL(); /* keep compiler quiet */
6869}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
#define ARR_NDIM(a)
Definition: array.h:290
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:263
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_NULLBITMAP(a)
Definition: array.h:300
#define ARR_ELEMTYPE(a)
Definition: array.h:292
#define PG_RETURN_ARRAYTYPE_P(x)
Definition: array.h:265
#define ARR_DIMS(a)
Definition: array.h:294
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5350
ArrayType * construct_empty_array(Oid elmtype)
Definition: arrayfuncs.c:3580
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3631
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5420
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define TextDatumGetCString(d)
Definition: builtins.h:98
@ BYTEA_OUTPUT_HEX
Definition: bytea.h:22
@ BYTEA_OUTPUT_ESCAPE
Definition: bytea.h:21
#define NameStr(name)
Definition: c.h:717
#define unconstify(underlying_type, expr)
Definition: c.h:1216
NameData * Name
Definition: c.h:715
#define Min(x, y)
Definition: c.h:975
#define Max(x, y)
Definition: c.h:969
#define VARHDRSZ
Definition: c.h:663
int64_t int64
Definition: c.h:499
int16_t int16
Definition: c.h:497
int8_t int8
Definition: c.h:496
uint8 bits8
Definition: c.h:509
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
#define unlikely(x)
Definition: c.h:347
uint32_t uint32
Definition: c.h:502
#define lengthof(array)
Definition: c.h:759
#define OidIsValid(objectId)
Definition: c.h:746
size_t Size
Definition: c.h:576
Oid collid
Size toast_datum_size(Datum value)
Definition: detoast.c:601
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition: detoast.h:22
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define ereturn(context, dummy_value,...)
Definition: elog.h:278
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
uint64 hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
Definition: encode.c:217
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:181
#define MaxAllocSize
Definition: fe_memutils.h:22
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
Datum DirectFunctionCall2Coll(PGFunction func, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:812
struct varlena * pg_detoast_datum_packed(struct varlena *datum)
Definition: fmgr.c:1864
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1683
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2044
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1910
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define DatumGetByteaPSlice(X, m, n)
Definition: fmgr.h:303
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DatumGetBpCharPP(X)
Definition: fmgr.h:293
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
#define PG_RETURN_INT64(x)
Definition: fmgr.h:368
struct FmgrInfo FmgrInfo
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_INT16(x)
Definition: fmgr.h:356
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define DatumGetTextPSlice(X, m, n)
Definition: fmgr.h:304
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_NAME(x)
Definition: fmgr.h:363
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_GETARG_BYTEA_P_COPY(n)
Definition: fmgr.h:314
#define PG_RETURN_OID(x)
Definition: fmgr.h:360
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
#define MAT_SRF_USE_EXPECTED_DESC
Definition: funcapi.h:296
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
Assert(PointerIsAligned(start, uint64))
return str start
const char * str
for(;;)
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:186
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:167
#define nitems(x)
Definition: indent.h:31
FILE * input
long val
Definition: informix.c:689
static struct @165 value
int digits
Definition: informix.c:691
static char * locale
Definition: initdb.c:140
Datum int8send(PG_FUNCTION_ARGS)
Definition: int8.c:94
Datum int2send(PG_FUNCTION_ARGS)
Definition: int.c:98
Datum int4send(PG_FUNCTION_ARGS)
Definition: int.c:322
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:187
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:151
int y
Definition: isn.c:76
int x
Definition: isn.c:75
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
#define MAX_LEVENSHTEIN_STRLEN
Definition: levenshtein.c:26
List * lappend(List *list, void *datum)
Definition: list.c:339
void list_free(List *list)
Definition: list.c:1546
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3047
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2411
void get_type_io_data(Oid typid, IOFuncSelector which_func, int16 *typlen, bool *typbyval, char *typalign, char *typdelim, Oid *typioparam, Oid *func)
Definition: lsyscache.c:2465
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2337
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2972
@ IOFunc_output
Definition: lsyscache.h:37
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1057
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1125
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1037
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
Definition: mbutils.c:864
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:986
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1256
char * pstrdup(const char *in)
Definition: mcxt.c:2322
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2167
void pfree(void *pointer)
Definition: mcxt.c:2147
void * palloc0(Size size)
Definition: mcxt.c:1970
void * palloc(Size size)
Definition: mcxt.c:1940
MemoryContext CurrentMemoryContext
Definition: mcxt.c:159
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:4614
int32 pg_strtoint32(const char *s)
Definition: numutils.c:383
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void * arg
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:363
#define BITS_PER_BYTE
#define NAMEDATALEN
#define MAXPGPATH
#define PG_CACHE_LINE_SIZE
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
#define NIL
Definition: pg_list.h:68
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:1370
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1188
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1336
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:1422
int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.c:1356
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1386
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1433
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
char typalign
Definition: pg_type.h:176
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_UTF8
Definition: pg_wchar.h:232
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:519
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:537
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:525
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:531
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
void canonicalize_path(char *path)
Definition: path.c:337
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:227
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
static Name DatumGetName(Datum X)
Definition: postgres.h:365
uintptr_t Datum
Definition: postgres.h:69
static char * DatumGetCString(Datum X)
Definition: postgres.h:340
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:167
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:415
void pq_sendbytes(StringInfo buf, const void *data, int datalen)
Definition: pqformat.c:126
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:172
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:635
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:546
void pq_copymsgbytes(StringInfo msg, void *buf, int datalen)
Definition: pqformat.c:528
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:326
const char * pq_getmsgbytes(StringInfo msg, int datalen)
Definition: pqformat.c:508
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:346
static void pq_sendint(StringInfo buf, uint32 i, int b)
Definition: pqformat.h:171
char * c
char * s1
char * s2
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:103
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define REG_NOMATCH
Definition: regex.h:216
#define regmatch_t
Definition: regex.h:246
#define REG_OKAY
Definition: regex.h:215
#define REG_NOSUB
Definition: regex.h:185
#define regex_t
Definition: regex.h:245
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:185
regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation)
Definition: regexp.c:141
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:13019
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:93
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:37
bool scanner_isspace(char ch)
Definition: scansup.c:117
#define S(n, x)
Definition: sha1.c:73
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
StringInfo makeStringInfo(void)
Definition: stringinfo.c:72
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoSpaces(StringInfo str, int count)
Definition: stringinfo.c:260
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
StringInfoData * StringInfo
Definition: stringinfo.h:54
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:231
static void initReadOnlyStringInfo(StringInfo str, char *data, int len)
Definition: stringinfo.h:157
Oid typioparam
Definition: array.h:243
char typalign
Definition: array.h:241
Oid typiofunc
Definition: array.h:244
int16 typlen
Definition: array.h:239
Oid element_type
Definition: array.h:238
FmgrInfo proc
Definition: array.h:245
char typdelim
Definition: array.h:242
bool typbyval
Definition: array.h:240
Definition: fmgr.h:57
void * fn_extra
Definition: fmgr.h:64
MemoryContext fn_mcxt
Definition: fmgr.h:65
FmgrInfo * flinfo
Definition: fmgr.h:87
Definition: pg_list.h:54
Definition: nodes.h:135
TupleDesc setDesc
Definition: execnodes.h:359
Tuplestorestate * setResult
Definition: execnodes.h:358
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:106
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
void * ssup_extra
Definition: sortsupport.h:87
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
TupleDesc tupdesc
Definition: varlena.c:115
ArrayBuildState * astate
Definition: varlena.c:113
Tuplestorestate * tupstore
Definition: varlena.c:114
int last_match_len
Definition: varlena.c:75
bool is_multibyte_char_in_char
Definition: varlena.c:58
int last_match_len_tmp
Definition: varlena.c:76
char * last_match
Definition: varlena.c:74
char * refpoint
Definition: varlena.c:84
pg_locale_t locale
Definition: varlena.c:57
pg_locale_t locale
Definition: varlena.c:104
hyperLogLogState full_card
Definition: varlena.c:102
hyperLogLogState abbr_card
Definition: varlena.c:101
Definition: c.h:712
bool deterministic
Definition: pg_locale.h:99
Definition: regguts.h:323
Oid va_valueid
Definition: varatt.h:37
Definition: c.h:658
ToastCompressionId toast_get_compression_id(struct varlena *attr)
ToastCompressionId
@ TOAST_INVALID_COMPRESSION_ID
@ TOAST_LZ4_COMPRESSION_ID
@ TOAST_PGLZ_COMPRESSION_ID
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3139
bool trace_sort
Definition: tuplesort.c:124
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:150
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:185
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:53
pg_unicode_category unicode_category(pg_wchar code)
@ PG_U_UNASSIGNED
UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:598
pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:402
UnicodeNormalizationForm
Definition: unicode_norm.h:20
@ UNICODE_NFKD
Definition: unicode_norm.h:24
@ UNICODE_NFD
Definition: unicode_norm.h:22
@ UNICODE_NFC
Definition: unicode_norm.h:21
@ UNICODE_NFKC
Definition: unicode_norm.h:23
UnicodeNormalizationQC
Definition: unicode_norm.h:29
@ UNICODE_NORM_QC_YES
Definition: unicode_norm.h:31
@ UNICODE_NORM_QC_NO
Definition: unicode_norm.h:30
#define PG_UNICODE_VERSION
String * makeString(char *str)
Definition: value.c:63
#define VARATT_IS_EXTERNAL_ONDISK(PTR)
Definition: varatt.h:290
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARATT_IS_COMPRESSED(PTR)
Definition: varatt.h:288
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279
#define VARATT_IS_EXTERNAL(PTR)
Definition: varatt.h:289
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
int bpchartruelen(char *s, int len)
Definition: varchar.c:676
static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
Definition: varlena.c:2192
Datum byteacat(PG_FUNCTION_ARGS)
Definition: varlena.c:2985
Datum unknownrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:666
Datum array_to_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4968
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:3142
static int text_cmp(text *arg1, text *arg2, Oid collid)
Definition: varlena.c:1658
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:627
Datum textoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:1112
static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, Datum value, bool isNull, int flags, int width)
Definition: varlena.c:6243
static text * text_overlay(text *t1, text *t2, int sp, int sl)
Definition: varlena.c:1124
Datum text_format(PG_FUNCTION_ARGS)
Definition: varlena.c:5840
Datum textlen(PG_FUNCTION_ARGS)
Definition: varlena.c:701
Datum pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
Definition: varlena.c:5316
static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
Definition: varlena.c:1230
int bytea_output
Definition: varlena.c:48
static int32 text_length(Datum str)
Definition: varlena.c:719
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: varlena.c:2490
Datum byteaeq(PG_FUNCTION_ARGS)
Definition: varlena.c:3862
Datum text_left(PG_FUNCTION_ARGS)
Definition: varlena.c:5740
#define DIG(VAL)
Definition: varlena.c:284
Datum byteagt(PG_FUNCTION_ARGS)
Definition: varlena.c:3966
Datum string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5388
static bool text_isequal(text *txt1, text *txt2, Oid collid)
Definition: varlena.c:4702
static void text_position_cleanup(TextPositionState *state)
Definition: varlena.c:1574
Datum bytea_int2(PG_FUNCTION_ARGS)
Definition: varlena.c:4083
static text * text_catenate(text *t1, text *t2)
Definition: varlena.c:773
static text * concat_internal(const char *sepstr, int argidx, FunctionCallInfo fcinfo)
Definition: varlena.c:5624
static void appendStringInfoText(StringInfo str, const text *t)
Definition: varlena.c:4184
Datum textgtname(PG_FUNCTION_ARGS)
Definition: varlena.c:2821
Datum textout(PG_FUNCTION_ARGS)
Definition: varlena.c:598
Datum textcat(PG_FUNCTION_ARGS)
Definition: varlena.c:758
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:860
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:559
Datum text_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:2630
static text * text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
Definition: varlena.c:893
static int check_replace_text_has_escape(const text *replace_text)
Definition: varlena.c:4275
Datum text_concat_ws(PG_FUNCTION_ARGS)
Definition: varlena.c:5719
static int internal_text_pattern_compare(text *arg1, text *arg2)
Definition: varlena.c:2844
Datum string_agg_serialize(PG_FUNCTION_ARGS)
Definition: varlena.c:5493
Datum text_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:1832
static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2147
Datum array_to_text_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4984
static const char * text_format_parse_format(const char *start_ptr, const char *end_ptr, int *argpos, int *widthpos, int *flags, int *width)
Definition: varlena.c:6166
Datum int2_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4158
Datum text_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:2618
Datum byteapos(PG_FUNCTION_ARGS)
Definition: varlena.c:3212
Datum unicode_assigned(PG_FUNCTION_ARGS)
Definition: varlena.c:6518
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition: varlena.c:3165
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1610
static char * text_position_get_match_ptr(TextPositionState *state)
Definition: varlena.c:1539
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2102
Datum text_to_array_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4742
static unsigned int hexval_n(const char *instr, size_t n)
Definition: varlena.c:6690
Datum byteane(PG_FUNCTION_ARGS)
Definition: varlena.c:3894
static bool rest_of_char_same(const char *s1, const char *s2, int len)
Definition: varlena.c:6354
Datum byteage(PG_FUNCTION_ARGS)
Definition: varlena.c:3986
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:204
Datum byteacmp(PG_FUNCTION_ARGS)
Definition: varlena.c:4006
Datum text_to_table_null(PG_FUNCTION_ARGS)
Definition: varlena.c:4777
Datum text_right(PG_FUNCTION_ARGS)
Definition: varlena.c:5764
Datum textne(PG_FUNCTION_ARGS)
Definition: varlena.c:1738
Datum textrecv(PG_FUNCTION_ARGS)
Definition: varlena.c:609
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3285
static void text_format_append_string(StringInfo buf, const char *str, int flags, int width)
Definition: varlena.c:6292
static int text_position(text *t1, text *t2, Oid collid)
Definition: varlena.c:1184
bool SplitDirectoriesString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3652
Datum bytea_bit_count(PG_FUNCTION_ARGS)
Definition: varlena.c:3198
Datum unicode_normalize_func(PG_FUNCTION_ARGS)
Definition: varlena.c:6546
Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:2946
static void split_text_accum_result(SplitTextOutputData *tstate, text *field_value, text *null_string, Oid collation)
Definition: varlena.c:4929
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition: varlena.c:3355
Datum split_part(PG_FUNCTION_ARGS)
Definition: varlena.c:4570
Datum texteqname(PG_FUNCTION_ARGS)
Definition: varlena.c:2672
Datum text_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:874
Datum text_name(PG_FUNCTION_ARGS)
Definition: varlena.c:3427
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3323
Datum text_le(PG_FUNCTION_ARGS)
Definition: varlena.c:1802
const char * getClosestMatch(ClosestMatchState *state)
Definition: varlena.c:6445
static void text_position_reset(TextPositionState *state)
Definition: varlena.c:1566
Datum text_to_table(PG_FUNCTION_ARGS)
Definition: varlena.c:4753
#define ADVANCE_PARSE_POINTER(ptr, end_ptr)
Definition: varlena.c:5827
Datum textnename(PG_FUNCTION_ARGS)
Definition: varlena.c:2722
static char * text_position_next_internal(char *start_ptr, TextPositionState *state)
Definition: varlena.c:1416
static FmgrInfo * build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
Definition: varlena.c:5586
Datum to_hex64(PG_FUNCTION_ARGS)
Definition: varlena.c:5203
Datum text_to_array(PG_FUNCTION_ARGS)
Definition: varlena.c:4716
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3066
Datum unicode_is_normalized(PG_FUNCTION_ARGS)
Definition: varlena.c:6612
#define TEXT_FORMAT_FLAG_MINUS
Definition: varlena.c:5825
static void check_collation_set(Oid collid)
Definition: varlena.c:1581
bool SplitGUCList(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3773
static text * convert_to_base(uint64 value, int base)
Definition: varlena.c:5132
Datum textoverlay(PG_FUNCTION_ARGS)
Definition: varlena.c:1101
static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)
Definition: varlena.c:4308
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3525
static text * array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string)
Definition: varlena.c:5010
Datum to_hex32(PG_FUNCTION_ARGS)
Definition: varlena.c:5196
Datum text_starts_with(PG_FUNCTION_ARGS)
Definition: varlena.c:1847
Datum byteale(PG_FUNCTION_ARGS)
Definition: varlena.c:3946
Datum text_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:1817
Datum text_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:5785
Datum to_bin64(PG_FUNCTION_ARGS)
Definition: varlena.c:5165
Datum texteq(PG_FUNCTION_ARGS)
Definition: varlena.c:1683
Datum to_oct64(PG_FUNCTION_ARGS)
Definition: varlena.c:5184
Datum text_pattern_gt(PG_FUNCTION_ARGS)
Definition: varlena.c:2914
static int charlen_to_bytelen(const char *p, int n)
Definition: varlena.c:814
void varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
Definition: varlena.c:1928
static int namefastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2135
#define PG_STR_GET_BYTEA(str_)
Definition: varlena.c:3033
static StringInfo makeStringAggState(FunctionCallInfo fcinfo)
Definition: varlena.c:5364
Datum textlename(PG_FUNCTION_ARGS)
Definition: varlena.c:2815
Datum icu_unicode_version(PG_FUNCTION_ARGS)
Definition: varlena.c:6504
static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2178
Datum int8_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4172
Datum bytea_int4(PG_FUNCTION_ARGS)
Definition: varlena.c:4108
Datum bytearecv(PG_FUNCTION_ARGS)
Definition: varlena.c:479
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)
Definition: varlena.c:2292
text * cstring_to_text(const char *s)
Definition: varlena.c:192
Datum text_concat(PG_FUNCTION_ARGS)
Definition: varlena.c:5704
Datum text_pattern_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:2866
Datum text_pattern_ge(PG_FUNCTION_ARGS)
Definition: varlena.c:2898
Datum btvarstrequalimage(PG_FUNCTION_ARGS)
Definition: varlena.c:2604
Datum bytea_smaller(PG_FUNCTION_ARGS)
Definition: varlena.c:4047
Datum nameletext(PG_FUNCTION_ARGS)
Definition: varlena.c:2791
#define CmpCall(cmpfunc)
Definition: varlena.c:2778
text * replace_text_regexp(text *src_text, text *pattern_text, text *replace_text, int cflags, Oid collation, int search_start, int n)
Definition: varlena.c:4408
Datum namenetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2697
static int text_position_get_match_pos(TextPositionState *state)
Definition: varlena.c:1550
void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
Definition: varlena.c:256
Datum to_bin32(PG_FUNCTION_ARGS)
Definition: varlena.c:5158
Datum bytea_sortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:4066
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition: varlena.c:3075
Datum to_oct32(PG_FUNCTION_ARGS)
Definition: varlena.c:5177
Datum namegttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2797
Datum unicode_version(PG_FUNCTION_ARGS)
Definition: varlena.c:6495
Datum namegetext(PG_FUNCTION_ARGS)
Definition: varlena.c:2803
static UnicodeNormalizationForm unicode_norm_form_from_string(const char *formstr)
Definition: varlena.c:6458
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition: varlena.c:3000
static bool text_position_next(TextPositionState *state)
Definition: varlena.c:1351
Datum textoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:739
Datum bytea_reverse(PG_FUNCTION_ARGS)
Definition: varlena.c:3405
Datum textltname(PG_FUNCTION_ARGS)
Definition: varlena.c:2809
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition: varlena.c:3153
Datum bytea_int8(PG_FUNCTION_ARGS)
Definition: varlena.c:4133
Datum bttextsortsupport(PG_FUNCTION_ARGS)
Definition: varlena.c:1901
Datum bytea_larger(PG_FUNCTION_ARGS)
Definition: varlena.c:4028
Datum text_format_nv(PG_FUNCTION_ARGS)
Definition: varlena.c:6344
Datum textpos(PG_FUNCTION_ARGS)
Definition: varlena.c:1161
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
Definition: varlena.c:2065
Datum bttext_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2930
Datum string_agg_finalfn(PG_FUNCTION_ARGS)
Definition: varlena.c:5560
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition: varlena.c:2969
Datum unistr(PG_FUNCTION_ARGS)
Definition: varlena.c:6704
static unsigned int hexval(unsigned char c)
Definition: varlena.c:6674
static bool text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
Definition: varlena.c:6117
Datum unknownin(PG_FUNCTION_ARGS)
Definition: varlena.c:642
static bool isxdigits_n(const char *instr, size_t n)
Definition: varlena.c:6664
Datum string_agg_deserialize(PG_FUNCTION_ARGS)
Definition: varlena.c:5524
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:396
Datum namelttext(PG_FUNCTION_ARGS)
Definition: varlena.c:2785
Datum pg_column_size(PG_FUNCTION_ARGS)
Definition: varlena.c:5216
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:298
#define DatumGetVarStringPP(X)
Definition: varlena.c:125
Datum pg_column_compression(PG_FUNCTION_ARGS)
Definition: varlena.c:5263
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3450
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition: varlena.c:506
Datum nameeqtext(PG_FUNCTION_ARGS)
Definition: varlena.c:2647
Datum bttextnamecmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2763
void initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
Definition: varlena.c:6390
Datum textin(PG_FUNCTION_ARGS)
Definition: varlena.c:587
Datum string_agg_combine(PG_FUNCTION_ARGS)
Definition: varlena.c:5443
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition: varlena.c:3256
Datum btnametextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:2747
Datum unknownsend(PG_FUNCTION_ARGS)
Definition: varlena.c:681
Datum text_pattern_le(PG_FUNCTION_ARGS)
Definition: varlena.c:2882
#define TEXTBUFLEN
Definition: varlena.c:122
void updateClosestMatch(ClosestMatchState *state, const char *candidate)
Definition: varlena.c:6410
Datum int4_bytea(PG_FUNCTION_ARGS)
Definition: varlena.c:4165
#define VAL(CH)
Definition: varlena.c:283
char * text_to_cstring(const text *t)
Definition: varlena.c:225
Datum bttextcmp(PG_FUNCTION_ARGS)
Definition: varlena.c:1886
Datum unknownout(PG_FUNCTION_ARGS)
Definition: varlena.c:654
Datum replace_text(PG_FUNCTION_ARGS)
Definition: varlena.c:4198
Datum textgename(PG_FUNCTION_ARGS)
Definition: varlena.c:2827
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3467
static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
Definition: varlena.c:4793
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:3052
Datum bytealt(PG_FUNCTION_ARGS)
Definition: varlena.c:3926
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:498
Datum text_lt(PG_FUNCTION_ARGS)
Definition: varlena.c:1787
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)