PostgreSQL Source Code git master
Loading...
Searching...
No Matches
bytea.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * bytea.c
4 * Functions for the bytea type.
5 *
6 * Portions Copyright (c) 2025-2026, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/bytea.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include "access/detoast.h"
18#include "common/hashfn.h"
19#include "common/int.h"
20#include "fmgr.h"
21#include "lib/hyperloglog.h"
22#include "libpq/pqformat.h"
23#include "port/pg_bitutils.h"
24#include "port/pg_bswap.h"
25#include "utils/builtins.h"
26#include "utils/bytea.h"
27#include "utils/fmgrprotos.h"
28#include "utils/guc.h"
29#include "utils/memutils.h"
30#include "utils/sortsupport.h"
31#include "utils/uuid.h"
32#include "varatt.h"
33
34/* GUC variable */
36
38static bytea *bytea_substring(Datum str, int S, int L,
40static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
41
42typedef struct
43{
44 bool abbreviate; /* Should we abbreviate keys? */
45 hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
46 hyperLogLogState full_card; /* Full key cardinality state */
47 double prop_card; /* Required cardinality proportion */
49
50/* Static function declarations for sort support */
51static int byteafastcmp(Datum x, Datum y, SortSupport ssup);
52static Datum bytea_abbrev_convert(Datum original, SortSupport ssup);
53static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup);
54
55/*
56 * bytea_catenate
57 * Guts of byteacat(), broken out so it can be used by other functions
58 *
59 * Arguments can be in short-header form, but not compressed or out-of-line
60 */
61static bytea *
63{
64 bytea *result;
65 int len1,
66 len2,
67 len;
68 char *ptr;
69
70 len1 = VARSIZE_ANY_EXHDR(t1);
71 len2 = VARSIZE_ANY_EXHDR(t2);
72
73 /* paranoia ... probably should throw error instead? */
74 if (len1 < 0)
75 len1 = 0;
76 if (len2 < 0)
77 len2 = 0;
78
79 len = len1 + len2 + VARHDRSZ;
80 result = (bytea *) palloc(len);
81
82 /* Set size of result string... */
83 SET_VARSIZE(result, len);
84
85 /* Fill data field of result string... */
86 ptr = VARDATA(result);
87 if (len1 > 0)
88 memcpy(ptr, VARDATA_ANY(t1), len1);
89 if (len2 > 0)
90 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
91
92 return result;
93}
94
95#define PG_STR_GET_BYTEA(str_) \
96 DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
97
98static bytea *
100 int S,
101 int L,
103{
104 int32 S1; /* adjusted start position */
105 int32 L1; /* adjusted substring length */
106 int32 E; /* end position */
107
108 /*
109 * The logic here should generally match text_substring().
110 */
111 S1 = Max(S, 1);
112
114 {
115 /*
116 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
117 * end of the string if we pass it a negative value for length.
118 */
119 L1 = -1;
120 }
121 else if (L < 0)
122 {
123 /* SQL99 says to throw an error for E < S, i.e., negative length */
126 errmsg("negative substring length not allowed")));
127 L1 = -1; /* silence stupider compilers */
128 }
129 else if (pg_add_s32_overflow(S, L, &E))
130 {
131 /*
132 * L could be large enough for S + L to overflow, in which case the
133 * substring must run to end of string.
134 */
135 L1 = -1;
136 }
137 else
138 {
139 /*
140 * A zero or negative value for the end position can happen if the
141 * start was negative or one. SQL99 says to return a zero-length
142 * string.
143 */
144 if (E < 1)
145 return PG_STR_GET_BYTEA("");
146
147 L1 = E - S1;
148 }
149
150 /*
151 * If the start position is past the end of the string, SQL99 says to
152 * return a zero-length string -- DatumGetByteaPSlice() will do that for
153 * us. We need only convert S1 to zero-based starting position.
154 */
155 return DatumGetByteaPSlice(str, S1 - 1, L1);
156}
157
158static bytea *
160{
161 bytea *result;
162 bytea *s1;
163 bytea *s2;
164 int sp_pl_sl;
165
166 /*
167 * Check for possible integer-overflow cases. For negative sp, throw a
168 * "substring length" error because that's what should be expected
169 * according to the spec's definition of OVERLAY().
170 */
171 if (sp <= 0)
174 errmsg("negative substring length not allowed")));
178 errmsg("integer out of range")));
179
180 s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
182 result = bytea_catenate(s1, t2);
183 result = bytea_catenate(result, s2);
184
185 return result;
186}
187
188/*****************************************************************************
189 * USER I/O ROUTINES *
190 *****************************************************************************/
191
192#define VAL(CH) ((CH) - '0')
193#define DIG(VAL) ((VAL) + '0')
194
195/*
196 * byteain - converts from printable representation of byte array
197 *
198 * Non-printable characters must be passed as '\nnn' (octal) and are
199 * converted to internal form. '\' must be passed as '\\'.
200 */
201Datum
203{
204 char *inputText = PG_GETARG_CSTRING(0);
205 Node *escontext = fcinfo->context;
206 size_t len = strlen(inputText);
207 size_t bc;
208 char *tp;
209 char *rp;
210 bytea *result;
211
212 /* Recognize hex input */
213 if (inputText[0] == '\\' && inputText[1] == 'x')
214 {
215 bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
216 result = palloc(bc);
217 bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
218 escontext);
219 SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
220
221 PG_RETURN_BYTEA_P(result);
222 }
223
224 /* Else, it's the traditional escaped style */
225 result = (bytea *) palloc(len + VARHDRSZ); /* maximum possible length */
226
227 tp = inputText;
228 rp = VARDATA(result);
229 while (*tp != '\0')
230 {
231 if (tp[0] != '\\')
232 *rp++ = *tp++;
233 else if ((tp[1] >= '0' && tp[1] <= '3') &&
234 (tp[2] >= '0' && tp[2] <= '7') &&
235 (tp[3] >= '0' && tp[3] <= '7'))
236 {
237 int v;
238
239 v = VAL(tp[1]);
240 v <<= 3;
241 v += VAL(tp[2]);
242 v <<= 3;
243 *rp++ = v + VAL(tp[3]);
244
245 tp += 4;
246 }
247 else if (tp[1] == '\\')
248 {
249 *rp++ = '\\';
250 tp += 2;
251 }
252 else
253 {
254 /*
255 * one backslash, not followed by another or ### valid octal
256 */
257 ereturn(escontext, (Datum) 0,
259 errmsg("invalid input syntax for type %s", "bytea")));
260 }
261 }
262
263 bc = rp - VARDATA(result); /* actual length */
264 SET_VARSIZE(result, bc + VARHDRSZ);
265
266 PG_RETURN_BYTEA_P(result);
267}
268
269/*
270 * byteaout - converts to printable representation of byte array
271 *
272 * In the traditional escaped format, non-printable characters are
273 * printed as '\nnn' (octal) and '\' as '\\'.
274 */
275Datum
277{
279 char *result;
280 char *rp;
281
283 {
284 /* Print hex format */
285 rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
286 *rp++ = '\\';
287 *rp++ = 'x';
289 }
291 {
292 /* Print traditional escaped format */
293 char *vp;
294 uint64 len;
295 int i;
296
297 len = 1; /* empty string has 1 char */
299 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
300 {
301 if (*vp == '\\')
302 len += 2;
303 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
304 len += 4;
305 else
306 len++;
307 }
308
309 /*
310 * In principle len can't overflow uint32 if the input fit in 1GB, but
311 * for safety let's check rather than relying on palloc's internal
312 * check.
313 */
314 if (len > MaxAllocSize)
317 errmsg_internal("result of bytea output conversion is too large")));
318 rp = result = (char *) palloc(len);
319
321 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
322 {
323 if (*vp == '\\')
324 {
325 *rp++ = '\\';
326 *rp++ = '\\';
327 }
328 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
329 {
330 int val; /* holds unprintable chars */
331
332 val = *vp;
333 rp[0] = '\\';
334 rp[3] = DIG(val & 07);
335 val >>= 3;
336 rp[2] = DIG(val & 07);
337 val >>= 3;
338 rp[1] = DIG(val & 03);
339 rp += 4;
340 }
341 else
342 *rp++ = *vp;
343 }
344 }
345 else
346 {
347 elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
349 rp = result = NULL; /* keep compiler quiet */
350 }
351 *rp = '\0';
352 PG_RETURN_CSTRING(result);
353}
354
355/*
356 * bytearecv - converts external binary format to bytea
357 */
358Datum
360{
362 bytea *result;
363 int nbytes;
364
365 nbytes = buf->len - buf->cursor;
366 result = (bytea *) palloc(nbytes + VARHDRSZ);
367 SET_VARSIZE(result, nbytes + VARHDRSZ);
368 pq_copymsgbytes(buf, VARDATA(result), nbytes);
369 PG_RETURN_BYTEA_P(result);
370}
371
372/*
373 * byteasend - converts bytea to binary format
374 *
375 * This is a special case: just copy the input...
376 */
377Datum
384
385Datum
387{
389
391
392 /* Append the value unless null, preceding it with the delimiter. */
393 if (!PG_ARGISNULL(1))
394 {
396 bool isfirst = false;
397
398 /*
399 * You might think we can just throw away the first delimiter, however
400 * we must keep it as we may be a parallel worker doing partial
401 * aggregation building a state to send to the main process. We need
402 * to keep the delimiter of every aggregation so that the combine
403 * function can properly join up the strings of two separately
404 * partially aggregated results. The first delimiter is only stripped
405 * off in the final function. To know how much to strip off the front
406 * of the string, we store the length of the first delimiter in the
407 * StringInfo's cursor field, which we don't otherwise need here.
408 */
409 if (state == NULL)
410 {
411 MemoryContext aggcontext;
412 MemoryContext oldcontext;
413
414 if (!AggCheckCallContext(fcinfo, &aggcontext))
415 {
416 /* cannot be called directly because of internal-type argument */
417 elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
418 }
419
420 /*
421 * Create state in aggregate context. It'll stay there across
422 * subsequent calls.
423 */
424 oldcontext = MemoryContextSwitchTo(aggcontext);
426 MemoryContextSwitchTo(oldcontext);
427
428 isfirst = true;
429 }
430
431 if (!PG_ARGISNULL(2))
432 {
433 bytea *delim = PG_GETARG_BYTEA_PP(2);
434
436 VARSIZE_ANY_EXHDR(delim));
437 if (isfirst)
438 state->cursor = VARSIZE_ANY_EXHDR(delim);
439 }
440
443 }
444
445 /*
446 * The transition type for string_agg() is declared to be "internal",
447 * which is a pass-by-value type the same size as a pointer.
448 */
449 if (state)
452}
453
454Datum
456{
458
459 /* cannot be called directly because of internal-type argument */
461
463
464 if (state != NULL)
465 {
466 /* As per comment in transfn, strip data before the cursor position */
467 bytea *result;
468 int strippedlen = state->len - state->cursor;
469
470 result = (bytea *) palloc(strippedlen + VARHDRSZ);
472 memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
473 PG_RETURN_BYTEA_P(result);
474 }
475 else
477}
478
479/*-------------------------------------------------------------
480 * byteaoctetlen
481 *
482 * get the number of bytes contained in an instance of type 'bytea'
483 *-------------------------------------------------------------
484 */
485Datum
487{
489
490 /* We need not detoast the input at all */
492}
493
494/*
495 * byteacat -
496 * takes two bytea* and returns a bytea* that is the concatenation of
497 * the two.
498 *
499 * Cloned from textcat and modified as required.
500 */
501Datum
509
510/*
511 * byteaoverlay
512 * Replace specified substring of first string with second
513 *
514 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
515 * This code is a direct implementation of what the standard says.
516 */
517Datum
519{
522 int sp = PG_GETARG_INT32(2); /* substring start position */
523 int sl = PG_GETARG_INT32(3); /* substring length */
524
526}
527
528Datum
530{
533 int sp = PG_GETARG_INT32(2); /* substring start position */
534 int sl;
535
536 sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
538}
539
540/*
541 * bytea_substr()
542 * Return a substring starting at the specified position.
543 * Cloned from text_substr and modified as required.
544 *
545 * Input:
546 * - string
547 * - starting position (is one-based)
548 * - string length (optional)
549 *
550 * If the starting position is zero or less, then return from the start of the string
551 * adjusting the length to be consistent with the "negative start" per SQL.
552 * If the length is less than zero, an ERROR is thrown. If no third argument
553 * (length) is provided, the length to the end of the string is assumed.
554 */
555Datum
563
564/*
565 * bytea_substr_no_len -
566 * Wrapper to avoid opr_sanity failure due to
567 * one function accepting a different number of args.
568 */
569Datum
577
578/*
579 * bit_count
580 */
581Datum
588
589/*
590 * byteapos -
591 * Return the position of the specified substring.
592 * Implements the SQL POSITION() function.
593 * Cloned from textpos and modified as required.
594 */
595Datum
597{
600 int pos;
601 int px,
602 p;
603 int len1,
604 len2;
605 char *p1,
606 *p2;
607
608 len1 = VARSIZE_ANY_EXHDR(t1);
609 len2 = VARSIZE_ANY_EXHDR(t2);
610
611 if (len2 <= 0)
612 PG_RETURN_INT32(1); /* result for empty pattern */
613
614 p1 = VARDATA_ANY(t1);
615 p2 = VARDATA_ANY(t2);
616
617 pos = 0;
618 px = (len1 - len2);
619 for (p = 0; p <= px; p++)
620 {
621 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
622 {
623 pos = p + 1;
624 break;
625 };
626 p1++;
627 };
628
629 PG_RETURN_INT32(pos);
630}
631
632/*-------------------------------------------------------------
633 * byteaGetByte
634 *
635 * this routine treats "bytea" as an array of bytes.
636 * It returns the Nth byte (a number between 0 and 255).
637 *-------------------------------------------------------------
638 */
639Datum
641{
643 int32 n = PG_GETARG_INT32(1);
644 int len;
645 int byte;
646
648
649 if (n < 0 || n >= len)
652 errmsg("index %d out of valid range, 0..%d",
653 n, len - 1)));
654
655 byte = ((unsigned char *) VARDATA_ANY(v))[n];
656
657 PG_RETURN_INT32(byte);
658}
659
660/*-------------------------------------------------------------
661 * byteaGetBit
662 *
663 * This routine treats a "bytea" type like an array of bits.
664 * It returns the value of the Nth bit (0 or 1).
665 *
666 *-------------------------------------------------------------
667 */
668Datum
670{
672 int64 n = PG_GETARG_INT64(1);
673 int byteNo,
674 bitNo;
675 int len;
676 int byte;
677
679
680 if (n < 0 || n >= (int64) len * 8)
683 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
684 n, (int64) len * 8 - 1)));
685
686 /* n/8 is now known < len, so safe to cast to int */
687 byteNo = (int) (n / 8);
688 bitNo = (int) (n % 8);
689
690 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
691
692 if (byte & (1 << bitNo))
694 else
696}
697
698/*-------------------------------------------------------------
699 * byteaSetByte
700 *
701 * Given an instance of type 'bytea' creates a new one with
702 * the Nth byte set to the given value.
703 *
704 *-------------------------------------------------------------
705 */
706Datum
708{
710 int32 n = PG_GETARG_INT32(1);
712 int len;
713
714 len = VARSIZE(res) - VARHDRSZ;
715
716 if (n < 0 || n >= len)
719 errmsg("index %d out of valid range, 0..%d",
720 n, len - 1)));
721
722 /*
723 * Now set the byte.
724 */
725 ((unsigned char *) VARDATA(res))[n] = newByte;
726
728}
729
730/*-------------------------------------------------------------
731 * byteaSetBit
732 *
733 * Given an instance of type 'bytea' creates a new one with
734 * the Nth bit set to the given value.
735 *
736 *-------------------------------------------------------------
737 */
738Datum
740{
742 int64 n = PG_GETARG_INT64(1);
744 int len;
745 int oldByte,
746 newByte;
747 int byteNo,
748 bitNo;
749
750 len = VARSIZE(res) - VARHDRSZ;
751
752 if (n < 0 || n >= (int64) len * 8)
755 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
756 n, (int64) len * 8 - 1)));
757
758 /* n/8 is now known < len, so safe to cast to int */
759 byteNo = (int) (n / 8);
760 bitNo = (int) (n % 8);
761
762 /*
763 * sanity check!
764 */
765 if (newBit != 0 && newBit != 1)
768 errmsg("new bit must be 0 or 1")));
769
770 /*
771 * Update the byte.
772 */
773 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
774
775 if (newBit == 0)
776 newByte = oldByte & (~(1 << bitNo));
777 else
778 newByte = oldByte | (1 << bitNo);
779
780 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
781
783}
784
785/*
786 * Return reversed bytea
787 */
788Datum
790{
792 const char *p = VARDATA_ANY(v);
793 int len = VARSIZE_ANY_EXHDR(v);
794 const char *endp = p + len;
795 bytea *result = palloc(len + VARHDRSZ);
796 char *dst = (char *) VARDATA(result) + len;
797
798 SET_VARSIZE(result, len + VARHDRSZ);
799
800 while (p < endp)
801 *(--dst) = *p++;
802
803 PG_RETURN_BYTEA_P(result);
804}
805
806
807/*****************************************************************************
808 * Comparison Functions used for bytea
809 *
810 * Note: btree indexes need these routines not to leak memory; therefore,
811 * be careful to free working copies of toasted datums. Most places don't
812 * need to be so careful.
813 *****************************************************************************/
814
815Datum
817{
820 bool result;
821 Size len1,
822 len2;
823
824 /*
825 * We can use a fast path for unequal lengths, which might save us from
826 * having to detoast one or both values.
827 */
830 if (len1 != len2)
831 result = false;
832 else
833 {
836
838 len1 - VARHDRSZ) == 0);
839
842 }
843
844 PG_RETURN_BOOL(result);
845}
846
847Datum
849{
852 bool result;
853 Size len1,
854 len2;
855
856 /*
857 * We can use a fast path for unequal lengths, which might save us from
858 * having to detoast one or both values.
859 */
862 if (len1 != len2)
863 result = true;
864 else
865 {
868
870 len1 - VARHDRSZ) != 0);
871
874 }
875
876 PG_RETURN_BOOL(result);
877}
878
879Datum
881{
884 int len1,
885 len2;
886 int cmp;
887
888 len1 = VARSIZE_ANY_EXHDR(arg1);
889 len2 = VARSIZE_ANY_EXHDR(arg2);
890
891 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
892
895
896 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
897}
898
899Datum
901{
904 int len1,
905 len2;
906 int cmp;
907
908 len1 = VARSIZE_ANY_EXHDR(arg1);
909 len2 = VARSIZE_ANY_EXHDR(arg2);
910
911 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
912
915
916 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
917}
918
919Datum
921{
924 int len1,
925 len2;
926 int cmp;
927
928 len1 = VARSIZE_ANY_EXHDR(arg1);
929 len2 = VARSIZE_ANY_EXHDR(arg2);
930
931 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
932
935
936 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
937}
938
939Datum
941{
944 int len1,
945 len2;
946 int cmp;
947
948 len1 = VARSIZE_ANY_EXHDR(arg1);
949 len2 = VARSIZE_ANY_EXHDR(arg2);
950
951 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
952
955
956 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
957}
958
959Datum
961{
964 int len1,
965 len2;
966 int cmp;
967
968 len1 = VARSIZE_ANY_EXHDR(arg1);
969 len2 = VARSIZE_ANY_EXHDR(arg2);
970
971 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
972 if ((cmp == 0) && (len1 != len2))
973 cmp = (len1 < len2) ? -1 : 1;
974
977
979}
980
981Datum
983{
986 bytea *result;
987 int len1,
988 len2;
989 int cmp;
990
991 len1 = VARSIZE_ANY_EXHDR(arg1);
992 len2 = VARSIZE_ANY_EXHDR(arg2);
993
994 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
995 result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
996
997 PG_RETURN_BYTEA_P(result);
998}
999
1000Datum
1002{
1005 bytea *result;
1006 int len1,
1007 len2;
1008 int cmp;
1009
1010 len1 = VARSIZE_ANY_EXHDR(arg1);
1011 len2 = VARSIZE_ANY_EXHDR(arg2);
1012
1013 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1014 result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
1015
1016 PG_RETURN_BYTEA_P(result);
1017}
1018
1019/*
1020 * sortsupport comparison func
1021 */
1022static int
1024{
1027 char *a1p,
1028 *a2p;
1029 int len1,
1030 len2,
1031 result;
1032
1033 a1p = VARDATA_ANY(arg1);
1034 a2p = VARDATA_ANY(arg2);
1035
1036 len1 = VARSIZE_ANY_EXHDR(arg1);
1037 len2 = VARSIZE_ANY_EXHDR(arg2);
1038
1039 result = memcmp(a1p, a2p, Min(len1, len2));
1040 if ((result == 0) && (len1 != len2))
1041 result = (len1 < len2) ? -1 : 1;
1042
1043 /* We can't afford to leak memory here. */
1044 if (PointerGetDatum(arg1) != x)
1045 pfree(arg1);
1046 if (PointerGetDatum(arg2) != y)
1047 pfree(arg2);
1048
1049 return result;
1050}
1051
1052/*
1053 * Conversion routine for sortsupport. Converts original to abbreviated key
1054 * representation. Our encoding strategy is simple -- pack the first 8 bytes
1055 * of the bytea data into a Datum (on little-endian machines, the bytes are
1056 * stored in reverse order), and treat it as an unsigned integer.
1057 */
1058static Datum
1060{
1061 const size_t max_prefix_bytes = sizeof(Datum);
1063 bytea *authoritative = DatumGetByteaPP(original);
1065 Datum res;
1066 char *pres;
1067 int len;
1068 uint32 hash;
1069
1070 pres = (char *) &res;
1071
1072 /* memset(), so any non-overwritten bytes are NUL */
1075
1076 /*
1077 * Short byteas will have terminating NUL bytes in the abbreviated datum.
1078 * Abbreviated comparison need not make a distinction between these NUL
1079 * bytes, and NUL bytes representing actual NULs in the authoritative
1080 * representation.
1081 *
1082 * Hopefully a comparison at or past one abbreviated key's terminating NUL
1083 * byte will resolve the comparison without consulting the authoritative
1084 * representation; specifically, some later non-NUL byte in the longer
1085 * bytea can resolve the comparison against a subsequent terminating NUL
1086 * in the shorter bytea. There will usually be what is effectively a
1087 * "length-wise" resolution there and then.
1088 *
1089 * If that doesn't work out -- if all bytes in the longer bytea positioned
1090 * at or past the offset of the smaller bytea (first) terminating NUL are
1091 * actually representative of NUL bytes in the authoritative binary bytea
1092 * (perhaps with some *terminating* NUL bytes towards the end of the
1093 * longer bytea iff it happens to still be small) -- then an authoritative
1094 * tie-breaker will happen, and do the right thing: explicitly consider
1095 * bytea length.
1096 */
1098
1099 /*
1100 * Maintain approximate cardinality of both abbreviated keys and original,
1101 * authoritative keys using HyperLogLog. Used as cheap insurance against
1102 * the worst case, where we do many string abbreviations for no saving in
1103 * full memcmp()-based comparisons. These statistics are used by
1104 * bytea_abbrev_abort().
1105 *
1106 * First, Hash key proper, or a significant fraction of it. Mix in length
1107 * in order to compensate for cases where differences are past
1108 * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
1109 */
1110 hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
1112
1113 if (len > PG_CACHE_LINE_SIZE)
1115
1116 addHyperLogLog(&bss->full_card, hash);
1117
1118 /* Hash abbreviated key */
1119 {
1120 uint32 tmp;
1121
1122 tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
1124 }
1125
1126 addHyperLogLog(&bss->abbr_card, hash);
1127
1128 /*
1129 * Byteswap on little-endian machines.
1130 *
1131 * This is needed so that ssup_datum_unsigned_cmp() works correctly on all
1132 * platforms.
1133 */
1134 res = DatumBigEndianToNative(res);
1135
1136 /* Don't leak memory here */
1137 if (PointerGetDatum(authoritative) != original)
1139
1140 return res;
1141}
1142
1143/*
1144 * Callback for estimating effectiveness of abbreviated key optimization, using
1145 * heuristic rules. Returns value indicating if the abbreviation optimization
1146 * should be aborted, based on its projected effectiveness.
1147 *
1148 * This is based on varstr_abbrev_abort(), but some comments have been elided
1149 * for brevity. See there for more details.
1150 */
1151static bool
1152bytea_abbrev_abort(int memtupcount, SortSupport ssup)
1153{
1155 double abbrev_distinct,
1157
1158 Assert(ssup->abbreviate);
1159
1160 /* Have a little patience */
1161 if (memtupcount < 100)
1162 return false;
1163
1165 key_distinct = estimateHyperLogLog(&bss->full_card);
1166
1167 /*
1168 * Clamp cardinality estimates to at least one distinct value. While
1169 * NULLs are generally disregarded, if only NULL values were seen so far,
1170 * that might misrepresent costs if we failed to clamp.
1171 */
1172 if (abbrev_distinct < 1.0)
1173 abbrev_distinct = 1.0;
1174
1175 if (key_distinct < 1.0)
1176 key_distinct = 1.0;
1177
1178 if (trace_sort)
1179 {
1180 double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
1181
1182 elog(LOG, "bytea_abbrev: abbrev_distinct after %d: %f "
1183 "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
1185 bss->prop_card);
1186 }
1187
1188 /*
1189 * If the number of distinct abbreviated keys approximately matches the
1190 * number of distinct original keys, continue with abbreviation.
1191 */
1192 if (abbrev_distinct > key_distinct * bss->prop_card)
1193 {
1194 /*
1195 * Decay required cardinality aggressively after 10,000 tuples.
1196 */
1197 if (memtupcount > 10000)
1198 bss->prop_card *= 0.65;
1199
1200 return false;
1201 }
1202
1203 /*
1204 * Abort abbreviation strategy.
1205 */
1206 if (trace_sort)
1207 elog(LOG, "bytea_abbrev: aborted abbreviation at %d "
1208 "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
1209 memtupcount, abbrev_distinct, key_distinct, bss->prop_card);
1210
1211 return true;
1212}
1213
1214Datum
1216{
1218 MemoryContext oldcontext;
1219
1220 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1221
1222 ssup->comparator = byteafastcmp;
1223
1224 /*
1225 * Set up abbreviation support if requested.
1226 */
1227 if (ssup->abbreviate)
1228 {
1230
1232 bss->abbreviate = true;
1233 bss->prop_card = 0.20;
1234 initHyperLogLog(&bss->abbr_card, 10);
1235 initHyperLogLog(&bss->full_card, 10);
1236
1237 ssup->ssup_extra = bss;
1238 ssup->abbrev_full_comparator = ssup->comparator;
1242 }
1243
1244 MemoryContextSwitchTo(oldcontext);
1245
1247}
1248
1249/* Cast bytea -> int2 */
1250Datum
1252{
1253 bytea *v = PG_GETARG_BYTEA_PP(0);
1254 int len = VARSIZE_ANY_EXHDR(v);
1255 uint16 result;
1256
1257 /* Check that the byte array is not too long */
1258 if (len > sizeof(result))
1259 ereport(ERROR,
1261 errmsg("smallint out of range"));
1262
1263 /* Convert it to an integer; most significant bytes come first */
1264 result = 0;
1265 for (int i = 0; i < len; i++)
1266 {
1267 result <<= BITS_PER_BYTE;
1268 result |= ((unsigned char *) VARDATA_ANY(v))[i];
1269 }
1270
1271 PG_RETURN_INT16(result);
1272}
1273
1274/* Cast bytea -> int4 */
1275Datum
1277{
1278 bytea *v = PG_GETARG_BYTEA_PP(0);
1279 int len = VARSIZE_ANY_EXHDR(v);
1280 uint32 result;
1281
1282 /* Check that the byte array is not too long */
1283 if (len > sizeof(result))
1284 ereport(ERROR,
1286 errmsg("integer out of range"));
1287
1288 /* Convert it to an integer; most significant bytes come first */
1289 result = 0;
1290 for (int i = 0; i < len; i++)
1291 {
1292 result <<= BITS_PER_BYTE;
1293 result |= ((unsigned char *) VARDATA_ANY(v))[i];
1294 }
1295
1296 PG_RETURN_INT32(result);
1297}
1298
1299/* Cast bytea -> int8 */
1300Datum
1302{
1303 bytea *v = PG_GETARG_BYTEA_PP(0);
1304 int len = VARSIZE_ANY_EXHDR(v);
1305 uint64 result;
1306
1307 /* Check that the byte array is not too long */
1308 if (len > sizeof(result))
1309 ereport(ERROR,
1311 errmsg("bigint out of range"));
1312
1313 /* Convert it to an integer; most significant bytes come first */
1314 result = 0;
1315 for (int i = 0; i < len; i++)
1316 {
1317 result <<= BITS_PER_BYTE;
1318 result |= ((unsigned char *) VARDATA_ANY(v))[i];
1319 }
1320
1321 PG_RETURN_INT64(result);
1322}
1323
1324/* Cast int2 -> bytea; can just use int2send() */
1325Datum
1327{
1328 return int2send(fcinfo);
1329}
1330
1331/* Cast int4 -> bytea; can just use int4send() */
1332Datum
1334{
1335 return int4send(fcinfo);
1336}
1337
1338/* Cast int8 -> bytea; can just use int8send() */
1339Datum
1341{
1342 return int8send(fcinfo);
1343}
1344
1345/* Cast bytea -> uuid */
1346Datum
1348{
1349 bytea *v = PG_GETARG_BYTEA_PP(0);
1350 int len = VARSIZE_ANY_EXHDR(v);
1351 pg_uuid_t *uuid;
1352
1353 if (len != UUID_LEN)
1354 ereport(ERROR,
1356 errmsg("invalid input length for type %s", "uuid"),
1357 errdetail("Expected %d bytes, got %d.", UUID_LEN, len)));
1358
1360 memcpy(uuid->data, VARDATA_ANY(v), UUID_LEN);
1362}
1363
1364/* Cast uuid -> bytea; can just use uuid_send() */
1365Datum
1367{
1368 return uuid_send(fcinfo);
1369}
Datum byteacat(PG_FUNCTION_ARGS)
Definition bytea.c:502
Datum uuid_bytea(PG_FUNCTION_ARGS)
Definition bytea.c:1366
Datum byteaoverlay(PG_FUNCTION_ARGS)
Definition bytea.c:518
int bytea_output
Definition bytea.c:35
Datum byteaeq(PG_FUNCTION_ARGS)
Definition bytea.c:816
#define DIG(VAL)
Definition bytea.c:193
Datum byteagt(PG_FUNCTION_ARGS)
Definition bytea.c:920
Datum bytea_int2(PG_FUNCTION_ARGS)
Definition bytea.c:1251
Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
Definition bytea.c:455
Datum int2_bytea(PG_FUNCTION_ARGS)
Definition bytea.c:1326
Datum byteapos(PG_FUNCTION_ARGS)
Definition bytea.c:596
static bytea * bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
Definition bytea.c:159
Datum byteane(PG_FUNCTION_ARGS)
Definition bytea.c:848
Datum byteage(PG_FUNCTION_ARGS)
Definition bytea.c:940
Datum byteacmp(PG_FUNCTION_ARGS)
Definition bytea.c:960
Datum byteaGetBit(PG_FUNCTION_ARGS)
Definition bytea.c:669
Datum bytea_bit_count(PG_FUNCTION_ARGS)
Definition bytea.c:582
Datum byteaSetBit(PG_FUNCTION_ARGS)
Definition bytea.c:739
Datum byteaSetByte(PG_FUNCTION_ARGS)
Definition bytea.c:707
static int byteafastcmp(Datum x, Datum y, SortSupport ssup)
Definition bytea.c:1023
Datum bytea_substr_no_len(PG_FUNCTION_ARGS)
Definition bytea.c:570
Datum byteale(PG_FUNCTION_ARGS)
Definition bytea.c:900
#define PG_STR_GET_BYTEA(str_)
Definition bytea.c:95
Datum int8_bytea(PG_FUNCTION_ARGS)
Definition bytea.c:1340
Datum bytea_int4(PG_FUNCTION_ARGS)
Definition bytea.c:1276
Datum bytearecv(PG_FUNCTION_ARGS)
Definition bytea.c:359
Datum bytea_smaller(PG_FUNCTION_ARGS)
Definition bytea.c:1001
Datum bytea_sortsupport(PG_FUNCTION_ARGS)
Definition bytea.c:1215
static bytea * bytea_substring(Datum str, int S, int L, bool length_not_specified)
Definition bytea.c:99
static Datum bytea_abbrev_convert(Datum original, SortSupport ssup)
Definition bytea.c:1059
static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup)
Definition bytea.c:1152
static bytea * bytea_catenate(bytea *t1, bytea *t2)
Definition bytea.c:62
Datum bytea_uuid(PG_FUNCTION_ARGS)
Definition bytea.c:1347
Datum bytea_reverse(PG_FUNCTION_ARGS)
Definition bytea.c:789
Datum byteaoverlay_no_len(PG_FUNCTION_ARGS)
Definition bytea.c:529
Datum bytea_int8(PG_FUNCTION_ARGS)
Definition bytea.c:1301
Datum bytea_larger(PG_FUNCTION_ARGS)
Definition bytea.c:982
Datum byteaoctetlen(PG_FUNCTION_ARGS)
Definition bytea.c:486
Datum byteaout(PG_FUNCTION_ARGS)
Definition bytea.c:276
Datum byteain(PG_FUNCTION_ARGS)
Definition bytea.c:202
Datum bytea_string_agg_transfn(PG_FUNCTION_ARGS)
Definition bytea.c:386
Datum byteaGetByte(PG_FUNCTION_ARGS)
Definition bytea.c:640
Datum int4_bytea(PG_FUNCTION_ARGS)
Definition bytea.c:1333
#define VAL(CH)
Definition bytea.c:192
Datum bytea_substr(PG_FUNCTION_ARGS)
Definition bytea.c:556
Datum bytealt(PG_FUNCTION_ARGS)
Definition bytea.c:880
Datum byteasend(PG_FUNCTION_ARGS)
Definition bytea.c:378
@ BYTEA_OUTPUT_HEX
Definition bytea.h:22
@ BYTEA_OUTPUT_ESCAPE
Definition bytea.h:21
#define Min(x, y)
Definition c.h:1093
#define Max(x, y)
Definition c.h:1087
#define VARHDRSZ
Definition c.h:783
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
uint16_t uint16
Definition c.h:617
uint32_t uint32
Definition c.h:618
size_t Size
Definition c.h:691
Size toast_raw_datum_size(Datum value)
Definition detoast.c:545
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
#define ereturn(context, dummy_value,...)
Definition elog.h:278
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
uint64 hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
Definition encode.c:351
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition encode.c:202
struct SortSupportData * SortSupport
Definition execnodes.h:60
#define palloc_object(type)
Definition fe_memutils.h:74
#define MaxAllocSize
Definition fe_memutils.h:22
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define DatumGetByteaPSlice(X, m, n)
Definition fmgr.h:304
#define PG_GETARG_BYTEA_PP(n)
Definition fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition fmgr.h:373
#define DatumGetByteaPP(X)
Definition fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_RETURN_CSTRING(x)
Definition fmgr.h:364
#define PG_ARGISNULL(n)
Definition fmgr.h:209
#define PG_RETURN_INT64(x)
Definition fmgr.h:370
#define PG_GETARG_DATUM(n)
Definition fmgr.h:268
#define PG_GETARG_CSTRING(n)
Definition fmgr.h:278
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_RETURN_INT16(x)
Definition fmgr.h:357
#define PG_RETURN_INT32(x)
Definition fmgr.h:355
#define PG_GETARG_INT32(n)
Definition fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_GETARG_BYTEA_P_COPY(n)
Definition fmgr.h:315
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
static Datum hash_uint32(uint32 k)
Definition hashfn.h:43
static Datum hash_any(const unsigned char *k, int keylen)
Definition hashfn.h:31
const char * str
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
long val
Definition informix.c:689
static struct @174 value
Datum int8send(PG_FUNCTION_ARGS)
Definition int8.c:94
Datum int2send(PG_FUNCTION_ARGS)
Definition int.c:98
Datum int4send(PG_FUNCTION_ARGS)
Definition int.c:351
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition int.h:151
int y
Definition isn.c:76
int x
Definition isn.c:75
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition nodeAgg.c:4609
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
static uint64 pg_popcount(const char *buf, int bytes)
#define DatumBigEndianToNative(x)
Definition pg_bswap.h:145
#define BITS_PER_BYTE
#define PG_CACHE_LINE_SIZE
const void size_t len
static char buf[DEFAULT_XLOG_SEG_SIZE]
static uint32 DatumGetUInt32(Datum X)
Definition postgres.h:222
static uint64 DatumGetUInt64(Datum X)
Definition postgres.h:423
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
void pq_copymsgbytes(StringInfo msg, void *buf, int datalen)
Definition pqformat.c:527
static int fb(int x)
char * s1
char * s2
static int cmp(const chr *x, const chr *y, size_t len)
static unsigned hash(unsigned *uv, int n)
Definition rege_dfa.c:715
#define S(n, x)
Definition sha1.c:73
struct StringInfoData * StringInfo
Definition string.h:15
StringInfo makeStringInfo(void)
Definition stringinfo.c:72
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
double prop_card
Definition bytea.c:47
hyperLogLogState full_card
Definition bytea.c:46
hyperLogLogState abbr_card
Definition bytea.c:45
bool abbreviate
Definition bytea.c:44
Definition nodes.h:135
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
MemoryContext ssup_cxt
Definition sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition c.h:778
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition tuplesort.c:3411
bool trace_sort
Definition tuplesort.c:122
Datum uuid_send(PG_FUNCTION_ARGS)
Definition uuid.c:192
#define PG_RETURN_UUID_P(X)
Definition uuid.h:32
#define UUID_LEN
Definition uuid.h:18
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432