PostgreSQL Source Code git master
oracle_compat.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * oracle_compat.c
3 * Oracle compatible functions.
4 *
5 * Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 *
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/oracle_compat.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include "common/int.h"
19#include "mb/pg_wchar.h"
20#include "miscadmin.h"
21#include "utils/builtins.h"
22#include "utils/formatting.h"
23#include "utils/memutils.h"
24#include "varatt.h"
25
26
27static text *dotrim(const char *string, int stringlen,
28 const char *set, int setlen,
29 bool doltrim, bool dortrim);
30static bytea *dobyteatrim(bytea *string, bytea *set,
31 bool doltrim, bool dortrim);
32
33
34/********************************************************************
35 *
36 * lower
37 *
38 * Syntax:
39 *
40 * text lower(text string)
41 *
42 * Purpose:
43 *
44 * Returns string, with all letters forced to lowercase.
45 *
46 ********************************************************************/
47
50{
51 text *in_string = PG_GETARG_TEXT_PP(0);
52 char *out_string;
53 text *result;
54
55 out_string = str_tolower(VARDATA_ANY(in_string),
56 VARSIZE_ANY_EXHDR(in_string),
58 result = cstring_to_text(out_string);
59 pfree(out_string);
60
61 PG_RETURN_TEXT_P(result);
62}
63
64
65/********************************************************************
66 *
67 * upper
68 *
69 * Syntax:
70 *
71 * text upper(text string)
72 *
73 * Purpose:
74 *
75 * Returns string, with all letters forced to uppercase.
76 *
77 ********************************************************************/
78
81{
82 text *in_string = PG_GETARG_TEXT_PP(0);
83 char *out_string;
84 text *result;
85
86 out_string = str_toupper(VARDATA_ANY(in_string),
87 VARSIZE_ANY_EXHDR(in_string),
89 result = cstring_to_text(out_string);
90 pfree(out_string);
91
92 PG_RETURN_TEXT_P(result);
93}
94
95
96/********************************************************************
97 *
98 * initcap
99 *
100 * Syntax:
101 *
102 * text initcap(text string)
103 *
104 * Purpose:
105 *
106 * Returns string, with first letter of each word in uppercase, all
107 * other letters in lowercase. A word is defined as a sequence of
108 * alphanumeric characters, delimited by non-alphanumeric
109 * characters.
110 *
111 ********************************************************************/
112
113Datum
115{
116 text *in_string = PG_GETARG_TEXT_PP(0);
117 char *out_string;
118 text *result;
119
120 out_string = str_initcap(VARDATA_ANY(in_string),
121 VARSIZE_ANY_EXHDR(in_string),
123 result = cstring_to_text(out_string);
124 pfree(out_string);
125
126 PG_RETURN_TEXT_P(result);
127}
128
129Datum
131{
132 text *in_string = PG_GETARG_TEXT_PP(0);
133 char *out_string;
134 text *result;
135
136 out_string = str_casefold(VARDATA_ANY(in_string),
137 VARSIZE_ANY_EXHDR(in_string),
139 result = cstring_to_text(out_string);
140 pfree(out_string);
141
142 PG_RETURN_TEXT_P(result);
143}
144
145
146/********************************************************************
147 *
148 * lpad
149 *
150 * Syntax:
151 *
152 * text lpad(text string1, int4 len, text string2)
153 *
154 * Purpose:
155 *
156 * Returns string1, left-padded to length len with the sequence of
157 * characters in string2. If len is less than the length of string1,
158 * instead truncate (on the right) to len.
159 *
160 ********************************************************************/
161
162Datum
164{
165 text *string1 = PG_GETARG_TEXT_PP(0);
167 text *string2 = PG_GETARG_TEXT_PP(2);
168 text *ret;
169 char *ptr1,
170 *ptr2,
171 *ptr2start,
172 *ptr2end,
173 *ptr_ret;
174 int m,
175 s1len,
176 s2len;
177 int bytelen;
178
179 /* Negative len is silently taken as zero */
180 if (len < 0)
181 len = 0;
182
183 s1len = VARSIZE_ANY_EXHDR(string1);
184 if (s1len < 0)
185 s1len = 0; /* shouldn't happen */
186
187 s2len = VARSIZE_ANY_EXHDR(string2);
188 if (s2len < 0)
189 s2len = 0; /* shouldn't happen */
190
191 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
192
193 if (s1len > len)
194 s1len = len; /* truncate string1 to len chars */
195
196 if (s2len <= 0)
197 len = s1len; /* nothing to pad with, so don't pad */
198
199 /* compute worst-case output length */
201 &bytelen)) ||
202 unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
203 unlikely(!AllocSizeIsValid(bytelen)))
205 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
206 errmsg("requested length too large")));
207
208 ret = (text *) palloc(bytelen);
209
210 m = len - s1len;
211
212 ptr2 = ptr2start = VARDATA_ANY(string2);
213 ptr2end = ptr2 + s2len;
214 ptr_ret = VARDATA(ret);
215
216 while (m--)
217 {
218 int mlen = pg_mblen(ptr2);
219
220 memcpy(ptr_ret, ptr2, mlen);
221 ptr_ret += mlen;
222 ptr2 += mlen;
223 if (ptr2 == ptr2end) /* wrap around at end of s2 */
224 ptr2 = ptr2start;
225 }
226
227 ptr1 = VARDATA_ANY(string1);
228
229 while (s1len--)
230 {
231 int mlen = pg_mblen(ptr1);
232
233 memcpy(ptr_ret, ptr1, mlen);
234 ptr_ret += mlen;
235 ptr1 += mlen;
236 }
237
238 SET_VARSIZE(ret, ptr_ret - (char *) ret);
239
240 PG_RETURN_TEXT_P(ret);
241}
242
243
244/********************************************************************
245 *
246 * rpad
247 *
248 * Syntax:
249 *
250 * text rpad(text string1, int4 len, text string2)
251 *
252 * Purpose:
253 *
254 * Returns string1, right-padded to length len with the sequence of
255 * characters in string2. If len is less than the length of string1,
256 * instead truncate (on the right) to len.
257 *
258 ********************************************************************/
259
260Datum
262{
263 text *string1 = PG_GETARG_TEXT_PP(0);
265 text *string2 = PG_GETARG_TEXT_PP(2);
266 text *ret;
267 char *ptr1,
268 *ptr2,
269 *ptr2start,
270 *ptr2end,
271 *ptr_ret;
272 int m,
273 s1len,
274 s2len;
275 int bytelen;
276
277 /* Negative len is silently taken as zero */
278 if (len < 0)
279 len = 0;
280
281 s1len = VARSIZE_ANY_EXHDR(string1);
282 if (s1len < 0)
283 s1len = 0; /* shouldn't happen */
284
285 s2len = VARSIZE_ANY_EXHDR(string2);
286 if (s2len < 0)
287 s2len = 0; /* shouldn't happen */
288
289 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
290
291 if (s1len > len)
292 s1len = len; /* truncate string1 to len chars */
293
294 if (s2len <= 0)
295 len = s1len; /* nothing to pad with, so don't pad */
296
297 /* compute worst-case output length */
299 &bytelen)) ||
300 unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
301 unlikely(!AllocSizeIsValid(bytelen)))
303 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
304 errmsg("requested length too large")));
305
306 ret = (text *) palloc(bytelen);
307
308 m = len - s1len;
309
310 ptr1 = VARDATA_ANY(string1);
311 ptr_ret = VARDATA(ret);
312
313 while (s1len--)
314 {
315 int mlen = pg_mblen(ptr1);
316
317 memcpy(ptr_ret, ptr1, mlen);
318 ptr_ret += mlen;
319 ptr1 += mlen;
320 }
321
322 ptr2 = ptr2start = VARDATA_ANY(string2);
323 ptr2end = ptr2 + s2len;
324
325 while (m--)
326 {
327 int mlen = pg_mblen(ptr2);
328
329 memcpy(ptr_ret, ptr2, mlen);
330 ptr_ret += mlen;
331 ptr2 += mlen;
332 if (ptr2 == ptr2end) /* wrap around at end of s2 */
333 ptr2 = ptr2start;
334 }
335
336 SET_VARSIZE(ret, ptr_ret - (char *) ret);
337
338 PG_RETURN_TEXT_P(ret);
339}
340
341
342/********************************************************************
343 *
344 * btrim
345 *
346 * Syntax:
347 *
348 * text btrim(text string, text set)
349 *
350 * Purpose:
351 *
352 * Returns string with characters removed from the front and back
353 * up to the first character not in set.
354 *
355 ********************************************************************/
356
357Datum
359{
360 text *string = PG_GETARG_TEXT_PP(0);
361 text *set = PG_GETARG_TEXT_PP(1);
362 text *ret;
363
364 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
366 true, true);
367
368 PG_RETURN_TEXT_P(ret);
369}
370
371/********************************************************************
372 *
373 * btrim1 --- btrim with set fixed as ' '
374 *
375 ********************************************************************/
376
377Datum
379{
380 text *string = PG_GETARG_TEXT_PP(0);
381 text *ret;
382
383 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
384 " ", 1,
385 true, true);
386
387 PG_RETURN_TEXT_P(ret);
388}
389
390/*
391 * Common implementation for btrim, ltrim, rtrim
392 */
393static text *
394dotrim(const char *string, int stringlen,
395 const char *set, int setlen,
396 bool doltrim, bool dortrim)
397{
398 int i;
399
400 /* Nothing to do if either string or set is empty */
401 if (stringlen > 0 && setlen > 0)
402 {
404 {
405 /*
406 * In the multibyte-encoding case, build arrays of pointers to
407 * character starts, so that we can avoid inefficient checks in
408 * the inner loops.
409 */
410 const char **stringchars;
411 const char **setchars;
412 int *stringmblen;
413 int *setmblen;
414 int stringnchars;
415 int setnchars;
416 int resultndx;
417 int resultnchars;
418 const char *p;
419 int len;
420 int mblen;
421 const char *str_pos;
422 int str_len;
423
424 stringchars = (const char **) palloc(stringlen * sizeof(char *));
425 stringmblen = (int *) palloc(stringlen * sizeof(int));
426 stringnchars = 0;
427 p = string;
428 len = stringlen;
429 while (len > 0)
430 {
431 stringchars[stringnchars] = p;
432 stringmblen[stringnchars] = mblen = pg_mblen(p);
433 stringnchars++;
434 p += mblen;
435 len -= mblen;
436 }
437
438 setchars = (const char **) palloc(setlen * sizeof(char *));
439 setmblen = (int *) palloc(setlen * sizeof(int));
440 setnchars = 0;
441 p = set;
442 len = setlen;
443 while (len > 0)
444 {
445 setchars[setnchars] = p;
446 setmblen[setnchars] = mblen = pg_mblen(p);
447 setnchars++;
448 p += mblen;
449 len -= mblen;
450 }
451
452 resultndx = 0; /* index in stringchars[] */
453 resultnchars = stringnchars;
454
455 if (doltrim)
456 {
457 while (resultnchars > 0)
458 {
459 str_pos = stringchars[resultndx];
460 str_len = stringmblen[resultndx];
461 for (i = 0; i < setnchars; i++)
462 {
463 if (str_len == setmblen[i] &&
464 memcmp(str_pos, setchars[i], str_len) == 0)
465 break;
466 }
467 if (i >= setnchars)
468 break; /* no match here */
469 string += str_len;
470 stringlen -= str_len;
471 resultndx++;
472 resultnchars--;
473 }
474 }
475
476 if (dortrim)
477 {
478 while (resultnchars > 0)
479 {
480 str_pos = stringchars[resultndx + resultnchars - 1];
481 str_len = stringmblen[resultndx + resultnchars - 1];
482 for (i = 0; i < setnchars; i++)
483 {
484 if (str_len == setmblen[i] &&
485 memcmp(str_pos, setchars[i], str_len) == 0)
486 break;
487 }
488 if (i >= setnchars)
489 break; /* no match here */
490 stringlen -= str_len;
491 resultnchars--;
492 }
493 }
494
495 pfree(stringchars);
496 pfree(stringmblen);
497 pfree(setchars);
498 pfree(setmblen);
499 }
500 else
501 {
502 /*
503 * In the single-byte-encoding case, we don't need such overhead.
504 */
505 if (doltrim)
506 {
507 while (stringlen > 0)
508 {
509 char str_ch = *string;
510
511 for (i = 0; i < setlen; i++)
512 {
513 if (str_ch == set[i])
514 break;
515 }
516 if (i >= setlen)
517 break; /* no match here */
518 string++;
519 stringlen--;
520 }
521 }
522
523 if (dortrim)
524 {
525 while (stringlen > 0)
526 {
527 char str_ch = string[stringlen - 1];
528
529 for (i = 0; i < setlen; i++)
530 {
531 if (str_ch == set[i])
532 break;
533 }
534 if (i >= setlen)
535 break; /* no match here */
536 stringlen--;
537 }
538 }
539 }
540 }
541
542 /* Return selected portion of string */
543 return cstring_to_text_with_len(string, stringlen);
544}
545
546/*
547 * Common implementation for bytea versions of btrim, ltrim, rtrim
548 */
549bytea *
550dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
551{
552 bytea *ret;
553 char *ptr,
554 *end,
555 *ptr2,
556 *ptr2start,
557 *end2;
558 int m,
559 stringlen,
560 setlen;
561
562 stringlen = VARSIZE_ANY_EXHDR(string);
563 setlen = VARSIZE_ANY_EXHDR(set);
564
565 if (stringlen <= 0 || setlen <= 0)
566 return string;
567
568 m = stringlen;
569 ptr = VARDATA_ANY(string);
570 end = ptr + stringlen - 1;
571 ptr2start = VARDATA_ANY(set);
572 end2 = ptr2start + setlen - 1;
573
574 if (doltrim)
575 {
576 while (m > 0)
577 {
578 ptr2 = ptr2start;
579 while (ptr2 <= end2)
580 {
581 if (*ptr == *ptr2)
582 break;
583 ++ptr2;
584 }
585 if (ptr2 > end2)
586 break;
587 ptr++;
588 m--;
589 }
590 }
591
592 if (dortrim)
593 {
594 while (m > 0)
595 {
596 ptr2 = ptr2start;
597 while (ptr2 <= end2)
598 {
599 if (*end == *ptr2)
600 break;
601 ++ptr2;
602 }
603 if (ptr2 > end2)
604 break;
605 end--;
606 m--;
607 }
608 }
609
610 ret = (bytea *) palloc(VARHDRSZ + m);
611 SET_VARSIZE(ret, VARHDRSZ + m);
612 memcpy(VARDATA(ret), ptr, m);
613 return ret;
614}
615
616/********************************************************************
617 *
618 * byteatrim
619 *
620 * Syntax:
621 *
622 * bytea byteatrim(bytea string, bytea set)
623 *
624 * Purpose:
625 *
626 * Returns string with characters removed from the front and back
627 * up to the first character not in set.
628 *
629 * Cloned from btrim and modified as required.
630 ********************************************************************/
631
632Datum
634{
635 bytea *string = PG_GETARG_BYTEA_PP(0);
636 bytea *set = PG_GETARG_BYTEA_PP(1);
637 bytea *ret;
638
639 ret = dobyteatrim(string, set, true, true);
640
642}
643
644/********************************************************************
645 *
646 * bytealtrim
647 *
648 * Syntax:
649 *
650 * bytea bytealtrim(bytea string, bytea set)
651 *
652 * Purpose:
653 *
654 * Returns string with initial characters removed up to the first
655 * character not in set.
656 *
657 ********************************************************************/
658
659Datum
661{
662 bytea *string = PG_GETARG_BYTEA_PP(0);
663 bytea *set = PG_GETARG_BYTEA_PP(1);
664 bytea *ret;
665
666 ret = dobyteatrim(string, set, true, false);
667
669}
670
671/********************************************************************
672 *
673 * byteartrim
674 *
675 * Syntax:
676 *
677 * bytea byteartrim(bytea string, bytea set)
678 *
679 * Purpose:
680 *
681 * Returns string with final characters removed after the last
682 * character not in set.
683 *
684 ********************************************************************/
685
686Datum
688{
689 bytea *string = PG_GETARG_BYTEA_PP(0);
690 bytea *set = PG_GETARG_BYTEA_PP(1);
691 bytea *ret;
692
693 ret = dobyteatrim(string, set, false, true);
694
696}
697
698/********************************************************************
699 *
700 * ltrim
701 *
702 * Syntax:
703 *
704 * text ltrim(text string, text set)
705 *
706 * Purpose:
707 *
708 * Returns string with initial characters removed up to the first
709 * character not in set.
710 *
711 ********************************************************************/
712
713Datum
715{
716 text *string = PG_GETARG_TEXT_PP(0);
717 text *set = PG_GETARG_TEXT_PP(1);
718 text *ret;
719
720 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
722 true, false);
723
724 PG_RETURN_TEXT_P(ret);
725}
726
727/********************************************************************
728 *
729 * ltrim1 --- ltrim with set fixed as ' '
730 *
731 ********************************************************************/
732
733Datum
735{
736 text *string = PG_GETARG_TEXT_PP(0);
737 text *ret;
738
739 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
740 " ", 1,
741 true, false);
742
743 PG_RETURN_TEXT_P(ret);
744}
745
746/********************************************************************
747 *
748 * rtrim
749 *
750 * Syntax:
751 *
752 * text rtrim(text string, text set)
753 *
754 * Purpose:
755 *
756 * Returns string with final characters removed after the last
757 * character not in set.
758 *
759 ********************************************************************/
760
761Datum
763{
764 text *string = PG_GETARG_TEXT_PP(0);
765 text *set = PG_GETARG_TEXT_PP(1);
766 text *ret;
767
768 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
770 false, true);
771
772 PG_RETURN_TEXT_P(ret);
773}
774
775/********************************************************************
776 *
777 * rtrim1 --- rtrim with set fixed as ' '
778 *
779 ********************************************************************/
780
781Datum
783{
784 text *string = PG_GETARG_TEXT_PP(0);
785 text *ret;
786
787 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
788 " ", 1,
789 false, true);
790
791 PG_RETURN_TEXT_P(ret);
792}
793
794
795/********************************************************************
796 *
797 * translate
798 *
799 * Syntax:
800 *
801 * text translate(text string, text from, text to)
802 *
803 * Purpose:
804 *
805 * Returns string after replacing all occurrences of characters in from
806 * with the corresponding character in to. If from is longer than to,
807 * occurrences of the extra characters in from are deleted.
808 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
809 *
810 ********************************************************************/
811
812Datum
814{
815 text *string = PG_GETARG_TEXT_PP(0);
816 text *from = PG_GETARG_TEXT_PP(1);
817 text *to = PG_GETARG_TEXT_PP(2);
818 text *result;
819 char *from_ptr,
820 *to_ptr,
821 *to_end;
822 char *source,
823 *target;
824 int m,
825 fromlen,
826 tolen,
827 retlen,
828 i;
829 int bytelen;
830 int len;
831 int source_len;
832 int from_index;
833
834 m = VARSIZE_ANY_EXHDR(string);
835 if (m <= 0)
836 PG_RETURN_TEXT_P(string);
837 source = VARDATA_ANY(string);
838
839 fromlen = VARSIZE_ANY_EXHDR(from);
840 from_ptr = VARDATA_ANY(from);
841 tolen = VARSIZE_ANY_EXHDR(to);
842 to_ptr = VARDATA_ANY(to);
843 to_end = to_ptr + tolen;
844
845 /*
846 * The worst-case expansion is to substitute a max-length character for a
847 * single-byte character at each position of the string.
848 */
850 &bytelen)) ||
851 unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
852 unlikely(!AllocSizeIsValid(bytelen)))
854 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
855 errmsg("requested length too large")));
856
857 result = (text *) palloc(bytelen);
858
859 target = VARDATA(result);
860 retlen = 0;
861
862 while (m > 0)
863 {
864 source_len = pg_mblen(source);
865 from_index = 0;
866
867 for (i = 0; i < fromlen; i += len)
868 {
869 len = pg_mblen(&from_ptr[i]);
870 if (len == source_len &&
871 memcmp(source, &from_ptr[i], len) == 0)
872 break;
873
874 from_index++;
875 }
876 if (i < fromlen)
877 {
878 /* substitute, or delete if no corresponding "to" character */
879 char *p = to_ptr;
880
881 for (i = 0; i < from_index; i++)
882 {
883 if (p >= to_end)
884 break;
885 p += pg_mblen(p);
886 }
887 if (p < to_end)
888 {
889 len = pg_mblen(p);
890 memcpy(target, p, len);
891 target += len;
892 retlen += len;
893 }
894 }
895 else
896 {
897 /* no match, so copy */
898 memcpy(target, source, source_len);
899 target += source_len;
900 retlen += source_len;
901 }
902
903 source += source_len;
904 m -= source_len;
905 }
906
907 SET_VARSIZE(result, retlen + VARHDRSZ);
908
909 /*
910 * The function result is probably much bigger than needed, if we're using
911 * a multibyte encoding, but it's not worth reallocating it; the result
912 * probably won't live long anyway.
913 */
914
915 PG_RETURN_TEXT_P(result);
916}
917
918/********************************************************************
919 *
920 * ascii
921 *
922 * Syntax:
923 *
924 * int ascii(text string)
925 *
926 * Purpose:
927 *
928 * Returns the decimal representation of the first character from
929 * string.
930 * If the string is empty we return 0.
931 * If the database encoding is UTF8, we return the Unicode codepoint.
932 * If the database encoding is any other multi-byte encoding, we
933 * return the value of the first byte if it is an ASCII character
934 * (range 1 .. 127), or raise an error.
935 * For all other encodings we return the value of the first byte,
936 * (range 1..255).
937 *
938 ********************************************************************/
939
940Datum
942{
943 text *string = PG_GETARG_TEXT_PP(0);
945 unsigned char *data;
946
947 if (VARSIZE_ANY_EXHDR(string) <= 0)
949
950 data = (unsigned char *) VARDATA_ANY(string);
951
952 if (encoding == PG_UTF8 && *data > 127)
953 {
954 /* return the code point for Unicode */
955
956 int result = 0,
957 tbytes = 0,
958 i;
959
960 if (*data >= 0xF0)
961 {
962 result = *data & 0x07;
963 tbytes = 3;
964 }
965 else if (*data >= 0xE0)
966 {
967 result = *data & 0x0F;
968 tbytes = 2;
969 }
970 else
971 {
972 Assert(*data > 0xC0);
973 result = *data & 0x1f;
974 tbytes = 1;
975 }
976
977 Assert(tbytes > 0);
978
979 for (i = 1; i <= tbytes; i++)
980 {
981 Assert((data[i] & 0xC0) == 0x80);
982 result = (result << 6) + (data[i] & 0x3f);
983 }
984
985 PG_RETURN_INT32(result);
986 }
987 else
988 {
989 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
991 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
992 errmsg("requested character too large")));
993
994
996 }
997}
998
999/********************************************************************
1000 *
1001 * chr
1002 *
1003 * Syntax:
1004 *
1005 * text chr(int val)
1006 *
1007 * Purpose:
1008 *
1009 * Returns the character having the binary equivalent to val.
1010 *
1011 * For UTF8 we treat the argument as a Unicode code point.
1012 * For other multi-byte encodings we raise an error for arguments
1013 * outside the strict ASCII range (1..127).
1014 *
1015 * It's important that we don't ever return a value that is not valid
1016 * in the database encoding, so that this doesn't become a way for
1017 * invalid data to enter the database.
1018 *
1019 ********************************************************************/
1020
1021Datum
1023{
1025 uint32 cvalue;
1026 text *result;
1028
1029 /*
1030 * Error out on arguments that make no sense or that we can't validly
1031 * represent in the encoding.
1032 */
1033 if (arg < 0)
1034 ereport(ERROR,
1035 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1036 errmsg("character number must be positive")));
1037 else if (arg == 0)
1038 ereport(ERROR,
1039 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1040 errmsg("null character not permitted")));
1041
1042 cvalue = arg;
1043
1044 if (encoding == PG_UTF8 && cvalue > 127)
1045 {
1046 /* for Unicode we treat the argument as a code point */
1047 int bytes;
1048 unsigned char *wch;
1049
1050 /*
1051 * We only allow valid Unicode code points; per RFC3629 that stops at
1052 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1053 * U+1FFFFF.
1054 */
1055 if (cvalue > 0x0010ffff)
1056 ereport(ERROR,
1057 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1058 errmsg("requested character too large for encoding: %u",
1059 cvalue)));
1060
1061 if (cvalue > 0xffff)
1062 bytes = 4;
1063 else if (cvalue > 0x07ff)
1064 bytes = 3;
1065 else
1066 bytes = 2;
1067
1068 result = (text *) palloc(VARHDRSZ + bytes);
1069 SET_VARSIZE(result, VARHDRSZ + bytes);
1070 wch = (unsigned char *) VARDATA(result);
1071
1072 if (bytes == 2)
1073 {
1074 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1075 wch[1] = 0x80 | (cvalue & 0x3F);
1076 }
1077 else if (bytes == 3)
1078 {
1079 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1080 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1081 wch[2] = 0x80 | (cvalue & 0x3F);
1082 }
1083 else
1084 {
1085 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1086 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1087 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1088 wch[3] = 0x80 | (cvalue & 0x3F);
1089 }
1090
1091 /*
1092 * The preceding range check isn't sufficient, because UTF8 excludes
1093 * Unicode "surrogate pair" codes. Make sure what we created is valid
1094 * UTF8.
1095 */
1096 if (!pg_utf8_islegal(wch, bytes))
1097 ereport(ERROR,
1098 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1099 errmsg("requested character not valid for encoding: %u",
1100 cvalue)));
1101 }
1102 else
1103 {
1104 bool is_mb;
1105
1106 is_mb = pg_encoding_max_length(encoding) > 1;
1107
1108 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1109 ereport(ERROR,
1110 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1111 errmsg("requested character too large for encoding: %u",
1112 cvalue)));
1113
1114 result = (text *) palloc(VARHDRSZ + 1);
1115 SET_VARSIZE(result, VARHDRSZ + 1);
1116 *VARDATA(result) = (char) cvalue;
1117 }
1118
1119 PG_RETURN_TEXT_P(result);
1120}
1121
1122/********************************************************************
1123 *
1124 * repeat
1125 *
1126 * Syntax:
1127 *
1128 * text repeat(text string, int val)
1129 *
1130 * Purpose:
1131 *
1132 * Repeat string by val.
1133 *
1134 ********************************************************************/
1135
1136Datum
1138{
1139 text *string = PG_GETARG_TEXT_PP(0);
1140 int32 count = PG_GETARG_INT32(1);
1141 text *result;
1142 int slen,
1143 tlen;
1144 int i;
1145 char *cp,
1146 *sp;
1147
1148 if (count < 0)
1149 count = 0;
1150
1151 slen = VARSIZE_ANY_EXHDR(string);
1152
1153 if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1154 unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) ||
1155 unlikely(!AllocSizeIsValid(tlen)))
1156 ereport(ERROR,
1157 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1158 errmsg("requested length too large")));
1159
1160 result = (text *) palloc(tlen);
1161
1162 SET_VARSIZE(result, tlen);
1163 cp = VARDATA(result);
1164 sp = VARDATA_ANY(string);
1165 for (i = 0; i < count; i++)
1166 {
1167 memcpy(cp, sp, slen);
1168 cp += slen;
1170 }
1171
1172 PG_RETURN_TEXT_P(result);
1173}
#define VARHDRSZ
Definition: c.h:649
#define Assert(condition)
Definition: c.h:815
int32_t int32
Definition: c.h:484
#define unlikely(x)
Definition: c.h:333
uint32_t uint32
Definition: c.h:488
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
char * str_initcap(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1765
char * str_casefold(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1829
char * str_toupper(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1701
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1637
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:187
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:151
int i
Definition: isn.c:72
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1057
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
#define AllocSizeIsValid(size)
Definition: memutils.h:42
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim)
Datum bytealtrim(PG_FUNCTION_ARGS)
Datum ltrim(PG_FUNCTION_ARGS)
Datum byteatrim(PG_FUNCTION_ARGS)
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum initcap(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
Datum byteartrim(PG_FUNCTION_ARGS)
Datum rtrim(PG_FUNCTION_ARGS)
Datum rpad(PG_FUNCTION_ARGS)
Datum chr(PG_FUNCTION_ARGS)
Datum ltrim1(PG_FUNCTION_ARGS)
Datum btrim1(PG_FUNCTION_ARGS)
Datum ascii(PG_FUNCTION_ARGS)
Datum rtrim1(PG_FUNCTION_ARGS)
Datum casefold(PG_FUNCTION_ARGS)
Datum translate(PG_FUNCTION_ARGS)
static bytea * dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
Datum lpad(PG_FUNCTION_ARGS)
Datum btrim(PG_FUNCTION_ARGS)
Datum repeat(PG_FUNCTION_ARGS)
void * arg
const void size_t len
const void * data
int32 encoding
Definition: pg_database.h:41
static rewind_source * source
Definition: pg_rewind.c:89
@ PG_UTF8
Definition: pg_wchar.h:232
uintptr_t Datum
Definition: postgres.h:69
char string[11]
Definition: preproc-type.c:52
Definition: c.h:644
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:196
text * cstring_to_text(const char *s)
Definition: varlena.c:184
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1987
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2174