PostgreSQL Source Code  git master
oracle_compat.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  * Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  *
7  * Author: Edmund Mergl <E.Mergl@bawue.de>
8  * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23 #include "utils/memutils.h"
24 #include "varatt.h"
25 
26 
27 static text *dotrim(const char *string, int stringlen,
28  const char *set, int setlen,
29  bool doltrim, bool dortrim);
30 static bytea *dobyteatrim(bytea *string, bytea *set,
31  bool doltrim, bool dortrim);
32 
33 
34 /********************************************************************
35  *
36  * lower
37  *
38  * Syntax:
39  *
40  * text lower(text string)
41  *
42  * Purpose:
43  *
44  * Returns string, with all letters forced to lowercase.
45  *
46  ********************************************************************/
47 
48 Datum
50 {
51  text *in_string = PG_GETARG_TEXT_PP(0);
52  char *out_string;
53  text *result;
54 
55  out_string = str_tolower(VARDATA_ANY(in_string),
56  VARSIZE_ANY_EXHDR(in_string),
58  result = cstring_to_text(out_string);
59  pfree(out_string);
60 
61  PG_RETURN_TEXT_P(result);
62 }
63 
64 
65 /********************************************************************
66  *
67  * upper
68  *
69  * Syntax:
70  *
71  * text upper(text string)
72  *
73  * Purpose:
74  *
75  * Returns string, with all letters forced to uppercase.
76  *
77  ********************************************************************/
78 
79 Datum
81 {
82  text *in_string = PG_GETARG_TEXT_PP(0);
83  char *out_string;
84  text *result;
85 
86  out_string = str_toupper(VARDATA_ANY(in_string),
87  VARSIZE_ANY_EXHDR(in_string),
89  result = cstring_to_text(out_string);
90  pfree(out_string);
91 
92  PG_RETURN_TEXT_P(result);
93 }
94 
95 
96 /********************************************************************
97  *
98  * initcap
99  *
100  * Syntax:
101  *
102  * text initcap(text string)
103  *
104  * Purpose:
105  *
106  * Returns string, with first letter of each word in uppercase, all
107  * other letters in lowercase. A word is defined as a sequence of
108  * alphanumeric characters, delimited by non-alphanumeric
109  * characters.
110  *
111  ********************************************************************/
112 
113 Datum
115 {
116  text *in_string = PG_GETARG_TEXT_PP(0);
117  char *out_string;
118  text *result;
119 
120  out_string = str_initcap(VARDATA_ANY(in_string),
121  VARSIZE_ANY_EXHDR(in_string),
122  PG_GET_COLLATION());
123  result = cstring_to_text(out_string);
124  pfree(out_string);
125 
126  PG_RETURN_TEXT_P(result);
127 }
128 
129 
130 /********************************************************************
131  *
132  * lpad
133  *
134  * Syntax:
135  *
136  * text lpad(text string1, int4 len, text string2)
137  *
138  * Purpose:
139  *
140  * Returns string1, left-padded to length len with the sequence of
141  * characters in string2. If len is less than the length of string1,
142  * instead truncate (on the right) to len.
143  *
144  ********************************************************************/
145 
146 Datum
148 {
149  text *string1 = PG_GETARG_TEXT_PP(0);
151  text *string2 = PG_GETARG_TEXT_PP(2);
152  text *ret;
153  char *ptr1,
154  *ptr2,
155  *ptr2start,
156  *ptr2end,
157  *ptr_ret;
158  int m,
159  s1len,
160  s2len;
161  int bytelen;
162 
163  /* Negative len is silently taken as zero */
164  if (len < 0)
165  len = 0;
166 
167  s1len = VARSIZE_ANY_EXHDR(string1);
168  if (s1len < 0)
169  s1len = 0; /* shouldn't happen */
170 
171  s2len = VARSIZE_ANY_EXHDR(string2);
172  if (s2len < 0)
173  s2len = 0; /* shouldn't happen */
174 
175  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
176 
177  if (s1len > len)
178  s1len = len; /* truncate string1 to len chars */
179 
180  if (s2len <= 0)
181  len = s1len; /* nothing to pad with, so don't pad */
182 
183  /* compute worst-case output length */
185  &bytelen)) ||
186  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
187  unlikely(!AllocSizeIsValid(bytelen)))
188  ereport(ERROR,
189  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
190  errmsg("requested length too large")));
191 
192  ret = (text *) palloc(bytelen);
193 
194  m = len - s1len;
195 
196  ptr2 = ptr2start = VARDATA_ANY(string2);
197  ptr2end = ptr2 + s2len;
198  ptr_ret = VARDATA(ret);
199 
200  while (m--)
201  {
202  int mlen = pg_mblen(ptr2);
203 
204  memcpy(ptr_ret, ptr2, mlen);
205  ptr_ret += mlen;
206  ptr2 += mlen;
207  if (ptr2 == ptr2end) /* wrap around at end of s2 */
208  ptr2 = ptr2start;
209  }
210 
211  ptr1 = VARDATA_ANY(string1);
212 
213  while (s1len--)
214  {
215  int mlen = pg_mblen(ptr1);
216 
217  memcpy(ptr_ret, ptr1, mlen);
218  ptr_ret += mlen;
219  ptr1 += mlen;
220  }
221 
222  SET_VARSIZE(ret, ptr_ret - (char *) ret);
223 
224  PG_RETURN_TEXT_P(ret);
225 }
226 
227 
228 /********************************************************************
229  *
230  * rpad
231  *
232  * Syntax:
233  *
234  * text rpad(text string1, int4 len, text string2)
235  *
236  * Purpose:
237  *
238  * Returns string1, right-padded to length len with the sequence of
239  * characters in string2. If len is less than the length of string1,
240  * instead truncate (on the right) to len.
241  *
242  ********************************************************************/
243 
244 Datum
246 {
247  text *string1 = PG_GETARG_TEXT_PP(0);
249  text *string2 = PG_GETARG_TEXT_PP(2);
250  text *ret;
251  char *ptr1,
252  *ptr2,
253  *ptr2start,
254  *ptr2end,
255  *ptr_ret;
256  int m,
257  s1len,
258  s2len;
259  int bytelen;
260 
261  /* Negative len is silently taken as zero */
262  if (len < 0)
263  len = 0;
264 
265  s1len = VARSIZE_ANY_EXHDR(string1);
266  if (s1len < 0)
267  s1len = 0; /* shouldn't happen */
268 
269  s2len = VARSIZE_ANY_EXHDR(string2);
270  if (s2len < 0)
271  s2len = 0; /* shouldn't happen */
272 
273  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
274 
275  if (s1len > len)
276  s1len = len; /* truncate string1 to len chars */
277 
278  if (s2len <= 0)
279  len = s1len; /* nothing to pad with, so don't pad */
280 
281  /* compute worst-case output length */
283  &bytelen)) ||
284  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
285  unlikely(!AllocSizeIsValid(bytelen)))
286  ereport(ERROR,
287  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
288  errmsg("requested length too large")));
289 
290  ret = (text *) palloc(bytelen);
291 
292  m = len - s1len;
293 
294  ptr1 = VARDATA_ANY(string1);
295  ptr_ret = VARDATA(ret);
296 
297  while (s1len--)
298  {
299  int mlen = pg_mblen(ptr1);
300 
301  memcpy(ptr_ret, ptr1, mlen);
302  ptr_ret += mlen;
303  ptr1 += mlen;
304  }
305 
306  ptr2 = ptr2start = VARDATA_ANY(string2);
307  ptr2end = ptr2 + s2len;
308 
309  while (m--)
310  {
311  int mlen = pg_mblen(ptr2);
312 
313  memcpy(ptr_ret, ptr2, mlen);
314  ptr_ret += mlen;
315  ptr2 += mlen;
316  if (ptr2 == ptr2end) /* wrap around at end of s2 */
317  ptr2 = ptr2start;
318  }
319 
320  SET_VARSIZE(ret, ptr_ret - (char *) ret);
321 
322  PG_RETURN_TEXT_P(ret);
323 }
324 
325 
326 /********************************************************************
327  *
328  * btrim
329  *
330  * Syntax:
331  *
332  * text btrim(text string, text set)
333  *
334  * Purpose:
335  *
336  * Returns string with characters removed from the front and back
337  * up to the first character not in set.
338  *
339  ********************************************************************/
340 
341 Datum
343 {
344  text *string = PG_GETARG_TEXT_PP(0);
345  text *set = PG_GETARG_TEXT_PP(1);
346  text *ret;
347 
348  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
349  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
350  true, true);
351 
352  PG_RETURN_TEXT_P(ret);
353 }
354 
355 /********************************************************************
356  *
357  * btrim1 --- btrim with set fixed as ' '
358  *
359  ********************************************************************/
360 
361 Datum
363 {
364  text *string = PG_GETARG_TEXT_PP(0);
365  text *ret;
366 
367  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
368  " ", 1,
369  true, true);
370 
371  PG_RETURN_TEXT_P(ret);
372 }
373 
374 /*
375  * Common implementation for btrim, ltrim, rtrim
376  */
377 static text *
378 dotrim(const char *string, int stringlen,
379  const char *set, int setlen,
380  bool doltrim, bool dortrim)
381 {
382  int i;
383 
384  /* Nothing to do if either string or set is empty */
385  if (stringlen > 0 && setlen > 0)
386  {
388  {
389  /*
390  * In the multibyte-encoding case, build arrays of pointers to
391  * character starts, so that we can avoid inefficient checks in
392  * the inner loops.
393  */
394  const char **stringchars;
395  const char **setchars;
396  int *stringmblen;
397  int *setmblen;
398  int stringnchars;
399  int setnchars;
400  int resultndx;
401  int resultnchars;
402  const char *p;
403  int len;
404  int mblen;
405  const char *str_pos;
406  int str_len;
407 
408  stringchars = (const char **) palloc(stringlen * sizeof(char *));
409  stringmblen = (int *) palloc(stringlen * sizeof(int));
410  stringnchars = 0;
411  p = string;
412  len = stringlen;
413  while (len > 0)
414  {
415  stringchars[stringnchars] = p;
416  stringmblen[stringnchars] = mblen = pg_mblen(p);
417  stringnchars++;
418  p += mblen;
419  len -= mblen;
420  }
421 
422  setchars = (const char **) palloc(setlen * sizeof(char *));
423  setmblen = (int *) palloc(setlen * sizeof(int));
424  setnchars = 0;
425  p = set;
426  len = setlen;
427  while (len > 0)
428  {
429  setchars[setnchars] = p;
430  setmblen[setnchars] = mblen = pg_mblen(p);
431  setnchars++;
432  p += mblen;
433  len -= mblen;
434  }
435 
436  resultndx = 0; /* index in stringchars[] */
437  resultnchars = stringnchars;
438 
439  if (doltrim)
440  {
441  while (resultnchars > 0)
442  {
443  str_pos = stringchars[resultndx];
444  str_len = stringmblen[resultndx];
445  for (i = 0; i < setnchars; i++)
446  {
447  if (str_len == setmblen[i] &&
448  memcmp(str_pos, setchars[i], str_len) == 0)
449  break;
450  }
451  if (i >= setnchars)
452  break; /* no match here */
453  string += str_len;
454  stringlen -= str_len;
455  resultndx++;
456  resultnchars--;
457  }
458  }
459 
460  if (dortrim)
461  {
462  while (resultnchars > 0)
463  {
464  str_pos = stringchars[resultndx + resultnchars - 1];
465  str_len = stringmblen[resultndx + resultnchars - 1];
466  for (i = 0; i < setnchars; i++)
467  {
468  if (str_len == setmblen[i] &&
469  memcmp(str_pos, setchars[i], str_len) == 0)
470  break;
471  }
472  if (i >= setnchars)
473  break; /* no match here */
474  stringlen -= str_len;
475  resultnchars--;
476  }
477  }
478 
479  pfree(stringchars);
480  pfree(stringmblen);
481  pfree(setchars);
482  pfree(setmblen);
483  }
484  else
485  {
486  /*
487  * In the single-byte-encoding case, we don't need such overhead.
488  */
489  if (doltrim)
490  {
491  while (stringlen > 0)
492  {
493  char str_ch = *string;
494 
495  for (i = 0; i < setlen; i++)
496  {
497  if (str_ch == set[i])
498  break;
499  }
500  if (i >= setlen)
501  break; /* no match here */
502  string++;
503  stringlen--;
504  }
505  }
506 
507  if (dortrim)
508  {
509  while (stringlen > 0)
510  {
511  char str_ch = string[stringlen - 1];
512 
513  for (i = 0; i < setlen; i++)
514  {
515  if (str_ch == set[i])
516  break;
517  }
518  if (i >= setlen)
519  break; /* no match here */
520  stringlen--;
521  }
522  }
523  }
524  }
525 
526  /* Return selected portion of string */
527  return cstring_to_text_with_len(string, stringlen);
528 }
529 
530 /*
531  * Common implementation for bytea versions of btrim, ltrim, rtrim
532  */
533 bytea *
534 dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
535 {
536  bytea *ret;
537  char *ptr,
538  *end,
539  *ptr2,
540  *ptr2start,
541  *end2;
542  int m,
543  stringlen,
544  setlen;
545 
546  stringlen = VARSIZE_ANY_EXHDR(string);
547  setlen = VARSIZE_ANY_EXHDR(set);
548 
549  if (stringlen <= 0 || setlen <= 0)
550  return string;
551 
552  m = stringlen;
553  ptr = VARDATA_ANY(string);
554  end = ptr + stringlen - 1;
555  ptr2start = VARDATA_ANY(set);
556  end2 = ptr2start + setlen - 1;
557 
558  if (doltrim)
559  {
560  while (m > 0)
561  {
562  ptr2 = ptr2start;
563  while (ptr2 <= end2)
564  {
565  if (*ptr == *ptr2)
566  break;
567  ++ptr2;
568  }
569  if (ptr2 > end2)
570  break;
571  ptr++;
572  m--;
573  }
574  }
575 
576  if (dortrim)
577  {
578  while (m > 0)
579  {
580  ptr2 = ptr2start;
581  while (ptr2 <= end2)
582  {
583  if (*end == *ptr2)
584  break;
585  ++ptr2;
586  }
587  if (ptr2 > end2)
588  break;
589  end--;
590  m--;
591  }
592  }
593 
594  ret = (bytea *) palloc(VARHDRSZ + m);
595  SET_VARSIZE(ret, VARHDRSZ + m);
596  memcpy(VARDATA(ret), ptr, m);
597  return ret;
598 }
599 
600 /********************************************************************
601  *
602  * byteatrim
603  *
604  * Syntax:
605  *
606  * bytea byteatrim(bytea string, bytea set)
607  *
608  * Purpose:
609  *
610  * Returns string with characters removed from the front and back
611  * up to the first character not in set.
612  *
613  * Cloned from btrim and modified as required.
614  ********************************************************************/
615 
616 Datum
618 {
619  bytea *string = PG_GETARG_BYTEA_PP(0);
620  bytea *set = PG_GETARG_BYTEA_PP(1);
621  bytea *ret;
622 
623  ret = dobyteatrim(string, set, true, true);
624 
625  PG_RETURN_BYTEA_P(ret);
626 }
627 
628 /********************************************************************
629  *
630  * bytealtrim
631  *
632  * Syntax:
633  *
634  * bytea bytealtrim(bytea string, bytea set)
635  *
636  * Purpose:
637  *
638  * Returns string with initial characters removed up to the first
639  * character not in set.
640  *
641  ********************************************************************/
642 
643 Datum
645 {
646  bytea *string = PG_GETARG_BYTEA_PP(0);
647  bytea *set = PG_GETARG_BYTEA_PP(1);
648  bytea *ret;
649 
650  ret = dobyteatrim(string, set, true, false);
651 
652  PG_RETURN_BYTEA_P(ret);
653 }
654 
655 /********************************************************************
656  *
657  * byteartrim
658  *
659  * Syntax:
660  *
661  * bytea byteartrim(bytea string, bytea set)
662  *
663  * Purpose:
664  *
665  * Returns string with final characters removed after the last
666  * character not in set.
667  *
668  ********************************************************************/
669 
670 Datum
672 {
673  bytea *string = PG_GETARG_BYTEA_PP(0);
674  bytea *set = PG_GETARG_BYTEA_PP(1);
675  bytea *ret;
676 
677  ret = dobyteatrim(string, set, false, true);
678 
679  PG_RETURN_BYTEA_P(ret);
680 }
681 
682 /********************************************************************
683  *
684  * ltrim
685  *
686  * Syntax:
687  *
688  * text ltrim(text string, text set)
689  *
690  * Purpose:
691  *
692  * Returns string with initial characters removed up to the first
693  * character not in set.
694  *
695  ********************************************************************/
696 
697 Datum
699 {
700  text *string = PG_GETARG_TEXT_PP(0);
701  text *set = PG_GETARG_TEXT_PP(1);
702  text *ret;
703 
704  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
705  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
706  true, false);
707 
708  PG_RETURN_TEXT_P(ret);
709 }
710 
711 /********************************************************************
712  *
713  * ltrim1 --- ltrim with set fixed as ' '
714  *
715  ********************************************************************/
716 
717 Datum
719 {
720  text *string = PG_GETARG_TEXT_PP(0);
721  text *ret;
722 
723  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
724  " ", 1,
725  true, false);
726 
727  PG_RETURN_TEXT_P(ret);
728 }
729 
730 /********************************************************************
731  *
732  * rtrim
733  *
734  * Syntax:
735  *
736  * text rtrim(text string, text set)
737  *
738  * Purpose:
739  *
740  * Returns string with final characters removed after the last
741  * character not in set.
742  *
743  ********************************************************************/
744 
745 Datum
747 {
748  text *string = PG_GETARG_TEXT_PP(0);
749  text *set = PG_GETARG_TEXT_PP(1);
750  text *ret;
751 
752  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
753  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
754  false, true);
755 
756  PG_RETURN_TEXT_P(ret);
757 }
758 
759 /********************************************************************
760  *
761  * rtrim1 --- rtrim with set fixed as ' '
762  *
763  ********************************************************************/
764 
765 Datum
767 {
768  text *string = PG_GETARG_TEXT_PP(0);
769  text *ret;
770 
771  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
772  " ", 1,
773  false, true);
774 
775  PG_RETURN_TEXT_P(ret);
776 }
777 
778 
779 /********************************************************************
780  *
781  * translate
782  *
783  * Syntax:
784  *
785  * text translate(text string, text from, text to)
786  *
787  * Purpose:
788  *
789  * Returns string after replacing all occurrences of characters in from
790  * with the corresponding character in to. If from is longer than to,
791  * occurrences of the extra characters in from are deleted.
792  * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
793  *
794  ********************************************************************/
795 
796 Datum
798 {
799  text *string = PG_GETARG_TEXT_PP(0);
800  text *from = PG_GETARG_TEXT_PP(1);
801  text *to = PG_GETARG_TEXT_PP(2);
802  text *result;
803  char *from_ptr,
804  *to_ptr,
805  *to_end;
806  char *source,
807  *target;
808  int m,
809  fromlen,
810  tolen,
811  retlen,
812  i;
813  int bytelen;
814  int len;
815  int source_len;
816  int from_index;
817 
818  m = VARSIZE_ANY_EXHDR(string);
819  if (m <= 0)
820  PG_RETURN_TEXT_P(string);
821  source = VARDATA_ANY(string);
822 
823  fromlen = VARSIZE_ANY_EXHDR(from);
824  from_ptr = VARDATA_ANY(from);
825  tolen = VARSIZE_ANY_EXHDR(to);
826  to_ptr = VARDATA_ANY(to);
827  to_end = to_ptr + tolen;
828 
829  /*
830  * The worst-case expansion is to substitute a max-length character for a
831  * single-byte character at each position of the string.
832  */
834  &bytelen)) ||
835  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
836  unlikely(!AllocSizeIsValid(bytelen)))
837  ereport(ERROR,
838  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
839  errmsg("requested length too large")));
840 
841  result = (text *) palloc(bytelen);
842 
843  target = VARDATA(result);
844  retlen = 0;
845 
846  while (m > 0)
847  {
848  source_len = pg_mblen(source);
849  from_index = 0;
850 
851  for (i = 0; i < fromlen; i += len)
852  {
853  len = pg_mblen(&from_ptr[i]);
854  if (len == source_len &&
855  memcmp(source, &from_ptr[i], len) == 0)
856  break;
857 
858  from_index++;
859  }
860  if (i < fromlen)
861  {
862  /* substitute, or delete if no corresponding "to" character */
863  char *p = to_ptr;
864 
865  for (i = 0; i < from_index; i++)
866  {
867  if (p >= to_end)
868  break;
869  p += pg_mblen(p);
870  }
871  if (p < to_end)
872  {
873  len = pg_mblen(p);
874  memcpy(target, p, len);
875  target += len;
876  retlen += len;
877  }
878  }
879  else
880  {
881  /* no match, so copy */
882  memcpy(target, source, source_len);
883  target += source_len;
884  retlen += source_len;
885  }
886 
887  source += source_len;
888  m -= source_len;
889  }
890 
891  SET_VARSIZE(result, retlen + VARHDRSZ);
892 
893  /*
894  * The function result is probably much bigger than needed, if we're using
895  * a multibyte encoding, but it's not worth reallocating it; the result
896  * probably won't live long anyway.
897  */
898 
899  PG_RETURN_TEXT_P(result);
900 }
901 
902 /********************************************************************
903  *
904  * ascii
905  *
906  * Syntax:
907  *
908  * int ascii(text string)
909  *
910  * Purpose:
911  *
912  * Returns the decimal representation of the first character from
913  * string.
914  * If the string is empty we return 0.
915  * If the database encoding is UTF8, we return the Unicode codepoint.
916  * If the database encoding is any other multi-byte encoding, we
917  * return the value of the first byte if it is an ASCII character
918  * (range 1 .. 127), or raise an error.
919  * For all other encodings we return the value of the first byte,
920  * (range 1..255).
921  *
922  ********************************************************************/
923 
924 Datum
926 {
927  text *string = PG_GETARG_TEXT_PP(0);
929  unsigned char *data;
930 
931  if (VARSIZE_ANY_EXHDR(string) <= 0)
932  PG_RETURN_INT32(0);
933 
934  data = (unsigned char *) VARDATA_ANY(string);
935 
936  if (encoding == PG_UTF8 && *data > 127)
937  {
938  /* return the code point for Unicode */
939 
940  int result = 0,
941  tbytes = 0,
942  i;
943 
944  if (*data >= 0xF0)
945  {
946  result = *data & 0x07;
947  tbytes = 3;
948  }
949  else if (*data >= 0xE0)
950  {
951  result = *data & 0x0F;
952  tbytes = 2;
953  }
954  else
955  {
956  Assert(*data > 0xC0);
957  result = *data & 0x1f;
958  tbytes = 1;
959  }
960 
961  Assert(tbytes > 0);
962 
963  for (i = 1; i <= tbytes; i++)
964  {
965  Assert((data[i] & 0xC0) == 0x80);
966  result = (result << 6) + (data[i] & 0x3f);
967  }
968 
969  PG_RETURN_INT32(result);
970  }
971  else
972  {
973  if (pg_encoding_max_length(encoding) > 1 && *data > 127)
974  ereport(ERROR,
975  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
976  errmsg("requested character too large")));
977 
978 
980  }
981 }
982 
983 /********************************************************************
984  *
985  * chr
986  *
987  * Syntax:
988  *
989  * text chr(int val)
990  *
991  * Purpose:
992  *
993  * Returns the character having the binary equivalent to val.
994  *
995  * For UTF8 we treat the argument as a Unicode code point.
996  * For other multi-byte encodings we raise an error for arguments
997  * outside the strict ASCII range (1..127).
998  *
999  * It's important that we don't ever return a value that is not valid
1000  * in the database encoding, so that this doesn't become a way for
1001  * invalid data to enter the database.
1002  *
1003  ********************************************************************/
1004 
1005 Datum
1007 {
1008  int32 arg = PG_GETARG_INT32(0);
1009  uint32 cvalue;
1010  text *result;
1011  int encoding = GetDatabaseEncoding();
1012 
1013  /*
1014  * Error out on arguments that make no sense or that we can't validly
1015  * represent in the encoding.
1016  */
1017  if (arg < 0)
1018  ereport(ERROR,
1019  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1020  errmsg("character number must be positive")));
1021  else if (arg == 0)
1022  ereport(ERROR,
1023  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1024  errmsg("null character not permitted")));
1025 
1026  cvalue = arg;
1027 
1028  if (encoding == PG_UTF8 && cvalue > 127)
1029  {
1030  /* for Unicode we treat the argument as a code point */
1031  int bytes;
1032  unsigned char *wch;
1033 
1034  /*
1035  * We only allow valid Unicode code points; per RFC3629 that stops at
1036  * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1037  * U+1FFFFF.
1038  */
1039  if (cvalue > 0x0010ffff)
1040  ereport(ERROR,
1041  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1042  errmsg("requested character too large for encoding: %u",
1043  cvalue)));
1044 
1045  if (cvalue > 0xffff)
1046  bytes = 4;
1047  else if (cvalue > 0x07ff)
1048  bytes = 3;
1049  else
1050  bytes = 2;
1051 
1052  result = (text *) palloc(VARHDRSZ + bytes);
1053  SET_VARSIZE(result, VARHDRSZ + bytes);
1054  wch = (unsigned char *) VARDATA(result);
1055 
1056  if (bytes == 2)
1057  {
1058  wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1059  wch[1] = 0x80 | (cvalue & 0x3F);
1060  }
1061  else if (bytes == 3)
1062  {
1063  wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1064  wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1065  wch[2] = 0x80 | (cvalue & 0x3F);
1066  }
1067  else
1068  {
1069  wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1070  wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1071  wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1072  wch[3] = 0x80 | (cvalue & 0x3F);
1073  }
1074 
1075  /*
1076  * The preceding range check isn't sufficient, because UTF8 excludes
1077  * Unicode "surrogate pair" codes. Make sure what we created is valid
1078  * UTF8.
1079  */
1080  if (!pg_utf8_islegal(wch, bytes))
1081  ereport(ERROR,
1082  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1083  errmsg("requested character not valid for encoding: %u",
1084  cvalue)));
1085  }
1086  else
1087  {
1088  bool is_mb;
1089 
1090  is_mb = pg_encoding_max_length(encoding) > 1;
1091 
1092  if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1093  ereport(ERROR,
1094  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1095  errmsg("requested character too large for encoding: %u",
1096  cvalue)));
1097 
1098  result = (text *) palloc(VARHDRSZ + 1);
1099  SET_VARSIZE(result, VARHDRSZ + 1);
1100  *VARDATA(result) = (char) cvalue;
1101  }
1102 
1103  PG_RETURN_TEXT_P(result);
1104 }
1105 
1106 /********************************************************************
1107  *
1108  * repeat
1109  *
1110  * Syntax:
1111  *
1112  * text repeat(text string, int val)
1113  *
1114  * Purpose:
1115  *
1116  * Repeat string by val.
1117  *
1118  ********************************************************************/
1119 
1120 Datum
1122 {
1123  text *string = PG_GETARG_TEXT_PP(0);
1124  int32 count = PG_GETARG_INT32(1);
1125  text *result;
1126  int slen,
1127  tlen;
1128  int i;
1129  char *cp,
1130  *sp;
1131 
1132  if (count < 0)
1133  count = 0;
1134 
1135  slen = VARSIZE_ANY_EXHDR(string);
1136 
1137  if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1138  unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) ||
1139  unlikely(!AllocSizeIsValid(tlen)))
1140  ereport(ERROR,
1141  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1142  errmsg("requested length too large")));
1143 
1144  result = (text *) palloc(tlen);
1145 
1146  SET_VARSIZE(result, tlen);
1147  cp = VARDATA(result);
1148  sp = VARDATA_ANY(string);
1149  for (i = 0; i < count; i++)
1150  {
1151  memcpy(cp, sp, slen);
1152  cp += slen;
1154  }
1155 
1156  PG_RETURN_TEXT_P(result);
1157 }
unsigned int uint32
Definition: c.h:493
signed int int32
Definition: c.h:481
#define VARHDRSZ
Definition: c.h:679
#define unlikely(x)
Definition: c.h:298
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
char * str_initcap(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1932
char * str_toupper(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1784
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1636
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:140
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:104
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1057
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void pfree(void *pointer)
Definition: mcxt.c:1508
void * palloc(Size size)
Definition: mcxt.c:1304
#define AllocSizeIsValid(size)
Definition: memutils.h:42
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim)
Datum bytealtrim(PG_FUNCTION_ARGS)
Datum ltrim(PG_FUNCTION_ARGS)
Datum byteatrim(PG_FUNCTION_ARGS)
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum initcap(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
Datum byteartrim(PG_FUNCTION_ARGS)
Datum rtrim(PG_FUNCTION_ARGS)
Datum rpad(PG_FUNCTION_ARGS)
Datum chr(PG_FUNCTION_ARGS)
Datum ltrim1(PG_FUNCTION_ARGS)
Datum btrim1(PG_FUNCTION_ARGS)
Datum ascii(PG_FUNCTION_ARGS)
Datum rtrim1(PG_FUNCTION_ARGS)
Datum translate(PG_FUNCTION_ARGS)
static bytea * dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
Datum lpad(PG_FUNCTION_ARGS)
Datum btrim(PG_FUNCTION_ARGS)
Datum repeat(PG_FUNCTION_ARGS)
void * arg
const void size_t len
const void * data
int32 encoding
Definition: pg_database.h:41
static rewind_source * source
Definition: pg_rewind.c:89
@ PG_UTF8
Definition: pg_wchar.h:232
uintptr_t Datum
Definition: postgres.h:64
char string[11]
Definition: preproc-type.c:52
Definition: c.h:674
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:196
text * cstring_to_text(const char *s)
Definition: varlena.c:184
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2127