PostgreSQL Source Code  git master
oracle_compat.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  * Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2022, PostgreSQL Global Development Group
6  *
7  * Author: Edmund Mergl <E.Mergl@bawue.de>
8  * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23 #include "utils/memutils.h"
24 
25 
26 static text *dotrim(const char *string, int stringlen,
27  const char *set, int setlen,
28  bool doltrim, bool dortrim);
29 static bytea *dobyteatrim(bytea *string, bytea *set,
30  bool doltrim, bool dortrim);
31 
32 
33 /********************************************************************
34  *
35  * lower
36  *
37  * Syntax:
38  *
39  * text lower(text string)
40  *
41  * Purpose:
42  *
43  * Returns string, with all letters forced to lowercase.
44  *
45  ********************************************************************/
46 
47 Datum
49 {
50  text *in_string = PG_GETARG_TEXT_PP(0);
51  char *out_string;
52  text *result;
53 
54  out_string = str_tolower(VARDATA_ANY(in_string),
55  VARSIZE_ANY_EXHDR(in_string),
57  result = cstring_to_text(out_string);
58  pfree(out_string);
59 
60  PG_RETURN_TEXT_P(result);
61 }
62 
63 
64 /********************************************************************
65  *
66  * upper
67  *
68  * Syntax:
69  *
70  * text upper(text string)
71  *
72  * Purpose:
73  *
74  * Returns string, with all letters forced to uppercase.
75  *
76  ********************************************************************/
77 
78 Datum
80 {
81  text *in_string = PG_GETARG_TEXT_PP(0);
82  char *out_string;
83  text *result;
84 
85  out_string = str_toupper(VARDATA_ANY(in_string),
86  VARSIZE_ANY_EXHDR(in_string),
88  result = cstring_to_text(out_string);
89  pfree(out_string);
90 
91  PG_RETURN_TEXT_P(result);
92 }
93 
94 
95 /********************************************************************
96  *
97  * initcap
98  *
99  * Syntax:
100  *
101  * text initcap(text string)
102  *
103  * Purpose:
104  *
105  * Returns string, with first letter of each word in uppercase, all
106  * other letters in lowercase. A word is defined as a sequence of
107  * alphanumeric characters, delimited by non-alphanumeric
108  * characters.
109  *
110  ********************************************************************/
111 
112 Datum
114 {
115  text *in_string = PG_GETARG_TEXT_PP(0);
116  char *out_string;
117  text *result;
118 
119  out_string = str_initcap(VARDATA_ANY(in_string),
120  VARSIZE_ANY_EXHDR(in_string),
121  PG_GET_COLLATION());
122  result = cstring_to_text(out_string);
123  pfree(out_string);
124 
125  PG_RETURN_TEXT_P(result);
126 }
127 
128 
129 /********************************************************************
130  *
131  * lpad
132  *
133  * Syntax:
134  *
135  * text lpad(text string1, int4 len, text string2)
136  *
137  * Purpose:
138  *
139  * Returns string1, left-padded to length len with the sequence of
140  * characters in string2. If len is less than the length of string1,
141  * instead truncate (on the right) to len.
142  *
143  ********************************************************************/
144 
145 Datum
147 {
148  text *string1 = PG_GETARG_TEXT_PP(0);
150  text *string2 = PG_GETARG_TEXT_PP(2);
151  text *ret;
152  char *ptr1,
153  *ptr2,
154  *ptr2start,
155  *ptr2end,
156  *ptr_ret;
157  int m,
158  s1len,
159  s2len;
160  int bytelen;
161 
162  /* Negative len is silently taken as zero */
163  if (len < 0)
164  len = 0;
165 
166  s1len = VARSIZE_ANY_EXHDR(string1);
167  if (s1len < 0)
168  s1len = 0; /* shouldn't happen */
169 
170  s2len = VARSIZE_ANY_EXHDR(string2);
171  if (s2len < 0)
172  s2len = 0; /* shouldn't happen */
173 
174  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
175 
176  if (s1len > len)
177  s1len = len; /* truncate string1 to len chars */
178 
179  if (s2len <= 0)
180  len = s1len; /* nothing to pad with, so don't pad */
181 
182  /* compute worst-case output length */
184  &bytelen)) ||
185  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
186  unlikely(!AllocSizeIsValid(bytelen)))
187  ereport(ERROR,
188  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
189  errmsg("requested length too large")));
190 
191  ret = (text *) palloc(bytelen);
192 
193  m = len - s1len;
194 
195  ptr2 = ptr2start = VARDATA_ANY(string2);
196  ptr2end = ptr2 + s2len;
197  ptr_ret = VARDATA(ret);
198 
199  while (m--)
200  {
201  int mlen = pg_mblen(ptr2);
202 
203  memcpy(ptr_ret, ptr2, mlen);
204  ptr_ret += mlen;
205  ptr2 += mlen;
206  if (ptr2 == ptr2end) /* wrap around at end of s2 */
207  ptr2 = ptr2start;
208  }
209 
210  ptr1 = VARDATA_ANY(string1);
211 
212  while (s1len--)
213  {
214  int mlen = pg_mblen(ptr1);
215 
216  memcpy(ptr_ret, ptr1, mlen);
217  ptr_ret += mlen;
218  ptr1 += mlen;
219  }
220 
221  SET_VARSIZE(ret, ptr_ret - (char *) ret);
222 
223  PG_RETURN_TEXT_P(ret);
224 }
225 
226 
227 /********************************************************************
228  *
229  * rpad
230  *
231  * Syntax:
232  *
233  * text rpad(text string1, int4 len, text string2)
234  *
235  * Purpose:
236  *
237  * Returns string1, right-padded to length len with the sequence of
238  * characters in string2. If len is less than the length of string1,
239  * instead truncate (on the right) to len.
240  *
241  ********************************************************************/
242 
243 Datum
245 {
246  text *string1 = PG_GETARG_TEXT_PP(0);
248  text *string2 = PG_GETARG_TEXT_PP(2);
249  text *ret;
250  char *ptr1,
251  *ptr2,
252  *ptr2start,
253  *ptr2end,
254  *ptr_ret;
255  int m,
256  s1len,
257  s2len;
258  int bytelen;
259 
260  /* Negative len is silently taken as zero */
261  if (len < 0)
262  len = 0;
263 
264  s1len = VARSIZE_ANY_EXHDR(string1);
265  if (s1len < 0)
266  s1len = 0; /* shouldn't happen */
267 
268  s2len = VARSIZE_ANY_EXHDR(string2);
269  if (s2len < 0)
270  s2len = 0; /* shouldn't happen */
271 
272  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
273 
274  if (s1len > len)
275  s1len = len; /* truncate string1 to len chars */
276 
277  if (s2len <= 0)
278  len = s1len; /* nothing to pad with, so don't pad */
279 
280  /* compute worst-case output length */
282  &bytelen)) ||
283  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
284  unlikely(!AllocSizeIsValid(bytelen)))
285  ereport(ERROR,
286  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
287  errmsg("requested length too large")));
288 
289  ret = (text *) palloc(bytelen);
290 
291  m = len - s1len;
292 
293  ptr1 = VARDATA_ANY(string1);
294  ptr_ret = VARDATA(ret);
295 
296  while (s1len--)
297  {
298  int mlen = pg_mblen(ptr1);
299 
300  memcpy(ptr_ret, ptr1, mlen);
301  ptr_ret += mlen;
302  ptr1 += mlen;
303  }
304 
305  ptr2 = ptr2start = VARDATA_ANY(string2);
306  ptr2end = ptr2 + s2len;
307 
308  while (m--)
309  {
310  int mlen = pg_mblen(ptr2);
311 
312  memcpy(ptr_ret, ptr2, mlen);
313  ptr_ret += mlen;
314  ptr2 += mlen;
315  if (ptr2 == ptr2end) /* wrap around at end of s2 */
316  ptr2 = ptr2start;
317  }
318 
319  SET_VARSIZE(ret, ptr_ret - (char *) ret);
320 
321  PG_RETURN_TEXT_P(ret);
322 }
323 
324 
325 /********************************************************************
326  *
327  * btrim
328  *
329  * Syntax:
330  *
331  * text btrim(text string, text set)
332  *
333  * Purpose:
334  *
335  * Returns string with characters removed from the front and back
336  * up to the first character not in set.
337  *
338  ********************************************************************/
339 
340 Datum
342 {
343  text *string = PG_GETARG_TEXT_PP(0);
344  text *set = PG_GETARG_TEXT_PP(1);
345  text *ret;
346 
347  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
348  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
349  true, true);
350 
351  PG_RETURN_TEXT_P(ret);
352 }
353 
354 /********************************************************************
355  *
356  * btrim1 --- btrim with set fixed as ' '
357  *
358  ********************************************************************/
359 
360 Datum
362 {
363  text *string = PG_GETARG_TEXT_PP(0);
364  text *ret;
365 
366  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
367  " ", 1,
368  true, true);
369 
370  PG_RETURN_TEXT_P(ret);
371 }
372 
373 /*
374  * Common implementation for btrim, ltrim, rtrim
375  */
376 static text *
377 dotrim(const char *string, int stringlen,
378  const char *set, int setlen,
379  bool doltrim, bool dortrim)
380 {
381  int i;
382 
383  /* Nothing to do if either string or set is empty */
384  if (stringlen > 0 && setlen > 0)
385  {
387  {
388  /*
389  * In the multibyte-encoding case, build arrays of pointers to
390  * character starts, so that we can avoid inefficient checks in
391  * the inner loops.
392  */
393  const char **stringchars;
394  const char **setchars;
395  int *stringmblen;
396  int *setmblen;
397  int stringnchars;
398  int setnchars;
399  int resultndx;
400  int resultnchars;
401  const char *p;
402  int len;
403  int mblen;
404  const char *str_pos;
405  int str_len;
406 
407  stringchars = (const char **) palloc(stringlen * sizeof(char *));
408  stringmblen = (int *) palloc(stringlen * sizeof(int));
409  stringnchars = 0;
410  p = string;
411  len = stringlen;
412  while (len > 0)
413  {
414  stringchars[stringnchars] = p;
415  stringmblen[stringnchars] = mblen = pg_mblen(p);
416  stringnchars++;
417  p += mblen;
418  len -= mblen;
419  }
420 
421  setchars = (const char **) palloc(setlen * sizeof(char *));
422  setmblen = (int *) palloc(setlen * sizeof(int));
423  setnchars = 0;
424  p = set;
425  len = setlen;
426  while (len > 0)
427  {
428  setchars[setnchars] = p;
429  setmblen[setnchars] = mblen = pg_mblen(p);
430  setnchars++;
431  p += mblen;
432  len -= mblen;
433  }
434 
435  resultndx = 0; /* index in stringchars[] */
436  resultnchars = stringnchars;
437 
438  if (doltrim)
439  {
440  while (resultnchars > 0)
441  {
442  str_pos = stringchars[resultndx];
443  str_len = stringmblen[resultndx];
444  for (i = 0; i < setnchars; i++)
445  {
446  if (str_len == setmblen[i] &&
447  memcmp(str_pos, setchars[i], str_len) == 0)
448  break;
449  }
450  if (i >= setnchars)
451  break; /* no match here */
452  string += str_len;
453  stringlen -= str_len;
454  resultndx++;
455  resultnchars--;
456  }
457  }
458 
459  if (dortrim)
460  {
461  while (resultnchars > 0)
462  {
463  str_pos = stringchars[resultndx + resultnchars - 1];
464  str_len = stringmblen[resultndx + resultnchars - 1];
465  for (i = 0; i < setnchars; i++)
466  {
467  if (str_len == setmblen[i] &&
468  memcmp(str_pos, setchars[i], str_len) == 0)
469  break;
470  }
471  if (i >= setnchars)
472  break; /* no match here */
473  stringlen -= str_len;
474  resultnchars--;
475  }
476  }
477 
478  pfree(stringchars);
479  pfree(stringmblen);
480  pfree(setchars);
481  pfree(setmblen);
482  }
483  else
484  {
485  /*
486  * In the single-byte-encoding case, we don't need such overhead.
487  */
488  if (doltrim)
489  {
490  while (stringlen > 0)
491  {
492  char str_ch = *string;
493 
494  for (i = 0; i < setlen; i++)
495  {
496  if (str_ch == set[i])
497  break;
498  }
499  if (i >= setlen)
500  break; /* no match here */
501  string++;
502  stringlen--;
503  }
504  }
505 
506  if (dortrim)
507  {
508  while (stringlen > 0)
509  {
510  char str_ch = string[stringlen - 1];
511 
512  for (i = 0; i < setlen; i++)
513  {
514  if (str_ch == set[i])
515  break;
516  }
517  if (i >= setlen)
518  break; /* no match here */
519  stringlen--;
520  }
521  }
522  }
523  }
524 
525  /* Return selected portion of string */
526  return cstring_to_text_with_len(string, stringlen);
527 }
528 
529 /*
530  * Common implementation for bytea versions of btrim, ltrim, rtrim
531  */
532 bytea *
533 dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
534 {
535  bytea *ret;
536  char *ptr,
537  *end,
538  *ptr2,
539  *ptr2start,
540  *end2;
541  int m,
542  stringlen,
543  setlen;
544 
545  stringlen = VARSIZE_ANY_EXHDR(string);
546  setlen = VARSIZE_ANY_EXHDR(set);
547 
548  if (stringlen <= 0 || setlen <= 0)
549  return string;
550 
551  m = stringlen;
552  ptr = VARDATA_ANY(string);
553  end = ptr + stringlen - 1;
554  ptr2start = VARDATA_ANY(set);
555  end2 = ptr2start + setlen - 1;
556 
557  if (doltrim)
558  {
559  while (m > 0)
560  {
561  ptr2 = ptr2start;
562  while (ptr2 <= end2)
563  {
564  if (*ptr == *ptr2)
565  break;
566  ++ptr2;
567  }
568  if (ptr2 > end2)
569  break;
570  ptr++;
571  m--;
572  }
573  }
574 
575  if (dortrim)
576  {
577  while (m > 0)
578  {
579  ptr2 = ptr2start;
580  while (ptr2 <= end2)
581  {
582  if (*end == *ptr2)
583  break;
584  ++ptr2;
585  }
586  if (ptr2 > end2)
587  break;
588  end--;
589  m--;
590  }
591  }
592 
593  ret = (bytea *) palloc(VARHDRSZ + m);
594  SET_VARSIZE(ret, VARHDRSZ + m);
595  memcpy(VARDATA(ret), ptr, m);
596  return ret;
597 }
598 
599 /********************************************************************
600  *
601  * byteatrim
602  *
603  * Syntax:
604  *
605  * bytea byteatrim(bytea string, bytea set)
606  *
607  * Purpose:
608  *
609  * Returns string with characters removed from the front and back
610  * up to the first character not in set.
611  *
612  * Cloned from btrim and modified as required.
613  ********************************************************************/
614 
615 Datum
617 {
618  bytea *string = PG_GETARG_BYTEA_PP(0);
619  bytea *set = PG_GETARG_BYTEA_PP(1);
620  bytea *ret;
621 
622  ret = dobyteatrim(string, set, true, true);
623 
624  PG_RETURN_BYTEA_P(ret);
625 }
626 
627 /********************************************************************
628  *
629  * bytealtrim
630  *
631  * Syntax:
632  *
633  * bytea bytealtrim(bytea string, bytea set)
634  *
635  * Purpose:
636  *
637  * Returns string with initial characters removed up to the first
638  * character not in set.
639  *
640  ********************************************************************/
641 
642 Datum
644 {
645  bytea *string = PG_GETARG_BYTEA_PP(0);
646  bytea *set = PG_GETARG_BYTEA_PP(1);
647  bytea *ret;
648 
649  ret = dobyteatrim(string, set, true, false);
650 
651  PG_RETURN_BYTEA_P(ret);
652 }
653 
654 /********************************************************************
655  *
656  * byteartrim
657  *
658  * Syntax:
659  *
660  * bytea byteartrim(bytea string, bytea set)
661  *
662  * Purpose:
663  *
664  * Returns string with final characters removed after the last
665  * character not in set.
666  *
667  ********************************************************************/
668 
669 Datum
671 {
672  bytea *string = PG_GETARG_BYTEA_PP(0);
673  bytea *set = PG_GETARG_BYTEA_PP(1);
674  bytea *ret;
675 
676  ret = dobyteatrim(string, set, false, true);
677 
678  PG_RETURN_BYTEA_P(ret);
679 }
680 
681 /********************************************************************
682  *
683  * ltrim
684  *
685  * Syntax:
686  *
687  * text ltrim(text string, text set)
688  *
689  * Purpose:
690  *
691  * Returns string with initial characters removed up to the first
692  * character not in set.
693  *
694  ********************************************************************/
695 
696 Datum
698 {
699  text *string = PG_GETARG_TEXT_PP(0);
700  text *set = PG_GETARG_TEXT_PP(1);
701  text *ret;
702 
703  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
704  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
705  true, false);
706 
707  PG_RETURN_TEXT_P(ret);
708 }
709 
710 /********************************************************************
711  *
712  * ltrim1 --- ltrim with set fixed as ' '
713  *
714  ********************************************************************/
715 
716 Datum
718 {
719  text *string = PG_GETARG_TEXT_PP(0);
720  text *ret;
721 
722  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
723  " ", 1,
724  true, false);
725 
726  PG_RETURN_TEXT_P(ret);
727 }
728 
729 /********************************************************************
730  *
731  * rtrim
732  *
733  * Syntax:
734  *
735  * text rtrim(text string, text set)
736  *
737  * Purpose:
738  *
739  * Returns string with final characters removed after the last
740  * character not in set.
741  *
742  ********************************************************************/
743 
744 Datum
746 {
747  text *string = PG_GETARG_TEXT_PP(0);
748  text *set = PG_GETARG_TEXT_PP(1);
749  text *ret;
750 
751  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
752  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
753  false, true);
754 
755  PG_RETURN_TEXT_P(ret);
756 }
757 
758 /********************************************************************
759  *
760  * rtrim1 --- rtrim with set fixed as ' '
761  *
762  ********************************************************************/
763 
764 Datum
766 {
767  text *string = PG_GETARG_TEXT_PP(0);
768  text *ret;
769 
770  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
771  " ", 1,
772  false, true);
773 
774  PG_RETURN_TEXT_P(ret);
775 }
776 
777 
778 /********************************************************************
779  *
780  * translate
781  *
782  * Syntax:
783  *
784  * text translate(text string, text from, text to)
785  *
786  * Purpose:
787  *
788  * Returns string after replacing all occurrences of characters in from
789  * with the corresponding character in to. If from is longer than to,
790  * occurrences of the extra characters in from are deleted.
791  * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
792  *
793  ********************************************************************/
794 
795 Datum
797 {
798  text *string = PG_GETARG_TEXT_PP(0);
799  text *from = PG_GETARG_TEXT_PP(1);
800  text *to = PG_GETARG_TEXT_PP(2);
801  text *result;
802  char *from_ptr,
803  *to_ptr;
804  char *source,
805  *target;
806  int m,
807  fromlen,
808  tolen,
809  retlen,
810  i;
811  int bytelen;
812  int len;
813  int source_len;
814  int from_index;
815 
816  m = VARSIZE_ANY_EXHDR(string);
817  if (m <= 0)
818  PG_RETURN_TEXT_P(string);
819  source = VARDATA_ANY(string);
820 
821  fromlen = VARSIZE_ANY_EXHDR(from);
822  from_ptr = VARDATA_ANY(from);
823  tolen = VARSIZE_ANY_EXHDR(to);
824  to_ptr = VARDATA_ANY(to);
825 
826  /*
827  * The worst-case expansion is to substitute a max-length character for a
828  * single-byte character at each position of the string.
829  */
831  &bytelen)) ||
832  unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
833  unlikely(!AllocSizeIsValid(bytelen)))
834  ereport(ERROR,
835  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
836  errmsg("requested length too large")));
837 
838  result = (text *) palloc(bytelen);
839 
840  target = VARDATA(result);
841  retlen = 0;
842 
843  while (m > 0)
844  {
845  source_len = pg_mblen(source);
846  from_index = 0;
847 
848  for (i = 0; i < fromlen; i += len)
849  {
850  len = pg_mblen(&from_ptr[i]);
851  if (len == source_len &&
852  memcmp(source, &from_ptr[i], len) == 0)
853  break;
854 
855  from_index++;
856  }
857  if (i < fromlen)
858  {
859  /* substitute */
860  char *p = to_ptr;
861 
862  for (i = 0; i < from_index; i++)
863  {
864  p += pg_mblen(p);
865  if (p >= (to_ptr + tolen))
866  break;
867  }
868  if (p < (to_ptr + tolen))
869  {
870  len = pg_mblen(p);
871  memcpy(target, p, len);
872  target += len;
873  retlen += len;
874  }
875  }
876  else
877  {
878  /* no match, so copy */
879  memcpy(target, source, source_len);
880  target += source_len;
881  retlen += source_len;
882  }
883 
884  source += source_len;
885  m -= source_len;
886  }
887 
888  SET_VARSIZE(result, retlen + VARHDRSZ);
889 
890  /*
891  * The function result is probably much bigger than needed, if we're using
892  * a multibyte encoding, but it's not worth reallocating it; the result
893  * probably won't live long anyway.
894  */
895 
896  PG_RETURN_TEXT_P(result);
897 }
898 
899 /********************************************************************
900  *
901  * ascii
902  *
903  * Syntax:
904  *
905  * int ascii(text string)
906  *
907  * Purpose:
908  *
909  * Returns the decimal representation of the first character from
910  * string.
911  * If the string is empty we return 0.
912  * If the database encoding is UTF8, we return the Unicode codepoint.
913  * If the database encoding is any other multi-byte encoding, we
914  * return the value of the first byte if it is an ASCII character
915  * (range 1 .. 127), or raise an error.
916  * For all other encodings we return the value of the first byte,
917  * (range 1..255).
918  *
919  ********************************************************************/
920 
921 Datum
923 {
924  text *string = PG_GETARG_TEXT_PP(0);
926  unsigned char *data;
927 
928  if (VARSIZE_ANY_EXHDR(string) <= 0)
929  PG_RETURN_INT32(0);
930 
931  data = (unsigned char *) VARDATA_ANY(string);
932 
933  if (encoding == PG_UTF8 && *data > 127)
934  {
935  /* return the code point for Unicode */
936 
937  int result = 0,
938  tbytes = 0,
939  i;
940 
941  if (*data >= 0xF0)
942  {
943  result = *data & 0x07;
944  tbytes = 3;
945  }
946  else if (*data >= 0xE0)
947  {
948  result = *data & 0x0F;
949  tbytes = 2;
950  }
951  else
952  {
953  Assert(*data > 0xC0);
954  result = *data & 0x1f;
955  tbytes = 1;
956  }
957 
958  Assert(tbytes > 0);
959 
960  for (i = 1; i <= tbytes; i++)
961  {
962  Assert((data[i] & 0xC0) == 0x80);
963  result = (result << 6) + (data[i] & 0x3f);
964  }
965 
966  PG_RETURN_INT32(result);
967  }
968  else
969  {
970  if (pg_encoding_max_length(encoding) > 1 && *data > 127)
971  ereport(ERROR,
972  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
973  errmsg("requested character too large")));
974 
975 
977  }
978 }
979 
980 /********************************************************************
981  *
982  * chr
983  *
984  * Syntax:
985  *
986  * text chr(int val)
987  *
988  * Purpose:
989  *
990  * Returns the character having the binary equivalent to val.
991  *
992  * For UTF8 we treat the argument as a Unicode code point.
993  * For other multi-byte encodings we raise an error for arguments
994  * outside the strict ASCII range (1..127).
995  *
996  * It's important that we don't ever return a value that is not valid
997  * in the database encoding, so that this doesn't become a way for
998  * invalid data to enter the database.
999  *
1000  ********************************************************************/
1001 
1002 Datum
1004 {
1005  int32 arg = PG_GETARG_INT32(0);
1006  uint32 cvalue;
1007  text *result;
1008  int encoding = GetDatabaseEncoding();
1009 
1010  /*
1011  * Error out on arguments that make no sense or that we can't validly
1012  * represent in the encoding.
1013  */
1014  if (arg < 0)
1015  ereport(ERROR,
1016  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1017  errmsg("character number must be positive")));
1018  else if (arg == 0)
1019  ereport(ERROR,
1020  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1021  errmsg("null character not permitted")));
1022 
1023  cvalue = arg;
1024 
1025  if (encoding == PG_UTF8 && cvalue > 127)
1026  {
1027  /* for Unicode we treat the argument as a code point */
1028  int bytes;
1029  unsigned char *wch;
1030 
1031  /*
1032  * We only allow valid Unicode code points; per RFC3629 that stops at
1033  * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1034  * U+1FFFFF.
1035  */
1036  if (cvalue > 0x0010ffff)
1037  ereport(ERROR,
1038  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1039  errmsg("requested character too large for encoding: %u",
1040  cvalue)));
1041 
1042  if (cvalue > 0xffff)
1043  bytes = 4;
1044  else if (cvalue > 0x07ff)
1045  bytes = 3;
1046  else
1047  bytes = 2;
1048 
1049  result = (text *) palloc(VARHDRSZ + bytes);
1050  SET_VARSIZE(result, VARHDRSZ + bytes);
1051  wch = (unsigned char *) VARDATA(result);
1052 
1053  if (bytes == 2)
1054  {
1055  wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1056  wch[1] = 0x80 | (cvalue & 0x3F);
1057  }
1058  else if (bytes == 3)
1059  {
1060  wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1061  wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1062  wch[2] = 0x80 | (cvalue & 0x3F);
1063  }
1064  else
1065  {
1066  wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1067  wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1068  wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1069  wch[3] = 0x80 | (cvalue & 0x3F);
1070  }
1071 
1072  /*
1073  * The preceding range check isn't sufficient, because UTF8 excludes
1074  * Unicode "surrogate pair" codes. Make sure what we created is valid
1075  * UTF8.
1076  */
1077  if (!pg_utf8_islegal(wch, bytes))
1078  ereport(ERROR,
1079  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1080  errmsg("requested character not valid for encoding: %u",
1081  cvalue)));
1082  }
1083  else
1084  {
1085  bool is_mb;
1086 
1087  is_mb = pg_encoding_max_length(encoding) > 1;
1088 
1089  if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1090  ereport(ERROR,
1091  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1092  errmsg("requested character too large for encoding: %u",
1093  cvalue)));
1094 
1095  result = (text *) palloc(VARHDRSZ + 1);
1096  SET_VARSIZE(result, VARHDRSZ + 1);
1097  *VARDATA(result) = (char) cvalue;
1098  }
1099 
1100  PG_RETURN_TEXT_P(result);
1101 }
1102 
1103 /********************************************************************
1104  *
1105  * repeat
1106  *
1107  * Syntax:
1108  *
1109  * text repeat(text string, int val)
1110  *
1111  * Purpose:
1112  *
1113  * Repeat string by val.
1114  *
1115  ********************************************************************/
1116 
1117 Datum
1119 {
1120  text *string = PG_GETARG_TEXT_PP(0);
1121  int32 count = PG_GETARG_INT32(1);
1122  text *result;
1123  int slen,
1124  tlen;
1125  int i;
1126  char *cp,
1127  *sp;
1128 
1129  if (count < 0)
1130  count = 0;
1131 
1132  slen = VARSIZE_ANY_EXHDR(string);
1133 
1134  if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1135  unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) ||
1136  unlikely(!AllocSizeIsValid(tlen)))
1137  ereport(ERROR,
1138  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1139  errmsg("requested length too large")));
1140 
1141  result = (text *) palloc(tlen);
1142 
1143  SET_VARSIZE(result, tlen);
1144  cp = VARDATA(result);
1145  sp = VARDATA_ANY(string);
1146  for (i = 0; i < count; i++)
1147  {
1148  memcpy(cp, sp, slen);
1149  cp += slen;
1151  }
1152 
1153  PG_RETURN_TEXT_P(result);
1154 }
unsigned int uint32
Definition: c.h:441
signed int int32
Definition: c.h:429
#define VARHDRSZ
Definition: c.h:627
#define unlikely(x)
Definition: c.h:273
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define ereport(elevel,...)
Definition: elog.h:143
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
char * str_initcap(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1912
char * str_toupper(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1790
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1668
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:140
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:104
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1000
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc(Size size)
Definition: mcxt.c:1068
#define AllocSizeIsValid(size)
Definition: memutils.h:42
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim)
Datum bytealtrim(PG_FUNCTION_ARGS)
Datum ltrim(PG_FUNCTION_ARGS)
Datum byteatrim(PG_FUNCTION_ARGS)
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:48
Datum initcap(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:79
Datum byteartrim(PG_FUNCTION_ARGS)
Datum rtrim(PG_FUNCTION_ARGS)
Datum rpad(PG_FUNCTION_ARGS)
Datum chr(PG_FUNCTION_ARGS)
Datum ltrim1(PG_FUNCTION_ARGS)
Datum btrim1(PG_FUNCTION_ARGS)
Datum ascii(PG_FUNCTION_ARGS)
Datum rtrim1(PG_FUNCTION_ARGS)
Datum translate(PG_FUNCTION_ARGS)
static bytea * dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
Datum lpad(PG_FUNCTION_ARGS)
Datum btrim(PG_FUNCTION_ARGS)
Datum repeat(PG_FUNCTION_ARGS)
void * arg
const void size_t len
const void * data
int32 encoding
Definition: pg_database.h:41
static rewind_source * source
Definition: pg_rewind.c:81
@ PG_UTF8
Definition: pg_wchar.h:230
uintptr_t Datum
Definition: postgres.h:411
#define VARDATA(PTR)
Definition: postgres.h:315
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
char string[11]
Definition: preproc-type.c:46
Definition: c.h:622
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:200
text * cstring_to_text(const char *s)
Definition: varlena.c:188
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:2012
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2187