PostgreSQL Source Code  git master
oracle_compat.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  * Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2021, PostgreSQL Global Development Group
6  *
7  * Author: Edmund Mergl <E.Mergl@bawue.de>
8  * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23 
24 static text *dotrim(const char *string, int stringlen,
25  const char *set, int setlen,
26  bool doltrim, bool dortrim);
27 static bytea *dobyteatrim(bytea *string, bytea *set,
28  bool doltrim, bool dortrim);
29 
30 
31 /********************************************************************
32  *
33  * lower
34  *
35  * Syntax:
36  *
37  * text lower(text string)
38  *
39  * Purpose:
40  *
41  * Returns string, with all letters forced to lowercase.
42  *
43  ********************************************************************/
44 
45 Datum
47 {
48  text *in_string = PG_GETARG_TEXT_PP(0);
49  char *out_string;
50  text *result;
51 
52  out_string = str_tolower(VARDATA_ANY(in_string),
53  VARSIZE_ANY_EXHDR(in_string),
55  result = cstring_to_text(out_string);
56  pfree(out_string);
57 
58  PG_RETURN_TEXT_P(result);
59 }
60 
61 
62 /********************************************************************
63  *
64  * upper
65  *
66  * Syntax:
67  *
68  * text upper(text string)
69  *
70  * Purpose:
71  *
72  * Returns string, with all letters forced to uppercase.
73  *
74  ********************************************************************/
75 
76 Datum
78 {
79  text *in_string = PG_GETARG_TEXT_PP(0);
80  char *out_string;
81  text *result;
82 
83  out_string = str_toupper(VARDATA_ANY(in_string),
84  VARSIZE_ANY_EXHDR(in_string),
86  result = cstring_to_text(out_string);
87  pfree(out_string);
88 
89  PG_RETURN_TEXT_P(result);
90 }
91 
92 
93 /********************************************************************
94  *
95  * initcap
96  *
97  * Syntax:
98  *
99  * text initcap(text string)
100  *
101  * Purpose:
102  *
103  * Returns string, with first letter of each word in uppercase, all
104  * other letters in lowercase. A word is defined as a sequence of
105  * alphanumeric characters, delimited by non-alphanumeric
106  * characters.
107  *
108  ********************************************************************/
109 
110 Datum
112 {
113  text *in_string = PG_GETARG_TEXT_PP(0);
114  char *out_string;
115  text *result;
116 
117  out_string = str_initcap(VARDATA_ANY(in_string),
118  VARSIZE_ANY_EXHDR(in_string),
119  PG_GET_COLLATION());
120  result = cstring_to_text(out_string);
121  pfree(out_string);
122 
123  PG_RETURN_TEXT_P(result);
124 }
125 
126 
127 /********************************************************************
128  *
129  * lpad
130  *
131  * Syntax:
132  *
133  * text lpad(text string1, int4 len, text string2)
134  *
135  * Purpose:
136  *
137  * Returns string1, left-padded to length len with the sequence of
138  * characters in string2. If len is less than the length of string1,
139  * instead truncate (on the right) to len.
140  *
141  ********************************************************************/
142 
143 Datum
145 {
146  text *string1 = PG_GETARG_TEXT_PP(0);
147  int32 len = PG_GETARG_INT32(1);
148  text *string2 = PG_GETARG_TEXT_PP(2);
149  text *ret;
150  char *ptr1,
151  *ptr2,
152  *ptr2start,
153  *ptr2end,
154  *ptr_ret;
155  int m,
156  s1len,
157  s2len;
158 
159  int bytelen;
160 
161  /* Negative len is silently taken as zero */
162  if (len < 0)
163  len = 0;
164 
165  s1len = VARSIZE_ANY_EXHDR(string1);
166  if (s1len < 0)
167  s1len = 0; /* shouldn't happen */
168 
169  s2len = VARSIZE_ANY_EXHDR(string2);
170  if (s2len < 0)
171  s2len = 0; /* shouldn't happen */
172 
173  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
174 
175  if (s1len > len)
176  s1len = len; /* truncate string1 to len chars */
177 
178  if (s2len <= 0)
179  len = s1len; /* nothing to pad with, so don't pad */
180 
181  bytelen = pg_database_encoding_max_length() * len;
182 
183  /* check for integer overflow */
184  if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
185  ereport(ERROR,
186  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
187  errmsg("requested length too large")));
188 
189  ret = (text *) palloc(VARHDRSZ + bytelen);
190 
191  m = len - s1len;
192 
193  ptr2 = ptr2start = VARDATA_ANY(string2);
194  ptr2end = ptr2 + s2len;
195  ptr_ret = VARDATA(ret);
196 
197  while (m--)
198  {
199  int mlen = pg_mblen(ptr2);
200 
201  memcpy(ptr_ret, ptr2, mlen);
202  ptr_ret += mlen;
203  ptr2 += mlen;
204  if (ptr2 == ptr2end) /* wrap around at end of s2 */
205  ptr2 = ptr2start;
206  }
207 
208  ptr1 = VARDATA_ANY(string1);
209 
210  while (s1len--)
211  {
212  int mlen = pg_mblen(ptr1);
213 
214  memcpy(ptr_ret, ptr1, mlen);
215  ptr_ret += mlen;
216  ptr1 += mlen;
217  }
218 
219  SET_VARSIZE(ret, ptr_ret - (char *) ret);
220 
221  PG_RETURN_TEXT_P(ret);
222 }
223 
224 
225 /********************************************************************
226  *
227  * rpad
228  *
229  * Syntax:
230  *
231  * text rpad(text string1, int4 len, text string2)
232  *
233  * Purpose:
234  *
235  * Returns string1, right-padded to length len with the sequence of
236  * characters in string2. If len is less than the length of string1,
237  * instead truncate (on the right) to len.
238  *
239  ********************************************************************/
240 
241 Datum
243 {
244  text *string1 = PG_GETARG_TEXT_PP(0);
245  int32 len = PG_GETARG_INT32(1);
246  text *string2 = PG_GETARG_TEXT_PP(2);
247  text *ret;
248  char *ptr1,
249  *ptr2,
250  *ptr2start,
251  *ptr2end,
252  *ptr_ret;
253  int m,
254  s1len,
255  s2len;
256 
257  int bytelen;
258 
259  /* Negative len is silently taken as zero */
260  if (len < 0)
261  len = 0;
262 
263  s1len = VARSIZE_ANY_EXHDR(string1);
264  if (s1len < 0)
265  s1len = 0; /* shouldn't happen */
266 
267  s2len = VARSIZE_ANY_EXHDR(string2);
268  if (s2len < 0)
269  s2len = 0; /* shouldn't happen */
270 
271  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
272 
273  if (s1len > len)
274  s1len = len; /* truncate string1 to len chars */
275 
276  if (s2len <= 0)
277  len = s1len; /* nothing to pad with, so don't pad */
278 
279  bytelen = pg_database_encoding_max_length() * len;
280 
281  /* Check for integer overflow */
282  if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
283  ereport(ERROR,
284  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
285  errmsg("requested length too large")));
286 
287  ret = (text *) palloc(VARHDRSZ + bytelen);
288  m = len - s1len;
289 
290  ptr1 = VARDATA_ANY(string1);
291  ptr_ret = VARDATA(ret);
292 
293  while (s1len--)
294  {
295  int mlen = pg_mblen(ptr1);
296 
297  memcpy(ptr_ret, ptr1, mlen);
298  ptr_ret += mlen;
299  ptr1 += mlen;
300  }
301 
302  ptr2 = ptr2start = VARDATA_ANY(string2);
303  ptr2end = ptr2 + s2len;
304 
305  while (m--)
306  {
307  int mlen = pg_mblen(ptr2);
308 
309  memcpy(ptr_ret, ptr2, mlen);
310  ptr_ret += mlen;
311  ptr2 += mlen;
312  if (ptr2 == ptr2end) /* wrap around at end of s2 */
313  ptr2 = ptr2start;
314  }
315 
316  SET_VARSIZE(ret, ptr_ret - (char *) ret);
317 
318  PG_RETURN_TEXT_P(ret);
319 }
320 
321 
322 /********************************************************************
323  *
324  * btrim
325  *
326  * Syntax:
327  *
328  * text btrim(text string, text set)
329  *
330  * Purpose:
331  *
332  * Returns string with characters removed from the front and back
333  * up to the first character not in set.
334  *
335  ********************************************************************/
336 
337 Datum
339 {
340  text *string = PG_GETARG_TEXT_PP(0);
341  text *set = PG_GETARG_TEXT_PP(1);
342  text *ret;
343 
344  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
345  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
346  true, true);
347 
348  PG_RETURN_TEXT_P(ret);
349 }
350 
351 /********************************************************************
352  *
353  * btrim1 --- btrim with set fixed as ' '
354  *
355  ********************************************************************/
356 
357 Datum
359 {
360  text *string = PG_GETARG_TEXT_PP(0);
361  text *ret;
362 
363  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
364  " ", 1,
365  true, true);
366 
367  PG_RETURN_TEXT_P(ret);
368 }
369 
370 /*
371  * Common implementation for btrim, ltrim, rtrim
372  */
373 static text *
374 dotrim(const char *string, int stringlen,
375  const char *set, int setlen,
376  bool doltrim, bool dortrim)
377 {
378  int i;
379 
380  /* Nothing to do if either string or set is empty */
381  if (stringlen > 0 && setlen > 0)
382  {
384  {
385  /*
386  * In the multibyte-encoding case, build arrays of pointers to
387  * character starts, so that we can avoid inefficient checks in
388  * the inner loops.
389  */
390  const char **stringchars;
391  const char **setchars;
392  int *stringmblen;
393  int *setmblen;
394  int stringnchars;
395  int setnchars;
396  int resultndx;
397  int resultnchars;
398  const char *p;
399  int len;
400  int mblen;
401  const char *str_pos;
402  int str_len;
403 
404  stringchars = (const char **) palloc(stringlen * sizeof(char *));
405  stringmblen = (int *) palloc(stringlen * sizeof(int));
406  stringnchars = 0;
407  p = string;
408  len = stringlen;
409  while (len > 0)
410  {
411  stringchars[stringnchars] = p;
412  stringmblen[stringnchars] = mblen = pg_mblen(p);
413  stringnchars++;
414  p += mblen;
415  len -= mblen;
416  }
417 
418  setchars = (const char **) palloc(setlen * sizeof(char *));
419  setmblen = (int *) palloc(setlen * sizeof(int));
420  setnchars = 0;
421  p = set;
422  len = setlen;
423  while (len > 0)
424  {
425  setchars[setnchars] = p;
426  setmblen[setnchars] = mblen = pg_mblen(p);
427  setnchars++;
428  p += mblen;
429  len -= mblen;
430  }
431 
432  resultndx = 0; /* index in stringchars[] */
433  resultnchars = stringnchars;
434 
435  if (doltrim)
436  {
437  while (resultnchars > 0)
438  {
439  str_pos = stringchars[resultndx];
440  str_len = stringmblen[resultndx];
441  for (i = 0; i < setnchars; i++)
442  {
443  if (str_len == setmblen[i] &&
444  memcmp(str_pos, setchars[i], str_len) == 0)
445  break;
446  }
447  if (i >= setnchars)
448  break; /* no match here */
449  string += str_len;
450  stringlen -= str_len;
451  resultndx++;
452  resultnchars--;
453  }
454  }
455 
456  if (dortrim)
457  {
458  while (resultnchars > 0)
459  {
460  str_pos = stringchars[resultndx + resultnchars - 1];
461  str_len = stringmblen[resultndx + resultnchars - 1];
462  for (i = 0; i < setnchars; i++)
463  {
464  if (str_len == setmblen[i] &&
465  memcmp(str_pos, setchars[i], str_len) == 0)
466  break;
467  }
468  if (i >= setnchars)
469  break; /* no match here */
470  stringlen -= str_len;
471  resultnchars--;
472  }
473  }
474 
475  pfree(stringchars);
476  pfree(stringmblen);
477  pfree(setchars);
478  pfree(setmblen);
479  }
480  else
481  {
482  /*
483  * In the single-byte-encoding case, we don't need such overhead.
484  */
485  if (doltrim)
486  {
487  while (stringlen > 0)
488  {
489  char str_ch = *string;
490 
491  for (i = 0; i < setlen; i++)
492  {
493  if (str_ch == set[i])
494  break;
495  }
496  if (i >= setlen)
497  break; /* no match here */
498  string++;
499  stringlen--;
500  }
501  }
502 
503  if (dortrim)
504  {
505  while (stringlen > 0)
506  {
507  char str_ch = string[stringlen - 1];
508 
509  for (i = 0; i < setlen; i++)
510  {
511  if (str_ch == set[i])
512  break;
513  }
514  if (i >= setlen)
515  break; /* no match here */
516  stringlen--;
517  }
518  }
519  }
520  }
521 
522  /* Return selected portion of string */
523  return cstring_to_text_with_len(string, stringlen);
524 }
525 
526 /*
527  * Common implementation for bytea versions of btrim, ltrim, rtrim
528  */
529 bytea *
530 dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
531 {
532  bytea *ret;
533  char *ptr,
534  *end,
535  *ptr2,
536  *ptr2start,
537  *end2;
538  int m,
539  stringlen,
540  setlen;
541 
542  stringlen = VARSIZE_ANY_EXHDR(string);
543  setlen = VARSIZE_ANY_EXHDR(set);
544 
545  if (stringlen <= 0 || setlen <= 0)
546  return string;
547 
548  m = stringlen;
549  ptr = VARDATA_ANY(string);
550  end = ptr + stringlen - 1;
551  ptr2start = VARDATA_ANY(set);
552  end2 = ptr2start + setlen - 1;
553 
554  if (doltrim)
555  {
556  while (m > 0)
557  {
558  ptr2 = ptr2start;
559  while (ptr2 <= end2)
560  {
561  if (*ptr == *ptr2)
562  break;
563  ++ptr2;
564  }
565  if (ptr2 > end2)
566  break;
567  ptr++;
568  m--;
569  }
570  }
571 
572  if (dortrim)
573  {
574  while (m > 0)
575  {
576  ptr2 = ptr2start;
577  while (ptr2 <= end2)
578  {
579  if (*end == *ptr2)
580  break;
581  ++ptr2;
582  }
583  if (ptr2 > end2)
584  break;
585  end--;
586  m--;
587  }
588  }
589 
590  ret = (bytea *) palloc(VARHDRSZ + m);
591  SET_VARSIZE(ret, VARHDRSZ + m);
592  memcpy(VARDATA(ret), ptr, m);
593  return ret;
594 }
595 
596 /********************************************************************
597  *
598  * byteatrim
599  *
600  * Syntax:
601  *
602  * bytea byteatrim(bytea string, bytea set)
603  *
604  * Purpose:
605  *
606  * Returns string with characters removed from the front and back
607  * up to the first character not in set.
608  *
609  * Cloned from btrim and modified as required.
610  ********************************************************************/
611 
612 Datum
614 {
615  bytea *string = PG_GETARG_BYTEA_PP(0);
616  bytea *set = PG_GETARG_BYTEA_PP(1);
617  bytea *ret;
618 
619  ret = dobyteatrim(string, set, true, true);
620 
621  PG_RETURN_BYTEA_P(ret);
622 }
623 
624 /********************************************************************
625  *
626  * bytealtrim
627  *
628  * Syntax:
629  *
630  * bytea bytealtrim(bytea string, bytea set)
631  *
632  * Purpose:
633  *
634  * Returns string with initial characters removed up to the first
635  * character not in set.
636  *
637  ********************************************************************/
638 
639 Datum
641 {
642  bytea *string = PG_GETARG_BYTEA_PP(0);
643  bytea *set = PG_GETARG_BYTEA_PP(1);
644  bytea *ret;
645 
646  ret = dobyteatrim(string, set, true, false);
647 
648  PG_RETURN_BYTEA_P(ret);
649 }
650 
651 /********************************************************************
652  *
653  * byteartrim
654  *
655  * Syntax:
656  *
657  * bytea byteartrim(bytea string, bytea set)
658  *
659  * Purpose:
660  *
661  * Returns string with final characters removed after the last
662  * character not in set.
663  *
664  ********************************************************************/
665 
666 Datum
668 {
669  bytea *string = PG_GETARG_BYTEA_PP(0);
670  bytea *set = PG_GETARG_BYTEA_PP(1);
671  bytea *ret;
672 
673  ret = dobyteatrim(string, set, false, true);
674 
675  PG_RETURN_BYTEA_P(ret);
676 }
677 
678 /********************************************************************
679  *
680  * ltrim
681  *
682  * Syntax:
683  *
684  * text ltrim(text string, text set)
685  *
686  * Purpose:
687  *
688  * Returns string with initial characters removed up to the first
689  * character not in set.
690  *
691  ********************************************************************/
692 
693 Datum
695 {
696  text *string = PG_GETARG_TEXT_PP(0);
697  text *set = PG_GETARG_TEXT_PP(1);
698  text *ret;
699 
700  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
701  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
702  true, false);
703 
704  PG_RETURN_TEXT_P(ret);
705 }
706 
707 /********************************************************************
708  *
709  * ltrim1 --- ltrim with set fixed as ' '
710  *
711  ********************************************************************/
712 
713 Datum
715 {
716  text *string = PG_GETARG_TEXT_PP(0);
717  text *ret;
718 
719  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
720  " ", 1,
721  true, false);
722 
723  PG_RETURN_TEXT_P(ret);
724 }
725 
726 /********************************************************************
727  *
728  * rtrim
729  *
730  * Syntax:
731  *
732  * text rtrim(text string, text set)
733  *
734  * Purpose:
735  *
736  * Returns string with final characters removed after the last
737  * character not in set.
738  *
739  ********************************************************************/
740 
741 Datum
743 {
744  text *string = PG_GETARG_TEXT_PP(0);
745  text *set = PG_GETARG_TEXT_PP(1);
746  text *ret;
747 
748  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
749  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
750  false, true);
751 
752  PG_RETURN_TEXT_P(ret);
753 }
754 
755 /********************************************************************
756  *
757  * rtrim1 --- rtrim with set fixed as ' '
758  *
759  ********************************************************************/
760 
761 Datum
763 {
764  text *string = PG_GETARG_TEXT_PP(0);
765  text *ret;
766 
767  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
768  " ", 1,
769  false, true);
770 
771  PG_RETURN_TEXT_P(ret);
772 }
773 
774 
775 /********************************************************************
776  *
777  * translate
778  *
779  * Syntax:
780  *
781  * text translate(text string, text from, text to)
782  *
783  * Purpose:
784  *
785  * Returns string after replacing all occurrences of characters in from
786  * with the corresponding character in to. If from is longer than to,
787  * occurrences of the extra characters in from are deleted.
788  * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
789  *
790  ********************************************************************/
791 
792 Datum
794 {
795  text *string = PG_GETARG_TEXT_PP(0);
796  text *from = PG_GETARG_TEXT_PP(1);
797  text *to = PG_GETARG_TEXT_PP(2);
798  text *result;
799  char *from_ptr,
800  *to_ptr;
801  char *source,
802  *target;
803  int m,
804  fromlen,
805  tolen,
806  retlen,
807  i;
808  int worst_len;
809  int len;
810  int source_len;
811  int from_index;
812 
813  m = VARSIZE_ANY_EXHDR(string);
814  if (m <= 0)
815  PG_RETURN_TEXT_P(string);
816  source = VARDATA_ANY(string);
817 
818  fromlen = VARSIZE_ANY_EXHDR(from);
819  from_ptr = VARDATA_ANY(from);
820  tolen = VARSIZE_ANY_EXHDR(to);
821  to_ptr = VARDATA_ANY(to);
822 
823  /*
824  * The worst-case expansion is to substitute a max-length character for a
825  * single-byte character at each position of the string.
826  */
827  worst_len = pg_database_encoding_max_length() * m;
828 
829  /* check for integer overflow */
830  if (worst_len / pg_database_encoding_max_length() != m)
831  ereport(ERROR,
832  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
833  errmsg("requested length too large")));
834 
835  result = (text *) palloc(worst_len + VARHDRSZ);
836  target = VARDATA(result);
837  retlen = 0;
838 
839  while (m > 0)
840  {
841  source_len = pg_mblen(source);
842  from_index = 0;
843 
844  for (i = 0; i < fromlen; i += len)
845  {
846  len = pg_mblen(&from_ptr[i]);
847  if (len == source_len &&
848  memcmp(source, &from_ptr[i], len) == 0)
849  break;
850 
851  from_index++;
852  }
853  if (i < fromlen)
854  {
855  /* substitute */
856  char *p = to_ptr;
857 
858  for (i = 0; i < from_index; i++)
859  {
860  p += pg_mblen(p);
861  if (p >= (to_ptr + tolen))
862  break;
863  }
864  if (p < (to_ptr + tolen))
865  {
866  len = pg_mblen(p);
867  memcpy(target, p, len);
868  target += len;
869  retlen += len;
870  }
871 
872  }
873  else
874  {
875  /* no match, so copy */
876  memcpy(target, source, source_len);
877  target += source_len;
878  retlen += source_len;
879  }
880 
881  source += source_len;
882  m -= source_len;
883  }
884 
885  SET_VARSIZE(result, retlen + VARHDRSZ);
886 
887  /*
888  * The function result is probably much bigger than needed, if we're using
889  * a multibyte encoding, but it's not worth reallocating it; the result
890  * probably won't live long anyway.
891  */
892 
893  PG_RETURN_TEXT_P(result);
894 }
895 
896 /********************************************************************
897  *
898  * ascii
899  *
900  * Syntax:
901  *
902  * int ascii(text string)
903  *
904  * Purpose:
905  *
906  * Returns the decimal representation of the first character from
907  * string.
908  * If the string is empty we return 0.
909  * If the database encoding is UTF8, we return the Unicode codepoint.
910  * If the database encoding is any other multi-byte encoding, we
911  * return the value of the first byte if it is an ASCII character
912  * (range 1 .. 127), or raise an error.
913  * For all other encodings we return the value of the first byte,
914  * (range 1..255).
915  *
916  ********************************************************************/
917 
918 Datum
920 {
921  text *string = PG_GETARG_TEXT_PP(0);
923  unsigned char *data;
924 
925  if (VARSIZE_ANY_EXHDR(string) <= 0)
926  PG_RETURN_INT32(0);
927 
928  data = (unsigned char *) VARDATA_ANY(string);
929 
930  if (encoding == PG_UTF8 && *data > 127)
931  {
932  /* return the code point for Unicode */
933 
934  int result = 0,
935  tbytes = 0,
936  i;
937 
938  if (*data >= 0xF0)
939  {
940  result = *data & 0x07;
941  tbytes = 3;
942  }
943  else if (*data >= 0xE0)
944  {
945  result = *data & 0x0F;
946  tbytes = 2;
947  }
948  else
949  {
950  Assert(*data > 0xC0);
951  result = *data & 0x1f;
952  tbytes = 1;
953  }
954 
955  Assert(tbytes > 0);
956 
957  for (i = 1; i <= tbytes; i++)
958  {
959  Assert((data[i] & 0xC0) == 0x80);
960  result = (result << 6) + (data[i] & 0x3f);
961  }
962 
963  PG_RETURN_INT32(result);
964  }
965  else
966  {
967  if (pg_encoding_max_length(encoding) > 1 && *data > 127)
968  ereport(ERROR,
969  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
970  errmsg("requested character too large")));
971 
972 
973  PG_RETURN_INT32((int32) *data);
974  }
975 }
976 
977 /********************************************************************
978  *
979  * chr
980  *
981  * Syntax:
982  *
983  * text chr(int val)
984  *
985  * Purpose:
986  *
987  * Returns the character having the binary equivalent to val.
988  *
989  * For UTF8 we treat the argument as a Unicode code point.
990  * For other multi-byte encodings we raise an error for arguments
991  * outside the strict ASCII range (1..127).
992  *
993  * It's important that we don't ever return a value that is not valid
994  * in the database encoding, so that this doesn't become a way for
995  * invalid data to enter the database.
996  *
997  ********************************************************************/
998 
999 Datum
1001 {
1002  uint32 cvalue = PG_GETARG_UINT32(0);
1003  text *result;
1004  int encoding = GetDatabaseEncoding();
1005 
1006  if (encoding == PG_UTF8 && cvalue > 127)
1007  {
1008  /* for Unicode we treat the argument as a code point */
1009  int bytes;
1010  unsigned char *wch;
1011 
1012  /*
1013  * We only allow valid Unicode code points; per RFC3629 that stops at
1014  * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1015  * U+1FFFFF.
1016  */
1017  if (cvalue > 0x0010ffff)
1018  ereport(ERROR,
1019  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1020  errmsg("requested character too large for encoding: %d",
1021  cvalue)));
1022 
1023  if (cvalue > 0xffff)
1024  bytes = 4;
1025  else if (cvalue > 0x07ff)
1026  bytes = 3;
1027  else
1028  bytes = 2;
1029 
1030  result = (text *) palloc(VARHDRSZ + bytes);
1031  SET_VARSIZE(result, VARHDRSZ + bytes);
1032  wch = (unsigned char *) VARDATA(result);
1033 
1034  if (bytes == 2)
1035  {
1036  wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1037  wch[1] = 0x80 | (cvalue & 0x3F);
1038  }
1039  else if (bytes == 3)
1040  {
1041  wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1042  wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1043  wch[2] = 0x80 | (cvalue & 0x3F);
1044  }
1045  else
1046  {
1047  wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1048  wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1049  wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1050  wch[3] = 0x80 | (cvalue & 0x3F);
1051  }
1052 
1053  /*
1054  * The preceding range check isn't sufficient, because UTF8 excludes
1055  * Unicode "surrogate pair" codes. Make sure what we created is valid
1056  * UTF8.
1057  */
1058  if (!pg_utf8_islegal(wch, bytes))
1059  ereport(ERROR,
1060  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1061  errmsg("requested character not valid for encoding: %d",
1062  cvalue)));
1063  }
1064  else
1065  {
1066  bool is_mb;
1067 
1068  /*
1069  * Error out on arguments that make no sense or that we can't validly
1070  * represent in the encoding.
1071  */
1072  if (cvalue == 0)
1073  ereport(ERROR,
1074  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1075  errmsg("null character not permitted")));
1076 
1077  is_mb = pg_encoding_max_length(encoding) > 1;
1078 
1079  if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1080  ereport(ERROR,
1081  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1082  errmsg("requested character too large for encoding: %d",
1083  cvalue)));
1084 
1085  result = (text *) palloc(VARHDRSZ + 1);
1086  SET_VARSIZE(result, VARHDRSZ + 1);
1087  *VARDATA(result) = (char) cvalue;
1088  }
1089 
1090  PG_RETURN_TEXT_P(result);
1091 }
1092 
1093 /********************************************************************
1094  *
1095  * repeat
1096  *
1097  * Syntax:
1098  *
1099  * text repeat(text string, int val)
1100  *
1101  * Purpose:
1102  *
1103  * Repeat string by val.
1104  *
1105  ********************************************************************/
1106 
1107 Datum
1109 {
1110  text *string = PG_GETARG_TEXT_PP(0);
1111  int32 count = PG_GETARG_INT32(1);
1112  text *result;
1113  int slen,
1114  tlen;
1115  int i;
1116  char *cp,
1117  *sp;
1118 
1119  if (count < 0)
1120  count = 0;
1121 
1122  slen = VARSIZE_ANY_EXHDR(string);
1123 
1124  if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1125  unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)))
1126  ereport(ERROR,
1127  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1128  errmsg("requested length too large")));
1129 
1130  result = (text *) palloc(tlen);
1131 
1132  SET_VARSIZE(result, tlen);
1133  cp = VARDATA(result);
1134  sp = VARDATA_ANY(string);
1135  for (i = 0; i < count; i++)
1136  {
1137  memcpy(cp, sp, slen);
1138  cp += slen;
1140  }
1141 
1142  PG_RETURN_TEXT_P(result);
1143 }
#define PG_GETARG_UINT32(n)
Definition: fmgr.h:270
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define VARDATA(PTR)
Definition: postgres.h:315
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1804
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1638
static bytea * dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
static bool pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:140
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:46
Datum chr(PG_FUNCTION_ARGS)
char * str_initcap(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1886
#define VARHDRSZ
Definition: c.h:627
def bytes(source, encoding='ascii', errors='strict')
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
Datum ascii(PG_FUNCTION_ARGS)
int errcode(int sqlerrcode)
Definition: elog.c:698
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:77
#define PG_GET_COLLATION()
Definition: fmgr.h:198
Datum translate(PG_FUNCTION_ARGS)
signed int int32
Definition: c.h:429
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1000
Datum bytealtrim(PG_FUNCTION_ARGS)
Datum btrim(PG_FUNCTION_ARGS)
void pfree(void *pointer)
Definition: mcxt.c:1169
Datum ltrim(PG_FUNCTION_ARGS)
Datum ltrim1(PG_FUNCTION_ARGS)
#define ERROR
Definition: elog.h:46
Datum repeat(PG_FUNCTION_ARGS)
Datum rtrim1(PG_FUNCTION_ARGS)
Datum initcap(PG_FUNCTION_ARGS)
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1979
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:202
Datum rpad(PG_FUNCTION_ARGS)
char string[11]
Definition: preproc-type.c:46
unsigned int uint32
Definition: c.h:441
Datum byteatrim(PG_FUNCTION_ARGS)
uintptr_t Datum
Definition: postgres.h:411
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
Datum btrim1(PG_FUNCTION_ARGS)
#define ereport(elevel,...)
Definition: elog.h:157
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
text * cstring_to_text(const char *s)
Definition: varlena.c:190
Datum byteartrim(PG_FUNCTION_ARGS)
#define Assert(condition)
Definition: c.h:804
static bool pg_add_s32_overflow(int32 a, int32 b, int32 *result)
Definition: int.h:104
static rewind_source * source
Definition: pg_rewind.c:79
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
Datum lpad(PG_FUNCTION_ARGS)
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
int32 encoding
Definition: pg_database.h:41
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
Datum rtrim(PG_FUNCTION_ARGS)
int i
static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim)
char * str_toupper(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1762
#define unlikely(x)
Definition: c.h:273
Definition: c.h:621
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342