PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
oracle_compat.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  * Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2017, PostgreSQL Global Development Group
6  *
7  * Author: Edmund Mergl <E.Mergl@bawue.de>
8  * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "utils/builtins.h"
19 #include "utils/formatting.h"
20 #include "mb/pg_wchar.h"
21 
22 
23 static text *dotrim(const char *string, int stringlen,
24  const char *set, int setlen,
25  bool doltrim, bool dortrim);
26 
27 
28 /********************************************************************
29  *
30  * lower
31  *
32  * Syntax:
33  *
34  * text lower(text string)
35  *
36  * Purpose:
37  *
38  * Returns string, with all letters forced to lowercase.
39  *
40  ********************************************************************/
41 
42 Datum
44 {
45  text *in_string = PG_GETARG_TEXT_PP(0);
46  char *out_string;
47  text *result;
48 
49  out_string = str_tolower(VARDATA_ANY(in_string),
50  VARSIZE_ANY_EXHDR(in_string),
52  result = cstring_to_text(out_string);
53  pfree(out_string);
54 
55  PG_RETURN_TEXT_P(result);
56 }
57 
58 
59 /********************************************************************
60  *
61  * upper
62  *
63  * Syntax:
64  *
65  * text upper(text string)
66  *
67  * Purpose:
68  *
69  * Returns string, with all letters forced to uppercase.
70  *
71  ********************************************************************/
72 
73 Datum
75 {
76  text *in_string = PG_GETARG_TEXT_PP(0);
77  char *out_string;
78  text *result;
79 
80  out_string = str_toupper(VARDATA_ANY(in_string),
81  VARSIZE_ANY_EXHDR(in_string),
83  result = cstring_to_text(out_string);
84  pfree(out_string);
85 
86  PG_RETURN_TEXT_P(result);
87 }
88 
89 
90 /********************************************************************
91  *
92  * initcap
93  *
94  * Syntax:
95  *
96  * text initcap(text string)
97  *
98  * Purpose:
99  *
100  * Returns string, with first letter of each word in uppercase, all
101  * other letters in lowercase. A word is defined as a sequence of
102  * alphanumeric characters, delimited by non-alphanumeric
103  * characters.
104  *
105  ********************************************************************/
106 
107 Datum
109 {
110  text *in_string = PG_GETARG_TEXT_PP(0);
111  char *out_string;
112  text *result;
113 
114  out_string = str_initcap(VARDATA_ANY(in_string),
115  VARSIZE_ANY_EXHDR(in_string),
116  PG_GET_COLLATION());
117  result = cstring_to_text(out_string);
118  pfree(out_string);
119 
120  PG_RETURN_TEXT_P(result);
121 }
122 
123 
124 /********************************************************************
125  *
126  * lpad
127  *
128  * Syntax:
129  *
130  * text lpad(text string1, int4 len, text string2)
131  *
132  * Purpose:
133  *
134  * Returns string1, left-padded to length len with the sequence of
135  * characters in string2. If len is less than the length of string1,
136  * instead truncate (on the right) to len.
137  *
138  ********************************************************************/
139 
140 Datum
142 {
143  text *string1 = PG_GETARG_TEXT_PP(0);
144  int32 len = PG_GETARG_INT32(1);
145  text *string2 = PG_GETARG_TEXT_PP(2);
146  text *ret;
147  char *ptr1,
148  *ptr2,
149  *ptr2start,
150  *ptr2end,
151  *ptr_ret;
152  int m,
153  s1len,
154  s2len;
155 
156  int bytelen;
157 
158  /* Negative len is silently taken as zero */
159  if (len < 0)
160  len = 0;
161 
162  s1len = VARSIZE_ANY_EXHDR(string1);
163  if (s1len < 0)
164  s1len = 0; /* shouldn't happen */
165 
166  s2len = VARSIZE_ANY_EXHDR(string2);
167  if (s2len < 0)
168  s2len = 0; /* shouldn't happen */
169 
170  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
171 
172  if (s1len > len)
173  s1len = len; /* truncate string1 to len chars */
174 
175  if (s2len <= 0)
176  len = s1len; /* nothing to pad with, so don't pad */
177 
178  bytelen = pg_database_encoding_max_length() * len;
179 
180  /* check for integer overflow */
181  if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
182  ereport(ERROR,
183  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
184  errmsg("requested length too large")));
185 
186  ret = (text *) palloc(VARHDRSZ + bytelen);
187 
188  m = len - s1len;
189 
190  ptr2 = ptr2start = VARDATA_ANY(string2);
191  ptr2end = ptr2 + s2len;
192  ptr_ret = VARDATA(ret);
193 
194  while (m--)
195  {
196  int mlen = pg_mblen(ptr2);
197 
198  memcpy(ptr_ret, ptr2, mlen);
199  ptr_ret += mlen;
200  ptr2 += mlen;
201  if (ptr2 == ptr2end) /* wrap around at end of s2 */
202  ptr2 = ptr2start;
203  }
204 
205  ptr1 = VARDATA_ANY(string1);
206 
207  while (s1len--)
208  {
209  int mlen = pg_mblen(ptr1);
210 
211  memcpy(ptr_ret, ptr1, mlen);
212  ptr_ret += mlen;
213  ptr1 += mlen;
214  }
215 
216  SET_VARSIZE(ret, ptr_ret - (char *) ret);
217 
218  PG_RETURN_TEXT_P(ret);
219 }
220 
221 
222 /********************************************************************
223  *
224  * rpad
225  *
226  * Syntax:
227  *
228  * text rpad(text string1, int4 len, text string2)
229  *
230  * Purpose:
231  *
232  * Returns string1, right-padded to length len with the sequence of
233  * characters in string2. If len is less than the length of string1,
234  * instead truncate (on the right) to len.
235  *
236  ********************************************************************/
237 
238 Datum
240 {
241  text *string1 = PG_GETARG_TEXT_PP(0);
242  int32 len = PG_GETARG_INT32(1);
243  text *string2 = PG_GETARG_TEXT_PP(2);
244  text *ret;
245  char *ptr1,
246  *ptr2,
247  *ptr2start,
248  *ptr2end,
249  *ptr_ret;
250  int m,
251  s1len,
252  s2len;
253 
254  int bytelen;
255 
256  /* Negative len is silently taken as zero */
257  if (len < 0)
258  len = 0;
259 
260  s1len = VARSIZE_ANY_EXHDR(string1);
261  if (s1len < 0)
262  s1len = 0; /* shouldn't happen */
263 
264  s2len = VARSIZE_ANY_EXHDR(string2);
265  if (s2len < 0)
266  s2len = 0; /* shouldn't happen */
267 
268  s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
269 
270  if (s1len > len)
271  s1len = len; /* truncate string1 to len chars */
272 
273  if (s2len <= 0)
274  len = s1len; /* nothing to pad with, so don't pad */
275 
276  bytelen = pg_database_encoding_max_length() * len;
277 
278  /* Check for integer overflow */
279  if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
280  ereport(ERROR,
281  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
282  errmsg("requested length too large")));
283 
284  ret = (text *) palloc(VARHDRSZ + bytelen);
285  m = len - s1len;
286 
287  ptr1 = VARDATA_ANY(string1);
288  ptr_ret = VARDATA(ret);
289 
290  while (s1len--)
291  {
292  int mlen = pg_mblen(ptr1);
293 
294  memcpy(ptr_ret, ptr1, mlen);
295  ptr_ret += mlen;
296  ptr1 += mlen;
297  }
298 
299  ptr2 = ptr2start = VARDATA_ANY(string2);
300  ptr2end = ptr2 + s2len;
301 
302  while (m--)
303  {
304  int mlen = pg_mblen(ptr2);
305 
306  memcpy(ptr_ret, ptr2, mlen);
307  ptr_ret += mlen;
308  ptr2 += mlen;
309  if (ptr2 == ptr2end) /* wrap around at end of s2 */
310  ptr2 = ptr2start;
311  }
312 
313  SET_VARSIZE(ret, ptr_ret - (char *) ret);
314 
315  PG_RETURN_TEXT_P(ret);
316 }
317 
318 
319 /********************************************************************
320  *
321  * btrim
322  *
323  * Syntax:
324  *
325  * text btrim(text string, text set)
326  *
327  * Purpose:
328  *
329  * Returns string with characters removed from the front and back
330  * up to the first character not in set.
331  *
332  ********************************************************************/
333 
334 Datum
336 {
337  text *string = PG_GETARG_TEXT_PP(0);
338  text *set = PG_GETARG_TEXT_PP(1);
339  text *ret;
340 
341  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
342  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
343  true, true);
344 
345  PG_RETURN_TEXT_P(ret);
346 }
347 
348 /********************************************************************
349  *
350  * btrim1 --- btrim with set fixed as ' '
351  *
352  ********************************************************************/
353 
354 Datum
356 {
357  text *string = PG_GETARG_TEXT_PP(0);
358  text *ret;
359 
360  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
361  " ", 1,
362  true, true);
363 
364  PG_RETURN_TEXT_P(ret);
365 }
366 
367 /*
368  * Common implementation for btrim, ltrim, rtrim
369  */
370 static text *
371 dotrim(const char *string, int stringlen,
372  const char *set, int setlen,
373  bool doltrim, bool dortrim)
374 {
375  int i;
376 
377  /* Nothing to do if either string or set is empty */
378  if (stringlen > 0 && setlen > 0)
379  {
381  {
382  /*
383  * In the multibyte-encoding case, build arrays of pointers to
384  * character starts, so that we can avoid inefficient checks in
385  * the inner loops.
386  */
387  const char **stringchars;
388  const char **setchars;
389  int *stringmblen;
390  int *setmblen;
391  int stringnchars;
392  int setnchars;
393  int resultndx;
394  int resultnchars;
395  const char *p;
396  int len;
397  int mblen;
398  const char *str_pos;
399  int str_len;
400 
401  stringchars = (const char **) palloc(stringlen * sizeof(char *));
402  stringmblen = (int *) palloc(stringlen * sizeof(int));
403  stringnchars = 0;
404  p = string;
405  len = stringlen;
406  while (len > 0)
407  {
408  stringchars[stringnchars] = p;
409  stringmblen[stringnchars] = mblen = pg_mblen(p);
410  stringnchars++;
411  p += mblen;
412  len -= mblen;
413  }
414 
415  setchars = (const char **) palloc(setlen * sizeof(char *));
416  setmblen = (int *) palloc(setlen * sizeof(int));
417  setnchars = 0;
418  p = set;
419  len = setlen;
420  while (len > 0)
421  {
422  setchars[setnchars] = p;
423  setmblen[setnchars] = mblen = pg_mblen(p);
424  setnchars++;
425  p += mblen;
426  len -= mblen;
427  }
428 
429  resultndx = 0; /* index in stringchars[] */
430  resultnchars = stringnchars;
431 
432  if (doltrim)
433  {
434  while (resultnchars > 0)
435  {
436  str_pos = stringchars[resultndx];
437  str_len = stringmblen[resultndx];
438  for (i = 0; i < setnchars; i++)
439  {
440  if (str_len == setmblen[i] &&
441  memcmp(str_pos, setchars[i], str_len) == 0)
442  break;
443  }
444  if (i >= setnchars)
445  break; /* no match here */
446  string += str_len;
447  stringlen -= str_len;
448  resultndx++;
449  resultnchars--;
450  }
451  }
452 
453  if (dortrim)
454  {
455  while (resultnchars > 0)
456  {
457  str_pos = stringchars[resultndx + resultnchars - 1];
458  str_len = stringmblen[resultndx + resultnchars - 1];
459  for (i = 0; i < setnchars; i++)
460  {
461  if (str_len == setmblen[i] &&
462  memcmp(str_pos, setchars[i], str_len) == 0)
463  break;
464  }
465  if (i >= setnchars)
466  break; /* no match here */
467  stringlen -= str_len;
468  resultnchars--;
469  }
470  }
471 
472  pfree(stringchars);
473  pfree(stringmblen);
474  pfree(setchars);
475  pfree(setmblen);
476  }
477  else
478  {
479  /*
480  * In the single-byte-encoding case, we don't need such overhead.
481  */
482  if (doltrim)
483  {
484  while (stringlen > 0)
485  {
486  char str_ch = *string;
487 
488  for (i = 0; i < setlen; i++)
489  {
490  if (str_ch == set[i])
491  break;
492  }
493  if (i >= setlen)
494  break; /* no match here */
495  string++;
496  stringlen--;
497  }
498  }
499 
500  if (dortrim)
501  {
502  while (stringlen > 0)
503  {
504  char str_ch = string[stringlen - 1];
505 
506  for (i = 0; i < setlen; i++)
507  {
508  if (str_ch == set[i])
509  break;
510  }
511  if (i >= setlen)
512  break; /* no match here */
513  stringlen--;
514  }
515  }
516  }
517  }
518 
519  /* Return selected portion of string */
520  return cstring_to_text_with_len(string, stringlen);
521 }
522 
523 /********************************************************************
524  *
525  * byteatrim
526  *
527  * Syntax:
528  *
529  * bytea byteatrim(byta string, bytea set)
530  *
531  * Purpose:
532  *
533  * Returns string with characters removed from the front and back
534  * up to the first character not in set.
535  *
536  * Cloned from btrim and modified as required.
537  ********************************************************************/
538 
539 Datum
541 {
542  bytea *string = PG_GETARG_BYTEA_PP(0);
543  bytea *set = PG_GETARG_BYTEA_PP(1);
544  bytea *ret;
545  char *ptr,
546  *end,
547  *ptr2,
548  *ptr2start,
549  *end2;
550  int m,
551  stringlen,
552  setlen;
553 
554  stringlen = VARSIZE_ANY_EXHDR(string);
555  setlen = VARSIZE_ANY_EXHDR(set);
556 
557  if (stringlen <= 0 || setlen <= 0)
558  PG_RETURN_BYTEA_P(string);
559 
560  m = stringlen;
561  ptr = VARDATA_ANY(string);
562  end = ptr + stringlen - 1;
563  ptr2start = VARDATA_ANY(set);
564  end2 = ptr2start + setlen - 1;
565 
566  while (m > 0)
567  {
568  ptr2 = ptr2start;
569  while (ptr2 <= end2)
570  {
571  if (*ptr == *ptr2)
572  break;
573  ++ptr2;
574  }
575  if (ptr2 > end2)
576  break;
577  ptr++;
578  m--;
579  }
580 
581  while (m > 0)
582  {
583  ptr2 = ptr2start;
584  while (ptr2 <= end2)
585  {
586  if (*end == *ptr2)
587  break;
588  ++ptr2;
589  }
590  if (ptr2 > end2)
591  break;
592  end--;
593  m--;
594  }
595 
596  ret = (bytea *) palloc(VARHDRSZ + m);
597  SET_VARSIZE(ret, VARHDRSZ + m);
598  memcpy(VARDATA(ret), ptr, m);
599 
600  PG_RETURN_BYTEA_P(ret);
601 }
602 
603 /********************************************************************
604  *
605  * ltrim
606  *
607  * Syntax:
608  *
609  * text ltrim(text string, text set)
610  *
611  * Purpose:
612  *
613  * Returns string with initial characters removed up to the first
614  * character not in set.
615  *
616  ********************************************************************/
617 
618 Datum
620 {
621  text *string = PG_GETARG_TEXT_PP(0);
622  text *set = PG_GETARG_TEXT_PP(1);
623  text *ret;
624 
625  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
626  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
627  true, false);
628 
629  PG_RETURN_TEXT_P(ret);
630 }
631 
632 /********************************************************************
633  *
634  * ltrim1 --- ltrim with set fixed as ' '
635  *
636  ********************************************************************/
637 
638 Datum
640 {
641  text *string = PG_GETARG_TEXT_PP(0);
642  text *ret;
643 
644  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
645  " ", 1,
646  true, false);
647 
648  PG_RETURN_TEXT_P(ret);
649 }
650 
651 /********************************************************************
652  *
653  * rtrim
654  *
655  * Syntax:
656  *
657  * text rtrim(text string, text set)
658  *
659  * Purpose:
660  *
661  * Returns string with final characters removed after the last
662  * character not in set.
663  *
664  ********************************************************************/
665 
666 Datum
668 {
669  text *string = PG_GETARG_TEXT_PP(0);
670  text *set = PG_GETARG_TEXT_PP(1);
671  text *ret;
672 
673  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
674  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
675  false, true);
676 
677  PG_RETURN_TEXT_P(ret);
678 }
679 
680 /********************************************************************
681  *
682  * rtrim1 --- rtrim with set fixed as ' '
683  *
684  ********************************************************************/
685 
686 Datum
688 {
689  text *string = PG_GETARG_TEXT_PP(0);
690  text *ret;
691 
692  ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
693  " ", 1,
694  false, true);
695 
696  PG_RETURN_TEXT_P(ret);
697 }
698 
699 
700 /********************************************************************
701  *
702  * translate
703  *
704  * Syntax:
705  *
706  * text translate(text string, text from, text to)
707  *
708  * Purpose:
709  *
710  * Returns string after replacing all occurrences of characters in from
711  * with the corresponding character in to. If from is longer than to,
712  * occurrences of the extra characters in from are deleted.
713  * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
714  *
715  ********************************************************************/
716 
717 Datum
719 {
720  text *string = PG_GETARG_TEXT_PP(0);
721  text *from = PG_GETARG_TEXT_PP(1);
722  text *to = PG_GETARG_TEXT_PP(2);
723  text *result;
724  char *from_ptr,
725  *to_ptr;
726  char *source,
727  *target;
728  int m,
729  fromlen,
730  tolen,
731  retlen,
732  i;
733  int worst_len;
734  int len;
735  int source_len;
736  int from_index;
737 
738  m = VARSIZE_ANY_EXHDR(string);
739  if (m <= 0)
740  PG_RETURN_TEXT_P(string);
741  source = VARDATA_ANY(string);
742 
743  fromlen = VARSIZE_ANY_EXHDR(from);
744  from_ptr = VARDATA_ANY(from);
745  tolen = VARSIZE_ANY_EXHDR(to);
746  to_ptr = VARDATA_ANY(to);
747 
748  /*
749  * The worst-case expansion is to substitute a max-length character for a
750  * single-byte character at each position of the string.
751  */
752  worst_len = pg_database_encoding_max_length() * m;
753 
754  /* check for integer overflow */
755  if (worst_len / pg_database_encoding_max_length() != m)
756  ereport(ERROR,
757  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
758  errmsg("requested length too large")));
759 
760  result = (text *) palloc(worst_len + VARHDRSZ);
761  target = VARDATA(result);
762  retlen = 0;
763 
764  while (m > 0)
765  {
766  source_len = pg_mblen(source);
767  from_index = 0;
768 
769  for (i = 0; i < fromlen; i += len)
770  {
771  len = pg_mblen(&from_ptr[i]);
772  if (len == source_len &&
773  memcmp(source, &from_ptr[i], len) == 0)
774  break;
775 
776  from_index++;
777  }
778  if (i < fromlen)
779  {
780  /* substitute */
781  char *p = to_ptr;
782 
783  for (i = 0; i < from_index; i++)
784  {
785  p += pg_mblen(p);
786  if (p >= (to_ptr + tolen))
787  break;
788  }
789  if (p < (to_ptr + tolen))
790  {
791  len = pg_mblen(p);
792  memcpy(target, p, len);
793  target += len;
794  retlen += len;
795  }
796 
797  }
798  else
799  {
800  /* no match, so copy */
801  memcpy(target, source, source_len);
802  target += source_len;
803  retlen += source_len;
804  }
805 
806  source += source_len;
807  m -= source_len;
808  }
809 
810  SET_VARSIZE(result, retlen + VARHDRSZ);
811 
812  /*
813  * The function result is probably much bigger than needed, if we're using
814  * a multibyte encoding, but it's not worth reallocating it; the result
815  * probably won't live long anyway.
816  */
817 
818  PG_RETURN_TEXT_P(result);
819 }
820 
821 /********************************************************************
822  *
823  * ascii
824  *
825  * Syntax:
826  *
827  * int ascii(text string)
828  *
829  * Purpose:
830  *
831  * Returns the decimal representation of the first character from
832  * string.
833  * If the string is empty we return 0.
834  * If the database encoding is UTF8, we return the Unicode codepoint.
835  * If the database encoding is any other multi-byte encoding, we
836  * return the value of the first byte if it is an ASCII character
837  * (range 1 .. 127), or raise an error.
838  * For all other encodings we return the value of the first byte,
839  * (range 1..255).
840  *
841  ********************************************************************/
842 
843 Datum
845 {
846  text *string = PG_GETARG_TEXT_PP(0);
848  unsigned char *data;
849 
850  if (VARSIZE_ANY_EXHDR(string) <= 0)
851  PG_RETURN_INT32(0);
852 
853  data = (unsigned char *) VARDATA_ANY(string);
854 
855  if (encoding == PG_UTF8 && *data > 127)
856  {
857  /* return the code point for Unicode */
858 
859  int result = 0,
860  tbytes = 0,
861  i;
862 
863  if (*data >= 0xF0)
864  {
865  result = *data & 0x07;
866  tbytes = 3;
867  }
868  else if (*data >= 0xE0)
869  {
870  result = *data & 0x0F;
871  tbytes = 2;
872  }
873  else
874  {
875  Assert(*data > 0xC0);
876  result = *data & 0x1f;
877  tbytes = 1;
878  }
879 
880  Assert(tbytes > 0);
881 
882  for (i = 1; i <= tbytes; i++)
883  {
884  Assert((data[i] & 0xC0) == 0x80);
885  result = (result << 6) + (data[i] & 0x3f);
886  }
887 
888  PG_RETURN_INT32(result);
889  }
890  else
891  {
892  if (pg_encoding_max_length(encoding) > 1 && *data > 127)
893  ereport(ERROR,
894  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
895  errmsg("requested character too large")));
896 
897 
898  PG_RETURN_INT32((int32) *data);
899  }
900 }
901 
902 /********************************************************************
903  *
904  * chr
905  *
906  * Syntax:
907  *
908  * text chr(int val)
909  *
910  * Purpose:
911  *
912  * Returns the character having the binary equivalent to val.
913  *
914  * For UTF8 we treat the argumwent as a Unicode code point.
915  * For other multi-byte encodings we raise an error for arguments
916  * outside the strict ASCII range (1..127).
917  *
918  * It's important that we don't ever return a value that is not valid
919  * in the database encoding, so that this doesn't become a way for
920  * invalid data to enter the database.
921  *
922  ********************************************************************/
923 
924 Datum
926 {
927  uint32 cvalue = PG_GETARG_UINT32(0);
928  text *result;
930 
931  if (encoding == PG_UTF8 && cvalue > 127)
932  {
933  /* for Unicode we treat the argument as a code point */
934  int bytes;
935  unsigned char *wch;
936 
937  /*
938  * We only allow valid Unicode code points; per RFC3629 that stops at
939  * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
940  * U+1FFFFF.
941  */
942  if (cvalue > 0x0010ffff)
943  ereport(ERROR,
944  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
945  errmsg("requested character too large for encoding: %d",
946  cvalue)));
947 
948  if (cvalue > 0xffff)
949  bytes = 4;
950  else if (cvalue > 0x07ff)
951  bytes = 3;
952  else
953  bytes = 2;
954 
955  result = (text *) palloc(VARHDRSZ + bytes);
956  SET_VARSIZE(result, VARHDRSZ + bytes);
957  wch = (unsigned char *) VARDATA(result);
958 
959  if (bytes == 2)
960  {
961  wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
962  wch[1] = 0x80 | (cvalue & 0x3F);
963  }
964  else if (bytes == 3)
965  {
966  wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
967  wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
968  wch[2] = 0x80 | (cvalue & 0x3F);
969  }
970  else
971  {
972  wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
973  wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
974  wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
975  wch[3] = 0x80 | (cvalue & 0x3F);
976  }
977 
978  /*
979  * The preceding range check isn't sufficient, because UTF8 excludes
980  * Unicode "surrogate pair" codes. Make sure what we created is valid
981  * UTF8.
982  */
983  if (!pg_utf8_islegal(wch, bytes))
984  ereport(ERROR,
985  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
986  errmsg("requested character not valid for encoding: %d",
987  cvalue)));
988  }
989  else
990  {
991  bool is_mb;
992 
993  /*
994  * Error out on arguments that make no sense or that we can't validly
995  * represent in the encoding.
996  */
997  if (cvalue == 0)
998  ereport(ERROR,
999  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1000  errmsg("null character not permitted")));
1001 
1002  is_mb = pg_encoding_max_length(encoding) > 1;
1003 
1004  if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1005  ereport(ERROR,
1006  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1007  errmsg("requested character too large for encoding: %d",
1008  cvalue)));
1009 
1010  result = (text *) palloc(VARHDRSZ + 1);
1011  SET_VARSIZE(result, VARHDRSZ + 1);
1012  *VARDATA(result) = (char) cvalue;
1013  }
1014 
1015  PG_RETURN_TEXT_P(result);
1016 }
1017 
1018 /********************************************************************
1019  *
1020  * repeat
1021  *
1022  * Syntax:
1023  *
1024  * text repeat(text string, int val)
1025  *
1026  * Purpose:
1027  *
1028  * Repeat string by val.
1029  *
1030  ********************************************************************/
1031 
1032 Datum
1034 {
1035  text *string = PG_GETARG_TEXT_PP(0);
1036  int32 count = PG_GETARG_INT32(1);
1037  text *result;
1038  int slen,
1039  tlen;
1040  int i;
1041  char *cp,
1042  *sp;
1043 
1044  if (count < 0)
1045  count = 0;
1046 
1047  slen = VARSIZE_ANY_EXHDR(string);
1048  tlen = VARHDRSZ + (count * slen);
1049 
1050  /* Check for integer overflow */
1051  if (slen != 0 && count != 0)
1052  {
1053  int check = count * slen;
1054  int check2 = check + VARHDRSZ;
1055 
1056  if ((check / slen) != count || check2 <= check)
1057  ereport(ERROR,
1058  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1059  errmsg("requested length too large")));
1060  }
1061 
1062  result = (text *) palloc(tlen);
1063 
1064  SET_VARSIZE(result, tlen);
1065  cp = VARDATA(result);
1066  sp = VARDATA_ANY(string);
1067  for (i = 0; i < count; i++)
1068  {
1069  memcpy(cp, sp, slen);
1070  cp += slen;
1071  }
1072 
1073  PG_RETURN_TEXT_P(result);
1074 }
#define PG_GETARG_UINT32(n)
Definition: fmgr.h:235
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1458
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1509
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
Datum chr(PG_FUNCTION_ARGS)
char * str_initcap(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1746
#define VARHDRSZ
Definition: c.h:445
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
Datum ascii(PG_FUNCTION_ARGS)
int errcode(int sqlerrcode)
Definition: elog.c:575
return result
Definition: formatting.c:1618
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:329
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:74
#define PG_GET_COLLATION()
Definition: fmgr.h:163
Datum translate(PG_FUNCTION_ARGS)
signed int int32
Definition: c.h:256
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:805
Datum btrim(PG_FUNCTION_ARGS)
void pfree(void *pointer)
Definition: mcxt.c:950
Datum ltrim(PG_FUNCTION_ARGS)
Datum ltrim1(PG_FUNCTION_ARGS)
#define ERROR
Definition: elog.h:43
Datum repeat(PG_FUNCTION_ARGS)
Datum rtrim1(PG_FUNCTION_ARGS)
Datum initcap(PG_FUNCTION_ARGS)
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1820
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:161
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
Datum rpad(PG_FUNCTION_ARGS)
char string[11]
Definition: preproc-type.c:46
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
Datum byteatrim(PG_FUNCTION_ARGS)
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Datum btrim1(PG_FUNCTION_ARGS)
static char * encoding
Definition: initdb.c:122
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define Assert(condition)
Definition: c.h:675
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:272
Datum lpad(PG_FUNCTION_ARGS)
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
Datum rtrim(PG_FUNCTION_ARGS)
int i
static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim)
char * str_toupper(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1628
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328