PostgreSQL Source Code  git master
scansup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * scansup.c
4  * support routines for the lex/flex scanner, used by both the normal
5  * backend as well as the bootstrap backend
6  *
7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/parser/scansup.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include <ctype.h>
19 
20 #include "mb/pg_wchar.h"
21 #include "parser/scansup.h"
22 
23 /* ----------------
24  * scanstr
25  *
26  * if the string passed in has escaped codes, map the escape codes to actual
27  * chars
28  *
29  * the string returned is palloc'd and should eventually be pfree'd by the
30  * caller!
31  * ----------------
32  */
33 
34 char *
35 scanstr(const char *s)
36 {
37  char *newStr;
38  int len,
39  i,
40  j;
41 
42  if (s == NULL || s[0] == '\0')
43  return pstrdup("");
44 
45  len = strlen(s);
46 
47  newStr = palloc(len + 1); /* string cannot get longer */
48 
49  for (i = 0, j = 0; i < len; i++)
50  {
51  if (s[i] == '\'')
52  {
53  /*
54  * Note: if scanner is working right, unescaped quotes can only
55  * appear in pairs, so there should be another character.
56  */
57  i++;
58  /* The bootstrap parser is not as smart, so check here. */
59  Assert(s[i] == '\'');
60  newStr[j] = s[i];
61  }
62  else if (s[i] == '\\')
63  {
64  i++;
65  switch (s[i])
66  {
67  case 'b':
68  newStr[j] = '\b';
69  break;
70  case 'f':
71  newStr[j] = '\f';
72  break;
73  case 'n':
74  newStr[j] = '\n';
75  break;
76  case 'r':
77  newStr[j] = '\r';
78  break;
79  case 't':
80  newStr[j] = '\t';
81  break;
82  case '0':
83  case '1':
84  case '2':
85  case '3':
86  case '4':
87  case '5':
88  case '6':
89  case '7':
90  {
91  int k;
92  long octVal = 0;
93 
94  for (k = 0;
95  s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
96  k++)
97  octVal = (octVal << 3) + (s[i + k] - '0');
98  i += k - 1;
99  newStr[j] = ((char) octVal);
100  }
101  break;
102  default:
103  newStr[j] = s[i];
104  break;
105  } /* switch */
106  } /* s[i] == '\\' */
107  else
108  newStr[j] = s[i];
109  j++;
110  }
111  newStr[j] = '\0';
112  return newStr;
113 }
114 
115 
116 /*
117  * downcase_truncate_identifier() --- do appropriate downcasing and
118  * truncation of an unquoted identifier. Optionally warn of truncation.
119  *
120  * Returns a palloc'd string containing the adjusted identifier.
121  *
122  * Note: in some usages the passed string is not null-terminated.
123  *
124  * Note: the API of this function is designed to allow for downcasing
125  * transformations that increase the string length, but we don't yet
126  * support that. If you want to implement it, you'll need to fix
127  * SplitIdentifierString() in utils/adt/varlena.c.
128  */
129 char *
130 downcase_truncate_identifier(const char *ident, int len, bool warn)
131 {
132  return downcase_identifier(ident, len, warn, true);
133 }
134 
135 /*
136  * a workhorse for downcase_truncate_identifier
137  */
138 char *
139 downcase_identifier(const char *ident, int len, bool warn, bool truncate)
140 {
141  char *result;
142  int i;
143  bool enc_is_single_byte;
144 
145  result = palloc(len + 1);
146  enc_is_single_byte = pg_database_encoding_max_length() == 1;
147 
148  /*
149  * SQL99 specifies Unicode-aware case normalization, which we don't yet
150  * have the infrastructure for. Instead we use tolower() to provide a
151  * locale-aware translation. However, there are some locales where this
152  * is not right either (eg, Turkish may do strange things with 'i' and
153  * 'I'). Our current compromise is to use tolower() for characters with
154  * the high bit set, as long as they aren't part of a multi-byte
155  * character, and use an ASCII-only downcasing for 7-bit characters.
156  */
157  for (i = 0; i < len; i++)
158  {
159  unsigned char ch = (unsigned char) ident[i];
160 
161  if (ch >= 'A' && ch <= 'Z')
162  ch += 'a' - 'A';
163  else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
164  ch = tolower(ch);
165  result[i] = (char) ch;
166  }
167  result[i] = '\0';
168 
169  if (i >= NAMEDATALEN && truncate)
170  truncate_identifier(result, i, warn);
171 
172  return result;
173 }
174 
175 
176 /*
177  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
178  *
179  * The given string is modified in-place, if necessary. A warning is
180  * issued if requested.
181  *
182  * We require the caller to pass in the string length since this saves a
183  * strlen() call in some common usages.
184  */
185 void
186 truncate_identifier(char *ident, int len, bool warn)
187 {
188  if (len >= NAMEDATALEN)
189  {
190  len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
191  if (warn)
192  {
193  /*
194  * We avoid using %.*s here because it can misbehave if the data
195  * is not valid in what libc thinks is the prevailing encoding.
196  */
197  char buf[NAMEDATALEN];
198 
199  memcpy(buf, ident, len);
200  buf[len] = '\0';
201  ereport(NOTICE,
202  (errcode(ERRCODE_NAME_TOO_LONG),
203  errmsg("identifier \"%s\" will be truncated to \"%s\"",
204  ident, buf)));
205  }
206  ident[len] = '\0';
207  }
208 }
209 
210 /*
211  * scanner_isspace() --- return true if flex scanner considers char whitespace
212  *
213  * This should be used instead of the potentially locale-dependent isspace()
214  * function when it's important to match the lexer's behavior.
215  *
216  * In principle we might need similar functions for isalnum etc, but for the
217  * moment only isspace seems needed.
218  */
219 bool
221 {
222  /* This must match scan.l's list of {space} characters */
223  if (ch == ' ' ||
224  ch == '\t' ||
225  ch == '\n' ||
226  ch == '\r' ||
227  ch == '\f')
228  return true;
229  return false;
230 }
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:130
char * pstrdup(const char *in)
Definition: mcxt.c:1186
int errcode(int sqlerrcode)
Definition: elog.c:608
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:186
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1100
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:862
static char * buf
Definition: pg_test_fsync.c:67
int pg_database_encoding_max_length(void)
Definition: wchar.c:1881
#define ereport(elevel, rest)
Definition: elog.h:141
bool scanner_isspace(char ch)
Definition: scansup.c:220
char * downcase_identifier(const char *ident, int len, bool warn, bool truncate)
Definition: scansup.c:139
#define NOTICE
Definition: elog.h:37
#define Assert(condition)
Definition: c.h:733
warn
Definition: strftime.c:109
char * scanstr(const char *s)
Definition: scansup.c:35
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
int i