PostgreSQL Source Code  git master
scansup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * scansup.c
4  * scanner support routines used by the core lexer
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/parser/scansup.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 
19 #include "mb/pg_wchar.h"
20 #include "parser/scansup.h"
21 
22 
23 /*
24  * downcase_truncate_identifier() --- do appropriate downcasing and
25  * truncation of an unquoted identifier. Optionally warn of truncation.
26  *
27  * Returns a palloc'd string containing the adjusted identifier.
28  *
29  * Note: in some usages the passed string is not null-terminated.
30  *
31  * Note: the API of this function is designed to allow for downcasing
32  * transformations that increase the string length, but we don't yet
33  * support that. If you want to implement it, you'll need to fix
34  * SplitIdentifierString() in utils/adt/varlena.c.
35  */
36 char *
37 downcase_truncate_identifier(const char *ident, int len, bool warn)
38 {
39  return downcase_identifier(ident, len, warn, true);
40 }
41 
42 /*
43  * a workhorse for downcase_truncate_identifier
44  */
45 char *
46 downcase_identifier(const char *ident, int len, bool warn, bool truncate)
47 {
48  char *result;
49  int i;
50  bool enc_is_single_byte;
51 
52  result = palloc(len + 1);
53  enc_is_single_byte = pg_database_encoding_max_length() == 1;
54 
55  /*
56  * SQL99 specifies Unicode-aware case normalization, which we don't yet
57  * have the infrastructure for. Instead we use tolower() to provide a
58  * locale-aware translation. However, there are some locales where this
59  * is not right either (eg, Turkish may do strange things with 'i' and
60  * 'I'). Our current compromise is to use tolower() for characters with
61  * the high bit set, as long as they aren't part of a multi-byte
62  * character, and use an ASCII-only downcasing for 7-bit characters.
63  */
64  for (i = 0; i < len; i++)
65  {
66  unsigned char ch = (unsigned char) ident[i];
67 
68  if (ch >= 'A' && ch <= 'Z')
69  ch += 'a' - 'A';
70  else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
71  ch = tolower(ch);
72  result[i] = (char) ch;
73  }
74  result[i] = '\0';
75 
76  if (i >= NAMEDATALEN && truncate)
77  truncate_identifier(result, i, warn);
78 
79  return result;
80 }
81 
82 
83 /*
84  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
85  *
86  * The given string is modified in-place, if necessary. A warning is
87  * issued if requested.
88  *
89  * We require the caller to pass in the string length since this saves a
90  * strlen() call in some common usages.
91  */
92 void
93 truncate_identifier(char *ident, int len, bool warn)
94 {
95  if (len >= NAMEDATALEN)
96  {
98  if (warn)
100  (errcode(ERRCODE_NAME_TOO_LONG),
101  errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
102  ident, len, ident)));
103  ident[len] = '\0';
104  }
105 }
106 
107 /*
108  * scanner_isspace() --- return true if flex scanner considers char whitespace
109  *
110  * This should be used instead of the potentially locale-dependent isspace()
111  * function when it's important to match the lexer's behavior.
112  *
113  * In principle we might need similar functions for isalnum etc, but for the
114  * moment only isspace seems needed.
115  */
116 bool
118 {
119  /* This must match scan.l's list of {space} characters */
120  if (ch == ' ' ||
121  ch == '\t' ||
122  ch == '\n' ||
123  ch == '\r' ||
124  ch == '\v' ||
125  ch == '\f')
126  return true;
127  return false;
128 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
#define ident
Definition: indent_codes.h:47
int i
Definition: isn.c:72
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void * palloc(Size size)
Definition: mcxt.c:1317
#define NAMEDATALEN
const void size_t len
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:93
bool scanner_isspace(char ch)
Definition: scansup.c:117
char * downcase_identifier(const char *ident, int len, bool warn, bool truncate)
Definition: scansup.c:46
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:37
warn
Definition: strftime.c:110