PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ts_locale.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_locale.c
4  * locale compatibility layer for tsearch
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/ts_locale.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/pg_collation.h"
17 #include "storage/fd.h"
18 #include "tsearch/ts_locale.h"
19 #include "tsearch/ts_public.h"
20 
21 static void tsearch_readline_callback(void *arg);
22 
23 
24 #ifdef USE_WIDE_UPPER_LOWER
25 
26 int
27 t_isdigit(const char *ptr)
28 {
29  int clen = pg_mblen(ptr);
30  wchar_t character[2];
31  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
32  pg_locale_t mylocale = 0; /* TODO */
33 
34  if (clen == 1 || lc_ctype_is_c(collation))
35  return isdigit(TOUCHAR(ptr));
36 
37  char2wchar(character, 2, ptr, clen, mylocale);
38 
39  return iswdigit((wint_t) character[0]);
40 }
41 
42 int
43 t_isspace(const char *ptr)
44 {
45  int clen = pg_mblen(ptr);
46  wchar_t character[2];
47  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
48  pg_locale_t mylocale = 0; /* TODO */
49 
50  if (clen == 1 || lc_ctype_is_c(collation))
51  return isspace(TOUCHAR(ptr));
52 
53  char2wchar(character, 2, ptr, clen, mylocale);
54 
55  return iswspace((wint_t) character[0]);
56 }
57 
58 int
59 t_isalpha(const char *ptr)
60 {
61  int clen = pg_mblen(ptr);
62  wchar_t character[2];
63  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
64  pg_locale_t mylocale = 0; /* TODO */
65 
66  if (clen == 1 || lc_ctype_is_c(collation))
67  return isalpha(TOUCHAR(ptr));
68 
69  char2wchar(character, 2, ptr, clen, mylocale);
70 
71  return iswalpha((wint_t) character[0]);
72 }
73 
74 int
75 t_isprint(const char *ptr)
76 {
77  int clen = pg_mblen(ptr);
78  wchar_t character[2];
79  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
80  pg_locale_t mylocale = 0; /* TODO */
81 
82  if (clen == 1 || lc_ctype_is_c(collation))
83  return isprint(TOUCHAR(ptr));
84 
85  char2wchar(character, 2, ptr, clen, mylocale);
86 
87  return iswprint((wint_t) character[0]);
88 }
89 #endif /* USE_WIDE_UPPER_LOWER */
90 
91 
92 /*
93  * Set up to read a file using tsearch_readline(). This facility is
94  * better than just reading the file directly because it provides error
95  * context pointing to the specific line where a problem is detected.
96  *
97  * Expected usage is:
98  *
99  * tsearch_readline_state trst;
100  *
101  * if (!tsearch_readline_begin(&trst, filename))
102  * ereport(ERROR,
103  * (errcode(ERRCODE_CONFIG_FILE_ERROR),
104  * errmsg("could not open stop-word file \"%s\": %m",
105  * filename)));
106  * while ((line = tsearch_readline(&trst)) != NULL)
107  * process line;
108  * tsearch_readline_end(&trst);
109  *
110  * Note that the caller supplies the ereport() for file open failure;
111  * this is so that a custom message can be provided. The filename string
112  * passed to tsearch_readline_begin() must remain valid through
113  * tsearch_readline_end().
114  */
115 bool
117  const char *filename)
118 {
119  if ((stp->fp = AllocateFile(filename, "r")) == NULL)
120  return false;
121  stp->filename = filename;
122  stp->lineno = 0;
123  stp->curline = NULL;
124  /* Setup error traceback support for ereport() */
126  stp->cb.arg = (void *) stp;
128  error_context_stack = &stp->cb;
129  return true;
130 }
131 
132 /*
133  * Read the next line from a tsearch data file (expected to be in UTF-8), and
134  * convert it to database encoding if needed. The returned string is palloc'd.
135  * NULL return means EOF.
136  */
137 char *
139 {
140  char *result;
141 
142  stp->lineno++;
143  stp->curline = NULL;
144  result = t_readline(stp->fp);
145  stp->curline = result;
146  return result;
147 }
148 
149 /*
150  * Close down after reading a file with tsearch_readline()
151  */
152 void
154 {
155  FreeFile(stp->fp);
156  /* Pop the error context stack */
158 }
159 
160 /*
161  * Error context callback for errors occurring while reading a tsearch
162  * configuration file.
163  */
164 static void
166 {
168 
169  /*
170  * We can't include the text of the config line for errors that occur
171  * during t_readline() itself. This is only partly a consequence of our
172  * arms-length use of that routine: the major cause of such errors is
173  * encoding violations, and we daren't try to print error messages
174  * containing badly-encoded data.
175  */
176  if (stp->curline)
177  errcontext("line %d of configuration file \"%s\": \"%s\"",
178  stp->lineno,
179  stp->filename,
180  stp->curline);
181  else
182  errcontext("line %d of configuration file \"%s\"",
183  stp->lineno,
184  stp->filename);
185 }
186 
187 
188 /*
189  * Read the next line from a tsearch data file (expected to be in UTF-8), and
190  * convert it to database encoding if needed. The returned string is palloc'd.
191  * NULL return means EOF.
192  *
193  * Note: direct use of this function is now deprecated. Go through
194  * tsearch_readline() to provide better error reporting.
195  */
196 char *
197 t_readline(FILE *fp)
198 {
199  int len;
200  char *recoded;
201  char buf[4096]; /* lines must not be longer than this */
202 
203  if (fgets(buf, sizeof(buf), fp) == NULL)
204  return NULL;
205 
206  len = strlen(buf);
207 
208  /* Make sure the input is valid UTF-8 */
209  (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
210 
211  /* And convert */
212  recoded = pg_any_to_server(buf, len, PG_UTF8);
213  if (recoded == buf)
214  {
215  /*
216  * conversion didn't pstrdup, so we must. We can use the length of the
217  * original string, because no conversion was done.
218  */
219  recoded = pnstrdup(recoded, len);
220  }
221 
222  return recoded;
223 }
224 
225 /*
226  * lowerstr --- fold null-terminated string to lower case
227  *
228  * Returned string is palloc'd
229  */
230 char *
231 lowerstr(const char *str)
232 {
233  return lowerstr_with_len(str, strlen(str));
234 }
235 
236 /*
237  * lowerstr_with_len --- fold string to lower case
238  *
239  * Input string need not be null-terminated.
240  *
241  * Returned string is palloc'd
242  */
243 char *
244 lowerstr_with_len(const char *str, int len)
245 {
246  char *out;
247 
248 #ifdef USE_WIDE_UPPER_LOWER
249  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
250  pg_locale_t mylocale = 0; /* TODO */
251 #endif
252 
253  if (len == 0)
254  return pstrdup("");
255 
256 #ifdef USE_WIDE_UPPER_LOWER
257 
258  /*
259  * Use wide char code only when max encoding length > 1 and ctype != C.
260  * Some operating systems fail with multi-byte encodings and a C locale.
261  * Also, for a C locale there is no need to process as multibyte. From
262  * backend/utils/adt/oracle_compat.c Teodor
263  */
264  if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
265  {
266  wchar_t *wstr,
267  *wptr;
268  int wlen;
269 
270  /*
271  * alloc number of wchar_t for worst case, len contains number of
272  * bytes >= number of characters and alloc 1 wchar_t for 0, because
273  * wchar2char wants zero-terminated string
274  */
275  wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
276 
277  wlen = char2wchar(wstr, len + 1, str, len, mylocale);
278  Assert(wlen <= len);
279 
280  while (*wptr)
281  {
282  *wptr = towlower((wint_t) *wptr);
283  wptr++;
284  }
285 
286  /*
287  * Alloc result string for worst case + '\0'
288  */
289  len = pg_database_encoding_max_length() * wlen + 1;
290  out = (char *) palloc(len);
291 
292  wlen = wchar2char(out, wstr, len, mylocale);
293 
294  pfree(wstr);
295 
296  if (wlen < 0)
297  ereport(ERROR,
298  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
299  errmsg("conversion from wchar_t to server encoding failed: %m")));
300  Assert(wlen < len);
301  }
302  else
303 #endif /* USE_WIDE_UPPER_LOWER */
304  {
305  const char *ptr = str;
306  char *outptr;
307 
308  outptr = out = (char *) palloc(sizeof(char) * (len + 1));
309  while ((ptr - str) < len && *ptr)
310  {
311  *outptr++ = tolower(TOUCHAR(ptr));
312  ptr++;
313  }
314  *outptr = '\0';
315  }
316 
317  return out;
318 }
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:165
#define t_isspace(x)
Definition: ts_locale.h:58
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1088
char * pstrdup(const char *in)
Definition: mcxt.c:1077
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:244
int errcode(int sqlerrcode)
Definition: elog.c:575
return result
Definition: formatting.c:1633
char * lowerstr(const char *str)
Definition: ts_locale.c:231
#define t_isdigit(x)
Definition: ts_locale.h:57
unsigned int Oid
Definition: postgres_ext.h:31
struct ErrorContextCallback * previous
Definition: elog.h:238
char * t_readline(FILE *fp)
Definition: ts_locale.c:197
ErrorContextCallback * error_context_stack
Definition: elog.c:88
void pfree(void *pointer)
Definition: mcxt.c:950
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:66
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:75
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2094
ErrorContextCallback cb
Definition: ts_locale.h:39
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1877
#define ereport(elevel, rest)
Definition: elog.h:122
#define t_isprint(x)
Definition: ts_locale.h:60
const char * filename
Definition: ts_locale.h:36
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define TOUCHAR(x)
Definition: ts_locale.h:42
#define t_isalpha(x)
Definition: ts_locale.h:59
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:153
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:138
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:116
int FreeFile(FILE *file)
Definition: fd.c:2277
static char * filename
Definition: pg_dumpall.c:89
void(* callback)(void *arg)
Definition: elog.h:239
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define errcontext
Definition: elog.h:164
void * arg
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1178
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:572