PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ts_locale.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_locale.c
4  * locale compatibility layer for tsearch
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/ts_locale.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/pg_collation.h"
17 #include "storage/fd.h"
18 #include "tsearch/ts_locale.h"
19 #include "tsearch/ts_public.h"
20 
21 static void tsearch_readline_callback(void *arg);
22 
23 
24 int
25 t_isdigit(const char *ptr)
26 {
27  int clen = pg_mblen(ptr);
28  wchar_t character[2];
29  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
30  pg_locale_t mylocale = 0; /* TODO */
31 
32  if (clen == 1 || lc_ctype_is_c(collation))
33  return isdigit(TOUCHAR(ptr));
34 
35  char2wchar(character, 2, ptr, clen, mylocale);
36 
37  return iswdigit((wint_t) character[0]);
38 }
39 
40 int
41 t_isspace(const char *ptr)
42 {
43  int clen = pg_mblen(ptr);
44  wchar_t character[2];
45  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
46  pg_locale_t mylocale = 0; /* TODO */
47 
48  if (clen == 1 || lc_ctype_is_c(collation))
49  return isspace(TOUCHAR(ptr));
50 
51  char2wchar(character, 2, ptr, clen, mylocale);
52 
53  return iswspace((wint_t) character[0]);
54 }
55 
56 int
57 t_isalpha(const char *ptr)
58 {
59  int clen = pg_mblen(ptr);
60  wchar_t character[2];
61  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
62  pg_locale_t mylocale = 0; /* TODO */
63 
64  if (clen == 1 || lc_ctype_is_c(collation))
65  return isalpha(TOUCHAR(ptr));
66 
67  char2wchar(character, 2, ptr, clen, mylocale);
68 
69  return iswalpha((wint_t) character[0]);
70 }
71 
72 int
73 t_isprint(const char *ptr)
74 {
75  int clen = pg_mblen(ptr);
76  wchar_t character[2];
77  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
78  pg_locale_t mylocale = 0; /* TODO */
79 
80  if (clen == 1 || lc_ctype_is_c(collation))
81  return isprint(TOUCHAR(ptr));
82 
83  char2wchar(character, 2, ptr, clen, mylocale);
84 
85  return iswprint((wint_t) character[0]);
86 }
87 
88 
89 /*
90  * Set up to read a file using tsearch_readline(). This facility is
91  * better than just reading the file directly because it provides error
92  * context pointing to the specific line where a problem is detected.
93  *
94  * Expected usage is:
95  *
96  * tsearch_readline_state trst;
97  *
98  * if (!tsearch_readline_begin(&trst, filename))
99  * ereport(ERROR,
100  * (errcode(ERRCODE_CONFIG_FILE_ERROR),
101  * errmsg("could not open stop-word file \"%s\": %m",
102  * filename)));
103  * while ((line = tsearch_readline(&trst)) != NULL)
104  * process line;
105  * tsearch_readline_end(&trst);
106  *
107  * Note that the caller supplies the ereport() for file open failure;
108  * this is so that a custom message can be provided. The filename string
109  * passed to tsearch_readline_begin() must remain valid through
110  * tsearch_readline_end().
111  */
112 bool
114  const char *filename)
115 {
116  if ((stp->fp = AllocateFile(filename, "r")) == NULL)
117  return false;
118  stp->filename = filename;
119  stp->lineno = 0;
120  stp->curline = NULL;
121  /* Setup error traceback support for ereport() */
123  stp->cb.arg = (void *) stp;
125  error_context_stack = &stp->cb;
126  return true;
127 }
128 
129 /*
130  * Read the next line from a tsearch data file (expected to be in UTF-8), and
131  * convert it to database encoding if needed. The returned string is palloc'd.
132  * NULL return means EOF.
133  */
134 char *
136 {
137  char *result;
138 
139  stp->lineno++;
140  stp->curline = NULL;
141  result = t_readline(stp->fp);
142  stp->curline = result;
143  return result;
144 }
145 
146 /*
147  * Close down after reading a file with tsearch_readline()
148  */
149 void
151 {
152  FreeFile(stp->fp);
153  /* Pop the error context stack */
155 }
156 
157 /*
158  * Error context callback for errors occurring while reading a tsearch
159  * configuration file.
160  */
161 static void
163 {
165 
166  /*
167  * We can't include the text of the config line for errors that occur
168  * during t_readline() itself. This is only partly a consequence of our
169  * arms-length use of that routine: the major cause of such errors is
170  * encoding violations, and we daren't try to print error messages
171  * containing badly-encoded data.
172  */
173  if (stp->curline)
174  errcontext("line %d of configuration file \"%s\": \"%s\"",
175  stp->lineno,
176  stp->filename,
177  stp->curline);
178  else
179  errcontext("line %d of configuration file \"%s\"",
180  stp->lineno,
181  stp->filename);
182 }
183 
184 
185 /*
186  * Read the next line from a tsearch data file (expected to be in UTF-8), and
187  * convert it to database encoding if needed. The returned string is palloc'd.
188  * NULL return means EOF.
189  *
190  * Note: direct use of this function is now deprecated. Go through
191  * tsearch_readline() to provide better error reporting.
192  */
193 char *
194 t_readline(FILE *fp)
195 {
196  int len;
197  char *recoded;
198  char buf[4096]; /* lines must not be longer than this */
199 
200  if (fgets(buf, sizeof(buf), fp) == NULL)
201  return NULL;
202 
203  len = strlen(buf);
204 
205  /* Make sure the input is valid UTF-8 */
206  (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
207 
208  /* And convert */
209  recoded = pg_any_to_server(buf, len, PG_UTF8);
210  if (recoded == buf)
211  {
212  /*
213  * conversion didn't pstrdup, so we must. We can use the length of the
214  * original string, because no conversion was done.
215  */
216  recoded = pnstrdup(recoded, len);
217  }
218 
219  return recoded;
220 }
221 
222 /*
223  * lowerstr --- fold null-terminated string to lower case
224  *
225  * Returned string is palloc'd
226  */
227 char *
228 lowerstr(const char *str)
229 {
230  return lowerstr_with_len(str, strlen(str));
231 }
232 
233 /*
234  * lowerstr_with_len --- fold string to lower case
235  *
236  * Input string need not be null-terminated.
237  *
238  * Returned string is palloc'd
239  */
240 char *
241 lowerstr_with_len(const char *str, int len)
242 {
243  char *out;
244  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
245  pg_locale_t mylocale = 0; /* TODO */
246 
247  if (len == 0)
248  return pstrdup("");
249 
250  /*
251  * Use wide char code only when max encoding length > 1 and ctype != C.
252  * Some operating systems fail with multi-byte encodings and a C locale.
253  * Also, for a C locale there is no need to process as multibyte. From
254  * backend/utils/adt/oracle_compat.c Teodor
255  */
256  if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
257  {
258  wchar_t *wstr,
259  *wptr;
260  int wlen;
261 
262  /*
263  * alloc number of wchar_t for worst case, len contains number of
264  * bytes >= number of characters and alloc 1 wchar_t for 0, because
265  * wchar2char wants zero-terminated string
266  */
267  wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
268 
269  wlen = char2wchar(wstr, len + 1, str, len, mylocale);
270  Assert(wlen <= len);
271 
272  while (*wptr)
273  {
274  *wptr = towlower((wint_t) *wptr);
275  wptr++;
276  }
277 
278  /*
279  * Alloc result string for worst case + '\0'
280  */
281  len = pg_database_encoding_max_length() * wlen + 1;
282  out = (char *) palloc(len);
283 
284  wlen = wchar2char(out, wstr, len, mylocale);
285 
286  pfree(wstr);
287 
288  if (wlen < 0)
289  ereport(ERROR,
290  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
291  errmsg("conversion from wchar_t to server encoding failed: %m")));
292  Assert(wlen < len);
293  }
294  else
295  {
296  const char *ptr = str;
297  char *outptr;
298 
299  outptr = out = (char *) palloc(sizeof(char) * (len + 1));
300  while ((ptr - str) < len && *ptr)
301  {
302  *outptr++ = tolower(TOUCHAR(ptr));
303  ptr++;
304  }
305  *outptr = '\0';
306  }
307 
308  return out;
309 }
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:162
int t_isprint(const char *ptr)
Definition: ts_locale.c:73
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1087
char * pstrdup(const char *in)
Definition: mcxt.c:1076
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:241
int errcode(int sqlerrcode)
Definition: elog.c:575
char * lowerstr(const char *str)
Definition: ts_locale.c:228
unsigned int Oid
Definition: postgres_ext.h:31
struct ErrorContextCallback * previous
Definition: elog.h:238
char * t_readline(FILE *fp)
Definition: ts_locale.c:194
ErrorContextCallback * error_context_stack
Definition: elog.c:88
int t_isdigit(const char *ptr)
Definition: ts_locale.c:25
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
int t_isspace(const char *ptr)
Definition: ts_locale.c:41
static char * buf
Definition: pg_test_fsync.c:67
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:75
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2117
ErrorContextCallback cb
Definition: ts_locale.h:39
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1877
#define ereport(elevel, rest)
Definition: elog.h:122
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:1677
const char * filename
Definition: ts_locale.h:36
#define Assert(condition)
Definition: c.h:664
#define TOUCHAR(x)
Definition: ts_locale.h:42
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:150
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:135
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:113
int FreeFile(FILE *file)
Definition: fd.c:2309
static char * filename
Definition: pg_dumpall.c:90
void(* callback)(void *arg)
Definition: elog.h:239
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:1606
#define errcontext
Definition: elog.h:164
void * arg
int t_isalpha(const char *ptr)
Definition: ts_locale.c:57
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1178
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:572