PostgreSQL Source Code  git master
ts_locale.h File Reference
#include <ctype.h>
#include <limits.h>
#include <wctype.h>
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "utils/pg_locale.h"
Include dependency graph for ts_locale.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  tsearch_readline_state
 

Macros

#define TOUCHAR(x)   (*((const unsigned char *) (x)))
 
#define t_iseq(x, c)   (TOUCHAR(x) == (unsigned char) (c))
 
#define COPYCHAR(d, s)   memcpy(d, s, pg_mblen(s))
 

Functions

int t_isdigit (const char *ptr)
 
int t_isspace (const char *ptr)
 
int t_isalpha (const char *ptr)
 
int t_isalnum (const char *ptr)
 
int t_isprint (const char *ptr)
 
char * lowerstr (const char *str)
 
char * lowerstr_with_len (const char *str, int len)
 
bool tsearch_readline_begin (tsearch_readline_state *stp, const char *filename)
 
char * tsearch_readline (tsearch_readline_state *stp)
 
void tsearch_readline_end (tsearch_readline_state *stp)
 

Macro Definition Documentation

◆ COPYCHAR

#define COPYCHAR (   d,
 
)    memcpy(d, s, pg_mblen(s))

Definition at line 40 of file ts_locale.h.

◆ t_iseq

#define t_iseq (   x,
  c 
)    (TOUCHAR(x) == (unsigned char) (c))

Definition at line 38 of file ts_locale.h.

◆ TOUCHAR

#define TOUCHAR (   x)    (*((const unsigned char *) (x)))

Definition at line 35 of file ts_locale.h.

Function Documentation

◆ lowerstr()

char* lowerstr ( const char *  str)

◆ lowerstr_with_len()

char* lowerstr_with_len ( const char *  str,
int  len 
)

Definition at line 273 of file ts_locale.c.

274 {
275  char *out;
276  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
277  pg_locale_t mylocale = 0; /* TODO */
278 
279  if (len == 0)
280  return pstrdup("");
281 
282  /*
283  * Use wide char code only when max encoding length > 1 and ctype != C.
284  * Some operating systems fail with multi-byte encodings and a C locale.
285  * Also, for a C locale there is no need to process as multibyte. From
286  * backend/utils/adt/oracle_compat.c Teodor
287  */
288  if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
289  {
290  wchar_t *wstr,
291  *wptr;
292  int wlen;
293 
294  /*
295  * alloc number of wchar_t for worst case, len contains number of
296  * bytes >= number of characters and alloc 1 wchar_t for 0, because
297  * wchar2char wants zero-terminated string
298  */
299  wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
300 
301  wlen = char2wchar(wstr, len + 1, str, len, mylocale);
302  Assert(wlen <= len);
303 
304  while (*wptr)
305  {
306  *wptr = towlower((wint_t) *wptr);
307  wptr++;
308  }
309 
310  /*
311  * Alloc result string for worst case + '\0'
312  */
313  len = pg_database_encoding_max_length() * wlen + 1;
314  out = (char *) palloc(len);
315 
316  wlen = wchar2char(out, wstr, len, mylocale);
317 
318  pfree(wstr);
319 
320  if (wlen < 0)
321  ereport(ERROR,
322  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
323  errmsg("conversion from wchar_t to server encoding failed: %m")));
324  Assert(wlen < len);
325  }
326  else
327  {
328  const char *ptr = str;
329  char *outptr;
330 
331  outptr = out = (char *) palloc(sizeof(char) * (len + 1));
332  while ((ptr - str) < len && *ptr)
333  {
334  *outptr++ = tolower(TOUCHAR(ptr));
335  ptr++;
336  }
337  *outptr = '\0';
338  }
339 
340  return out;
341 }
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
Assert(fmt[strlen(fmt) - 1] !='\n')
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
char * pstrdup(const char *in)
Definition: mcxt.c:1483
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc(Size size)
Definition: mcxt.c:1199
const void size_t len
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:1990
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1352
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2061
unsigned int Oid
Definition: postgres_ext.h:31
#define TOUCHAR(x)
Definition: ts_locale.h:35

References Assert(), char2wchar(), ereport, errcode(), errmsg(), ERROR, lc_ctype_is_c(), len, palloc(), pfree(), pg_database_encoding_max_length(), pstrdup(), generate_unaccent_rules::str, TOUCHAR, and wchar2char().

Referenced by dispell_lexize(), dsimple_lexize(), dsnowball_lexize(), dsynonym_lexize(), generate_trgm_only(), generate_wildcard_trgm(), and lowerstr().

◆ t_isalnum()

int t_isalnum ( const char *  ptr)

Definition at line 85 of file ts_locale.c.

86 {
87  int clen = pg_mblen(ptr);
88  wchar_t character[WC_BUF_LEN];
89  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
90  pg_locale_t mylocale = 0; /* TODO */
91 
92  if (clen == 1 || lc_ctype_is_c(collation))
93  return isalnum(TOUCHAR(ptr));
94 
95  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
96 
97  return iswalnum((wint_t) character[0]);
98 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
#define WC_BUF_LEN
Definition: ts_locale.c:34

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by parse_or_operator().

◆ t_isalpha()

int t_isalpha ( const char *  ptr)

Definition at line 69 of file ts_locale.c.

70 {
71  int clen = pg_mblen(ptr);
72  wchar_t character[WC_BUF_LEN];
73  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
74  pg_locale_t mylocale = 0; /* TODO */
75 
76  if (clen == 1 || lc_ctype_is_c(collation))
77  return isalpha(TOUCHAR(ptr));
78 
79  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
80 
81  return iswalpha((wint_t) character[0]);
82 }

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by parse_affentry(), RS_compile(), and RS_isRegis().

◆ t_isdigit()

int t_isdigit ( const char *  ptr)

Definition at line 37 of file ts_locale.c.

38 {
39  int clen = pg_mblen(ptr);
40  wchar_t character[WC_BUF_LEN];
41  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
42  pg_locale_t mylocale = 0; /* TODO */
43 
44  if (clen == 1 || lc_ctype_is_c(collation))
45  return isdigit(TOUCHAR(ptr));
46 
47  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
48 
49  return iswdigit((wint_t) character[0]);
50 }

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by getNextFlagFromString(), gettoken_tsvector(), NISortDictionary(), parse_lquery(), and parse_phrase_operator().

◆ t_isprint()

int t_isprint ( const char *  ptr)

Definition at line 101 of file ts_locale.c.

102 {
103  int clen = pg_mblen(ptr);
104  wchar_t character[WC_BUF_LEN];
105  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
106  pg_locale_t mylocale = 0; /* TODO */
107 
108  if (clen == 1 || lc_ctype_is_c(collation))
109  return isprint(TOUCHAR(ptr));
110 
111  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
112 
113  return iswprint((wint_t) character[0]);
114 }

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by NIImportDictionary().

◆ t_isspace()

int t_isspace ( const char *  ptr)

Definition at line 53 of file ts_locale.c.

54 {
55  int clen = pg_mblen(ptr);
56  wchar_t character[WC_BUF_LEN];
57  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
58  pg_locale_t mylocale = 0; /* TODO */
59 
60  if (clen == 1 || lc_ctype_is_c(collation))
61  return isspace(TOUCHAR(ptr));
62 
63  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
64 
65  return iswspace((wint_t) character[0]);
66 }

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by addCompoundAffixFlagValue(), find_word(), findwrd(), get_nextfield(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NISortDictionary(), parse_affentry(), parse_or_operator(), readstoplist(), and thesaurusRead().

◆ tsearch_readline()

char* tsearch_readline ( tsearch_readline_state stp)

Definition at line 164 of file ts_locale.c.

165 {
166  char *recoded;
167 
168  /* Advance line number to use in error reports */
169  stp->lineno++;
170 
171  /* Clear curline, it's no longer relevant */
172  if (stp->curline)
173  {
174  if (stp->curline != stp->buf.data)
175  pfree(stp->curline);
176  stp->curline = NULL;
177  }
178 
179  /* Collect next line, if there is one */
180  if (!pg_get_line_buf(stp->fp, &stp->buf))
181  return NULL;
182 
183  /* Validate the input as UTF-8, then convert to DB encoding if needed */
184  recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
185 
186  /* Save the correctly-encoded string for possible error reports */
187  stp->curline = recoded; /* might be equal to buf.data */
188 
189  /*
190  * We always return a freshly pstrdup'd string. This is clearly necessary
191  * if pg_any_to_server() returned buf.data, and we need a second copy even
192  * if encoding conversion did occur. The caller is entitled to pfree the
193  * returned string at any time, which would leave curline pointing to
194  * recycled storage, causing problems if an error occurs after that point.
195  * (It's preferable to return the result of pstrdup instead of the output
196  * of pg_any_to_server, because the conversion result tends to be
197  * over-allocated. Since callers might save the result string directly
198  * into a long-lived dictionary structure, we don't want it to be a larger
199  * palloc chunk than necessary. We'll reclaim the conversion result on
200  * the next call.)
201  */
202  return pstrdup(recoded);
203 }
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
bool pg_get_line_buf(FILE *stream, StringInfo buf)
Definition: pg_get_line.c:95
@ PG_UTF8
Definition: pg_wchar.h:232
StringInfoData buf
Definition: ts_locale.h:29

References tsearch_readline_state::buf, tsearch_readline_state::curline, StringInfoData::data, tsearch_readline_state::fp, StringInfoData::len, tsearch_readline_state::lineno, pfree(), pg_any_to_server(), pg_get_line_buf(), PG_UTF8, and pstrdup().

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().

◆ tsearch_readline_begin()

bool tsearch_readline_begin ( tsearch_readline_state stp,
const char *  filename 
)

Definition at line 141 of file ts_locale.c.

143 {
144  if ((stp->fp = AllocateFile(filename, "r")) == NULL)
145  return false;
146  stp->filename = filename;
147  stp->lineno = 0;
148  initStringInfo(&stp->buf);
149  stp->curline = NULL;
150  /* Setup error traceback support for ereport() */
152  stp->cb.arg = (void *) stp;
154  error_context_stack = &stp->cb;
155  return true;
156 }
ErrorContextCallback * error_context_stack
Definition: elog.c:94
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2383
static char * filename
Definition: pg_dumpall.c:119
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:234
void(* callback)(void *arg)
Definition: elog.h:235
ErrorContextCallback cb
Definition: ts_locale.h:32
const char * filename
Definition: ts_locale.h:27
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:232

References AllocateFile(), ErrorContextCallback::arg, tsearch_readline_state::buf, ErrorContextCallback::callback, tsearch_readline_state::cb, tsearch_readline_state::curline, error_context_stack, filename, tsearch_readline_state::filename, tsearch_readline_state::fp, initStringInfo(), tsearch_readline_state::lineno, ErrorContextCallback::previous, and tsearch_readline_callback().

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().

◆ tsearch_readline_end()

void tsearch_readline_end ( tsearch_readline_state stp)

Definition at line 209 of file ts_locale.c.

210 {
211  /* Suppress use of curline in any error reported below */
212  if (stp->curline)
213  {
214  if (stp->curline != stp->buf.data)
215  pfree(stp->curline);
216  stp->curline = NULL;
217  }
218 
219  /* Release other resources */
220  pfree(stp->buf.data);
221  FreeFile(stp->fp);
222 
223  /* Pop the error context stack */
225 }
int FreeFile(FILE *file)
Definition: fd.c:2581

References tsearch_readline_state::buf, tsearch_readline_state::cb, tsearch_readline_state::curline, StringInfoData::data, error_context_stack, tsearch_readline_state::fp, FreeFile(), pfree(), and ErrorContextCallback::previous.

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().