PostgreSQL Source Code  git master
ts_locale.h File Reference
#include <ctype.h>
#include <limits.h>
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "utils/pg_locale.h"
#include <wchar.h>
Include dependency graph for ts_locale.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  tsearch_readline_state
 

Macros

#define TOUCHAR(x)   (*((const unsigned char *) (x)))
 
#define t_iseq(x, c)   (TOUCHAR(x) == (unsigned char) (c))
 
#define COPYCHAR(d, s)   memcpy(d, s, pg_mblen(s))
 

Functions

int t_isdigit (const char *ptr)
 
int t_isspace (const char *ptr)
 
int t_isalpha (const char *ptr)
 
int t_isprint (const char *ptr)
 
char * lowerstr (const char *str)
 
char * lowerstr_with_len (const char *str, int len)
 
bool tsearch_readline_begin (tsearch_readline_state *stp, const char *filename)
 
char * tsearch_readline (tsearch_readline_state *stp)
 
void tsearch_readline_end (tsearch_readline_state *stp)
 

Macro Definition Documentation

◆ COPYCHAR

#define COPYCHAR (   d,
 
)    memcpy(d, s, pg_mblen(s))

◆ t_iseq

◆ TOUCHAR

#define TOUCHAR (   x)    (*((const unsigned char *) (x)))

Definition at line 43 of file ts_locale.h.

Referenced by lowerstr_with_len(), t_isalpha(), t_isdigit(), t_isprint(), and t_isspace().

Function Documentation

◆ lowerstr()

char* lowerstr ( const char *  str)

Definition at line 244 of file ts_locale.c.

References lowerstr_with_len().

Referenced by convertPgWchar(), dispell_init(), dsimple_init(), dsnowball_init(), dsynonym_init(), dxsyn_lexize(), lowerstr_ctx(), NIImportAffixes(), and read_dictionary().

245 {
246  return lowerstr_with_len(str, strlen(str));
247 }
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:257

◆ lowerstr_with_len()

char* lowerstr_with_len ( const char *  str,
int  len 
)

Definition at line 257 of file ts_locale.c.

References Assert, char2wchar(), ereport, errcode(), errmsg(), ERROR, lc_ctype_is_c(), palloc(), pfree(), pg_database_encoding_max_length(), pstrdup(), generate_unaccent_rules::str, TOUCHAR, and wchar2char().

Referenced by dispell_lexize(), dsimple_lexize(), dsnowball_lexize(), dsynonym_lexize(), generate_trgm_only(), generate_wildcard_trgm(), and lowerstr().

258 {
259  char *out;
260  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
261  pg_locale_t mylocale = 0; /* TODO */
262 
263  if (len == 0)
264  return pstrdup("");
265 
266  /*
267  * Use wide char code only when max encoding length > 1 and ctype != C.
268  * Some operating systems fail with multi-byte encodings and a C locale.
269  * Also, for a C locale there is no need to process as multibyte. From
270  * backend/utils/adt/oracle_compat.c Teodor
271  */
272  if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
273  {
274  wchar_t *wstr,
275  *wptr;
276  int wlen;
277 
278  /*
279  * alloc number of wchar_t for worst case, len contains number of
280  * bytes >= number of characters and alloc 1 wchar_t for 0, because
281  * wchar2char wants zero-terminated string
282  */
283  wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
284 
285  wlen = char2wchar(wstr, len + 1, str, len, mylocale);
286  Assert(wlen <= len);
287 
288  while (*wptr)
289  {
290  *wptr = towlower((wint_t) *wptr);
291  wptr++;
292  }
293 
294  /*
295  * Alloc result string for worst case + '\0'
296  */
297  len = pg_database_encoding_max_length() * wlen + 1;
298  out = (char *) palloc(len);
299 
300  wlen = wchar2char(out, wstr, len, mylocale);
301 
302  pfree(wstr);
303 
304  if (wlen < 0)
305  ereport(ERROR,
306  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
307  errmsg("conversion from wchar_t to server encoding failed: %m")));
308  Assert(wlen < len);
309  }
310  else
311  {
312  const char *ptr = str;
313  char *outptr;
314 
315  outptr = out = (char *) palloc(sizeof(char) * (len + 1));
316  while ((ptr - str) < len && *ptr)
317  {
318  *outptr++ = tolower(TOUCHAR(ptr));
319  ptr++;
320  }
321  *outptr = '\0';
322  }
323 
324  return out;
325 }
char * pstrdup(const char *in)
Definition: mcxt.c:1187
int errcode(int sqlerrcode)
Definition: elog.c:610
unsigned int Oid
Definition: postgres_ext.h:31
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ERROR
Definition: elog.h:43
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
#define ereport(elevel,...)
Definition: elog.h:144
#define Assert(condition)
Definition: c.h:745
#define TOUCHAR(x)
Definition: ts_locale.h:43
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1436
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:1981
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397

◆ t_isalpha()

int t_isalpha ( const char *  ptr)

Definition at line 69 of file ts_locale.c.

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by parse_affentry(), parse_or_operator(), RS_compile(), and RS_isRegis().

70 {
71  int clen = pg_mblen(ptr);
72  wchar_t character[WC_BUF_LEN];
73  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
74  pg_locale_t mylocale = 0; /* TODO */
75 
76  if (clen == 1 || lc_ctype_is_c(collation))
77  return isalpha(TOUCHAR(ptr));
78 
79  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
80 
81  return iswalpha((wint_t) character[0]);
82 }
#define WC_BUF_LEN
Definition: ts_locale.c:34
unsigned int Oid
Definition: postgres_ext.h:31
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
#define TOUCHAR(x)
Definition: ts_locale.h:43
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397

◆ t_isdigit()

int t_isdigit ( const char *  ptr)

Definition at line 37 of file ts_locale.c.

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by getNextFlagFromString(), gettoken_tsvector(), NISortDictionary(), parse_lquery(), parse_or_operator(), and parse_phrase_operator().

38 {
39  int clen = pg_mblen(ptr);
40  wchar_t character[WC_BUF_LEN];
41  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
42  pg_locale_t mylocale = 0; /* TODO */
43 
44  if (clen == 1 || lc_ctype_is_c(collation))
45  return isdigit(TOUCHAR(ptr));
46 
47  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
48 
49  return iswdigit((wint_t) character[0]);
50 }
#define WC_BUF_LEN
Definition: ts_locale.c:34
unsigned int Oid
Definition: postgres_ext.h:31
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
#define TOUCHAR(x)
Definition: ts_locale.h:43
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397

◆ t_isprint()

int t_isprint ( const char *  ptr)

Definition at line 85 of file ts_locale.c.

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by NIImportDictionary().

86 {
87  int clen = pg_mblen(ptr);
88  wchar_t character[WC_BUF_LEN];
89  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
90  pg_locale_t mylocale = 0; /* TODO */
91 
92  if (clen == 1 || lc_ctype_is_c(collation))
93  return isprint(TOUCHAR(ptr));
94 
95  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
96 
97  return iswprint((wint_t) character[0]);
98 }
#define WC_BUF_LEN
Definition: ts_locale.c:34
unsigned int Oid
Definition: postgres_ext.h:31
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
#define TOUCHAR(x)
Definition: ts_locale.h:43
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397

◆ t_isspace()

int t_isspace ( const char *  ptr)

Definition at line 53 of file ts_locale.c.

References char2wchar(), lc_ctype_is_c(), pg_mblen(), TOUCHAR, and WC_BUF_LEN.

Referenced by addCompoundAffixFlagValue(), find_word(), findwrd(), get_nextfield(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NISortDictionary(), parse_affentry(), parse_or_operator(), readstoplist(), and thesaurusRead().

54 {
55  int clen = pg_mblen(ptr);
56  wchar_t character[WC_BUF_LEN];
57  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
58  pg_locale_t mylocale = 0; /* TODO */
59 
60  if (clen == 1 || lc_ctype_is_c(collation))
61  return isspace(TOUCHAR(ptr));
62 
63  char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
64 
65  return iswspace((wint_t) character[0]);
66 }
#define WC_BUF_LEN
Definition: ts_locale.c:34
unsigned int Oid
Definition: postgres_ext.h:31
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
#define TOUCHAR(x)
Definition: ts_locale.h:43
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397

◆ tsearch_readline()

char* tsearch_readline ( tsearch_readline_state stp)

Definition at line 148 of file ts_locale.c.

References tsearch_readline_state::buf, tsearch_readline_state::curline, StringInfoData::data, tsearch_readline_state::fp, StringInfoData::len, tsearch_readline_state::lineno, pfree(), pg_any_to_server(), pg_get_line_buf(), PG_UTF8, and pstrdup().

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().

149 {
150  char *recoded;
151 
152  /* Advance line number to use in error reports */
153  stp->lineno++;
154 
155  /* Clear curline, it's no longer relevant */
156  if (stp->curline)
157  {
158  if (stp->curline != stp->buf.data)
159  pfree(stp->curline);
160  stp->curline = NULL;
161  }
162 
163  /* Collect next line, if there is one */
164  if (!pg_get_line_buf(stp->fp, &stp->buf))
165  return NULL;
166 
167  /* Validate the input as UTF-8, then convert to DB encoding if needed */
168  recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
169 
170  /* Save the correctly-encoded string for possible error reports */
171  stp->curline = recoded; /* might be equal to buf.data */
172 
173  /*
174  * We always return a freshly pstrdup'd string. This is clearly necessary
175  * if pg_any_to_server() returned buf.data, and we need a second copy even
176  * if encoding conversion did occur. The caller is entitled to pfree the
177  * returned string at any time, which would leave curline pointing to
178  * recycled storage, causing problems if an error occurs after that point.
179  * (It's preferable to return the result of pstrdup instead of the output
180  * of pg_any_to_server, because the conversion result tends to be
181  * over-allocated. Since callers might save the result string directly
182  * into a long-lived dictionary structure, we don't want it to be a larger
183  * palloc chunk than necessary. We'll reclaim the conversion result on
184  * the next call.)
185  */
186  return pstrdup(recoded);
187 }
bool pg_get_line_buf(FILE *stream, StringInfo buf)
Definition: pg_get_line.c:88
char * pstrdup(const char *in)
Definition: mcxt.c:1187
void pfree(void *pointer)
Definition: mcxt.c:1057
StringInfoData buf
Definition: ts_locale.h:37
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:619

◆ tsearch_readline_begin()

bool tsearch_readline_begin ( tsearch_readline_state stp,
const char *  filename 
)

Definition at line 125 of file ts_locale.c.

References AllocateFile(), ErrorContextCallback::arg, tsearch_readline_state::buf, ErrorContextCallback::callback, tsearch_readline_state::cb, tsearch_readline_state::curline, error_context_stack, tsearch_readline_state::filename, filename, tsearch_readline_state::fp, initStringInfo(), tsearch_readline_state::lineno, ErrorContextCallback::previous, and tsearch_readline_callback().

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().

127 {
128  if ((stp->fp = AllocateFile(filename, "r")) == NULL)
129  return false;
130  stp->filename = filename;
131  stp->lineno = 0;
132  initStringInfo(&stp->buf);
133  stp->curline = NULL;
134  /* Setup error traceback support for ereport() */
136  stp->cb.arg = (void *) stp;
138  error_context_stack = &stp->cb;
139  return true;
140 }
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:216
void(* callback)(void *arg)
Definition: elog.h:229
struct ErrorContextCallback * previous
Definition: elog.h:228
ErrorContextCallback * error_context_stack
Definition: elog.c:92
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2322
ErrorContextCallback cb
Definition: ts_locale.h:40
StringInfoData buf
Definition: ts_locale.h:37
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
const char * filename
Definition: ts_locale.h:35
static char * filename
Definition: pg_dumpall.c:91

◆ tsearch_readline_end()

void tsearch_readline_end ( tsearch_readline_state stp)

Definition at line 193 of file ts_locale.c.

References tsearch_readline_state::buf, tsearch_readline_state::cb, tsearch_readline_state::curline, StringInfoData::data, error_context_stack, tsearch_readline_state::fp, FreeFile(), pfree(), and ErrorContextCallback::previous.

Referenced by dsynonym_init(), initTrie(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), read_dictionary(), readstoplist(), and thesaurusRead().

194 {
195  /* Suppress use of curline in any error reported below */
196  if (stp->curline)
197  {
198  if (stp->curline != stp->buf.data)
199  pfree(stp->curline);
200  stp->curline = NULL;
201  }
202 
203  /* Release other resources */
204  pfree(stp->buf.data);
205  FreeFile(stp->fp);
206 
207  /* Pop the error context stack */
209 }
struct ErrorContextCallback * previous
Definition: elog.h:228
ErrorContextCallback * error_context_stack
Definition: elog.c:92
void pfree(void *pointer)
Definition: mcxt.c:1057
ErrorContextCallback cb
Definition: ts_locale.h:40
StringInfoData buf
Definition: ts_locale.h:37
int FreeFile(FILE *file)
Definition: fd.c:2521