PostgreSQL Source Code git master
ts_locale.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ts_locale.c
4 * locale compatibility layer for tsearch
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/ts_locale.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "common/string.h"
17#include "storage/fd.h"
18#include "tsearch/ts_locale.h"
19
20static void tsearch_readline_callback(void *arg);
21
22
23/*
24 * The reason these functions use a 3-wchar_t output buffer, not 2 as you
25 * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
26 * getting from char2wchar() is UTF16 not UTF32. A single input character
27 * may therefore produce a surrogate pair rather than just one wchar_t;
28 * we also need room for a trailing null. When we do get a surrogate pair,
29 * we pass just the first code to iswdigit() etc, so that these functions will
30 * always return false for characters outside the Basic Multilingual Plane.
31 */
32#define WC_BUF_LEN 3
33
34int
35t_isalpha(const char *ptr)
36{
37 int clen = pg_mblen(ptr);
38 wchar_t character[WC_BUF_LEN];
39 pg_locale_t mylocale = 0; /* TODO */
40
41 if (clen == 1 || database_ctype_is_c)
42 return isalpha(TOUCHAR(ptr));
43
44 char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
45
46 return iswalpha((wint_t) character[0]);
47}
48
49int
50t_isalnum(const char *ptr)
51{
52 int clen = pg_mblen(ptr);
53 wchar_t character[WC_BUF_LEN];
54 pg_locale_t mylocale = 0; /* TODO */
55
56 if (clen == 1 || database_ctype_is_c)
57 return isalnum(TOUCHAR(ptr));
58
59 char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
60
61 return iswalnum((wint_t) character[0]);
62}
63
64
65/*
66 * Set up to read a file using tsearch_readline(). This facility is
67 * better than just reading the file directly because it provides error
68 * context pointing to the specific line where a problem is detected.
69 *
70 * Expected usage is:
71 *
72 * tsearch_readline_state trst;
73 *
74 * if (!tsearch_readline_begin(&trst, filename))
75 * ereport(ERROR,
76 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
77 * errmsg("could not open stop-word file \"%s\": %m",
78 * filename)));
79 * while ((line = tsearch_readline(&trst)) != NULL)
80 * process line;
81 * tsearch_readline_end(&trst);
82 *
83 * Note that the caller supplies the ereport() for file open failure;
84 * this is so that a custom message can be provided. The filename string
85 * passed to tsearch_readline_begin() must remain valid through
86 * tsearch_readline_end().
87 */
88bool
90 const char *filename)
91{
92 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
93 return false;
94 stp->filename = filename;
95 stp->lineno = 0;
96 initStringInfo(&stp->buf);
97 stp->curline = NULL;
98 /* Setup error traceback support for ereport() */
100 stp->cb.arg = stp;
102 error_context_stack = &stp->cb;
103 return true;
104}
105
106/*
107 * Read the next line from a tsearch data file (expected to be in UTF-8), and
108 * convert it to database encoding if needed. The returned string is palloc'd.
109 * NULL return means EOF.
110 */
111char *
113{
114 char *recoded;
115
116 /* Advance line number to use in error reports */
117 stp->lineno++;
118
119 /* Clear curline, it's no longer relevant */
120 if (stp->curline)
121 {
122 if (stp->curline != stp->buf.data)
123 pfree(stp->curline);
124 stp->curline = NULL;
125 }
126
127 /* Collect next line, if there is one */
128 if (!pg_get_line_buf(stp->fp, &stp->buf))
129 return NULL;
130
131 /* Validate the input as UTF-8, then convert to DB encoding if needed */
132 recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
133
134 /* Save the correctly-encoded string for possible error reports */
135 stp->curline = recoded; /* might be equal to buf.data */
136
137 /*
138 * We always return a freshly pstrdup'd string. This is clearly necessary
139 * if pg_any_to_server() returned buf.data, and we need a second copy even
140 * if encoding conversion did occur. The caller is entitled to pfree the
141 * returned string at any time, which would leave curline pointing to
142 * recycled storage, causing problems if an error occurs after that point.
143 * (It's preferable to return the result of pstrdup instead of the output
144 * of pg_any_to_server, because the conversion result tends to be
145 * over-allocated. Since callers might save the result string directly
146 * into a long-lived dictionary structure, we don't want it to be a larger
147 * palloc chunk than necessary. We'll reclaim the conversion result on
148 * the next call.)
149 */
150 return pstrdup(recoded);
151}
152
153/*
154 * Close down after reading a file with tsearch_readline()
155 */
156void
158{
159 /* Suppress use of curline in any error reported below */
160 if (stp->curline)
161 {
162 if (stp->curline != stp->buf.data)
163 pfree(stp->curline);
164 stp->curline = NULL;
165 }
166
167 /* Release other resources */
168 pfree(stp->buf.data);
169 FreeFile(stp->fp);
170
171 /* Pop the error context stack */
173}
174
175/*
176 * Error context callback for errors occurring while reading a tsearch
177 * configuration file.
178 */
179static void
181{
183
184 /*
185 * We can't include the text of the config line for errors that occur
186 * during tsearch_readline() itself. The major cause of such errors is
187 * encoding violations, and we daren't try to print error messages
188 * containing badly-encoded data.
189 */
190 if (stp->curline)
191 errcontext("line %d of configuration file \"%s\": \"%s\"",
192 stp->lineno,
193 stp->filename,
194 stp->curline);
195 else
196 errcontext("line %d of configuration file \"%s\"",
197 stp->lineno,
198 stp->filename);
199}
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define errcontext
Definition: elog.h:196
int FreeFile(FILE *file)
Definition: fd.c:2803
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2605
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * arg
static char * filename
Definition: pg_dumpall.c:119
bool pg_get_line_buf(FILE *stream, StringInfo buf)
Definition: pg_get_line.c:95
bool database_ctype_is_c
Definition: pg_locale.c:145
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
@ PG_UTF8
Definition: pg_wchar.h:232
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
StringInfoData buf
Definition: ts_locale.h:29
ErrorContextCallback cb
Definition: ts_locale.h:32
const char * filename
Definition: ts_locale.h:27
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:89
int t_isalnum(const char *ptr)
Definition: ts_locale.c:50
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:112
int t_isalpha(const char *ptr)
Definition: ts_locale.c:35
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:157
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:180
#define WC_BUF_LEN
Definition: ts_locale.c:32
#define TOUCHAR(x)
Definition: ts_locale.h:35