PostgreSQL Source Code git master
Loading...
Searching...
No Matches
ts_locale.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ts_locale.c
4 * locale compatibility layer for tsearch
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/ts_locale.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "common/string.h"
17#include "storage/fd.h"
18#include "tsearch/ts_locale.h"
19
20static void tsearch_readline_callback(void *arg);
21
22
23/* space for a single character plus a trailing NUL */
24#define WC_BUF_LEN 2
25
26#define GENERATE_T_ISCLASS_DEF(character_class) \
27/* mblen shall be that of the first character */ \
28int \
29t_is##character_class##_with_len(const char *ptr, int mblen) \
30{ \
31 pg_wchar wstr[WC_BUF_LEN]; \
32 int wlen pg_attribute_unused(); \
33 wlen = pg_mb2wchar_with_len(ptr, wstr, mblen); \
34 Assert(wlen <= 1); \
35 /* pass single character, or NUL if empty */ \
36 return pg_isw##character_class(wstr[0], pg_database_locale()); \
37} \
38\
39/* ptr shall point to a NUL-terminated string */ \
40int \
41t_is##character_class##_cstr(const char *ptr) \
42{ \
43 return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
44} \
45/* ptr shall point to a string with pre-validated encoding */ \
46int \
47t_is##character_class##_unbounded(const char *ptr) \
48{ \
49 return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
50} \
51/* historical name for _unbounded */ \
52int \
53t_is##character_class(const char *ptr) \
54{ \
55 return t_is##character_class##_unbounded(ptr); \
56}
57
60
61/*
62 * Set up to read a file using tsearch_readline(). This facility is
63 * better than just reading the file directly because it provides error
64 * context pointing to the specific line where a problem is detected.
65 *
66 * Expected usage is:
67 *
68 * tsearch_readline_state trst;
69 *
70 * if (!tsearch_readline_begin(&trst, filename))
71 * ereport(ERROR,
72 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
73 * errmsg("could not open stop-word file \"%s\": %m",
74 * filename)));
75 * while ((line = tsearch_readline(&trst)) != NULL)
76 * process line;
77 * tsearch_readline_end(&trst);
78 *
79 * Note that the caller supplies the ereport() for file open failure;
80 * this is so that a custom message can be provided. The filename string
81 * passed to tsearch_readline_begin() must remain valid through
82 * tsearch_readline_end().
83 */
84bool
86 const char *filename)
87{
88 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
89 return false;
90 stp->filename = filename;
91 stp->lineno = 0;
92 initStringInfo(&stp->buf);
93 stp->curline = NULL;
94 /* Setup error traceback support for ereport() */
95 stp->cb.callback = tsearch_readline_callback;
96 stp->cb.arg = stp;
97 stp->cb.previous = error_context_stack;
99 return true;
100}
101
102/*
103 * Read the next line from a tsearch data file (expected to be in UTF-8), and
104 * convert it to database encoding if needed. The returned string is palloc'd.
105 * NULL return means EOF.
106 */
107char *
109{
110 char *recoded;
111
112 /* Advance line number to use in error reports */
113 stp->lineno++;
114
115 /* Clear curline, it's no longer relevant */
116 if (stp->curline)
117 {
118 if (stp->curline != stp->buf.data)
119 pfree(stp->curline);
120 stp->curline = NULL;
121 }
122
123 /* Collect next line, if there is one */
124 if (!pg_get_line_buf(stp->fp, &stp->buf))
125 return NULL;
126
127 /* Validate the input as UTF-8, then convert to DB encoding if needed */
128 recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
129
130 /* Save the correctly-encoded string for possible error reports */
131 stp->curline = recoded; /* might be equal to buf.data */
132
133 /*
134 * We always return a freshly pstrdup'd string. This is clearly necessary
135 * if pg_any_to_server() returned buf.data, and we need a second copy even
136 * if encoding conversion did occur. The caller is entitled to pfree the
137 * returned string at any time, which would leave curline pointing to
138 * recycled storage, causing problems if an error occurs after that point.
139 * (It's preferable to return the result of pstrdup instead of the output
140 * of pg_any_to_server, because the conversion result tends to be
141 * over-allocated. Since callers might save the result string directly
142 * into a long-lived dictionary structure, we don't want it to be a larger
143 * palloc chunk than necessary. We'll reclaim the conversion result on
144 * the next call.)
145 */
146 return pstrdup(recoded);
147}
148
149/*
150 * Close down after reading a file with tsearch_readline()
151 */
152void
154{
155 /* Suppress use of curline in any error reported below */
156 if (stp->curline)
157 {
158 if (stp->curline != stp->buf.data)
159 pfree(stp->curline);
160 stp->curline = NULL;
161 }
162
163 /* Release other resources */
164 pfree(stp->buf.data);
165 FreeFile(stp->fp);
166
167 /* Pop the error context stack */
169}
170
171/*
172 * Error context callback for errors occurring while reading a tsearch
173 * configuration file.
174 */
175static void
177{
179
180 /*
181 * We can't include the text of the config line for errors that occur
182 * during tsearch_readline() itself. The major cause of such errors is
183 * encoding violations, and we daren't try to print error messages
184 * containing badly-encoded data.
185 */
186 if (stp->curline)
187 errcontext("line %d of configuration file \"%s\": \"%s\"",
188 stp->lineno,
189 stp->filename,
190 stp->curline);
191 else
192 errcontext("line %d of configuration file \"%s\"",
193 stp->lineno,
194 stp->filename);
195}
ErrorContextCallback * error_context_stack
Definition elog.c:95
#define errcontext
Definition elog.h:198
int FreeFile(FILE *file)
Definition fd.c:2826
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2627
#define PG_UTF8
Definition mbprint.c:43
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * arg
static char * filename
Definition pg_dumpall.c:120
bool pg_get_line_buf(FILE *stream, StringInfo buf)
Definition pg_get_line.c:95
static int fb(int x)
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
struct ErrorContextCallback * previous
Definition elog.h:297
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition ts_locale.c:85
char * tsearch_readline(tsearch_readline_state *stp)
Definition ts_locale.c:108
#define GENERATE_T_ISCLASS_DEF(character_class)
Definition ts_locale.c:26
void tsearch_readline_end(tsearch_readline_state *stp)
Definition ts_locale.c:153
static void tsearch_readline_callback(void *arg)
Definition ts_locale.c:176