PostgreSQL Source Code  git master
dict_synonym.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dict_synonym.c
4  * Synonym dictionary: replace word by its synonym
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/dict_synonym.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "commands/defrem.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
19 #include "utils/fmgrprotos.h"
20 
21 typedef struct
22 {
23  char *in;
24  char *out;
25  int outlen;
27 } Syn;
28 
29 typedef struct
30 {
31  int len; /* length of syn array */
32  Syn *syn;
34 } DictSyn;
35 
36 /*
37  * Finds the next whitespace-delimited word within the 'in' string.
38  * Returns a pointer to the first character of the word, and a pointer
39  * to the next byte after the last character in the word (in *end).
40  * Character '*' at the end of word will not be treated as word
41  * character if flags is not null.
42  */
43 static char *
44 findwrd(char *in, char **end, uint16 *flags)
45 {
46  char *start;
47  char *lastchar;
48 
49  /* Skip leading spaces */
50  while (*in && t_isspace(in))
51  in += pg_mblen(in);
52 
53  /* Return NULL on empty lines */
54  if (*in == '\0')
55  {
56  *end = NULL;
57  return NULL;
58  }
59 
60  lastchar = start = in;
61 
62  /* Find end of word */
63  while (*in && !t_isspace(in))
64  {
65  lastchar = in;
66  in += pg_mblen(in);
67  }
68 
69  if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
70  {
71  *flags = TSL_PREFIX;
72  *end = lastchar;
73  }
74  else
75  {
76  if (flags)
77  *flags = 0;
78  *end = in;
79  }
80 
81  return start;
82 }
83 
84 static int
85 compareSyn(const void *a, const void *b)
86 {
87  return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
88 }
89 
90 
91 Datum
93 {
94  List *dictoptions = (List *) PG_GETARG_POINTER(0);
95  DictSyn *d;
96  ListCell *l;
97  char *filename = NULL;
98  bool case_sensitive = false;
100  char *starti,
101  *starto,
102  *end = NULL;
103  int cur = 0;
104  char *line = NULL;
105  uint16 flags = 0;
106 
107  foreach(l, dictoptions)
108  {
109  DefElem *defel = (DefElem *) lfirst(l);
110 
111  if (strcmp(defel->defname, "synonyms") == 0)
112  filename = defGetString(defel);
113  else if (strcmp(defel->defname, "casesensitive") == 0)
114  case_sensitive = defGetBoolean(defel);
115  else
116  ereport(ERROR,
117  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118  errmsg("unrecognized synonym parameter: \"%s\"",
119  defel->defname)));
120  }
121 
122  if (!filename)
123  ereport(ERROR,
124  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
125  errmsg("missing Synonyms parameter")));
126 
128 
129  if (!tsearch_readline_begin(&trst, filename))
130  ereport(ERROR,
131  (errcode(ERRCODE_CONFIG_FILE_ERROR),
132  errmsg("could not open synonym file \"%s\": %m",
133  filename)));
134 
135  d = (DictSyn *) palloc0(sizeof(DictSyn));
136 
137  while ((line = tsearch_readline(&trst)) != NULL)
138  {
139  starti = findwrd(line, &end, NULL);
140  if (!starti)
141  {
142  /* Empty line */
143  goto skipline;
144  }
145  if (*end == '\0')
146  {
147  /* A line with only one word. Ignore silently. */
148  goto skipline;
149  }
150  *end = '\0';
151 
152  starto = findwrd(end + 1, &end, &flags);
153  if (!starto)
154  {
155  /* A line with only one word (+whitespace). Ignore silently. */
156  goto skipline;
157  }
158  *end = '\0';
159 
160  /*
161  * starti now points to the first word, and starto to the second word
162  * on the line, with a \0 terminator at the end of both words.
163  */
164 
165  if (cur >= d->len)
166  {
167  if (d->len == 0)
168  {
169  d->len = 64;
170  d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
171  }
172  else
173  {
174  d->len *= 2;
175  d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
176  }
177  }
178 
179  if (case_sensitive)
180  {
181  d->syn[cur].in = pstrdup(starti);
182  d->syn[cur].out = pstrdup(starto);
183  }
184  else
185  {
186  d->syn[cur].in = lowerstr(starti);
187  d->syn[cur].out = lowerstr(starto);
188  }
189 
190  d->syn[cur].outlen = strlen(starto);
191  d->syn[cur].flags = flags;
192 
193  cur++;
194 
195 skipline:
196  pfree(line);
197  }
198 
199  tsearch_readline_end(&trst);
200 
201  d->len = cur;
202  qsort(d->syn, d->len, sizeof(Syn), compareSyn);
203 
204  d->case_sensitive = case_sensitive;
205 
207 }
208 
209 Datum
211 {
212  DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
213  char *in = (char *) PG_GETARG_POINTER(1);
215  Syn key,
216  *found;
217  TSLexeme *res;
218 
219  /* note: d->len test protects against Solaris bsearch-of-no-items bug */
220  if (len <= 0 || d->len <= 0)
221  PG_RETURN_POINTER(NULL);
222 
223  if (d->case_sensitive)
224  key.in = pnstrdup(in, len);
225  else
226  key.in = lowerstr_with_len(in, len);
227 
228  key.out = NULL;
229 
230  found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
231  pfree(key.in);
232 
233  if (!found)
234  PG_RETURN_POINTER(NULL);
235 
236  res = palloc0(sizeof(TSLexeme) * 2);
237  res[0].lexeme = pnstrdup(found->out, found->outlen);
238  res[0].flags = found->flags;
239 
241 }
unsigned short uint16
Definition: c.h:505
signed int int32
Definition: c.h:494
bool defGetBoolean(DefElem *def)
Definition: define.c:107
char * defGetString(DefElem *def)
Definition: define.c:48
static int compareSyn(const void *a, const void *b)
Definition: dict_synonym.c:85
static char * findwrd(char *in, char **end, uint16 *flags)
Definition: dict_synonym.c:44
Datum dsynonym_lexize(PG_FUNCTION_ARGS)
Definition: dict_synonym.c:210
Datum dsynonym_init(PG_FUNCTION_ARGS)
Definition: dict_synonym.c:92
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1706
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * palloc(Size size)
Definition: mcxt.c:1316
const void size_t len
static char * filename
Definition: pg_dumpall.c:119
#define lfirst(lc)
Definition: pg_list.h:172
#define qsort(a, b, c, d)
Definition: port.h:449
uintptr_t Datum
Definition: postgres.h:64
char * defname
Definition: parsenodes.h:815
bool case_sensitive
Definition: dict_synonym.c:33
Syn * syn
Definition: dict_xsyn.c:33
int len
Definition: dict_xsyn.c:32
Definition: pg_list.h:54
Definition: dict_xsyn.c:24
uint16 flags
Definition: dict_synonym.c:26
char * in
Definition: dict_synonym.c:23
char * out
Definition: dict_synonym.c:24
int outlen
Definition: dict_synonym.c:25
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:134
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:157
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:266
int t_isspace(const char *ptr)
Definition: ts_locale.c:50
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:202
char * lowerstr(const char *str)
Definition: ts_locale.c:253
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define TSL_PREFIX
Definition: ts_public.h:143
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:33