PostgreSQL Source Code  git master
dict_thesaurus.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dict_thesaurus.c
4  * Thesaurus dictionary: phrase to phrase substitution
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/dict_thesaurus.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/namespace.h"
17 #include "commands/defrem.h"
18 #include "tsearch/ts_cache.h"
19 #include "tsearch/ts_locale.h"
20 #include "tsearch/ts_public.h"
21 #include "utils/fmgrprotos.h"
22 #include "utils/regproc.h"
23 
24 
25 /*
26  * Temporary we use TSLexeme.flags for inner use...
27  */
28 #define DT_USEASIS 0x1000
29 
30 typedef struct LexemeInfo
31 {
32  uint32 idsubst; /* entry's number in DictThesaurus->subst */
33  uint16 posinsubst; /* pos info in entry */
34  uint16 tnvariant; /* total num lexemes in one variant */
38 
39 typedef struct
40 {
41  char *lexeme;
43 } TheLexeme;
44 
45 typedef struct
46 {
47  uint16 lastlexeme; /* number lexemes to substitute */
49  TSLexeme *res; /* prepared substituted result */
51 
52 typedef struct
53 {
54  /* subdictionary to normalize lexemes */
57 
58  /* Array to search lexeme by exact match */
60  int nwrds; /* current number of words */
61  int ntwrds; /* allocated array length */
62 
63  /*
64  * Storage of substituted result, n-th element is for n-th expression
65  */
67  int nsubst;
69 
70 
71 static void
73 {
74  TheLexeme *ptr;
75 
76  if (d->nwrds >= d->ntwrds)
77  {
78  if (d->ntwrds == 0)
79  {
80  d->ntwrds = 16;
81  d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
82  }
83  else
84  {
85  d->ntwrds *= 2;
86  d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
87  }
88  }
89 
90  ptr = d->wrds + d->nwrds;
91  d->nwrds++;
92 
93  ptr->lexeme = palloc(e - b + 1);
94 
95  memcpy(ptr->lexeme, b, e - b);
96  ptr->lexeme[e - b] = '\0';
97 
98  ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
99 
100  ptr->entries->nextentry = NULL;
101  ptr->entries->idsubst = idsubst;
102  ptr->entries->posinsubst = posinsubst;
103 }
104 
105 static void
106 addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
107 {
108  static int nres = 0;
109  static int ntres = 0;
110  TheSubstitute *ptr;
111 
112  if (nwrd == 0)
113  {
114  nres = ntres = 0;
115 
116  if (idsubst >= d->nsubst)
117  {
118  if (d->nsubst == 0)
119  {
120  d->nsubst = 16;
121  d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
122  }
123  else
124  {
125  d->nsubst *= 2;
126  d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
127  }
128  }
129  }
130 
131  ptr = d->subst + idsubst;
132 
133  ptr->lastlexeme = posinsubst - 1;
134 
135  if (nres + 1 >= ntres)
136  {
137  if (ntres == 0)
138  {
139  ntres = 2;
140  ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
141  }
142  else
143  {
144  ntres *= 2;
145  ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
146  }
147  }
148 
149  ptr->res[nres].lexeme = palloc(e - b + 1);
150  memcpy(ptr->res[nres].lexeme, b, e - b);
151  ptr->res[nres].lexeme[e - b] = '\0';
152 
153  ptr->res[nres].nvariant = nwrd;
154  if (useasis)
155  ptr->res[nres].flags = DT_USEASIS;
156  else
157  ptr->res[nres].flags = 0;
158 
159  ptr->res[++nres].lexeme = NULL;
160 }
161 
162 #define TR_WAITLEX 1
163 #define TR_INLEX 2
164 #define TR_WAITSUBS 3
165 #define TR_INSUBS 4
166 
167 static void
169 {
171  uint32 idsubst = 0;
172  bool useasis = false;
173  char *line;
174 
176  if (!tsearch_readline_begin(&trst, filename))
177  ereport(ERROR,
178  (errcode(ERRCODE_CONFIG_FILE_ERROR),
179  errmsg("could not open thesaurus file \"%s\": %m",
180  filename)));
181 
182  while ((line = tsearch_readline(&trst)) != NULL)
183  {
184  char *ptr;
185  int state = TR_WAITLEX;
186  char *beginwrd = NULL;
187  uint32 posinsubst = 0;
188  uint32 nwrd = 0;
189 
190  ptr = line;
191 
192  /* is it a comment? */
193  while (*ptr && t_isspace(ptr))
194  ptr += pg_mblen(ptr);
195 
196  if (t_iseq(ptr, '#') || *ptr == '\0' ||
197  t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
198  {
199  pfree(line);
200  continue;
201  }
202 
203  while (*ptr)
204  {
205  if (state == TR_WAITLEX)
206  {
207  if (t_iseq(ptr, ':'))
208  {
209  if (posinsubst == 0)
210  ereport(ERROR,
211  (errcode(ERRCODE_CONFIG_FILE_ERROR),
212  errmsg("unexpected delimiter")));
213  state = TR_WAITSUBS;
214  }
215  else if (!t_isspace(ptr))
216  {
217  beginwrd = ptr;
218  state = TR_INLEX;
219  }
220  }
221  else if (state == TR_INLEX)
222  {
223  if (t_iseq(ptr, ':'))
224  {
225  newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
226  state = TR_WAITSUBS;
227  }
228  else if (t_isspace(ptr))
229  {
230  newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
231  state = TR_WAITLEX;
232  }
233  }
234  else if (state == TR_WAITSUBS)
235  {
236  if (t_iseq(ptr, '*'))
237  {
238  useasis = true;
239  state = TR_INSUBS;
240  beginwrd = ptr + pg_mblen(ptr);
241  }
242  else if (t_iseq(ptr, '\\'))
243  {
244  useasis = false;
245  state = TR_INSUBS;
246  beginwrd = ptr + pg_mblen(ptr);
247  }
248  else if (!t_isspace(ptr))
249  {
250  useasis = false;
251  beginwrd = ptr;
252  state = TR_INSUBS;
253  }
254  }
255  else if (state == TR_INSUBS)
256  {
257  if (t_isspace(ptr))
258  {
259  if (ptr == beginwrd)
260  ereport(ERROR,
261  (errcode(ERRCODE_CONFIG_FILE_ERROR),
262  errmsg("unexpected end of line or lexeme")));
263  addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
264  state = TR_WAITSUBS;
265  }
266  }
267  else
268  elog(ERROR, "unrecognized thesaurus state: %d", state);
269 
270  ptr += pg_mblen(ptr);
271  }
272 
273  if (state == TR_INSUBS)
274  {
275  if (ptr == beginwrd)
276  ereport(ERROR,
277  (errcode(ERRCODE_CONFIG_FILE_ERROR),
278  errmsg("unexpected end of line or lexeme")));
279  addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280  }
281 
282  idsubst++;
283 
284  if (!(nwrd && posinsubst))
285  ereport(ERROR,
286  (errcode(ERRCODE_CONFIG_FILE_ERROR),
287  errmsg("unexpected end of line")));
288 
289  if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
290  ereport(ERROR,
291  (errcode(ERRCODE_CONFIG_FILE_ERROR),
292  errmsg("too many lexemes in thesaurus entry")));
293 
294  pfree(line);
295  }
296 
297  d->nsubst = idsubst;
298 
299  tsearch_readline_end(&trst);
300 }
301 
302 static TheLexeme *
303 addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
304 {
305  if (*nnw >= *tnm)
306  {
307  *tnm *= 2;
308  newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
309  }
310 
311  newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
312 
313  if (lexeme && lexeme->lexeme)
314  {
315  newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
316  newwrds[*nnw].entries->tnvariant = tnvariant;
317  }
318  else
319  {
320  newwrds[*nnw].lexeme = NULL;
321  newwrds[*nnw].entries->tnvariant = 1;
322  }
323 
324  newwrds[*nnw].entries->idsubst = src->idsubst;
325  newwrds[*nnw].entries->posinsubst = src->posinsubst;
326 
327  newwrds[*nnw].entries->nextentry = NULL;
328 
329  (*nnw)++;
330  return newwrds;
331 }
332 
333 static int
335 {
336  if (a == NULL || b == NULL)
337  return 0;
338 
339  if (a->idsubst == b->idsubst)
340  {
341  if (a->posinsubst == b->posinsubst)
342  {
343  if (a->tnvariant == b->tnvariant)
344  return 0;
345 
346  return (a->tnvariant > b->tnvariant) ? 1 : -1;
347  }
348 
349  return (a->posinsubst > b->posinsubst) ? 1 : -1;
350  }
351 
352  return (a->idsubst > b->idsubst) ? 1 : -1;
353 }
354 
355 static int
356 cmpLexeme(const TheLexeme *a, const TheLexeme *b)
357 {
358  if (a->lexeme == NULL)
359  {
360  if (b->lexeme == NULL)
361  return 0;
362  else
363  return 1;
364  }
365  else if (b->lexeme == NULL)
366  return -1;
367 
368  return strcmp(a->lexeme, b->lexeme);
369 }
370 
371 static int
372 cmpLexemeQ(const void *a, const void *b)
373 {
374  return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
375 }
376 
377 static int
378 cmpTheLexeme(const void *a, const void *b)
379 {
380  const TheLexeme *la = (const TheLexeme *) a;
381  const TheLexeme *lb = (const TheLexeme *) b;
382  int res;
383 
384  if ((res = cmpLexeme(la, lb)) != 0)
385  return res;
386 
387  return -cmpLexemeInfo(la->entries, lb->entries);
388 }
389 
390 static void
392 {
393  int i,
394  nnw = 0,
395  tnm = 16;
396  TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
397  *ptrwrds;
398 
399  for (i = 0; i < d->nwrds; i++)
400  {
401  TSLexeme *ptr;
402 
403  if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
404  newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
405  else
406  {
410  Int32GetDatum(strlen(d->wrds[i].lexeme)),
411  PointerGetDatum(NULL)));
412 
413  if (!ptr)
414  ereport(ERROR,
415  (errcode(ERRCODE_CONFIG_FILE_ERROR),
416  errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
417  d->wrds[i].lexeme,
418  d->wrds[i].entries->idsubst + 1)));
419  else if (!(ptr->lexeme))
420  ereport(ERROR,
421  (errcode(ERRCODE_CONFIG_FILE_ERROR),
422  errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
423  d->wrds[i].lexeme,
424  d->wrds[i].entries->idsubst + 1),
425  errhint("Use \"?\" to represent a stop word within a sample phrase.")));
426  else
427  {
428  while (ptr->lexeme)
429  {
430  TSLexeme *remptr = ptr + 1;
431  int tnvar = 1;
432  int curvar = ptr->nvariant;
433 
434  /* compute n words in one variant */
435  while (remptr->lexeme)
436  {
437  if (remptr->nvariant != (remptr - 1)->nvariant)
438  break;
439  tnvar++;
440  remptr++;
441  }
442 
443  remptr = ptr;
444  while (remptr->lexeme && remptr->nvariant == curvar)
445  {
446  newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
447  remptr++;
448  }
449 
450  ptr = remptr;
451  }
452  }
453  }
454 
455  pfree(d->wrds[i].lexeme);
456  pfree(d->wrds[i].entries);
457  }
458 
459  if (d->wrds)
460  pfree(d->wrds);
461  d->wrds = newwrds;
462  d->nwrds = nnw;
463  d->ntwrds = tnm;
464 
465  if (d->nwrds > 1)
466  {
467  qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
468 
469  /* uniq */
470  newwrds = d->wrds;
471  ptrwrds = d->wrds + 1;
472  while (ptrwrds - d->wrds < d->nwrds)
473  {
474  if (cmpLexeme(ptrwrds, newwrds) == 0)
475  {
476  if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
477  {
478  ptrwrds->entries->nextentry = newwrds->entries;
479  newwrds->entries = ptrwrds->entries;
480  }
481  else
482  pfree(ptrwrds->entries);
483 
484  if (ptrwrds->lexeme)
485  pfree(ptrwrds->lexeme);
486  }
487  else
488  {
489  newwrds++;
490  *newwrds = *ptrwrds;
491  }
492 
493  ptrwrds++;
494  }
495 
496  d->nwrds = newwrds - d->wrds + 1;
497  d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
498  }
499 }
500 
501 static void
503 {
504  int i;
505 
506  for (i = 0; i < d->nsubst; i++)
507  {
508  TSLexeme *rem = d->subst[i].res,
509  *outptr,
510  *inptr;
511  int n = 2;
512 
513  outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
514  outptr->lexeme = NULL;
515  inptr = rem;
516 
517  while (inptr && inptr->lexeme)
518  {
519  TSLexeme *lexized,
520  tmplex[2];
521 
522  if (inptr->flags & DT_USEASIS)
523  { /* do not lexize */
524  tmplex[0] = *inptr;
525  tmplex[0].flags = 0;
526  tmplex[1].lexeme = NULL;
527  lexized = tmplex;
528  }
529  else
530  {
531  lexized = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
533  PointerGetDatum(inptr->lexeme),
534  Int32GetDatum(strlen(inptr->lexeme)),
535  PointerGetDatum(NULL)));
536  }
537 
538  if (lexized && lexized->lexeme)
539  {
540  int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
541 
542  while (lexized->lexeme)
543  {
544  if (outptr - d->subst[i].res + 1 >= n)
545  {
546  int diff = outptr - d->subst[i].res;
547 
548  n *= 2;
549  d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
550  outptr = d->subst[i].res + diff;
551  }
552 
553  *outptr = *lexized;
554  outptr->lexeme = pstrdup(lexized->lexeme);
555 
556  outptr++;
557  lexized++;
558  }
559 
560  if (toset > 0)
561  d->subst[i].res[toset].flags |= TSL_ADDPOS;
562  }
563  else if (lexized)
564  {
565  ereport(ERROR,
566  (errcode(ERRCODE_CONFIG_FILE_ERROR),
567  errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
568  inptr->lexeme, i + 1)));
569  }
570  else
571  {
572  ereport(ERROR,
573  (errcode(ERRCODE_CONFIG_FILE_ERROR),
574  errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
575  inptr->lexeme, i + 1)));
576  }
577 
578  if (inptr->lexeme)
579  pfree(inptr->lexeme);
580  inptr++;
581  }
582 
583  if (outptr == d->subst[i].res)
584  ereport(ERROR,
585  (errcode(ERRCODE_CONFIG_FILE_ERROR),
586  errmsg("thesaurus substitute phrase is empty (rule %d)",
587  i + 1)));
588 
589  d->subst[i].reslen = outptr - d->subst[i].res;
590 
591  pfree(rem);
592  }
593 }
594 
595 Datum
597 {
598  List *dictoptions = (List *) PG_GETARG_POINTER(0);
599  DictThesaurus *d;
600  char *subdictname = NULL;
601  bool fileloaded = false;
602  List *namelist;
603  ListCell *l;
604 
605  d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
606 
607  foreach(l, dictoptions)
608  {
609  DefElem *defel = (DefElem *) lfirst(l);
610 
611  if (strcmp(defel->defname, "dictfile") == 0)
612  {
613  if (fileloaded)
614  ereport(ERROR,
615  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
616  errmsg("multiple DictFile parameters")));
617  thesaurusRead(defGetString(defel), d);
618  fileloaded = true;
619  }
620  else if (strcmp(defel->defname, "dictionary") == 0)
621  {
622  if (subdictname)
623  ereport(ERROR,
624  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
625  errmsg("multiple Dictionary parameters")));
626  subdictname = pstrdup(defGetString(defel));
627  }
628  else
629  {
630  ereport(ERROR,
631  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
632  errmsg("unrecognized Thesaurus parameter: \"%s\"",
633  defel->defname)));
634  }
635  }
636 
637  if (!fileloaded)
638  ereport(ERROR,
639  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
640  errmsg("missing DictFile parameter")));
641  if (!subdictname)
642  ereport(ERROR,
643  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
644  errmsg("missing Dictionary parameter")));
645 
646  namelist = stringToQualifiedNameList(subdictname, NULL);
647  d->subdictOid = get_ts_dict_oid(namelist, false);
649 
650  compileTheLexeme(d);
652 
654 }
655 
656 static LexemeInfo *
657 findTheLexeme(DictThesaurus *d, char *lexeme)
658 {
659  TheLexeme key,
660  *res;
661 
662  if (d->nwrds == 0)
663  return NULL;
664 
665  key.lexeme = lexeme;
666  key.entries = NULL;
667 
668  res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
669 
670  if (res == NULL)
671  return NULL;
672  return res->entries;
673 }
674 
675 static bool
677 {
678  bool res = true;
679 
680  if (stored)
681  {
682  res = false;
683 
684  for (; stored; stored = stored->nextvariant)
685  if (stored->idsubst == idsubst)
686  {
687  res = true;
688  break;
689  }
690  }
691 
692  return res;
693 }
694 
695 static LexemeInfo *
696 findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
697 {
698  for (;;)
699  {
700  int i;
701  LexemeInfo *ptr = newin[0];
702 
703  for (i = 0; i < newn; i++)
704  {
705  while (newin[i] && newin[i]->idsubst < ptr->idsubst)
706  newin[i] = newin[i]->nextentry;
707 
708  if (newin[i] == NULL)
709  return in;
710 
711  if (newin[i]->idsubst > ptr->idsubst)
712  {
713  ptr = newin[i];
714  i = -1;
715  continue;
716  }
717 
718  while (newin[i]->idsubst == ptr->idsubst)
719  {
720  if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
721  {
722  ptr = newin[i];
723  break;
724  }
725 
726  newin[i] = newin[i]->nextentry;
727  if (newin[i] == NULL)
728  return in;
729  }
730 
731  if (newin[i]->idsubst != ptr->idsubst)
732  {
733  ptr = newin[i];
734  i = -1;
735  continue;
736  }
737  }
738 
739  if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
740  { /* found */
741 
742  ptr->nextvariant = in;
743  in = ptr;
744  }
745 
746  /* step forward */
747  for (i = 0; i < newn; i++)
748  newin[i] = newin[i]->nextentry;
749  }
750 }
751 
752 static TSLexeme *
754 {
755  TSLexeme *res;
756  uint16 i;
757 
758  res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
759  for (i = 0; i < ts->reslen; i++)
760  {
761  res[i] = ts->res[i];
762  res[i].lexeme = pstrdup(ts->res[i].lexeme);
763  }
764 
765  res[ts->reslen].lexeme = NULL;
766 
767  return res;
768 }
769 
770 static TSLexeme *
771 checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
772 {
773  *moreres = false;
774  while (info)
775  {
776  Assert(info->idsubst < d->nsubst);
777  if (info->nextvariant)
778  *moreres = true;
779  if (d->subst[info->idsubst].lastlexeme == curpos)
780  return copyTSLexeme(d->subst + info->idsubst);
781  info = info->nextvariant;
782  }
783 
784  return NULL;
785 }
786 
787 Datum
789 {
792  TSLexeme *res = NULL;
793  LexemeInfo *stored,
794  *info = NULL;
795  uint16 curpos = 0;
796  bool moreres = false;
797 
798  if (PG_NARGS() != 4 || dstate == NULL)
799  elog(ERROR, "forbidden call of thesaurus or nested call");
800 
801  if (dstate->isend)
802  PG_RETURN_POINTER(NULL);
803  stored = (LexemeInfo *) dstate->private_state;
804 
805  if (stored)
806  curpos = stored->posinsubst + 1;
807 
808  if (!d->subdict->isvalid)
810 
813  PG_GETARG_DATUM(1),
814  PG_GETARG_DATUM(2),
815  PointerGetDatum(NULL)));
816 
817  if (res && res->lexeme)
818  {
819  TSLexeme *ptr = res,
820  *basevar;
821 
822  while (ptr->lexeme)
823  {
824  uint16 nv = ptr->nvariant;
825  uint16 i,
826  nlex = 0;
827  LexemeInfo **infos;
828 
829  basevar = ptr;
830  while (ptr->lexeme && nv == ptr->nvariant)
831  {
832  nlex++;
833  ptr++;
834  }
835 
836  infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
837  for (i = 0; i < nlex; i++)
838  if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
839  break;
840 
841  if (i < nlex)
842  {
843  /* no chance to find */
844  pfree(infos);
845  continue;
846  }
847 
848  info = findVariant(info, stored, curpos, infos, nlex);
849  }
850  }
851  else if (res)
852  { /* stop-word */
853  LexemeInfo *infos = findTheLexeme(d, NULL);
854 
855  info = findVariant(NULL, stored, curpos, &infos, 1);
856  }
857  else
858  {
859  info = NULL; /* word isn't recognized */
860  }
861 
862  dstate->private_state = (void *) info;
863 
864  if (!info)
865  {
866  dstate->getnext = false;
867  PG_RETURN_POINTER(NULL);
868  }
869 
870  if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
871  {
872  dstate->getnext = moreres;
874  }
875 
876  dstate->getnext = true;
877 
878  PG_RETURN_POINTER(NULL);
879 }
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
char * defGetString(DefElem *def)
Definition: define.c:48
Datum thesaurus_lexize(PG_FUNCTION_ARGS)
#define TR_WAITLEX
static void addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
static bool matchIdSubst(LexemeInfo *stored, uint32 idsubst)
static void compileTheSubstitute(DictThesaurus *d)
static void thesaurusRead(const char *filename, DictThesaurus *d)
static LexemeInfo * findTheLexeme(DictThesaurus *d, char *lexeme)
static TheLexeme * addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
static LexemeInfo * findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
#define TR_INLEX
static TSLexeme * copyTSLexeme(TheSubstitute *ts)
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)
static void compileTheLexeme(DictThesaurus *d)
Datum thesaurus_init(PG_FUNCTION_ARGS)
struct LexemeInfo LexemeInfo
static int cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
#define TR_WAITSUBS
#define TR_INSUBS
#define DT_USEASIS
static int cmpLexemeQ(const void *a, const void *b)
static TSLexeme * checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
static void newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
static int cmpTheLexeme(const void *a, const void *b)
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:666
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * palloc(Size size)
Definition: mcxt.c:1316
Oid get_ts_dict_oid(List *names, bool missing_ok)
Definition: namespace.c:2846
static char * filename
Definition: pg_dumpall.c:119
#define lfirst(lc)
Definition: pg_list.h:172
#define qsort(a, b, c, d)
Definition: port.h:449
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition: regproc.c:1797
char * defname
Definition: parsenodes.h:815
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154
TheLexeme * wrds
TheSubstitute * subst
TSDictionaryCacheEntry * subdict
uint16 posinsubst
struct LexemeInfo * nextvariant
struct LexemeInfo * nextentry
uint32 idsubst
uint16 tnvariant
Definition: pg_list.h:54
uint16 nvariant
Definition: ts_public.h:134
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
char * lexeme
LexemeInfo * entries
TSLexeme * res
Definition: regguts.h:323
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:208
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:134
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:157
int t_isspace(const char *ptr)
Definition: ts_locale.c:50
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:202
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define TSL_ADDPOS
Definition: ts_public.h:142
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:33