PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
dict_thesaurus.c File Reference
#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/regproc.h"
Include dependency graph for dict_thesaurus.c:

Go to the source code of this file.

Data Structures

struct  LexemeInfo
 
struct  TheLexeme
 
struct  TheSubstitute
 
struct  DictThesaurus
 

Macros

#define DT_USEASIS   0x1000
 
#define TR_WAITLEX   1
 
#define TR_INLEX   2
 
#define TR_WAITSUBS   3
 
#define TR_INSUBS   4
 

Typedefs

typedef struct LexemeInfo LexemeInfo
 

Functions

static void newLexeme (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
 
static void addWrd (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
 
static void thesaurusRead (char *filename, DictThesaurus *d)
 
static TheLexemeaddCompiledLexeme (TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
 
static int cmpLexemeInfo (LexemeInfo *a, LexemeInfo *b)
 
static int cmpLexeme (const TheLexeme *a, const TheLexeme *b)
 
static int cmpLexemeQ (const void *a, const void *b)
 
static int cmpTheLexeme (const void *a, const void *b)
 
static void compileTheLexeme (DictThesaurus *d)
 
static void compileTheSubstitute (DictThesaurus *d)
 
Datum thesaurus_init (PG_FUNCTION_ARGS)
 
static LexemeInfofindTheLexeme (DictThesaurus *d, char *lexeme)
 
static bool matchIdSubst (LexemeInfo *stored, uint32 idsubst)
 
static LexemeInfofindVariant (LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
 
static TSLexemecopyTSLexeme (TheSubstitute *ts)
 
static TSLexemecheckMatch (DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
 
Datum thesaurus_lexize (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

#define DT_USEASIS   0x1000

Definition at line 28 of file dict_thesaurus.c.

Referenced by addWrd(), and compileTheSubstitute().

#define TR_INLEX   2

Definition at line 163 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_INSUBS   4

Definition at line 165 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_WAITLEX   1

Definition at line 162 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_WAITSUBS   3

Definition at line 164 of file dict_thesaurus.c.

Referenced by thesaurusRead().

Typedef Documentation

Function Documentation

static TheLexeme* addCompiledLexeme ( TheLexeme newwrds,
int *  nnw,
int *  tnm,
TSLexeme lexeme,
LexemeInfo src,
uint16  tnvariant 
)
static

Definition at line 308 of file dict_thesaurus.c.

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, TSLexeme::lexeme, LexemeInfo::nextentry, NULL, palloc(), LexemeInfo::posinsubst, pstrdup(), repalloc(), and LexemeInfo::tnvariant.

Referenced by compileTheLexeme().

309 {
310  if (*nnw >= *tnm)
311  {
312  *tnm *= 2;
313  newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
314  }
315 
316  newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
317 
318  if (lexeme && lexeme->lexeme)
319  {
320  newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
321  newwrds[*nnw].entries->tnvariant = tnvariant;
322  }
323  else
324  {
325  newwrds[*nnw].lexeme = NULL;
326  newwrds[*nnw].entries->tnvariant = 1;
327  }
328 
329  newwrds[*nnw].entries->idsubst = src->idsubst;
330  newwrds[*nnw].entries->posinsubst = src->posinsubst;
331 
332  newwrds[*nnw].entries->nextentry = NULL;
333 
334  (*nnw)++;
335  return newwrds;
336 }
uint32 idsubst
struct LexemeInfo * nextentry
char * pstrdup(const char *in)
Definition: mcxt.c:1165
LexemeInfo * entries
char * lexeme
char * lexeme
Definition: ts_public.h:111
uint16 posinsubst
uint16 tnvariant
#define NULL
Definition: c.h:226
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
void * palloc(Size size)
Definition: mcxt.c:891
static void addWrd ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  nwrd,
uint16  posinsubst,
bool  useasis 
)
static

Definition at line 106 of file dict_thesaurus.c.

References DT_USEASIS, TSLexeme::flags, LexemeInfo::idsubst, TheSubstitute::lastlexeme, TSLexeme::lexeme, DictThesaurus::nsubst, NULL, TSLexeme::nvariant, palloc(), repalloc(), TheSubstitute::res, and DictThesaurus::subst.

Referenced by thesaurusRead().

107 {
108  static int nres = 0;
109  static int ntres = 0;
110  TheSubstitute *ptr;
111 
112  if (nwrd == 0)
113  {
114  nres = ntres = 0;
115 
116  if (idsubst >= d->nsubst)
117  {
118  if (d->nsubst == 0)
119  {
120  d->nsubst = 16;
121  d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
122  }
123  else
124  {
125  d->nsubst *= 2;
126  d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
127  }
128  }
129  }
130 
131  ptr = d->subst + idsubst;
132 
133  ptr->lastlexeme = posinsubst - 1;
134 
135  if (nres + 1 >= ntres)
136  {
137  if (ntres == 0)
138  {
139  ntres = 2;
140  ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
141  }
142  else
143  {
144  ntres *= 2;
145  ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
146  }
147  }
148 
149  ptr->res[nres].lexeme = palloc(e - b + 1);
150  memcpy(ptr->res[nres].lexeme, b, e - b);
151  ptr->res[nres].lexeme[e - b] = '\0';
152 
153  ptr->res[nres].nvariant = nwrd;
154  if (useasis)
155  ptr->res[nres].flags = DT_USEASIS;
156  else
157  ptr->res[nres].flags = 0;
158 
159  ptr->res[++nres].lexeme = NULL;
160 }
TheSubstitute * subst
char * lexeme
Definition: ts_public.h:111
uint16 nvariant
Definition: ts_public.h:107
uint16 flags
Definition: ts_public.h:109
#define NULL
Definition: c.h:226
#define DT_USEASIS
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
e
Definition: preproc-init.c:82
void * palloc(Size size)
Definition: mcxt.c:891
TSLexeme * res
static TSLexeme* checkMatch ( DictThesaurus d,
LexemeInfo info,
uint16  curpos,
bool moreres 
)
static

Definition at line 778 of file dict_thesaurus.c.

References Assert, copyTSLexeme(), LexemeInfo::idsubst, TheSubstitute::lastlexeme, LexemeInfo::nextvariant, DictThesaurus::nsubst, NULL, and DictThesaurus::subst.

Referenced by thesaurus_lexize().

779 {
780  *moreres = false;
781  while (info)
782  {
783  Assert(info->idsubst < d->nsubst);
784  if (info->nextvariant)
785  *moreres = true;
786  if (d->subst[info->idsubst].lastlexeme == curpos)
787  return copyTSLexeme(d->subst + info->idsubst);
788  info = info->nextvariant;
789  }
790 
791  return NULL;
792 }
uint32 idsubst
TheSubstitute * subst
static TSLexeme * copyTSLexeme(TheSubstitute *ts)
struct LexemeInfo * nextvariant
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
static int cmpLexeme ( const TheLexeme a,
const TheLexeme b 
)
static

Definition at line 361 of file dict_thesaurus.c.

References TheLexeme::lexeme, and NULL.

Referenced by cmpLexemeQ(), cmpTheLexeme(), and compileTheLexeme().

362 {
363  if (a->lexeme == NULL)
364  {
365  if (b->lexeme == NULL)
366  return 0;
367  else
368  return 1;
369  }
370  else if (b->lexeme == NULL)
371  return -1;
372 
373  return strcmp(a->lexeme, b->lexeme);
374 }
char * lexeme
#define NULL
Definition: c.h:226
static int cmpLexemeInfo ( LexemeInfo a,
LexemeInfo b 
)
static

Definition at line 339 of file dict_thesaurus.c.

References LexemeInfo::idsubst, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by cmpTheLexeme(), and compileTheLexeme().

340 {
341  if (a == NULL || b == NULL)
342  return 0;
343 
344  if (a->idsubst == b->idsubst)
345  {
346  if (a->posinsubst == b->posinsubst)
347  {
348  if (a->tnvariant == b->tnvariant)
349  return 0;
350 
351  return (a->tnvariant > b->tnvariant) ? 1 : -1;
352  }
353 
354  return (a->posinsubst > b->posinsubst) ? 1 : -1;
355  }
356 
357  return (a->idsubst > b->idsubst) ? 1 : -1;
358 }
uint32 idsubst
uint16 posinsubst
uint16 tnvariant
#define NULL
Definition: c.h:226
static int cmpLexemeQ ( const void *  a,
const void *  b 
)
static

Definition at line 377 of file dict_thesaurus.c.

References cmpLexeme().

Referenced by findTheLexeme().

378 {
379  return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
380 }
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)
static int cmpTheLexeme ( const void *  a,
const void *  b 
)
static

Definition at line 383 of file dict_thesaurus.c.

References cmpLexeme(), cmpLexemeInfo(), and TheLexeme::entries.

Referenced by compileTheLexeme().

384 {
385  const TheLexeme *la = (const TheLexeme *) a;
386  const TheLexeme *lb = (const TheLexeme *) b;
387  int res;
388 
389  if ((res = cmpLexeme(la, lb)) != 0)
390  return res;
391 
392  return -cmpLexemeInfo(la->entries, lb->entries);
393 }
LexemeInfo * entries
static int cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)
static void compileTheLexeme ( DictThesaurus d)
static

Definition at line 396 of file dict_thesaurus.c.

References addCompiledLexeme(), cmpLexeme(), cmpLexemeInfo(), cmpTheLexeme(), DatumGetPointer, TSDictionaryCacheEntry::dictData, TheLexeme::entries, ereport, errcode(), errhint(), errmsg(), ERROR, FunctionCall4, i, LexemeInfo::idsubst, Int32GetDatum, TheLexeme::lexeme, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexemeInfo::nextentry, DictThesaurus::ntwrds, NULL, TSLexeme::nvariant, DictThesaurus::nwrds, palloc(), pfree(), PointerGetDatum, qsort, repalloc(), DictThesaurus::subdict, and DictThesaurus::wrds.

Referenced by thesaurus_init().

397 {
398  int i,
399  nnw = 0,
400  tnm = 16;
401  TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
402  *ptrwrds;
403 
404  for (i = 0; i < d->nwrds; i++)
405  {
406  TSLexeme *ptr;
407 
408  if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
409  newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
410  else
411  {
414  PointerGetDatum(d->wrds[i].lexeme),
415  Int32GetDatum(strlen(d->wrds[i].lexeme)),
417 
418  if (!ptr)
419  ereport(ERROR,
420  (errcode(ERRCODE_CONFIG_FILE_ERROR),
421  errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
422  d->wrds[i].lexeme,
423  d->wrds[i].entries->idsubst + 1)));
424  else if (!(ptr->lexeme))
425  ereport(ERROR,
426  (errcode(ERRCODE_CONFIG_FILE_ERROR),
427  errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
428  d->wrds[i].lexeme,
429  d->wrds[i].entries->idsubst + 1),
430  errhint("Use \"?\" to represent a stop word within a sample phrase.")));
431  else
432  {
433  while (ptr->lexeme)
434  {
435  TSLexeme *remptr = ptr + 1;
436  int tnvar = 1;
437  int curvar = ptr->nvariant;
438 
439  /* compute n words in one variant */
440  while (remptr->lexeme)
441  {
442  if (remptr->nvariant != (remptr - 1)->nvariant)
443  break;
444  tnvar++;
445  remptr++;
446  }
447 
448  remptr = ptr;
449  while (remptr->lexeme && remptr->nvariant == curvar)
450  {
451  newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
452  remptr++;
453  }
454 
455  ptr = remptr;
456  }
457  }
458  }
459 
460  pfree(d->wrds[i].lexeme);
461  pfree(d->wrds[i].entries);
462  }
463 
464  if (d->wrds)
465  pfree(d->wrds);
466  d->wrds = newwrds;
467  d->nwrds = nnw;
468  d->ntwrds = tnm;
469 
470  if (d->nwrds > 1)
471  {
472  qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
473 
474  /* uniq */
475  newwrds = d->wrds;
476  ptrwrds = d->wrds + 1;
477  while (ptrwrds - d->wrds < d->nwrds)
478  {
479  if (cmpLexeme(ptrwrds, newwrds) == 0)
480  {
481  if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
482  {
483  ptrwrds->entries->nextentry = newwrds->entries;
484  newwrds->entries = ptrwrds->entries;
485  }
486  else
487  pfree(ptrwrds->entries);
488 
489  if (ptrwrds->lexeme)
490  pfree(ptrwrds->lexeme);
491  }
492  else
493  {
494  newwrds++;
495  *newwrds = *ptrwrds;
496  }
497 
498  ptrwrds++;
499  }
500 
501  d->nwrds = newwrds - d->wrds + 1;
502  d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
503  }
504 }
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 idsubst
#define PointerGetDatum(X)
Definition: postgres.h:564
struct LexemeInfo * nextentry
static TheLexeme * addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
int errcode(int sqlerrcode)
Definition: elog.c:575
LexemeInfo * entries
char * lexeme
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
char * lexeme
Definition: ts_public.h:111
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:579
uint16 nvariant
Definition: ts_public.h:107
#define ereport(elevel, rest)
Definition: elog.h:122
static int cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
TSDictionaryCacheEntry * subdict
TheLexeme * wrds
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)
#define NULL
Definition: c.h:226
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
#define DatumGetPointer(X)
Definition: postgres.h:557
static int cmpTheLexeme(const void *a, const void *b)
#define Int32GetDatum(X)
Definition: postgres.h:487
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
#define qsort(a, b, c, d)
Definition: port.h:440
static void compileTheSubstitute ( DictThesaurus d)
static

Definition at line 507 of file dict_thesaurus.c.

References DatumGetPointer, TSDictionaryCacheEntry::dictData, DT_USEASIS, ereport, errcode(), errmsg(), ERROR, TSLexeme::flags, FunctionCall4, i, Int32GetDatum, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, DictThesaurus::nsubst, NULL, palloc(), pfree(), PointerGetDatum, pstrdup(), repalloc(), TheSubstitute::res, TheSubstitute::reslen, DictThesaurus::subdict, DictThesaurus::subst, and TSL_ADDPOS.

Referenced by thesaurus_init().

508 {
509  int i;
510 
511  for (i = 0; i < d->nsubst; i++)
512  {
513  TSLexeme *rem = d->subst[i].res,
514  *outptr,
515  *inptr;
516  int n = 2;
517 
518  outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
519  outptr->lexeme = NULL;
520  inptr = rem;
521 
522  while (inptr && inptr->lexeme)
523  {
524  TSLexeme *lexized,
525  tmplex[2];
526 
527  if (inptr->flags & DT_USEASIS)
528  { /* do not lexize */
529  tmplex[0] = *inptr;
530  tmplex[0].flags = 0;
531  tmplex[1].lexeme = NULL;
532  lexized = tmplex;
533  }
534  else
535  {
536  lexized = (TSLexeme *) DatumGetPointer(
538  &(d->subdict->lexize),
540  PointerGetDatum(inptr->lexeme),
541  Int32GetDatum(strlen(inptr->lexeme)),
543  )
544  );
545  }
546 
547  if (lexized && lexized->lexeme)
548  {
549  int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
550 
551  while (lexized->lexeme)
552  {
553  if (outptr - d->subst[i].res + 1 >= n)
554  {
555  int diff = outptr - d->subst[i].res;
556 
557  n *= 2;
558  d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
559  outptr = d->subst[i].res + diff;
560  }
561 
562  *outptr = *lexized;
563  outptr->lexeme = pstrdup(lexized->lexeme);
564 
565  outptr++;
566  lexized++;
567  }
568 
569  if (toset > 0)
570  d->subst[i].res[toset].flags |= TSL_ADDPOS;
571  }
572  else if (lexized)
573  {
574  ereport(ERROR,
575  (errcode(ERRCODE_CONFIG_FILE_ERROR),
576  errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
577  inptr->lexeme, i + 1)));
578  }
579  else
580  {
581  ereport(ERROR,
582  (errcode(ERRCODE_CONFIG_FILE_ERROR),
583  errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
584  inptr->lexeme, i + 1)));
585  }
586 
587  if (inptr->lexeme)
588  pfree(inptr->lexeme);
589  inptr++;
590  }
591 
592  if (outptr == d->subst[i].res)
593  ereport(ERROR,
594  (errcode(ERRCODE_CONFIG_FILE_ERROR),
595  errmsg("thesaurus substitute phrase is empty (rule %d)",
596  i + 1)));
597 
598  d->subst[i].reslen = outptr - d->subst[i].res;
599 
600  pfree(rem);
601  }
602 }
#define TSL_ADDPOS
Definition: ts_public.h:115
TheSubstitute * subst
#define PointerGetDatum(X)
Definition: postgres.h:564
char * pstrdup(const char *in)
Definition: mcxt.c:1165
int errcode(int sqlerrcode)
Definition: elog.c:575
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
char * lexeme
Definition: ts_public.h:111
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:579
#define ereport(elevel, rest)
Definition: elog.h:122
uint16 flags
Definition: ts_public.h:109
TSDictionaryCacheEntry * subdict
#define NULL
Definition: c.h:226
#define DT_USEASIS
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
#define DatumGetPointer(X)
Definition: postgres.h:557
#define Int32GetDatum(X)
Definition: postgres.h:487
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
TSLexeme * res
int i
static TSLexeme* copyTSLexeme ( TheSubstitute ts)
static

Definition at line 760 of file dict_thesaurus.c.

References i, TSLexeme::lexeme, NULL, palloc(), pstrdup(), TheSubstitute::res, and TheSubstitute::reslen.

Referenced by checkMatch().

761 {
762  TSLexeme *res;
763  uint16 i;
764 
765  res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
766  for (i = 0; i < ts->reslen; i++)
767  {
768  res[i] = ts->res[i];
769  res[i].lexeme = pstrdup(ts->res[i].lexeme);
770  }
771 
772  res[ts->reslen].lexeme = NULL;
773 
774  return res;
775 }
char * pstrdup(const char *in)
Definition: mcxt.c:1165
unsigned short uint16
Definition: c.h:264
char * lexeme
Definition: ts_public.h:111
#define NULL
Definition: c.h:226
void * palloc(Size size)
Definition: mcxt.c:891
TSLexeme * res
int i
static LexemeInfo* findTheLexeme ( DictThesaurus d,
char *  lexeme 
)
static

Definition at line 664 of file dict_thesaurus.c.

References cmpLexemeQ(), TheLexeme::entries, TheLexeme::lexeme, NULL, DictThesaurus::nwrds, and DictThesaurus::wrds.

Referenced by thesaurus_lexize().

665 {
666  TheLexeme key,
667  *res;
668 
669  if (d->nwrds == 0)
670  return NULL;
671 
672  key.lexeme = lexeme;
673  key.entries = NULL;
674 
675  res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
676 
677  if (res == NULL)
678  return NULL;
679  return res->entries;
680 }
LexemeInfo * entries
char * lexeme
static int cmpLexemeQ(const void *a, const void *b)
TheLexeme * wrds
#define NULL
Definition: c.h:226
static LexemeInfo* findVariant ( LexemeInfo in,
LexemeInfo stored,
uint16  curpos,
LexemeInfo **  newin,
int  newn 
)
static

Definition at line 703 of file dict_thesaurus.c.

References i, LexemeInfo::idsubst, matchIdSubst(), LexemeInfo::nextentry, LexemeInfo::nextvariant, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by thesaurus_lexize().

704 {
705  for (;;)
706  {
707  int i;
708  LexemeInfo *ptr = newin[0];
709 
710  for (i = 0; i < newn; i++)
711  {
712  while (newin[i] && newin[i]->idsubst < ptr->idsubst)
713  newin[i] = newin[i]->nextentry;
714 
715  if (newin[i] == NULL)
716  return in;
717 
718  if (newin[i]->idsubst > ptr->idsubst)
719  {
720  ptr = newin[i];
721  i = -1;
722  continue;
723  }
724 
725  while (newin[i]->idsubst == ptr->idsubst)
726  {
727  if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
728  {
729  ptr = newin[i];
730  break;
731  }
732 
733  newin[i] = newin[i]->nextentry;
734  if (newin[i] == NULL)
735  return in;
736  }
737 
738  if (newin[i]->idsubst != ptr->idsubst)
739  {
740  ptr = newin[i];
741  i = -1;
742  continue;
743  }
744  }
745 
746  if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
747  { /* found */
748 
749  ptr->nextvariant = in;
750  in = ptr;
751  }
752 
753  /* step forward */
754  for (i = 0; i < newn; i++)
755  newin[i] = newin[i]->nextentry;
756  }
757 }
uint32 idsubst
struct LexemeInfo * nextentry
static bool matchIdSubst(LexemeInfo *stored, uint32 idsubst)
struct LexemeInfo * nextvariant
#define NULL
Definition: c.h:226
int i
static bool matchIdSubst ( LexemeInfo stored,
uint32  idsubst 
)
static

Definition at line 683 of file dict_thesaurus.c.

References LexemeInfo::idsubst, and LexemeInfo::nextvariant.

Referenced by findVariant().

684 {
685  bool res = true;
686 
687  if (stored)
688  {
689  res = false;
690 
691  for (; stored; stored = stored->nextvariant)
692  if (stored->idsubst == idsubst)
693  {
694  res = true;
695  break;
696  }
697  }
698 
699  return res;
700 }
uint32 idsubst
struct LexemeInfo * nextvariant
static void newLexeme ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  posinsubst 
)
static

Definition at line 72 of file dict_thesaurus.c.

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, LexemeInfo::nextentry, DictThesaurus::ntwrds, NULL, DictThesaurus::nwrds, palloc(), LexemeInfo::posinsubst, repalloc(), and DictThesaurus::wrds.

Referenced by thesaurusRead().

73 {
74  TheLexeme *ptr;
75 
76  if (d->nwrds >= d->ntwrds)
77  {
78  if (d->ntwrds == 0)
79  {
80  d->ntwrds = 16;
81  d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
82  }
83  else
84  {
85  d->ntwrds *= 2;
86  d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
87  }
88  }
89 
90  ptr = d->wrds + d->nwrds;
91  d->nwrds++;
92 
93  ptr->lexeme = palloc(e - b + 1);
94 
95  memcpy(ptr->lexeme, b, e - b);
96  ptr->lexeme[e - b] = '\0';
97 
98  ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
99 
100  ptr->entries->nextentry = NULL;
101  ptr->entries->idsubst = idsubst;
102  ptr->entries->posinsubst = posinsubst;
103 }
uint32 idsubst
struct LexemeInfo * nextentry
LexemeInfo * entries
char * lexeme
uint16 posinsubst
TheLexeme * wrds
#define NULL
Definition: c.h:226
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
e
Definition: preproc-init.c:82
void * palloc(Size size)
Definition: mcxt.c:891
Datum thesaurus_init ( PG_FUNCTION_ARGS  )

Definition at line 605 of file dict_thesaurus.c.

References compileTheLexeme(), compileTheSubstitute(), defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, get_ts_dict_oid(), lfirst, lookup_ts_dictionary_cache(), NULL, palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pg_strcasecmp(), pstrdup(), stringToQualifiedNameList(), DictThesaurus::subdict, DictThesaurus::subdictOid, and thesaurusRead().

606 {
607  List *dictoptions = (List *) PG_GETARG_POINTER(0);
608  DictThesaurus *d;
609  char *subdictname = NULL;
610  bool fileloaded = false;
611  ListCell *l;
612 
613  d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
614 
615  foreach(l, dictoptions)
616  {
617  DefElem *defel = (DefElem *) lfirst(l);
618 
619  if (pg_strcasecmp("DictFile", defel->defname) == 0)
620  {
621  if (fileloaded)
622  ereport(ERROR,
623  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
624  errmsg("multiple DictFile parameters")));
625  thesaurusRead(defGetString(defel), d);
626  fileloaded = true;
627  }
628  else if (pg_strcasecmp("Dictionary", defel->defname) == 0)
629  {
630  if (subdictname)
631  ereport(ERROR,
632  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
633  errmsg("multiple Dictionary parameters")));
634  subdictname = pstrdup(defGetString(defel));
635  }
636  else
637  {
638  ereport(ERROR,
639  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
640  errmsg("unrecognized Thesaurus parameter: \"%s\"",
641  defel->defname)));
642  }
643  }
644 
645  if (!fileloaded)
646  ereport(ERROR,
647  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
648  errmsg("missing DictFile parameter")));
649  if (!subdictname)
650  ereport(ERROR,
651  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
652  errmsg("missing Dictionary parameter")));
653 
654  d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
656 
657  compileTheLexeme(d);
659 
661 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
static void thesaurusRead(char *filename, DictThesaurus *d)
char * pstrdup(const char *in)
Definition: mcxt.c:1165
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
static void compileTheLexeme(DictThesaurus *d)
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:210
Oid get_ts_dict_oid(List *names, bool missing_ok)
Definition: namespace.c:2220
#define ERROR
Definition: elog.h:43
char * defGetString(DefElem *def)
Definition: define.c:49
#define ereport(elevel, rest)
Definition: elog.h:122
void * palloc0(Size size)
Definition: mcxt.c:920
TSDictionaryCacheEntry * subdict
#define NULL
Definition: c.h:226
#define lfirst(lc)
Definition: pg_list.h:106
static void compileTheSubstitute(DictThesaurus *d)
List * stringToQualifiedNameList(const char *string)
Definition: regproc.c:1830
int errmsg(const char *fmt,...)
Definition: elog.c:797
char * defname
Definition: parsenodes.h:675
Definition: pg_list.h:45
Datum thesaurus_lexize ( PG_FUNCTION_ARGS  )

Definition at line 795 of file dict_thesaurus.c.

References checkMatch(), DatumGetPointer, TSDictionaryCacheEntry::dictData, elog, ERROR, findTheLexeme(), findVariant(), FunctionCall4, DictSubState::getnext, i, DictSubState::isend, TSDictionaryCacheEntry::isvalid, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), NULL, TSLexeme::nvariant, palloc(), pfree(), PG_GETARG_DATUM, PG_GETARG_POINTER, PG_NARGS, PG_RETURN_POINTER, PointerGetDatum, LexemeInfo::posinsubst, DictSubState::private_state, DictThesaurus::subdict, and DictThesaurus::subdictOid.

796 {
799  TSLexeme *res = NULL;
800  LexemeInfo *stored,
801  *info = NULL;
802  uint16 curpos = 0;
803  bool moreres = false;
804 
805  if (PG_NARGS() != 4 || dstate == NULL)
806  elog(ERROR, "forbidden call of thesaurus or nested call");
807 
808  if (dstate->isend)
810  stored = (LexemeInfo *) dstate->private_state;
811 
812  if (stored)
813  curpos = stored->posinsubst + 1;
814 
815  if (!d->subdict->isvalid)
817 
820  PG_GETARG_DATUM(1),
821  PG_GETARG_DATUM(2),
823 
824  if (res && res->lexeme)
825  {
826  TSLexeme *ptr = res,
827  *basevar;
828 
829  while (ptr->lexeme)
830  {
831  uint16 nv = ptr->nvariant;
832  uint16 i,
833  nlex = 0;
834  LexemeInfo **infos;
835 
836  basevar = ptr;
837  while (ptr->lexeme && nv == ptr->nvariant)
838  {
839  nlex++;
840  ptr++;
841  }
842 
843  infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
844  for (i = 0; i < nlex; i++)
845  if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
846  break;
847 
848  if (i < nlex)
849  {
850  /* no chance to find */
851  pfree(infos);
852  continue;
853  }
854 
855  info = findVariant(info, stored, curpos, infos, nlex);
856  }
857  }
858  else if (res)
859  { /* stop-word */
860  LexemeInfo *infos = findTheLexeme(d, NULL);
861 
862  info = findVariant(NULL, stored, curpos, &infos, 1);
863  }
864  else
865  {
866  info = NULL; /* word isn't recognized */
867  }
868 
869  dstate->private_state = (void *) info;
870 
871  if (!info)
872  {
873  dstate->getnext = false;
875  }
876 
877  if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
878  {
879  dstate->getnext = moreres;
880  PG_RETURN_POINTER(res);
881  }
882 
883  dstate->getnext = true;
884 
886 }
static LexemeInfo * findTheLexeme(DictThesaurus *d, char *lexeme)
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
#define PointerGetDatum(X)
Definition: postgres.h:564
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:224
bool getnext
Definition: ts_public.h:127
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
static TSLexeme * checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:210
unsigned short uint16
Definition: c.h:264
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
char * lexeme
Definition: ts_public.h:111
uint16 posinsubst
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:579
uint16 nvariant
Definition: ts_public.h:107
TSDictionaryCacheEntry * subdict
#define NULL
Definition: c.h:226
#define PG_NARGS()
Definition: fmgr.h:160
#define DatumGetPointer(X)
Definition: postgres.h:557
void * private_state
Definition: ts_public.h:128
void * palloc(Size size)
Definition: mcxt.c:891
int i
#define elog
Definition: elog.h:219
static LexemeInfo * findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
static void thesaurusRead ( char *  filename,
DictThesaurus d 
)
static

Definition at line 168 of file dict_thesaurus.c.

References addWrd(), elog, ereport, errcode(), errmsg(), ERROR, get_tsearch_config_filename(), LexemeInfo::idsubst, newLexeme(), DictThesaurus::nsubst, NULL, pfree(), pg_mblen(), LexemeInfo::posinsubst, t_iseq, t_isspace, TR_INLEX, TR_INSUBS, TR_WAITLEX, TR_WAITSUBS, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().

Referenced by thesaurus_init().

169 {
171  uint32 idsubst = 0;
172  bool useasis = false;
173  char *line;
174 
176  if (!tsearch_readline_begin(&trst, filename))
177  ereport(ERROR,
178  (errcode(ERRCODE_CONFIG_FILE_ERROR),
179  errmsg("could not open thesaurus file \"%s\": %m",
180  filename)));
181 
182  while ((line = tsearch_readline(&trst)) != NULL)
183  {
184  char *ptr;
185  int state = TR_WAITLEX;
186  char *beginwrd = NULL;
187  uint32 posinsubst = 0;
188  uint32 nwrd = 0;
189 
190  ptr = line;
191 
192  /* is it a comment? */
193  while (*ptr && t_isspace(ptr))
194  ptr += pg_mblen(ptr);
195 
196  if (t_iseq(ptr, '#') || *ptr == '\0' ||
197  t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
198  {
199  pfree(line);
200  continue;
201  }
202 
203  while (*ptr)
204  {
205  if (state == TR_WAITLEX)
206  {
207  if (t_iseq(ptr, ':'))
208  {
209  if (posinsubst == 0)
210  ereport(ERROR,
211  (errcode(ERRCODE_CONFIG_FILE_ERROR),
212  errmsg("unexpected delimiter")));
213  state = TR_WAITSUBS;
214  }
215  else if (!t_isspace(ptr))
216  {
217  beginwrd = ptr;
218  state = TR_INLEX;
219  }
220  }
221  else if (state == TR_INLEX)
222  {
223  if (t_iseq(ptr, ':'))
224  {
225  newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
226  state = TR_WAITSUBS;
227  }
228  else if (t_isspace(ptr))
229  {
230  newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
231  state = TR_WAITLEX;
232  }
233  }
234  else if (state == TR_WAITSUBS)
235  {
236  if (t_iseq(ptr, '*'))
237  {
238  useasis = true;
239  state = TR_INSUBS;
240  beginwrd = ptr + pg_mblen(ptr);
241  }
242  else if (t_iseq(ptr, '\\'))
243  {
244  useasis = false;
245  state = TR_INSUBS;
246  beginwrd = ptr + pg_mblen(ptr);
247  }
248  else if (!t_isspace(ptr))
249  {
250  useasis = false;
251  beginwrd = ptr;
252  state = TR_INSUBS;
253  }
254  }
255  else if (state == TR_INSUBS)
256  {
257  if (t_isspace(ptr))
258  {
259  if (ptr == beginwrd)
260  ereport(ERROR,
261  (errcode(ERRCODE_CONFIG_FILE_ERROR),
262  errmsg("unexpected end of line or lexeme")));
263  addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
264  state = TR_WAITSUBS;
265  }
266  }
267  else
268  elog(ERROR, "unrecognized thesaurus state: %d", state);
269 
270  ptr += pg_mblen(ptr);
271  }
272 
273  if (state == TR_INSUBS)
274  {
275  if (ptr == beginwrd)
276  ereport(ERROR,
277  (errcode(ERRCODE_CONFIG_FILE_ERROR),
278  errmsg("unexpected end of line or lexeme")));
279  addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280  }
281 
282  idsubst++;
283 
284  if (!(nwrd && posinsubst))
285  ereport(ERROR,
286  (errcode(ERRCODE_CONFIG_FILE_ERROR),
287  errmsg("unexpected end of line")));
288 
289  /*
290  * Note: currently, tsearch_readline can't return lines exceeding 4KB,
291  * so overflow of the word counts is impossible. But that may not
292  * always be true, so let's check.
293  */
294  if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
295  ereport(ERROR,
296  (errcode(ERRCODE_CONFIG_FILE_ERROR),
297  errmsg("too many lexemes in thesaurus entry")));
298 
299  pfree(line);
300  }
301 
302  d->nsubst = idsubst;
303 
304  tsearch_readline_end(&trst);
305 }
#define t_isspace(x)
Definition: ts_locale.h:58
int errcode(int sqlerrcode)
Definition: elog.c:575
unsigned short uint16
Definition: c.h:264
void pfree(void *pointer)
Definition: mcxt.c:992
static void addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
#define ERROR
Definition: elog.h:43
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:33
#define t_iseq(x, c)
Definition: ts_locale.h:61
unsigned int uint32
Definition: c.h:265
#define ereport(elevel, rest)
Definition: elog.h:122
#define TR_INLEX
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:153
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:138
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:116
static char * filename
Definition: pg_dumpall.c:84
#define TR_WAITLEX
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define TR_WAITSUBS
#define TR_INSUBS
#define elog
Definition: elog.h:219
static void newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)