PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
dict_thesaurus.c File Reference
#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/fmgrprotos.h"
#include "utils/regproc.h"
Include dependency graph for dict_thesaurus.c:

Go to the source code of this file.

Data Structures

struct  LexemeInfo
 
struct  TheLexeme
 
struct  TheSubstitute
 
struct  DictThesaurus
 

Macros

#define DT_USEASIS   0x1000
 
#define TR_WAITLEX   1
 
#define TR_INLEX   2
 
#define TR_WAITSUBS   3
 
#define TR_INSUBS   4
 

Typedefs

typedef struct LexemeInfo LexemeInfo
 

Functions

static void newLexeme (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
 
static void addWrd (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
 
static void thesaurusRead (const char *filename, DictThesaurus *d)
 
static TheLexemeaddCompiledLexeme (TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
 
static int cmpLexemeInfo (LexemeInfo *a, LexemeInfo *b)
 
static int cmpLexeme (const TheLexeme *a, const TheLexeme *b)
 
static int cmpLexemeQ (const void *a, const void *b)
 
static int cmpTheLexeme (const void *a, const void *b)
 
static void compileTheLexeme (DictThesaurus *d)
 
static void compileTheSubstitute (DictThesaurus *d)
 
Datum thesaurus_init (PG_FUNCTION_ARGS)
 
static LexemeInfofindTheLexeme (DictThesaurus *d, char *lexeme)
 
static bool matchIdSubst (LexemeInfo *stored, uint32 idsubst)
 
static LexemeInfofindVariant (LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
 
static TSLexemecopyTSLexeme (TheSubstitute *ts)
 
static TSLexemecheckMatch (DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
 
Datum thesaurus_lexize (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ DT_USEASIS

#define DT_USEASIS   0x1000

Definition at line 28 of file dict_thesaurus.c.

◆ TR_INLEX

#define TR_INLEX   2

Definition at line 163 of file dict_thesaurus.c.

◆ TR_INSUBS

#define TR_INSUBS   4

Definition at line 165 of file dict_thesaurus.c.

◆ TR_WAITLEX

#define TR_WAITLEX   1

Definition at line 162 of file dict_thesaurus.c.

◆ TR_WAITSUBS

#define TR_WAITSUBS   3

Definition at line 164 of file dict_thesaurus.c.

Typedef Documentation

◆ LexemeInfo

typedef struct LexemeInfo LexemeInfo

Function Documentation

◆ addCompiledLexeme()

static TheLexeme * addCompiledLexeme ( TheLexeme newwrds,
int *  nnw,
int *  tnm,
TSLexeme lexeme,
LexemeInfo src,
uint16  tnvariant 
)
static

Definition at line 303 of file dict_thesaurus.c.

304{
305 if (*nnw >= *tnm)
306 {
307 *tnm *= 2;
308 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
309 }
310
311 newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
312
313 if (lexeme && lexeme->lexeme)
314 {
315 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
316 newwrds[*nnw].entries->tnvariant = tnvariant;
317 }
318 else
319 {
320 newwrds[*nnw].lexeme = NULL;
321 newwrds[*nnw].entries->tnvariant = 1;
322 }
323
324 newwrds[*nnw].entries->idsubst = src->idsubst;
325 newwrds[*nnw].entries->posinsubst = src->posinsubst;
326
327 newwrds[*nnw].entries->nextentry = NULL;
328
329 (*nnw)++;
330 return newwrds;
331}
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
uint16 posinsubst
struct LexemeInfo * nextentry
uint32 idsubst
uint16 tnvariant
char * lexeme
Definition: ts_public.h:138
char * lexeme
LexemeInfo * entries

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, TSLexeme::lexeme, LexemeInfo::nextentry, palloc(), LexemeInfo::posinsubst, pstrdup(), repalloc(), and LexemeInfo::tnvariant.

Referenced by compileTheLexeme().

◆ addWrd()

static void addWrd ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  nwrd,
uint16  posinsubst,
bool  useasis 
)
static

Definition at line 106 of file dict_thesaurus.c.

107{
108 static int nres = 0;
109 static int ntres = 0;
110 TheSubstitute *ptr;
111
112 if (nwrd == 0)
113 {
114 nres = ntres = 0;
115
116 if (idsubst >= d->nsubst)
117 {
118 if (d->nsubst == 0)
119 {
120 d->nsubst = 16;
121 d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
122 }
123 else
124 {
125 d->nsubst *= 2;
126 d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
127 }
128 }
129 }
130
131 ptr = d->subst + idsubst;
132
133 ptr->lastlexeme = posinsubst - 1;
134
135 if (nres + 1 >= ntres)
136 {
137 if (ntres == 0)
138 {
139 ntres = 2;
140 ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
141 }
142 else
143 {
144 ntres *= 2;
145 ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
146 }
147 }
148
149 ptr->res[nres].lexeme = palloc(e - b + 1);
150 memcpy(ptr->res[nres].lexeme, b, e - b);
151 ptr->res[nres].lexeme[e - b] = '\0';
152
153 ptr->res[nres].nvariant = nwrd;
154 if (useasis)
155 ptr->res[nres].flags = DT_USEASIS;
156 else
157 ptr->res[nres].flags = 0;
158
159 ptr->res[++nres].lexeme = NULL;
160}
#define DT_USEASIS
int b
Definition: isn.c:69
e
Definition: preproc-init.c:82
TheSubstitute * subst
uint16 nvariant
Definition: ts_public.h:134
uint16 flags
Definition: ts_public.h:136
TSLexeme * res

References b, DT_USEASIS, TSLexeme::flags, LexemeInfo::idsubst, TheSubstitute::lastlexeme, TSLexeme::lexeme, DictThesaurus::nsubst, TSLexeme::nvariant, palloc(), LexemeInfo::posinsubst, repalloc(), TheSubstitute::res, and DictThesaurus::subst.

Referenced by thesaurusRead().

◆ checkMatch()

static TSLexeme * checkMatch ( DictThesaurus d,
LexemeInfo info,
uint16  curpos,
bool *  moreres 
)
static

Definition at line 771 of file dict_thesaurus.c.

772{
773 *moreres = false;
774 while (info)
775 {
776 Assert(info->idsubst < d->nsubst);
777 if (info->nextvariant)
778 *moreres = true;
779 if (d->subst[info->idsubst].lastlexeme == curpos)
780 return copyTSLexeme(d->subst + info->idsubst);
781 info = info->nextvariant;
782 }
783
784 return NULL;
785}
#define Assert(condition)
Definition: c.h:812
static TSLexeme * copyTSLexeme(TheSubstitute *ts)
struct LexemeInfo * nextvariant

References Assert, copyTSLexeme(), LexemeInfo::idsubst, TheSubstitute::lastlexeme, LexemeInfo::nextvariant, DictThesaurus::nsubst, and DictThesaurus::subst.

Referenced by thesaurus_lexize().

◆ cmpLexeme()

static int cmpLexeme ( const TheLexeme a,
const TheLexeme b 
)
static

Definition at line 356 of file dict_thesaurus.c.

357{
358 if (a->lexeme == NULL)
359 {
360 if (b->lexeme == NULL)
361 return 0;
362 else
363 return 1;
364 }
365 else if (b->lexeme == NULL)
366 return -1;
367
368 return strcmp(a->lexeme, b->lexeme);
369}
int a
Definition: isn.c:68

References a, and b.

Referenced by cmpLexemeQ(), cmpTheLexeme(), and compileTheLexeme().

◆ cmpLexemeInfo()

static int cmpLexemeInfo ( LexemeInfo a,
LexemeInfo b 
)
static

Definition at line 334 of file dict_thesaurus.c.

335{
336 if (a == NULL || b == NULL)
337 return 0;
338
339 if (a->idsubst == b->idsubst)
340 {
341 if (a->posinsubst == b->posinsubst)
342 {
343 if (a->tnvariant == b->tnvariant)
344 return 0;
345
346 return (a->tnvariant > b->tnvariant) ? 1 : -1;
347 }
348
349 return (a->posinsubst > b->posinsubst) ? 1 : -1;
350 }
351
352 return (a->idsubst > b->idsubst) ? 1 : -1;
353}

References a, and b.

Referenced by cmpTheLexeme(), and compileTheLexeme().

◆ cmpLexemeQ()

static int cmpLexemeQ ( const void *  a,
const void *  b 
)
static

Definition at line 372 of file dict_thesaurus.c.

373{
374 return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
375}
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)

References a, b, and cmpLexeme().

Referenced by findTheLexeme().

◆ cmpTheLexeme()

static int cmpTheLexeme ( const void *  a,
const void *  b 
)
static

Definition at line 378 of file dict_thesaurus.c.

379{
380 const TheLexeme *la = (const TheLexeme *) a;
381 const TheLexeme *lb = (const TheLexeme *) b;
382 int res;
383
384 if ((res = cmpLexeme(la, lb)) != 0)
385 return res;
386
387 return -cmpLexemeInfo(la->entries, lb->entries);
388}
static int cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)

References a, b, cmpLexeme(), cmpLexemeInfo(), TheLexeme::entries, and res.

Referenced by compileTheLexeme().

◆ compileTheLexeme()

static void compileTheLexeme ( DictThesaurus d)
static

Definition at line 391 of file dict_thesaurus.c.

392{
393 int i,
394 nnw = 0,
395 tnm = 16;
396 TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
397 *ptrwrds;
398
399 for (i = 0; i < d->nwrds; i++)
400 {
401 TSLexeme *ptr;
402
403 if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
404 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
405 else
406 {
410 Int32GetDatum(strlen(d->wrds[i].lexeme)),
411 PointerGetDatum(NULL)));
412
413 if (!ptr)
415 (errcode(ERRCODE_CONFIG_FILE_ERROR),
416 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
417 d->wrds[i].lexeme,
418 d->wrds[i].entries->idsubst + 1)));
419 else if (!(ptr->lexeme))
421 (errcode(ERRCODE_CONFIG_FILE_ERROR),
422 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
423 d->wrds[i].lexeme,
424 d->wrds[i].entries->idsubst + 1),
425 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
426 else
427 {
428 while (ptr->lexeme)
429 {
430 TSLexeme *remptr = ptr + 1;
431 int tnvar = 1;
432 int curvar = ptr->nvariant;
433
434 /* compute n words in one variant */
435 while (remptr->lexeme)
436 {
437 if (remptr->nvariant != (remptr - 1)->nvariant)
438 break;
439 tnvar++;
440 remptr++;
441 }
442
443 remptr = ptr;
444 while (remptr->lexeme && remptr->nvariant == curvar)
445 {
446 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
447 remptr++;
448 }
449
450 ptr = remptr;
451 }
452 }
453 }
454
455 pfree(d->wrds[i].lexeme);
456 pfree(d->wrds[i].entries);
457 }
458
459 if (d->wrds)
460 pfree(d->wrds);
461 d->wrds = newwrds;
462 d->nwrds = nnw;
463 d->ntwrds = tnm;
464
465 if (d->nwrds > 1)
466 {
467 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
468
469 /* uniq */
470 newwrds = d->wrds;
471 ptrwrds = d->wrds + 1;
472 while (ptrwrds - d->wrds < d->nwrds)
473 {
474 if (cmpLexeme(ptrwrds, newwrds) == 0)
475 {
476 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
477 {
478 ptrwrds->entries->nextentry = newwrds->entries;
479 newwrds->entries = ptrwrds->entries;
480 }
481 else
482 pfree(ptrwrds->entries);
483
484 if (ptrwrds->lexeme)
485 pfree(ptrwrds->lexeme);
486 }
487 else
488 {
489 newwrds++;
490 *newwrds = *ptrwrds;
491 }
492
493 ptrwrds++;
494 }
495
496 d->nwrds = newwrds - d->wrds + 1;
497 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
498 }
499}
static TheLexeme * addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
static int cmpTheLexeme(const void *a, const void *b)
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:665
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
#define qsort(a, b, c, d)
Definition: port.h:447
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
TheLexeme * wrds
TSDictionaryCacheEntry * subdict

References addCompiledLexeme(), cmpLexeme(), cmpLexemeInfo(), cmpTheLexeme(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, TheLexeme::entries, ereport, errcode(), errhint(), errmsg(), ERROR, FunctionCall4, i, LexemeInfo::idsubst, Int32GetDatum(), TheLexeme::lexeme, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexemeInfo::nextentry, DictThesaurus::ntwrds, TSLexeme::nvariant, DictThesaurus::nwrds, palloc(), pfree(), PointerGetDatum(), qsort, repalloc(), DictThesaurus::subdict, and DictThesaurus::wrds.

Referenced by thesaurus_init().

◆ compileTheSubstitute()

static void compileTheSubstitute ( DictThesaurus d)
static

Definition at line 502 of file dict_thesaurus.c.

503{
504 int i;
505
506 for (i = 0; i < d->nsubst; i++)
507 {
508 TSLexeme *rem = d->subst[i].res,
509 *outptr,
510 *inptr;
511 int n = 2;
512
513 outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
514 outptr->lexeme = NULL;
515 inptr = rem;
516
517 while (inptr && inptr->lexeme)
518 {
519 TSLexeme *lexized,
520 tmplex[2];
521
522 if (inptr->flags & DT_USEASIS)
523 { /* do not lexize */
524 tmplex[0] = *inptr;
525 tmplex[0].flags = 0;
526 tmplex[1].lexeme = NULL;
527 lexized = tmplex;
528 }
529 else
530 {
533 PointerGetDatum(inptr->lexeme),
534 Int32GetDatum(strlen(inptr->lexeme)),
535 PointerGetDatum(NULL)));
536 }
537
538 if (lexized && lexized->lexeme)
539 {
540 int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
541
542 while (lexized->lexeme)
543 {
544 if (outptr - d->subst[i].res + 1 >= n)
545 {
546 int diff = outptr - d->subst[i].res;
547
548 n *= 2;
549 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
550 outptr = d->subst[i].res + diff;
551 }
552
553 *outptr = *lexized;
554 outptr->lexeme = pstrdup(lexized->lexeme);
555
556 outptr++;
557 lexized++;
558 }
559
560 if (toset > 0)
561 d->subst[i].res[toset].flags |= TSL_ADDPOS;
562 }
563 else if (lexized)
564 {
566 (errcode(ERRCODE_CONFIG_FILE_ERROR),
567 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
568 inptr->lexeme, i + 1)));
569 }
570 else
571 {
573 (errcode(ERRCODE_CONFIG_FILE_ERROR),
574 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
575 inptr->lexeme, i + 1)));
576 }
577
578 if (inptr->lexeme)
579 pfree(inptr->lexeme);
580 inptr++;
581 }
582
583 if (outptr == d->subst[i].res)
585 (errcode(ERRCODE_CONFIG_FILE_ERROR),
586 errmsg("thesaurus substitute phrase is empty (rule %d)",
587 i + 1)));
588
589 d->subst[i].reslen = outptr - d->subst[i].res;
590
591 pfree(rem);
592 }
593}
#define TSL_ADDPOS
Definition: ts_public.h:142

References DatumGetPointer(), TSDictionaryCacheEntry::dictData, DT_USEASIS, ereport, errcode(), errmsg(), ERROR, TSLexeme::flags, FunctionCall4, i, Int32GetDatum(), TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, DictThesaurus::nsubst, palloc(), pfree(), PointerGetDatum(), pstrdup(), repalloc(), TheSubstitute::res, TheSubstitute::reslen, DictThesaurus::subdict, DictThesaurus::subst, and TSL_ADDPOS.

Referenced by thesaurus_init().

◆ copyTSLexeme()

static TSLexeme * copyTSLexeme ( TheSubstitute ts)
static

Definition at line 753 of file dict_thesaurus.c.

754{
755 TSLexeme *res;
756 uint16 i;
757
758 res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
759 for (i = 0; i < ts->reslen; i++)
760 {
761 res[i] = ts->res[i];
762 res[i].lexeme = pstrdup(ts->res[i].lexeme);
763 }
764
765 res[ts->reslen].lexeme = NULL;
766
767 return res;
768}
uint16_t uint16
Definition: c.h:484

References i, TSLexeme::lexeme, palloc(), pstrdup(), res, TheSubstitute::res, and TheSubstitute::reslen.

Referenced by checkMatch().

◆ findTheLexeme()

static LexemeInfo * findTheLexeme ( DictThesaurus d,
char *  lexeme 
)
static

Definition at line 657 of file dict_thesaurus.c.

658{
660 *res;
661
662 if (d->nwrds == 0)
663 return NULL;
664
665 key.lexeme = lexeme;
666 key.entries = NULL;
667
668 res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
669
670 if (res == NULL)
671 return NULL;
672 return res->entries;
673}
static int cmpLexemeQ(const void *a, const void *b)

References cmpLexemeQ(), sort-test::key, DictThesaurus::nwrds, res, and DictThesaurus::wrds.

Referenced by thesaurus_lexize().

◆ findVariant()

static LexemeInfo * findVariant ( LexemeInfo in,
LexemeInfo stored,
uint16  curpos,
LexemeInfo **  newin,
int  newn 
)
static

Definition at line 696 of file dict_thesaurus.c.

697{
698 for (;;)
699 {
700 int i;
701 LexemeInfo *ptr = newin[0];
702
703 for (i = 0; i < newn; i++)
704 {
705 while (newin[i] && newin[i]->idsubst < ptr->idsubst)
706 newin[i] = newin[i]->nextentry;
707
708 if (newin[i] == NULL)
709 return in;
710
711 if (newin[i]->idsubst > ptr->idsubst)
712 {
713 ptr = newin[i];
714 i = -1;
715 continue;
716 }
717
718 while (newin[i]->idsubst == ptr->idsubst)
719 {
720 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
721 {
722 ptr = newin[i];
723 break;
724 }
725
726 newin[i] = newin[i]->nextentry;
727 if (newin[i] == NULL)
728 return in;
729 }
730
731 if (newin[i]->idsubst != ptr->idsubst)
732 {
733 ptr = newin[i];
734 i = -1;
735 continue;
736 }
737 }
738
739 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
740 { /* found */
741
742 ptr->nextvariant = in;
743 in = ptr;
744 }
745
746 /* step forward */
747 for (i = 0; i < newn; i++)
748 newin[i] = newin[i]->nextentry;
749 }
750}
static bool matchIdSubst(LexemeInfo *stored, uint32 idsubst)

References i, LexemeInfo::idsubst, matchIdSubst(), LexemeInfo::nextentry, LexemeInfo::nextvariant, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by thesaurus_lexize().

◆ matchIdSubst()

static bool matchIdSubst ( LexemeInfo stored,
uint32  idsubst 
)
static

Definition at line 676 of file dict_thesaurus.c.

677{
678 bool res = true;
679
680 if (stored)
681 {
682 res = false;
683
684 for (; stored; stored = stored->nextvariant)
685 if (stored->idsubst == idsubst)
686 {
687 res = true;
688 break;
689 }
690 }
691
692 return res;
693}

References LexemeInfo::idsubst, LexemeInfo::nextvariant, and res.

Referenced by findVariant().

◆ newLexeme()

static void newLexeme ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  posinsubst 
)
static

Definition at line 72 of file dict_thesaurus.c.

73{
74 TheLexeme *ptr;
75
76 if (d->nwrds >= d->ntwrds)
77 {
78 if (d->ntwrds == 0)
79 {
80 d->ntwrds = 16;
81 d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
82 }
83 else
84 {
85 d->ntwrds *= 2;
86 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
87 }
88 }
89
90 ptr = d->wrds + d->nwrds;
91 d->nwrds++;
92
93 ptr->lexeme = palloc(e - b + 1);
94
95 memcpy(ptr->lexeme, b, e - b);
96 ptr->lexeme[e - b] = '\0';
97
98 ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
99
100 ptr->entries->nextentry = NULL;
101 ptr->entries->idsubst = idsubst;
102 ptr->entries->posinsubst = posinsubst;
103}

References b, TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, LexemeInfo::nextentry, DictThesaurus::ntwrds, DictThesaurus::nwrds, palloc(), LexemeInfo::posinsubst, repalloc(), and DictThesaurus::wrds.

Referenced by thesaurusRead().

◆ thesaurus_init()

Datum thesaurus_init ( PG_FUNCTION_ARGS  )

Definition at line 596 of file dict_thesaurus.c.

597{
598 List *dictoptions = (List *) PG_GETARG_POINTER(0);
599 DictThesaurus *d;
600 char *subdictname = NULL;
601 bool fileloaded = false;
602 List *namelist;
603 ListCell *l;
604
605 d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
606
607 foreach(l, dictoptions)
608 {
609 DefElem *defel = (DefElem *) lfirst(l);
610
611 if (strcmp(defel->defname, "dictfile") == 0)
612 {
613 if (fileloaded)
615 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
616 errmsg("multiple DictFile parameters")));
617 thesaurusRead(defGetString(defel), d);
618 fileloaded = true;
619 }
620 else if (strcmp(defel->defname, "dictionary") == 0)
621 {
622 if (subdictname)
624 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
625 errmsg("multiple Dictionary parameters")));
626 subdictname = pstrdup(defGetString(defel));
627 }
628 else
629 {
631 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
632 errmsg("unrecognized Thesaurus parameter: \"%s\"",
633 defel->defname)));
634 }
635 }
636
637 if (!fileloaded)
639 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
640 errmsg("missing DictFile parameter")));
641 if (!subdictname)
643 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
644 errmsg("missing Dictionary parameter")));
645
646 namelist = stringToQualifiedNameList(subdictname, NULL);
647 d->subdictOid = get_ts_dict_oid(namelist, false);
649
652
654}
char * defGetString(DefElem *def)
Definition: define.c:35
static void compileTheSubstitute(DictThesaurus *d)
static void thesaurusRead(const char *filename, DictThesaurus *d)
static void compileTheLexeme(DictThesaurus *d)
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
void * palloc0(Size size)
Definition: mcxt.c:1347
Oid get_ts_dict_oid(List *names, bool missing_ok)
Definition: namespace.c:2861
#define lfirst(lc)
Definition: pg_list.h:172
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition: regproc.c:1797
char * defname
Definition: parsenodes.h:817
Definition: pg_list.h:54
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:208

References compileTheLexeme(), compileTheSubstitute(), defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, get_ts_dict_oid(), lfirst, lookup_ts_dictionary_cache(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pstrdup(), stringToQualifiedNameList(), DictThesaurus::subdict, DictThesaurus::subdictOid, and thesaurusRead().

◆ thesaurus_lexize()

Datum thesaurus_lexize ( PG_FUNCTION_ARGS  )

Definition at line 788 of file dict_thesaurus.c.

789{
792 TSLexeme *res = NULL;
793 LexemeInfo *stored,
794 *info = NULL;
795 uint16 curpos = 0;
796 bool moreres = false;
797
798 if (PG_NARGS() != 4 || dstate == NULL)
799 elog(ERROR, "forbidden call of thesaurus or nested call");
800
801 if (dstate->isend)
802 PG_RETURN_POINTER(NULL);
803 stored = (LexemeInfo *) dstate->private_state;
804
805 if (stored)
806 curpos = stored->posinsubst + 1;
807
808 if (!d->subdict->isvalid)
810
815 PointerGetDatum(NULL)));
816
817 if (res && res->lexeme)
818 {
819 TSLexeme *ptr = res,
820 *basevar;
821
822 while (ptr->lexeme)
823 {
824 uint16 nv = ptr->nvariant;
825 uint16 i,
826 nlex = 0;
827 LexemeInfo **infos;
828
829 basevar = ptr;
830 while (ptr->lexeme && nv == ptr->nvariant)
831 {
832 nlex++;
833 ptr++;
834 }
835
836 infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
837 for (i = 0; i < nlex; i++)
838 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
839 break;
840
841 if (i < nlex)
842 {
843 /* no chance to find */
844 pfree(infos);
845 continue;
846 }
847
848 info = findVariant(info, stored, curpos, infos, nlex);
849 }
850 }
851 else if (res)
852 { /* stop-word */
853 LexemeInfo *infos = findTheLexeme(d, NULL);
854
855 info = findVariant(NULL, stored, curpos, &infos, 1);
856 }
857 else
858 {
859 info = NULL; /* word isn't recognized */
860 }
861
862 dstate->private_state = info;
863
864 if (!info)
865 {
866 dstate->getnext = false;
867 PG_RETURN_POINTER(NULL);
868 }
869
870 if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
871 {
872 dstate->getnext = moreres;
874 }
875
876 dstate->getnext = true;
877
878 PG_RETURN_POINTER(NULL);
879}
static LexemeInfo * findTheLexeme(DictThesaurus *d, char *lexeme)
static TSLexeme * checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
static LexemeInfo * findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
#define elog(elevel,...)
Definition: elog.h:225
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154

References checkMatch(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, elog, ERROR, findTheLexeme(), findVariant(), FunctionCall4, DictSubState::getnext, i, if(), DictSubState::isend, TSDictionaryCacheEntry::isvalid, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSLexeme::nvariant, palloc(), pfree(), PG_GETARG_DATUM, PG_GETARG_POINTER, PG_NARGS, PG_RETURN_POINTER, PointerGetDatum(), LexemeInfo::posinsubst, DictSubState::private_state, res, DictThesaurus::subdict, and DictThesaurus::subdictOid.

◆ thesaurusRead()

static void thesaurusRead ( const char *  filename,
DictThesaurus d 
)
static

Definition at line 168 of file dict_thesaurus.c.

169{
171 uint32 idsubst = 0;
172 bool useasis = false;
173 char *line;
174
176 if (!tsearch_readline_begin(&trst, filename))
178 (errcode(ERRCODE_CONFIG_FILE_ERROR),
179 errmsg("could not open thesaurus file \"%s\": %m",
180 filename)));
181
182 while ((line = tsearch_readline(&trst)) != NULL)
183 {
184 char *ptr;
185 int state = TR_WAITLEX;
186 char *beginwrd = NULL;
187 uint32 posinsubst = 0;
188 uint32 nwrd = 0;
189
190 ptr = line;
191
192 /* is it a comment? */
193 while (*ptr && isspace((unsigned char) *ptr))
194 ptr += pg_mblen(ptr);
195
196 if (t_iseq(ptr, '#') || *ptr == '\0' ||
197 t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
198 {
199 pfree(line);
200 continue;
201 }
202
203 while (*ptr)
204 {
205 if (state == TR_WAITLEX)
206 {
207 if (t_iseq(ptr, ':'))
208 {
209 if (posinsubst == 0)
211 (errcode(ERRCODE_CONFIG_FILE_ERROR),
212 errmsg("unexpected delimiter")));
214 }
215 else if (!isspace((unsigned char) *ptr))
216 {
217 beginwrd = ptr;
218 state = TR_INLEX;
219 }
220 }
221 else if (state == TR_INLEX)
222 {
223 if (t_iseq(ptr, ':'))
224 {
225 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
227 }
228 else if (isspace((unsigned char) *ptr))
229 {
230 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
232 }
233 }
234 else if (state == TR_WAITSUBS)
235 {
236 if (t_iseq(ptr, '*'))
237 {
238 useasis = true;
240 beginwrd = ptr + pg_mblen(ptr);
241 }
242 else if (t_iseq(ptr, '\\'))
243 {
244 useasis = false;
246 beginwrd = ptr + pg_mblen(ptr);
247 }
248 else if (!isspace((unsigned char) *ptr))
249 {
250 useasis = false;
251 beginwrd = ptr;
253 }
254 }
255 else if (state == TR_INSUBS)
256 {
257 if (isspace((unsigned char) *ptr))
258 {
259 if (ptr == beginwrd)
261 (errcode(ERRCODE_CONFIG_FILE_ERROR),
262 errmsg("unexpected end of line or lexeme")));
263 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
265 }
266 }
267 else
268 elog(ERROR, "unrecognized thesaurus state: %d", state);
269
270 ptr += pg_mblen(ptr);
271 }
272
273 if (state == TR_INSUBS)
274 {
275 if (ptr == beginwrd)
277 (errcode(ERRCODE_CONFIG_FILE_ERROR),
278 errmsg("unexpected end of line or lexeme")));
279 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280 }
281
282 idsubst++;
283
284 if (!(nwrd && posinsubst))
286 (errcode(ERRCODE_CONFIG_FILE_ERROR),
287 errmsg("unexpected end of line")));
288
289 if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
291 (errcode(ERRCODE_CONFIG_FILE_ERROR),
292 errmsg("too many lexemes in thesaurus entry")));
293
294 pfree(line);
295 }
296
297 d->nsubst = idsubst;
298
300}
uint32_t uint32
Definition: c.h:485
#define TR_WAITLEX
static void addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
#define TR_INLEX
#define TR_WAITSUBS
#define TR_INSUBS
static void newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
static char * filename
Definition: pg_dumpall.c:119
Definition: regguts.h:323
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:89
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:112
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:157
#define t_iseq(x, c)
Definition: ts_locale.h:38
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:34

References addWrd(), elog, ereport, errcode(), errmsg(), ERROR, filename, get_tsearch_config_filename(), LexemeInfo::idsubst, newLexeme(), DictThesaurus::nsubst, pfree(), pg_mblen(), LexemeInfo::posinsubst, t_iseq, TR_INLEX, TR_INSUBS, TR_WAITLEX, TR_WAITSUBS, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().

Referenced by thesaurus_init().