PostgreSQL Source Code git master
dict_thesaurus.c File Reference
#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/fmgrprotos.h"
#include "utils/regproc.h"
Include dependency graph for dict_thesaurus.c:

Go to the source code of this file.

Data Structures

struct  LexemeInfo
 
struct  TheLexeme
 
struct  TheSubstitute
 
struct  DictThesaurus
 

Macros

#define DT_USEASIS   0x1000
 
#define TR_WAITLEX   1
 
#define TR_INLEX   2
 
#define TR_WAITSUBS   3
 
#define TR_INSUBS   4
 

Typedefs

typedef struct LexemeInfo LexemeInfo
 

Functions

static void newLexeme (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
 
static void addWrd (DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
 
static void thesaurusRead (const char *filename, DictThesaurus *d)
 
static TheLexemeaddCompiledLexeme (TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
 
static int cmpLexemeInfo (LexemeInfo *a, LexemeInfo *b)
 
static int cmpLexeme (const TheLexeme *a, const TheLexeme *b)
 
static int cmpLexemeQ (const void *a, const void *b)
 
static int cmpTheLexeme (const void *a, const void *b)
 
static void compileTheLexeme (DictThesaurus *d)
 
static void compileTheSubstitute (DictThesaurus *d)
 
Datum thesaurus_init (PG_FUNCTION_ARGS)
 
static LexemeInfofindTheLexeme (DictThesaurus *d, char *lexeme)
 
static bool matchIdSubst (LexemeInfo *stored, uint32 idsubst)
 
static LexemeInfofindVariant (LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
 
static TSLexemecopyTSLexeme (TheSubstitute *ts)
 
static TSLexemecheckMatch (DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
 
Datum thesaurus_lexize (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ DT_USEASIS

#define DT_USEASIS   0x1000

Definition at line 28 of file dict_thesaurus.c.

◆ TR_INLEX

#define TR_INLEX   2

Definition at line 163 of file dict_thesaurus.c.

◆ TR_INSUBS

#define TR_INSUBS   4

Definition at line 165 of file dict_thesaurus.c.

◆ TR_WAITLEX

#define TR_WAITLEX   1

Definition at line 162 of file dict_thesaurus.c.

◆ TR_WAITSUBS

#define TR_WAITSUBS   3

Definition at line 164 of file dict_thesaurus.c.

Typedef Documentation

◆ LexemeInfo

typedef struct LexemeInfo LexemeInfo

Function Documentation

◆ addCompiledLexeme()

static TheLexeme * addCompiledLexeme ( TheLexeme newwrds,
int *  nnw,
int *  tnm,
TSLexeme lexeme,
LexemeInfo src,
uint16  tnvariant 
)
static

Definition at line 304 of file dict_thesaurus.c.

305{
306 if (*nnw >= *tnm)
307 {
308 *tnm *= 2;
309 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
310 }
311
312 newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
313
314 if (lexeme && lexeme->lexeme)
315 {
316 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
317 newwrds[*nnw].entries->tnvariant = tnvariant;
318 }
319 else
320 {
321 newwrds[*nnw].lexeme = NULL;
322 newwrds[*nnw].entries->tnvariant = 1;
323 }
324
325 newwrds[*nnw].entries->idsubst = src->idsubst;
326 newwrds[*nnw].entries->posinsubst = src->posinsubst;
327
328 newwrds[*nnw].entries->nextentry = NULL;
329
330 (*nnw)++;
331 return newwrds;
332}
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void * palloc(Size size)
Definition: mcxt.c:1365
uint16 posinsubst
struct LexemeInfo * nextentry
uint32 idsubst
uint16 tnvariant
char * lexeme
Definition: ts_public.h:138
char * lexeme
LexemeInfo * entries

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, TSLexeme::lexeme, LexemeInfo::nextentry, palloc(), LexemeInfo::posinsubst, pstrdup(), repalloc(), and LexemeInfo::tnvariant.

Referenced by compileTheLexeme().

◆ addWrd()

static void addWrd ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  nwrd,
uint16  posinsubst,
bool  useasis 
)
static

Definition at line 106 of file dict_thesaurus.c.

107{
108 static int nres = 0;
109 static int ntres = 0;
110 TheSubstitute *ptr;
111
112 if (nwrd == 0)
113 {
114 nres = ntres = 0;
115
116 if (idsubst >= d->nsubst)
117 {
118 if (d->nsubst == 0)
119 {
120 d->nsubst = 16;
121 d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
122 }
123 else
124 {
125 d->nsubst *= 2;
126 d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
127 }
128 }
129 }
130
131 ptr = d->subst + idsubst;
132
133 ptr->lastlexeme = posinsubst - 1;
134
135 if (nres + 1 >= ntres)
136 {
137 if (ntres == 0)
138 {
139 ntres = 2;
140 ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
141 }
142 else
143 {
144 ntres *= 2;
145 ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
146 }
147 }
148
149 ptr->res[nres].lexeme = palloc(e - b + 1);
150 memcpy(ptr->res[nres].lexeme, b, e - b);
151 ptr->res[nres].lexeme[e - b] = '\0';
152
153 ptr->res[nres].nvariant = nwrd;
154 if (useasis)
155 ptr->res[nres].flags = DT_USEASIS;
156 else
157 ptr->res[nres].flags = 0;
158
159 ptr->res[++nres].lexeme = NULL;
160}
#define DT_USEASIS
int b
Definition: isn.c:74
e
Definition: preproc-init.c:82
TheSubstitute * subst
uint16 nvariant
Definition: ts_public.h:134
uint16 flags
Definition: ts_public.h:136
TSLexeme * res

References b, DT_USEASIS, TSLexeme::flags, LexemeInfo::idsubst, TheSubstitute::lastlexeme, TSLexeme::lexeme, DictThesaurus::nsubst, TSLexeme::nvariant, palloc(), LexemeInfo::posinsubst, repalloc(), TheSubstitute::res, and DictThesaurus::subst.

Referenced by thesaurusRead().

◆ checkMatch()

static TSLexeme * checkMatch ( DictThesaurus d,
LexemeInfo info,
uint16  curpos,
bool *  moreres 
)
static

Definition at line 772 of file dict_thesaurus.c.

773{
774 *moreres = false;
775 while (info)
776 {
777 Assert(info->idsubst < d->nsubst);
778 if (info->nextvariant)
779 *moreres = true;
780 if (d->subst[info->idsubst].lastlexeme == curpos)
781 return copyTSLexeme(d->subst + info->idsubst);
782 info = info->nextvariant;
783 }
784
785 return NULL;
786}
static TSLexeme * copyTSLexeme(TheSubstitute *ts)
Assert(PointerIsAligned(start, uint64))
struct LexemeInfo * nextvariant

References Assert(), copyTSLexeme(), LexemeInfo::idsubst, TheSubstitute::lastlexeme, LexemeInfo::nextvariant, DictThesaurus::nsubst, and DictThesaurus::subst.

Referenced by thesaurus_lexize().

◆ cmpLexeme()

static int cmpLexeme ( const TheLexeme a,
const TheLexeme b 
)
static

Definition at line 357 of file dict_thesaurus.c.

358{
359 if (a->lexeme == NULL)
360 {
361 if (b->lexeme == NULL)
362 return 0;
363 else
364 return 1;
365 }
366 else if (b->lexeme == NULL)
367 return -1;
368
369 return strcmp(a->lexeme, b->lexeme);
370}
int a
Definition: isn.c:73

References a, and b.

Referenced by cmpLexemeQ(), cmpTheLexeme(), and compileTheLexeme().

◆ cmpLexemeInfo()

static int cmpLexemeInfo ( LexemeInfo a,
LexemeInfo b 
)
static

Definition at line 335 of file dict_thesaurus.c.

336{
337 if (a == NULL || b == NULL)
338 return 0;
339
340 if (a->idsubst == b->idsubst)
341 {
342 if (a->posinsubst == b->posinsubst)
343 {
344 if (a->tnvariant == b->tnvariant)
345 return 0;
346
347 return (a->tnvariant > b->tnvariant) ? 1 : -1;
348 }
349
350 return (a->posinsubst > b->posinsubst) ? 1 : -1;
351 }
352
353 return (a->idsubst > b->idsubst) ? 1 : -1;
354}

References a, and b.

Referenced by cmpTheLexeme(), and compileTheLexeme().

◆ cmpLexemeQ()

static int cmpLexemeQ ( const void *  a,
const void *  b 
)
static

Definition at line 373 of file dict_thesaurus.c.

374{
375 return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
376}
static int cmpLexeme(const TheLexeme *a, const TheLexeme *b)

References a, b, and cmpLexeme().

Referenced by findTheLexeme().

◆ cmpTheLexeme()

static int cmpTheLexeme ( const void *  a,
const void *  b 
)
static

Definition at line 379 of file dict_thesaurus.c.

380{
381 const TheLexeme *la = (const TheLexeme *) a;
382 const TheLexeme *lb = (const TheLexeme *) b;
383 int res;
384
385 if ((res = cmpLexeme(la, lb)) != 0)
386 return res;
387
388 return -cmpLexemeInfo(la->entries, lb->entries);
389}
static int cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)

References a, b, cmpLexeme(), cmpLexemeInfo(), and TheLexeme::entries.

Referenced by compileTheLexeme().

◆ compileTheLexeme()

static void compileTheLexeme ( DictThesaurus d)
static

Definition at line 392 of file dict_thesaurus.c.

393{
394 int i,
395 nnw = 0,
396 tnm = 16;
397 TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
398 *ptrwrds;
399
400 for (i = 0; i < d->nwrds; i++)
401 {
402 TSLexeme *ptr;
403
404 if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
405 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
406 else
407 {
411 Int32GetDatum(strlen(d->wrds[i].lexeme)),
412 PointerGetDatum(NULL)));
413
414 if (!ptr)
416 (errcode(ERRCODE_CONFIG_FILE_ERROR),
417 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
418 d->wrds[i].lexeme,
419 d->wrds[i].entries->idsubst + 1)));
420 else if (!(ptr->lexeme))
422 (errcode(ERRCODE_CONFIG_FILE_ERROR),
423 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
424 d->wrds[i].lexeme,
425 d->wrds[i].entries->idsubst + 1),
426 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
427 else
428 {
429 while (ptr->lexeme)
430 {
431 TSLexeme *remptr = ptr + 1;
432 int tnvar = 1;
433 int curvar = ptr->nvariant;
434
435 /* compute n words in one variant */
436 while (remptr->lexeme)
437 {
438 if (remptr->nvariant != (remptr - 1)->nvariant)
439 break;
440 tnvar++;
441 remptr++;
442 }
443
444 remptr = ptr;
445 while (remptr->lexeme && remptr->nvariant == curvar)
446 {
447 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
448 remptr++;
449 }
450
451 ptr = remptr;
452 }
453 }
454 }
455
456 pfree(d->wrds[i].lexeme);
457 pfree(d->wrds[i].entries);
458 }
459
460 if (d->wrds)
461 pfree(d->wrds);
462 d->wrds = newwrds;
463 d->nwrds = nnw;
464 d->ntwrds = tnm;
465
466 if (d->nwrds > 1)
467 {
468 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
469
470 /* uniq */
471 newwrds = d->wrds;
472 ptrwrds = d->wrds + 1;
473 while (ptrwrds - d->wrds < d->nwrds)
474 {
475 if (cmpLexeme(ptrwrds, newwrds) == 0)
476 {
477 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
478 {
479 ptrwrds->entries->nextentry = newwrds->entries;
480 newwrds->entries = ptrwrds->entries;
481 }
482 else
483 pfree(ptrwrds->entries);
484
485 if (ptrwrds->lexeme)
486 pfree(ptrwrds->lexeme);
487 }
488 else
489 {
490 newwrds++;
491 *newwrds = *ptrwrds;
492 }
493
494 ptrwrds++;
495 }
496
497 d->nwrds = newwrds - d->wrds + 1;
498 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
499 }
500}
static TheLexeme * addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
static int cmpTheLexeme(const void *a, const void *b)
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:706
int i
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:1594
#define qsort(a, b, c, d)
Definition: port.h:500
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
TheLexeme * wrds
TSDictionaryCacheEntry * subdict

References addCompiledLexeme(), cmpLexeme(), cmpLexemeInfo(), cmpTheLexeme(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, TheLexeme::entries, ereport, errcode(), errhint(), errmsg(), ERROR, FunctionCall4, i, LexemeInfo::idsubst, Int32GetDatum(), TheLexeme::lexeme, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexemeInfo::nextentry, DictThesaurus::ntwrds, TSLexeme::nvariant, DictThesaurus::nwrds, palloc(), pfree(), PointerGetDatum(), qsort, repalloc(), DictThesaurus::subdict, and DictThesaurus::wrds.

Referenced by thesaurus_init().

◆ compileTheSubstitute()

static void compileTheSubstitute ( DictThesaurus d)
static

Definition at line 503 of file dict_thesaurus.c.

504{
505 int i;
506
507 for (i = 0; i < d->nsubst; i++)
508 {
509 TSLexeme *rem = d->subst[i].res,
510 *outptr,
511 *inptr;
512 int n = 2;
513
514 outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
515 outptr->lexeme = NULL;
516 inptr = rem;
517
518 while (inptr && inptr->lexeme)
519 {
520 TSLexeme *lexized,
521 tmplex[2];
522
523 if (inptr->flags & DT_USEASIS)
524 { /* do not lexize */
525 tmplex[0] = *inptr;
526 tmplex[0].flags = 0;
527 tmplex[1].lexeme = NULL;
528 lexized = tmplex;
529 }
530 else
531 {
534 PointerGetDatum(inptr->lexeme),
535 Int32GetDatum(strlen(inptr->lexeme)),
536 PointerGetDatum(NULL)));
537 }
538
539 if (lexized && lexized->lexeme)
540 {
541 int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
542
543 while (lexized->lexeme)
544 {
545 if (outptr - d->subst[i].res + 1 >= n)
546 {
547 int diff = outptr - d->subst[i].res;
548
549 n *= 2;
550 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
551 outptr = d->subst[i].res + diff;
552 }
553
554 *outptr = *lexized;
555 outptr->lexeme = pstrdup(lexized->lexeme);
556
557 outptr++;
558 lexized++;
559 }
560
561 if (toset > 0)
562 d->subst[i].res[toset].flags |= TSL_ADDPOS;
563 }
564 else if (lexized)
565 {
567 (errcode(ERRCODE_CONFIG_FILE_ERROR),
568 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
569 inptr->lexeme, i + 1)));
570 }
571 else
572 {
574 (errcode(ERRCODE_CONFIG_FILE_ERROR),
575 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
576 inptr->lexeme, i + 1)));
577 }
578
579 if (inptr->lexeme)
580 pfree(inptr->lexeme);
581 inptr++;
582 }
583
584 if (outptr == d->subst[i].res)
586 (errcode(ERRCODE_CONFIG_FILE_ERROR),
587 errmsg("thesaurus substitute phrase is empty (rule %d)",
588 i + 1)));
589
590 d->subst[i].reslen = outptr - d->subst[i].res;
591
592 pfree(rem);
593 }
594}
#define TSL_ADDPOS
Definition: ts_public.h:142

References DatumGetPointer(), TSDictionaryCacheEntry::dictData, DT_USEASIS, ereport, errcode(), errmsg(), ERROR, TSLexeme::flags, FunctionCall4, i, Int32GetDatum(), TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, DictThesaurus::nsubst, palloc(), pfree(), PointerGetDatum(), pstrdup(), repalloc(), TheSubstitute::res, TheSubstitute::reslen, DictThesaurus::subdict, DictThesaurus::subst, and TSL_ADDPOS.

Referenced by thesaurus_init().

◆ copyTSLexeme()

static TSLexeme * copyTSLexeme ( TheSubstitute ts)
static

Definition at line 754 of file dict_thesaurus.c.

755{
756 TSLexeme *res;
757 uint16 i;
758
759 res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
760 for (i = 0; i < ts->reslen; i++)
761 {
762 res[i] = ts->res[i];
763 res[i].lexeme = pstrdup(ts->res[i].lexeme);
764 }
765
766 res[ts->reslen].lexeme = NULL;
767
768 return res;
769}
uint16_t uint16
Definition: c.h:540

References i, TSLexeme::lexeme, palloc(), pstrdup(), TheSubstitute::res, and TheSubstitute::reslen.

Referenced by checkMatch().

◆ findTheLexeme()

static LexemeInfo * findTheLexeme ( DictThesaurus d,
char *  lexeme 
)
static

Definition at line 658 of file dict_thesaurus.c.

659{
661 *res;
662
663 if (d->nwrds == 0)
664 return NULL;
665
666 key.lexeme = lexeme;
667 key.entries = NULL;
668
669 res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
670
671 if (res == NULL)
672 return NULL;
673 return res->entries;
674}
static int cmpLexemeQ(const void *a, const void *b)

References cmpLexemeQ(), TheLexeme::entries, sort-test::key, DictThesaurus::nwrds, and DictThesaurus::wrds.

Referenced by thesaurus_lexize().

◆ findVariant()

static LexemeInfo * findVariant ( LexemeInfo in,
LexemeInfo stored,
uint16  curpos,
LexemeInfo **  newin,
int  newn 
)
static

Definition at line 697 of file dict_thesaurus.c.

698{
699 for (;;)
700 {
701 int i;
702 LexemeInfo *ptr = newin[0];
703
704 for (i = 0; i < newn; i++)
705 {
706 while (newin[i] && newin[i]->idsubst < ptr->idsubst)
707 newin[i] = newin[i]->nextentry;
708
709 if (newin[i] == NULL)
710 return in;
711
712 if (newin[i]->idsubst > ptr->idsubst)
713 {
714 ptr = newin[i];
715 i = -1;
716 continue;
717 }
718
719 while (newin[i]->idsubst == ptr->idsubst)
720 {
721 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
722 {
723 ptr = newin[i];
724 break;
725 }
726
727 newin[i] = newin[i]->nextentry;
728 if (newin[i] == NULL)
729 return in;
730 }
731
732 if (newin[i]->idsubst != ptr->idsubst)
733 {
734 ptr = newin[i];
735 i = -1;
736 continue;
737 }
738 }
739
740 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
741 { /* found */
742
743 ptr->nextvariant = in;
744 in = ptr;
745 }
746
747 /* step forward */
748 for (i = 0; i < newn; i++)
749 newin[i] = newin[i]->nextentry;
750 }
751}
static bool matchIdSubst(LexemeInfo *stored, uint32 idsubst)

References i, LexemeInfo::idsubst, matchIdSubst(), LexemeInfo::nextentry, LexemeInfo::nextvariant, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by thesaurus_lexize().

◆ matchIdSubst()

static bool matchIdSubst ( LexemeInfo stored,
uint32  idsubst 
)
static

Definition at line 677 of file dict_thesaurus.c.

678{
679 bool res = true;
680
681 if (stored)
682 {
683 res = false;
684
685 for (; stored; stored = stored->nextvariant)
686 if (stored->idsubst == idsubst)
687 {
688 res = true;
689 break;
690 }
691 }
692
693 return res;
694}

References LexemeInfo::idsubst, and LexemeInfo::nextvariant.

Referenced by findVariant().

◆ newLexeme()

static void newLexeme ( DictThesaurus d,
char *  b,
char *  e,
uint32  idsubst,
uint16  posinsubst 
)
static

Definition at line 72 of file dict_thesaurus.c.

73{
74 TheLexeme *ptr;
75
76 if (d->nwrds >= d->ntwrds)
77 {
78 if (d->ntwrds == 0)
79 {
80 d->ntwrds = 16;
81 d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
82 }
83 else
84 {
85 d->ntwrds *= 2;
86 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
87 }
88 }
89
90 ptr = d->wrds + d->nwrds;
91 d->nwrds++;
92
93 ptr->lexeme = palloc(e - b + 1);
94
95 memcpy(ptr->lexeme, b, e - b);
96 ptr->lexeme[e - b] = '\0';
97
98 ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
99
100 ptr->entries->nextentry = NULL;
101 ptr->entries->idsubst = idsubst;
102 ptr->entries->posinsubst = posinsubst;
103}

References b, TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, LexemeInfo::nextentry, DictThesaurus::ntwrds, DictThesaurus::nwrds, palloc(), LexemeInfo::posinsubst, repalloc(), and DictThesaurus::wrds.

Referenced by thesaurusRead().

◆ thesaurus_init()

Datum thesaurus_init ( PG_FUNCTION_ARGS  )

Definition at line 597 of file dict_thesaurus.c.

598{
599 List *dictoptions = (List *) PG_GETARG_POINTER(0);
600 DictThesaurus *d;
601 char *subdictname = NULL;
602 bool fileloaded = false;
603 List *namelist;
604 ListCell *l;
605
606 d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
607
608 foreach(l, dictoptions)
609 {
610 DefElem *defel = (DefElem *) lfirst(l);
611
612 if (strcmp(defel->defname, "dictfile") == 0)
613 {
614 if (fileloaded)
616 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
617 errmsg("multiple DictFile parameters")));
618 thesaurusRead(defGetString(defel), d);
619 fileloaded = true;
620 }
621 else if (strcmp(defel->defname, "dictionary") == 0)
622 {
623 if (subdictname)
625 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
626 errmsg("multiple Dictionary parameters")));
627 subdictname = pstrdup(defGetString(defel));
628 }
629 else
630 {
632 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
633 errmsg("unrecognized Thesaurus parameter: \"%s\"",
634 defel->defname)));
635 }
636 }
637
638 if (!fileloaded)
640 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
641 errmsg("missing DictFile parameter")));
642 if (!subdictname)
644 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
645 errmsg("missing Dictionary parameter")));
646
647 namelist = stringToQualifiedNameList(subdictname, NULL);
648 d->subdictOid = get_ts_dict_oid(namelist, false);
650
653
655}
char * defGetString(DefElem *def)
Definition: define.c:35
static void compileTheSubstitute(DictThesaurus *d)
static void thesaurusRead(const char *filename, DictThesaurus *d)
static void compileTheLexeme(DictThesaurus *d)
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
void * palloc0(Size size)
Definition: mcxt.c:1395
Oid get_ts_dict_oid(List *names, bool missing_ok)
Definition: namespace.c:2931
#define lfirst(lc)
Definition: pg_list.h:172
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition: regproc.c:1922
char * defname
Definition: parsenodes.h:843
Definition: pg_list.h:54
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:208

References compileTheLexeme(), compileTheSubstitute(), defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, get_ts_dict_oid(), lfirst, lookup_ts_dictionary_cache(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pstrdup(), stringToQualifiedNameList(), DictThesaurus::subdict, DictThesaurus::subdictOid, and thesaurusRead().

◆ thesaurus_lexize()

Datum thesaurus_lexize ( PG_FUNCTION_ARGS  )

Definition at line 789 of file dict_thesaurus.c.

790{
793 TSLexeme *res = NULL;
794 LexemeInfo *stored,
795 *info = NULL;
796 uint16 curpos = 0;
797 bool moreres = false;
798
799 if (PG_NARGS() != 4 || dstate == NULL)
800 elog(ERROR, "forbidden call of thesaurus or nested call");
801
802 if (dstate->isend)
803 PG_RETURN_POINTER(NULL);
804 stored = (LexemeInfo *) dstate->private_state;
805
806 if (stored)
807 curpos = stored->posinsubst + 1;
808
809 if (!d->subdict->isvalid)
811
816 PointerGetDatum(NULL)));
817
818 if (res && res->lexeme)
819 {
820 TSLexeme *ptr = res,
821 *basevar;
822
823 while (ptr->lexeme)
824 {
825 uint16 nv = ptr->nvariant;
826 uint16 i,
827 nlex = 0;
828 LexemeInfo **infos;
829
830 basevar = ptr;
831 while (ptr->lexeme && nv == ptr->nvariant)
832 {
833 nlex++;
834 ptr++;
835 }
836
837 infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
838 for (i = 0; i < nlex; i++)
839 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
840 break;
841
842 if (i < nlex)
843 {
844 /* no chance to find */
845 pfree(infos);
846 continue;
847 }
848
849 info = findVariant(info, stored, curpos, infos, nlex);
850 }
851 }
852 else if (res)
853 { /* stop-word */
854 LexemeInfo *infos = findTheLexeme(d, NULL);
855
856 info = findVariant(NULL, stored, curpos, &infos, 1);
857 }
858 else
859 {
860 info = NULL; /* word isn't recognized */
861 }
862
863 dstate->private_state = info;
864
865 if (!info)
866 {
867 dstate->getnext = false;
868 PG_RETURN_POINTER(NULL);
869 }
870
871 if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
872 {
873 dstate->getnext = moreres;
875 }
876
877 dstate->getnext = true;
878
879 PG_RETURN_POINTER(NULL);
880}
static LexemeInfo * findTheLexeme(DictThesaurus *d, char *lexeme)
static TSLexeme * checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
static LexemeInfo * findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
#define elog(elevel,...)
Definition: elog.h:226
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154

References checkMatch(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, elog, ERROR, findTheLexeme(), findVariant(), FunctionCall4, DictSubState::getnext, i, if(), DictSubState::isend, TSDictionaryCacheEntry::isvalid, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSLexeme::nvariant, palloc(), pfree(), PG_GETARG_DATUM, PG_GETARG_POINTER, PG_NARGS, PG_RETURN_POINTER, PointerGetDatum(), LexemeInfo::posinsubst, DictSubState::private_state, DictThesaurus::subdict, and DictThesaurus::subdictOid.

◆ thesaurusRead()

static void thesaurusRead ( const char *  filename,
DictThesaurus d 
)
static

Definition at line 168 of file dict_thesaurus.c.

169{
170 char *real_filename = get_tsearch_config_filename(filename, "ths");
172 uint32 idsubst = 0;
173 bool useasis = false;
174 char *line;
175
176 if (!tsearch_readline_begin(&trst, real_filename))
178 (errcode(ERRCODE_CONFIG_FILE_ERROR),
179 errmsg("could not open thesaurus file \"%s\": %m",
180 real_filename)));
181
182 while ((line = tsearch_readline(&trst)) != NULL)
183 {
184 char *ptr;
185 int state = TR_WAITLEX;
186 char *beginwrd = NULL;
187 uint32 posinsubst = 0;
188 uint32 nwrd = 0;
189
190 ptr = line;
191
192 /* is it a comment? */
193 while (*ptr && isspace((unsigned char) *ptr))
194 ptr += pg_mblen(ptr);
195
196 if (t_iseq(ptr, '#') || *ptr == '\0' ||
197 t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
198 {
199 pfree(line);
200 continue;
201 }
202
203 while (*ptr)
204 {
205 if (state == TR_WAITLEX)
206 {
207 if (t_iseq(ptr, ':'))
208 {
209 if (posinsubst == 0)
211 (errcode(ERRCODE_CONFIG_FILE_ERROR),
212 errmsg("unexpected delimiter")));
214 }
215 else if (!isspace((unsigned char) *ptr))
216 {
217 beginwrd = ptr;
218 state = TR_INLEX;
219 }
220 }
221 else if (state == TR_INLEX)
222 {
223 if (t_iseq(ptr, ':'))
224 {
225 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
227 }
228 else if (isspace((unsigned char) *ptr))
229 {
230 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
232 }
233 }
234 else if (state == TR_WAITSUBS)
235 {
236 if (t_iseq(ptr, '*'))
237 {
238 useasis = true;
240 beginwrd = ptr + pg_mblen(ptr);
241 }
242 else if (t_iseq(ptr, '\\'))
243 {
244 useasis = false;
246 beginwrd = ptr + pg_mblen(ptr);
247 }
248 else if (!isspace((unsigned char) *ptr))
249 {
250 useasis = false;
251 beginwrd = ptr;
253 }
254 }
255 else if (state == TR_INSUBS)
256 {
257 if (isspace((unsigned char) *ptr))
258 {
259 if (ptr == beginwrd)
261 (errcode(ERRCODE_CONFIG_FILE_ERROR),
262 errmsg("unexpected end of line or lexeme")));
263 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
265 }
266 }
267 else
268 elog(ERROR, "unrecognized thesaurus state: %d", state);
269
270 ptr += pg_mblen(ptr);
271 }
272
273 if (state == TR_INSUBS)
274 {
275 if (ptr == beginwrd)
277 (errcode(ERRCODE_CONFIG_FILE_ERROR),
278 errmsg("unexpected end of line or lexeme")));
279 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280 }
281
282 idsubst++;
283
284 if (!(nwrd && posinsubst))
286 (errcode(ERRCODE_CONFIG_FILE_ERROR),
287 errmsg("unexpected end of line")));
288
289 if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
291 (errcode(ERRCODE_CONFIG_FILE_ERROR),
292 errmsg("too many lexemes in thesaurus entry")));
293
294 pfree(line);
295 }
296
297 d->nsubst = idsubst;
298
300 pfree(real_filename);
301}
uint32_t uint32
Definition: c.h:541
#define TR_WAITLEX
static void addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
#define TR_INLEX
#define TR_WAITSUBS
#define TR_INSUBS
static void newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
static char * filename
Definition: pg_dumpall.c:120
Definition: regguts.h:323
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:77
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:100
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:145
#define t_iseq(x, c)
Definition: ts_locale.h:38
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:34

References addWrd(), elog, ereport, errcode(), errmsg(), ERROR, filename, get_tsearch_config_filename(), LexemeInfo::idsubst, newLexeme(), DictThesaurus::nsubst, pfree(), pg_mblen(), LexemeInfo::posinsubst, t_iseq, TR_INLEX, TR_INSUBS, TR_WAITLEX, TR_WAITSUBS, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().

Referenced by thesaurus_init().