PostgreSQL Source Code git master
ts_parse.c File Reference
#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "varatt.h"
Include dependency graph for ts_parse.c:

Go to the source code of this file.

Data Structures

struct  ParsedLex
 
struct  ListParsedLex
 
struct  LexizeData
 

Macros

#define IGNORE_LONGLEXEME   1
 

Typedefs

typedef struct ParsedLex ParsedLex
 
typedef struct ListParsedLex ListParsedLex
 

Functions

static void LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg)
 
static void LPLAddTail (ListParsedLex *list, ParsedLex *newpl)
 
static ParsedLexLPLRemoveHead (ListParsedLex *list)
 
static void LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm)
 
static void RemoveHead (LexizeData *ld)
 
static void setCorrLex (LexizeData *ld, ParsedLex **correspondLexem)
 
static void moveToWaste (LexizeData *ld, ParsedLex *stop)
 
static void setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res)
 
static TSLexemeLexizeExec (LexizeData *ld, ParsedLex **correspondLexem)
 
void parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen)
 
static void hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type)
 
static void hlfinditem (HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 
static void addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
 
void hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
 
textgenerateHeadline (HeadlineParsedText *prs)
 

Macro Definition Documentation

◆ IGNORE_LONGLEXEME

#define IGNORE_LONGLEXEME   1

Definition at line 21 of file ts_parse.c.

Typedef Documentation

◆ ListParsedLex

typedef struct ListParsedLex ListParsedLex

◆ ParsedLex

typedef struct ParsedLex ParsedLex

Function Documentation

◆ addHLParsedLex()

static void addHLParsedLex ( HeadlineParsedText prs,
TSQuery  query,
ParsedLex lexs,
TSLexeme norms 
)
static

Definition at line 499 of file ts_parse.c.

500{
501 ParsedLex *tmplexs;
502 TSLexeme *ptr;
503 int32 savedpos;
504
505 while (lexs)
506 {
507 if (lexs->type > 0)
508 hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
509
510 ptr = norms;
511 savedpos = prs->vectorpos;
512 while (ptr && ptr->lexeme)
513 {
514 if (ptr->flags & TSL_ADDPOS)
515 savedpos++;
516 hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
517 ptr++;
518 }
519
520 tmplexs = lexs->next;
521 pfree(lexs);
522 lexs = tmplexs;
523 }
524
525 if (norms)
526 {
527 ptr = norms;
528 while (ptr->lexeme)
529 {
530 if (ptr->flags & TSL_ADDPOS)
531 prs->vectorpos++;
532 pfree(ptr->lexeme);
533 ptr++;
534 }
535 pfree(norms);
536 }
537}
int32_t int32
Definition: c.h:498
void pfree(void *pointer)
Definition: mcxt.c:1524
char * lemm
Definition: ts_parse.c:30
int lenlemm
Definition: ts_parse.c:31
struct ParsedLex * next
Definition: ts_parse.c:32
int type
Definition: ts_parse.c:29
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
Definition: ts_parse.c:464
static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
Definition: ts_parse.c:440
#define TSL_ADDPOS
Definition: ts_public.h:142

References TSLexeme::flags, hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), TSL_ADDPOS, ParsedLex::type, and HeadlineParsedText::vectorpos.

Referenced by hlparsetext().

◆ generateHeadline()

text * generateHeadline ( HeadlineParsedText prs)

Definition at line 607 of file ts_parse.c.

608{
609 text *out;
610 char *ptr;
611 int len = 128;
612 int numfragments = 0;
613 int16 infrag = 0;
614
615 HeadlineWordEntry *wrd = prs->words;
616
617 out = (text *) palloc(len);
618 ptr = ((char *) out) + VARHDRSZ;
619
620 while (wrd - prs->words < prs->curwords)
621 {
622 while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
623 {
624 int dist = ptr - ((char *) out);
625
626 len *= 2;
627 out = (text *) repalloc(out, len);
628 ptr = ((char *) out) + dist;
629 }
630
631 if (wrd->in && !wrd->repeated)
632 {
633 if (!infrag)
634 {
635
636 /* start of a new fragment */
637 infrag = 1;
638 numfragments++;
639 /* add a fragment delimiter if this is after the first one */
640 if (numfragments > 1)
641 {
642 memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
643 ptr += prs->fragdelimlen;
644 }
645 }
646 if (wrd->replace)
647 {
648 *ptr = ' ';
649 ptr++;
650 }
651 else if (!wrd->skip)
652 {
653 if (wrd->selected)
654 {
655 memcpy(ptr, prs->startsel, prs->startsellen);
656 ptr += prs->startsellen;
657 }
658 memcpy(ptr, wrd->word, wrd->len);
659 ptr += wrd->len;
660 if (wrd->selected)
661 {
662 memcpy(ptr, prs->stopsel, prs->stopsellen);
663 ptr += prs->stopsellen;
664 }
665 }
666 }
667 else if (!wrd->repeated)
668 {
669 if (infrag)
670 infrag = 0;
671 pfree(wrd->word);
672 }
673
674 wrd++;
675 }
676
677 SET_VARSIZE(out, ptr - ((char *) out));
678 return out;
679}
#define VARHDRSZ
Definition: c.h:663
int16_t int16
Definition: c.h:497
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1544
void * palloc(Size size)
Definition: mcxt.c:1317
const void size_t len
HeadlineWordEntry * words
Definition: ts_public.h:76
Definition: c.h:658
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, VARHDRSZ, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by headline_json_value(), and ts_headline_byid_opt().

◆ hladdword()

static void hladdword ( HeadlineParsedText prs,
char *  buf,
int  buflen,
int  type 
)
static

Definition at line 440 of file ts_parse.c.

441{
442 if (prs->curwords >= prs->lenwords)
443 {
444 prs->lenwords *= 2;
445 prs->words = (HeadlineWordEntry *) repalloc(prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
446 }
447 memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
448 prs->words[prs->curwords].type = (uint8) type;
449 prs->words[prs->curwords].len = buflen;
450 prs->words[prs->curwords].word = palloc(buflen);
451 memcpy(prs->words[prs->curwords].word, buf, buflen);
452 prs->curwords++;
453}
uint8_t uint8
Definition: c.h:500
static char * buf
Definition: pg_test_fsync.c:72
const char * type

References buf, HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), type, HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by addHLParsedLex().

◆ hlfinditem()

static void hlfinditem ( HeadlineParsedText prs,
TSQuery  query,
int32  pos,
char *  buf,
int  buflen 
)
static

Definition at line 464 of file ts_parse.c.

465{
466 int i;
467 QueryItem *item = GETQUERY(query);
469
470 while (prs->curwords + query->size >= prs->lenwords)
471 {
472 prs->lenwords *= 2;
473 prs->words = (HeadlineWordEntry *) repalloc(prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
474 }
475
476 word = &(prs->words[prs->curwords - 1]);
477 word->pos = LIMITPOS(pos);
478 for (i = 0; i < query->size; i++)
479 {
480 if (item->type == QI_VAL &&
482 buf, buflen, item->qoperand.prefix) == 0)
483 {
484 if (word->item)
485 {
486 memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
487 prs->words[prs->curwords].item = &item->qoperand;
488 prs->words[prs->curwords].repeated = 1;
489 prs->curwords++;
490 }
491 else
492 word->item = &item->qoperand;
493 }
494 item++;
495 }
496}
#define GETQUERY(x)
Definition: _int.h:157
int i
Definition: isn.c:74
#define GETOPERAND(x)
Definition: ltree.h:165
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476
QueryOperand * item
Definition: ts_public.h:70
bool prefix
Definition: ts_type.h:163
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171
int32 size
Definition: ts_type.h:221
#define QI_VAL
Definition: ts_type.h:149
#define LIMITPOS(x)
Definition: ts_type.h:87
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
QueryOperand qoperand
Definition: ts_type.h:210
QueryItemType type
Definition: ts_type.h:208

References buf, HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, LIMITPOS, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, word(), and HeadlineParsedText::words.

Referenced by addHLParsedLex().

◆ hlparsetext()

void hlparsetext ( Oid  cfgId,
HeadlineParsedText prs,
TSQuery  query,
char *  buf,
int  buflen 
)

Definition at line 540 of file ts_parse.c.

541{
542 int type,
543 lenlemm = 0; /* silence compiler warning */
544 char *lemm = NULL;
545 LexizeData ldata;
546 TSLexeme *norms;
547 ParsedLex *lexs;
549 TSParserCacheEntry *prsobj;
550 void *prsdata;
551
552 cfg = lookup_ts_config_cache(cfgId);
553 prsobj = lookup_ts_parser_cache(cfg->prsId);
554
555 prsdata = DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
557 Int32GetDatum(buflen)));
558
559 LexizeInit(&ldata, cfg);
560
561 do
562 {
564 PointerGetDatum(prsdata),
565 PointerGetDatum(&lemm),
566 PointerGetDatum(&lenlemm)));
567
568 if (type > 0 && lenlemm >= MAXSTRLEN)
569 {
570#ifdef IGNORE_LONGLEXEME
572 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
573 errmsg("word is too long to be indexed"),
574 errdetail("Words longer than %d characters are ignored.",
575 MAXSTRLEN)));
576 continue;
577#else
579 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
580 errmsg("word is too long to be indexed"),
581 errdetail("Words longer than %d characters are ignored.",
582 MAXSTRLEN)));
583#endif
584 }
585
586 LexizeAddLemm(&ldata, type, lemm, lenlemm);
587
588 do
589 {
590 if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
591 {
592 prs->vectorpos++;
593 addHLParsedLex(prs, query, lexs, norms);
594 }
595 else
596 addHLParsedLex(prs, query, lexs, NULL);
597 } while (norms);
598 } while (type > 0);
599
600 FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
601}
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:659
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:661
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:663
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207
FmgrInfo prstoken
Definition: ts_cache.h:46
FmgrInfo prsstart
Definition: ts_cache.h:45
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:385
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:113
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:173
static void addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
Definition: ts_parse.c:499
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:100
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:61
#define MAXSTRLEN
Definition: ts_type.h:49

References addHLParsedLex(), buf, DatumGetInt32(), DatumGetPointer(), ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum(), LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, PointerGetDatum(), TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, type, and HeadlineParsedText::vectorpos.

Referenced by headline_json_value(), and ts_headline_byid_opt().

◆ LexizeAddLemm()

static void LexizeAddLemm ( LexizeData ld,
int  type,
char *  lemm,
int  lenlemm 
)
static

Definition at line 100 of file ts_parse.c.

101{
102 ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
103
104 newpl->type = type;
105 newpl->lemm = lemm;
106 newpl->lenlemm = lenlemm;
107 LPLAddTail(&ld->towork, newpl);
108 ld->curSub = ld->towork.tail;
109}
ListParsedLex towork
Definition: ts_parse.c:48
ParsedLex * curSub
Definition: ts_parse.c:47
ParsedLex * tail
Definition: ts_parse.c:38
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:73

References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, ParsedLex::type, and type.

Referenced by hlparsetext(), and parsetext().

◆ LexizeExec()

static TSLexeme * LexizeExec ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 173 of file ts_parse.c.

174{
175 int i;
176 ListDictionary *map;
178 TSLexeme *res;
179
180 if (ld->curDictId == InvalidOid)
181 {
182 /*
183 * usual mode: dictionary wants only one word, but we should keep in
184 * mind that we should go through all stack
185 */
186
187 while (ld->towork.head)
188 {
189 ParsedLex *curVal = ld->towork.head;
190 char *curValLemm = curVal->lemm;
191 int curValLenLemm = curVal->lenlemm;
192
193 map = ld->cfg->map + curVal->type;
194
195 if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
196 {
197 /* skip this type of lexeme */
198 RemoveHead(ld);
199 continue;
200 }
201
202 for (i = ld->posDict; i < map->len; i++)
203 {
205
206 ld->dictState.isend = ld->dictState.getnext = false;
207 ld->dictState.private_state = NULL;
208 res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
210 PointerGetDatum(curValLemm),
211 Int32GetDatum(curValLenLemm),
213
214 if (ld->dictState.getnext)
215 {
216 /*
217 * dictionary wants next word, so setup and store current
218 * position and go to multiword mode
219 */
220
221 ld->curDictId = DatumGetObjectId(map->dictIds[i]);
222 ld->posDict = i + 1;
223 ld->curSub = curVal->next;
224 if (res)
225 setNewTmpRes(ld, curVal, res);
226 return LexizeExec(ld, correspondLexem);
227 }
228
229 if (!res) /* dictionary doesn't know this lexeme */
230 continue;
231
232 if (res->flags & TSL_FILTER)
233 {
234 curValLemm = res->lexeme;
235 curValLenLemm = strlen(res->lexeme);
236 continue;
237 }
238
239 RemoveHead(ld);
240 setCorrLex(ld, correspondLexem);
241 return res;
242 }
243
244 RemoveHead(ld);
245 }
246 }
247 else
248 { /* curDictId is valid */
250
251 /*
252 * Dictionary ld->curDictId asks us about following words
253 */
254
255 while (ld->curSub)
256 {
257 ParsedLex *curVal = ld->curSub;
258
259 map = ld->cfg->map + curVal->type;
260
261 if (curVal->type != 0)
262 {
263 bool dictExists = false;
264
265 if (curVal->type >= ld->cfg->lenmap || map->len == 0)
266 {
267 /* skip this type of lexeme */
268 ld->curSub = curVal->next;
269 continue;
270 }
271
272 /*
273 * We should be sure that current type of lexeme is recognized
274 * by our dictionary: we just check is it exist in list of
275 * dictionaries ?
276 */
277 for (i = 0; i < map->len && !dictExists; i++)
278 if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
279 dictExists = true;
280
281 if (!dictExists)
282 {
283 /*
284 * Dictionary can't work with current type of lexeme,
285 * return to basic mode and redo all stored lexemes
286 */
287 ld->curDictId = InvalidOid;
288 return LexizeExec(ld, correspondLexem);
289 }
290 }
291
292 ld->dictState.isend = (curVal->type == 0);
293 ld->dictState.getnext = false;
294
295 res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
297 PointerGetDatum(curVal->lemm),
298 Int32GetDatum(curVal->lenlemm),
300
301 if (ld->dictState.getnext)
302 {
303 /* Dictionary wants one more */
304 ld->curSub = curVal->next;
305 if (res)
306 setNewTmpRes(ld, curVal, res);
307 continue;
308 }
309
310 if (res || ld->tmpRes)
311 {
312 /*
313 * Dictionary normalizes lexemes, so we remove from stack all
314 * used lexemes, return to basic mode and redo end of stack
315 * (if it exists)
316 */
317 if (res)
318 {
319 moveToWaste(ld, ld->curSub);
320 }
321 else
322 {
323 res = ld->tmpRes;
324 moveToWaste(ld, ld->lastRes);
325 }
326
327 /* reset to initial state */
328 ld->curDictId = InvalidOid;
329 ld->posDict = 0;
330 ld->lastRes = NULL;
331 ld->tmpRes = NULL;
332 setCorrLex(ld, correspondLexem);
333 return res;
334 }
335
336 /*
337 * Dict don't want next lexem and didn't recognize anything, redo
338 * from ld->towork.head
339 */
340 ld->curDictId = InvalidOid;
341 return LexizeExec(ld, correspondLexem);
342 }
343 }
344
345 setCorrLex(ld, correspondLexem);
346 return NULL;
347}
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:665
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:247
#define InvalidOid
Definition: postgres_ext.h:37
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154
TSLexeme * tmpRes
Definition: ts_parse.c:57
Oid curDictId
Definition: ts_parse.c:44
ParsedLex * lastRes
Definition: ts_parse.c:56
int posDict
Definition: ts_parse.c:45
DictSubState dictState
Definition: ts_parse.c:46
TSConfigCacheEntry * cfg
Definition: ts_parse.c:43
Oid * dictIds
Definition: ts_cache.h:68
ParsedLex * head
Definition: ts_parse.c:37
ListDictionary * map
Definition: ts_cache.h:80
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:208
static void setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
Definition: ts_parse.c:158
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:112
static void setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:120
static void moveToWaste(LexizeData *ld, ParsedLex *stop)
Definition: ts_parse.c:142
#define TSL_FILTER
Definition: ts_public.h:144

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, TSLexeme::flags, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum(), InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexizeExec(), lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, PointerGetDatum(), LexizeData::posDict, DictSubState::private_state, RemoveHead(), setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.

Referenced by hlparsetext(), LexizeExec(), and parsetext().

◆ LexizeInit()

static void LexizeInit ( LexizeData ld,
TSConfigCacheEntry cfg 
)
static

Definition at line 61 of file ts_parse.c.

62{
63 ld->cfg = cfg;
65 ld->posDict = 0;
66 ld->towork.head = ld->towork.tail = ld->curSub = NULL;
67 ld->waste.head = ld->waste.tail = NULL;
68 ld->lastRes = NULL;
69 ld->tmpRes = NULL;
70}
ListParsedLex waste
Definition: ts_parse.c:49

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, InvalidOid, LexizeData::lastRes, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.

Referenced by hlparsetext(), and parsetext().

◆ LPLAddTail()

static void LPLAddTail ( ListParsedLex list,
ParsedLex newpl 
)
static

Definition at line 73 of file ts_parse.c.

74{
75 if (list->tail)
76 {
77 list->tail->next = newpl;
78 list->tail = newpl;
79 }
80 else
81 list->head = list->tail = newpl;
82 newpl->next = NULL;
83}

References sort-test::list, and ParsedLex::next.

Referenced by LexizeAddLemm(), and RemoveHead().

◆ LPLRemoveHead()

static ParsedLex * LPLRemoveHead ( ListParsedLex list)
static

Definition at line 86 of file ts_parse.c.

87{
88 ParsedLex *res = list->head;
89
90 if (list->head)
91 list->head = list->head->next;
92
93 if (list->head == NULL)
94 list->tail = NULL;
95
96 return res;
97}

References sort-test::list.

Referenced by RemoveHead().

◆ moveToWaste()

static void moveToWaste ( LexizeData ld,
ParsedLex stop 
)
static

Definition at line 142 of file ts_parse.c.

143{
144 bool go = true;
145
146 while (ld->towork.head && go)
147 {
148 if (ld->towork.head == stop)
149 {
150 ld->curSub = stop->next;
151 go = false;
152 }
153 RemoveHead(ld);
154 }
155}

References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.

Referenced by LexizeExec().

◆ parsetext()

void parsetext ( Oid  cfgId,
ParsedText prs,
char *  buf,
int  buflen 
)

Definition at line 355 of file ts_parse.c.

356{
357 int type,
358 lenlemm = 0; /* silence compiler warning */
359 char *lemm = NULL;
360 LexizeData ldata;
361 TSLexeme *norms;
363 TSParserCacheEntry *prsobj;
364 void *prsdata;
365
366 cfg = lookup_ts_config_cache(cfgId);
367 prsobj = lookup_ts_parser_cache(cfg->prsId);
368
369 prsdata = DatumGetPointer(FunctionCall2(&prsobj->prsstart,
371 Int32GetDatum(buflen)));
372
373 LexizeInit(&ldata, cfg);
374
375 do
376 {
378 PointerGetDatum(prsdata),
379 PointerGetDatum(&lemm),
380 PointerGetDatum(&lenlemm)));
381
382 if (type > 0 && lenlemm >= MAXSTRLEN)
383 {
384#ifdef IGNORE_LONGLEXEME
386 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
387 errmsg("word is too long to be indexed"),
388 errdetail("Words longer than %d characters are ignored.",
389 MAXSTRLEN)));
390 continue;
391#else
393 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
394 errmsg("word is too long to be indexed"),
395 errdetail("Words longer than %d characters are ignored.",
396 MAXSTRLEN)));
397#endif
398 }
399
400 LexizeAddLemm(&ldata, type, lemm, lenlemm);
401
402 while ((norms = LexizeExec(&ldata, NULL)) != NULL)
403 {
404 TSLexeme *ptr = norms;
405
406 prs->pos++; /* set pos */
407
408 while (ptr->lexeme)
409 {
410 if (prs->curwords == prs->lenwords)
411 {
412 prs->lenwords *= 2;
413 prs->words = (ParsedWord *) repalloc(prs->words, prs->lenwords * sizeof(ParsedWord));
414 }
415
416 if (ptr->flags & TSL_ADDPOS)
417 prs->pos++;
418 prs->words[prs->curwords].len = strlen(ptr->lexeme);
419 prs->words[prs->curwords].word = ptr->lexeme;
420 prs->words[prs->curwords].nvariant = ptr->nvariant;
421 prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
422 prs->words[prs->curwords].alen = 0;
423 prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
424 ptr++;
425 prs->curwords++;
426 }
427 pfree(norms);
428 }
429 } while (type > 0);
430
431 FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
432}
int32 pos
Definition: ts_utils.h:107
int32 lenwords
Definition: ts_utils.h:105
int32 curwords
Definition: ts_utils.h:106
ParsedWord * words
Definition: ts_utils.h:104
uint16 alen
Definition: ts_utils.h:87
uint16 flags
Definition: ts_utils.h:84
uint16 nvariant
Definition: ts_utils.h:86
uint16 len
Definition: ts_utils.h:85
uint16 pos
Definition: ts_utils.h:90
char * word
Definition: ts_utils.h:99
uint16 nvariant
Definition: ts_public.h:134
#define TSL_PREFIX
Definition: ts_public.h:143

References ParsedWord::alen, buf, ParsedText::curwords, DatumGetInt32(), DatumGetPointer(), ereport, errcode(), errdetail(), errmsg(), ERROR, TSLexeme::flags, ParsedWord::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum(), ParsedWord::len, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, TSLexeme::nvariant, ParsedWord::nvariant, pfree(), PointerGetDatum(), ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, TSL_PREFIX, type, ParsedWord::word, and ParsedText::words.

Referenced by add_to_tsvector(), pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().

◆ RemoveHead()

static void RemoveHead ( LexizeData ld)
static

Definition at line 112 of file ts_parse.c.

113{
115
116 ld->posDict = 0;
117}
static ParsedLex * LPLRemoveHead(ListParsedLex *list)
Definition: ts_parse.c:86

References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.

Referenced by LexizeExec(), and moveToWaste().

◆ setCorrLex()

static void setCorrLex ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 120 of file ts_parse.c.

121{
122 if (correspondLexem)
123 {
124 *correspondLexem = ld->waste.head;
125 }
126 else
127 {
128 ParsedLex *tmp,
129 *ptr = ld->waste.head;
130
131 while (ptr)
132 {
133 tmp = ptr->next;
134 pfree(ptr);
135 ptr = tmp;
136 }
137 }
138 ld->waste.head = ld->waste.tail = NULL;
139}

References ListParsedLex::head, ParsedLex::next, pfree(), ListParsedLex::tail, and LexizeData::waste.

Referenced by LexizeExec().

◆ setNewTmpRes()

static void setNewTmpRes ( LexizeData ld,
ParsedLex lex,
TSLexeme res 
)
static

Definition at line 158 of file ts_parse.c.

159{
160 if (ld->tmpRes)
161 {
162 TSLexeme *ptr;
163
164 for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
165 pfree(ptr->lexeme);
166 pfree(ld->tmpRes);
167 }
168 ld->tmpRes = res;
169 ld->lastRes = lex;
170}

References LexizeData::lastRes, TSLexeme::lexeme, pfree(), and LexizeData::tmpRes.

Referenced by LexizeExec().