PostgreSQL Source Code  git master
ts_parse.c File Reference
#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "varatt.h"
Include dependency graph for ts_parse.c:

Go to the source code of this file.

Data Structures

struct  ParsedLex
 
struct  ListParsedLex
 
struct  LexizeData
 

Macros

#define IGNORE_LONGLEXEME   1
 

Typedefs

typedef struct ParsedLex ParsedLex
 
typedef struct ListParsedLex ListParsedLex
 

Functions

static void LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg)
 
static void LPLAddTail (ListParsedLex *list, ParsedLex *newpl)
 
static ParsedLexLPLRemoveHead (ListParsedLex *list)
 
static void LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm)
 
static void RemoveHead (LexizeData *ld)
 
static void setCorrLex (LexizeData *ld, ParsedLex **correspondLexem)
 
static void moveToWaste (LexizeData *ld, ParsedLex *stop)
 
static void setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res)
 
static TSLexemeLexizeExec (LexizeData *ld, ParsedLex **correspondLexem)
 
void parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen)
 
static void hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type)
 
static void hlfinditem (HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 
static void addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
 
void hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
 
textgenerateHeadline (HeadlineParsedText *prs)
 

Macro Definition Documentation

◆ IGNORE_LONGLEXEME

#define IGNORE_LONGLEXEME   1

Definition at line 21 of file ts_parse.c.

Typedef Documentation

◆ ListParsedLex

typedef struct ListParsedLex ListParsedLex

◆ ParsedLex

typedef struct ParsedLex ParsedLex

Function Documentation

◆ addHLParsedLex()

static void addHLParsedLex ( HeadlineParsedText prs,
TSQuery  query,
ParsedLex lexs,
TSLexeme norms 
)
static

Definition at line 499 of file ts_parse.c.

500 {
501  ParsedLex *tmplexs;
502  TSLexeme *ptr;
503  int32 savedpos;
504 
505  while (lexs)
506  {
507  if (lexs->type > 0)
508  hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
509 
510  ptr = norms;
511  savedpos = prs->vectorpos;
512  while (ptr && ptr->lexeme)
513  {
514  if (ptr->flags & TSL_ADDPOS)
515  savedpos++;
516  hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
517  ptr++;
518  }
519 
520  tmplexs = lexs->next;
521  pfree(lexs);
522  lexs = tmplexs;
523  }
524 
525  if (norms)
526  {
527  ptr = norms;
528  while (ptr->lexeme)
529  {
530  if (ptr->flags & TSL_ADDPOS)
531  prs->vectorpos++;
532  pfree(ptr->lexeme);
533  ptr++;
534  }
535  pfree(norms);
536  }
537 }
int32_t int32
Definition: c.h:481
void pfree(void *pointer)
Definition: mcxt.c:1521
char * lemm
Definition: ts_parse.c:30
int lenlemm
Definition: ts_parse.c:31
struct ParsedLex * next
Definition: ts_parse.c:32
int type
Definition: ts_parse.c:29
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
Definition: ts_parse.c:464
static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
Definition: ts_parse.c:440
#define TSL_ADDPOS
Definition: ts_public.h:142

References TSLexeme::flags, hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), TSL_ADDPOS, ParsedLex::type, and HeadlineParsedText::vectorpos.

Referenced by hlparsetext().

◆ generateHeadline()

text* generateHeadline ( HeadlineParsedText prs)

Definition at line 607 of file ts_parse.c.

608 {
609  text *out;
610  char *ptr;
611  int len = 128;
612  int numfragments = 0;
613  int16 infrag = 0;
614 
615  HeadlineWordEntry *wrd = prs->words;
616 
617  out = (text *) palloc(len);
618  ptr = ((char *) out) + VARHDRSZ;
619 
620  while (wrd - prs->words < prs->curwords)
621  {
622  while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
623  {
624  int dist = ptr - ((char *) out);
625 
626  len *= 2;
627  out = (text *) repalloc(out, len);
628  ptr = ((char *) out) + dist;
629  }
630 
631  if (wrd->in && !wrd->repeated)
632  {
633  if (!infrag)
634  {
635 
636  /* start of a new fragment */
637  infrag = 1;
638  numfragments++;
639  /* add a fragment delimiter if this is after the first one */
640  if (numfragments > 1)
641  {
642  memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
643  ptr += prs->fragdelimlen;
644  }
645  }
646  if (wrd->replace)
647  {
648  *ptr = ' ';
649  ptr++;
650  }
651  else if (!wrd->skip)
652  {
653  if (wrd->selected)
654  {
655  memcpy(ptr, prs->startsel, prs->startsellen);
656  ptr += prs->startsellen;
657  }
658  memcpy(ptr, wrd->word, wrd->len);
659  ptr += wrd->len;
660  if (wrd->selected)
661  {
662  memcpy(ptr, prs->stopsel, prs->stopsellen);
663  ptr += prs->stopsellen;
664  }
665  }
666  }
667  else if (!wrd->repeated)
668  {
669  if (infrag)
670  infrag = 0;
671  pfree(wrd->word);
672  }
673 
674  wrd++;
675  }
676 
677  SET_VARSIZE(out, ptr - ((char *) out));
678  return out;
679 }
#define VARHDRSZ
Definition: c.h:646
int16_t int16
Definition: c.h:480
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
const void size_t len
HeadlineWordEntry * words
Definition: ts_public.h:76
Definition: c.h:641
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, VARHDRSZ, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by headline_json_value(), and ts_headline_byid_opt().

◆ hladdword()

static void hladdword ( HeadlineParsedText prs,
char *  buf,
int  buflen,
int  type 
)
static

Definition at line 440 of file ts_parse.c.

441 {
442  if (prs->curwords >= prs->lenwords)
443  {
444  prs->lenwords *= 2;
445  prs->words = (HeadlineWordEntry *) repalloc(prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
446  }
447  memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
448  prs->words[prs->curwords].type = (uint8) type;
449  prs->words[prs->curwords].len = buflen;
450  prs->words[prs->curwords].word = palloc(buflen);
451  memcpy(prs->words[prs->curwords].word, buf, buflen);
452  prs->curwords++;
453 }
uint8_t uint8
Definition: c.h:483
static char * buf
Definition: pg_test_fsync.c:72
const char * type

References buf, HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), type, HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by addHLParsedLex().

◆ hlfinditem()

static void hlfinditem ( HeadlineParsedText prs,
TSQuery  query,
int32  pos,
char *  buf,
int  buflen 
)
static

Definition at line 464 of file ts_parse.c.

465 {
466  int i;
467  QueryItem *item = GETQUERY(query);
469 
470  while (prs->curwords + query->size >= prs->lenwords)
471  {
472  prs->lenwords *= 2;
473  prs->words = (HeadlineWordEntry *) repalloc(prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
474  }
475 
476  word = &(prs->words[prs->curwords - 1]);
477  word->pos = LIMITPOS(pos);
478  for (i = 0; i < query->size; i++)
479  {
480  if (item->type == QI_VAL &&
482  buf, buflen, item->qoperand.prefix) == 0)
483  {
484  if (word->item)
485  {
486  memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
487  prs->words[prs->curwords].item = &item->qoperand;
488  prs->words[prs->curwords].repeated = 1;
489  prs->curwords++;
490  }
491  else
492  word->item = &item->qoperand;
493  }
494  item++;
495  }
496 }
#define GETQUERY(x)
Definition: _int.h:157
int i
Definition: isn.c:72
#define GETOPERAND(x)
Definition: ltree.h:165
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476
QueryOperand * item
Definition: ts_public.h:70
bool prefix
Definition: ts_type.h:163
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171
int32 size
Definition: ts_type.h:221
#define QI_VAL
Definition: ts_type.h:149
#define LIMITPOS(x)
Definition: ts_type.h:87
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
QueryOperand qoperand
Definition: ts_type.h:210
QueryItemType type
Definition: ts_type.h:208

References buf, HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, LIMITPOS, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, word(), and HeadlineParsedText::words.

Referenced by addHLParsedLex().

◆ hlparsetext()

void hlparsetext ( Oid  cfgId,
HeadlineParsedText prs,
TSQuery  query,
char *  buf,
int  buflen 
)

Definition at line 540 of file ts_parse.c.

541 {
542  int type,
543  lenlemm = 0; /* silence compiler warning */
544  char *lemm = NULL;
545  LexizeData ldata;
546  TSLexeme *norms;
547  ParsedLex *lexs;
548  TSConfigCacheEntry *cfg;
549  TSParserCacheEntry *prsobj;
550  void *prsdata;
551 
552  cfg = lookup_ts_config_cache(cfgId);
553  prsobj = lookup_ts_parser_cache(cfg->prsId);
554 
555  prsdata = DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
557  Int32GetDatum(buflen)));
558 
559  LexizeInit(&ldata, cfg);
560 
561  do
562  {
564  PointerGetDatum(prsdata),
565  PointerGetDatum(&lemm),
566  PointerGetDatum(&lenlemm)));
567 
568  if (type > 0 && lenlemm >= MAXSTRLEN)
569  {
570 #ifdef IGNORE_LONGLEXEME
571  ereport(NOTICE,
572  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
573  errmsg("word is too long to be indexed"),
574  errdetail("Words longer than %d characters are ignored.",
575  MAXSTRLEN)));
576  continue;
577 #else
578  ereport(ERROR,
579  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
580  errmsg("word is too long to be indexed"),
581  errdetail("Words longer than %d characters are ignored.",
582  MAXSTRLEN)));
583 #endif
584  }
585 
586  LexizeAddLemm(&ldata, type, lemm, lenlemm);
587 
588  do
589  {
590  if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
591  {
592  prs->vectorpos++;
593  addHLParsedLex(prs, query, lexs, norms);
594  }
595  else
596  addHLParsedLex(prs, query, lexs, NULL);
597  } while (norms);
598  } while (type > 0);
599 
600  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
601 }
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:659
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:661
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:663
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
FmgrInfo prstoken
Definition: ts_cache.h:46
FmgrInfo prsstart
Definition: ts_cache.h:45
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:385
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:113
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:173
static void addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
Definition: ts_parse.c:499
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:100
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:61
#define MAXSTRLEN
Definition: ts_type.h:49

References addHLParsedLex(), buf, DatumGetInt32(), DatumGetPointer(), ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum(), LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, PointerGetDatum(), TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, type, and HeadlineParsedText::vectorpos.

Referenced by headline_json_value(), and ts_headline_byid_opt().

◆ LexizeAddLemm()

static void LexizeAddLemm ( LexizeData ld,
int  type,
char *  lemm,
int  lenlemm 
)
static

Definition at line 100 of file ts_parse.c.

101 {
102  ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
103 
104  newpl->type = type;
105  newpl->lemm = lemm;
106  newpl->lenlemm = lenlemm;
107  LPLAddTail(&ld->towork, newpl);
108  ld->curSub = ld->towork.tail;
109 }
ListParsedLex towork
Definition: ts_parse.c:48
ParsedLex * curSub
Definition: ts_parse.c:47
ParsedLex * tail
Definition: ts_parse.c:38
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:73

References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, ParsedLex::type, and type.

Referenced by hlparsetext(), and parsetext().

◆ LexizeExec()

static TSLexeme* LexizeExec ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 173 of file ts_parse.c.

174 {
175  int i;
176  ListDictionary *map;
178  TSLexeme *res;
179 
180  if (ld->curDictId == InvalidOid)
181  {
182  /*
183  * usual mode: dictionary wants only one word, but we should keep in
184  * mind that we should go through all stack
185  */
186 
187  while (ld->towork.head)
188  {
189  ParsedLex *curVal = ld->towork.head;
190  char *curValLemm = curVal->lemm;
191  int curValLenLemm = curVal->lenlemm;
192 
193  map = ld->cfg->map + curVal->type;
194 
195  if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
196  {
197  /* skip this type of lexeme */
198  RemoveHead(ld);
199  continue;
200  }
201 
202  for (i = ld->posDict; i < map->len; i++)
203  {
204  dict = lookup_ts_dictionary_cache(map->dictIds[i]);
205 
206  ld->dictState.isend = ld->dictState.getnext = false;
207  ld->dictState.private_state = NULL;
209  PointerGetDatum(dict->dictData),
210  PointerGetDatum(curValLemm),
211  Int32GetDatum(curValLenLemm),
212  PointerGetDatum(&ld->dictState)));
213 
214  if (ld->dictState.getnext)
215  {
216  /*
217  * dictionary wants next word, so setup and store current
218  * position and go to multiword mode
219  */
220 
221  ld->curDictId = DatumGetObjectId(map->dictIds[i]);
222  ld->posDict = i + 1;
223  ld->curSub = curVal->next;
224  if (res)
225  setNewTmpRes(ld, curVal, res);
226  return LexizeExec(ld, correspondLexem);
227  }
228 
229  if (!res) /* dictionary doesn't know this lexeme */
230  continue;
231 
232  if (res->flags & TSL_FILTER)
233  {
234  curValLemm = res->lexeme;
235  curValLenLemm = strlen(res->lexeme);
236  continue;
237  }
238 
239  RemoveHead(ld);
240  setCorrLex(ld, correspondLexem);
241  return res;
242  }
243 
244  RemoveHead(ld);
245  }
246  }
247  else
248  { /* curDictId is valid */
250 
251  /*
252  * Dictionary ld->curDictId asks us about following words
253  */
254 
255  while (ld->curSub)
256  {
257  ParsedLex *curVal = ld->curSub;
258 
259  map = ld->cfg->map + curVal->type;
260 
261  if (curVal->type != 0)
262  {
263  bool dictExists = false;
264 
265  if (curVal->type >= ld->cfg->lenmap || map->len == 0)
266  {
267  /* skip this type of lexeme */
268  ld->curSub = curVal->next;
269  continue;
270  }
271 
272  /*
273  * We should be sure that current type of lexeme is recognized
274  * by our dictionary: we just check is it exist in list of
275  * dictionaries ?
276  */
277  for (i = 0; i < map->len && !dictExists; i++)
278  if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
279  dictExists = true;
280 
281  if (!dictExists)
282  {
283  /*
284  * Dictionary can't work with current type of lexeme,
285  * return to basic mode and redo all stored lexemes
286  */
287  ld->curDictId = InvalidOid;
288  return LexizeExec(ld, correspondLexem);
289  }
290  }
291 
292  ld->dictState.isend = (curVal->type == 0);
293  ld->dictState.getnext = false;
294 
296  PointerGetDatum(dict->dictData),
297  PointerGetDatum(curVal->lemm),
298  Int32GetDatum(curVal->lenlemm),
299  PointerGetDatum(&ld->dictState)));
300 
301  if (ld->dictState.getnext)
302  {
303  /* Dictionary wants one more */
304  ld->curSub = curVal->next;
305  if (res)
306  setNewTmpRes(ld, curVal, res);
307  continue;
308  }
309 
310  if (res || ld->tmpRes)
311  {
312  /*
313  * Dictionary normalizes lexemes, so we remove from stack all
314  * used lexemes, return to basic mode and redo end of stack
315  * (if it exists)
316  */
317  if (res)
318  {
319  moveToWaste(ld, ld->curSub);
320  }
321  else
322  {
323  res = ld->tmpRes;
324  moveToWaste(ld, ld->lastRes);
325  }
326 
327  /* reset to initial state */
328  ld->curDictId = InvalidOid;
329  ld->posDict = 0;
330  ld->lastRes = NULL;
331  ld->tmpRes = NULL;
332  setCorrLex(ld, correspondLexem);
333  return res;
334  }
335 
336  /*
337  * Dict don't want next lexem and didn't recognize anything, redo
338  * from ld->towork.head
339  */
340  ld->curDictId = InvalidOid;
341  return LexizeExec(ld, correspondLexem);
342  }
343  }
344 
345  setCorrLex(ld, correspondLexem);
346  return NULL;
347 }
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:665
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:242
#define InvalidOid
Definition: postgres_ext.h:36
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154
TSLexeme * tmpRes
Definition: ts_parse.c:57
Oid curDictId
Definition: ts_parse.c:44
ParsedLex * lastRes
Definition: ts_parse.c:56
int posDict
Definition: ts_parse.c:45
DictSubState dictState
Definition: ts_parse.c:46
TSConfigCacheEntry * cfg
Definition: ts_parse.c:43
Oid * dictIds
Definition: ts_cache.h:68
ParsedLex * head
Definition: ts_parse.c:37
ListDictionary * map
Definition: ts_cache.h:80
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:208
static void setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
Definition: ts_parse.c:158
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:112
static void setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:120
static void moveToWaste(LexizeData *ld, ParsedLex *stop)
Definition: ts_parse.c:142
#define TSL_FILTER
Definition: ts_public.h:144

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId(), DatumGetPointer(), TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum(), InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, PointerGetDatum(), LexizeData::posDict, DictSubState::private_state, RemoveHead(), res, setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.

Referenced by hlparsetext(), and parsetext().

◆ LexizeInit()

static void LexizeInit ( LexizeData ld,
TSConfigCacheEntry cfg 
)
static

Definition at line 61 of file ts_parse.c.

62 {
63  ld->cfg = cfg;
64  ld->curDictId = InvalidOid;
65  ld->posDict = 0;
66  ld->towork.head = ld->towork.tail = ld->curSub = NULL;
67  ld->waste.head = ld->waste.tail = NULL;
68  ld->lastRes = NULL;
69  ld->tmpRes = NULL;
70 }
ListParsedLex waste
Definition: ts_parse.c:49

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, InvalidOid, LexizeData::lastRes, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.

Referenced by hlparsetext(), and parsetext().

◆ LPLAddTail()

static void LPLAddTail ( ListParsedLex list,
ParsedLex newpl 
)
static

Definition at line 73 of file ts_parse.c.

74 {
75  if (list->tail)
76  {
77  list->tail->next = newpl;
78  list->tail = newpl;
79  }
80  else
81  list->head = list->tail = newpl;
82  newpl->next = NULL;
83 }

References sort-test::list, and ParsedLex::next.

Referenced by LexizeAddLemm(), and RemoveHead().

◆ LPLRemoveHead()

static ParsedLex* LPLRemoveHead ( ListParsedLex list)
static

Definition at line 86 of file ts_parse.c.

87 {
88  ParsedLex *res = list->head;
89 
90  if (list->head)
91  list->head = list->head->next;
92 
93  if (list->head == NULL)
94  list->tail = NULL;
95 
96  return res;
97 }

References sort-test::list, and res.

Referenced by RemoveHead().

◆ moveToWaste()

static void moveToWaste ( LexizeData ld,
ParsedLex stop 
)
static

Definition at line 142 of file ts_parse.c.

143 {
144  bool go = true;
145 
146  while (ld->towork.head && go)
147  {
148  if (ld->towork.head == stop)
149  {
150  ld->curSub = stop->next;
151  go = false;
152  }
153  RemoveHead(ld);
154  }
155 }

References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.

Referenced by LexizeExec().

◆ parsetext()

void parsetext ( Oid  cfgId,
ParsedText prs,
char *  buf,
int  buflen 
)

Definition at line 355 of file ts_parse.c.

356 {
357  int type,
358  lenlemm = 0; /* silence compiler warning */
359  char *lemm = NULL;
360  LexizeData ldata;
361  TSLexeme *norms;
362  TSConfigCacheEntry *cfg;
363  TSParserCacheEntry *prsobj;
364  void *prsdata;
365 
366  cfg = lookup_ts_config_cache(cfgId);
367  prsobj = lookup_ts_parser_cache(cfg->prsId);
368 
369  prsdata = DatumGetPointer(FunctionCall2(&prsobj->prsstart,
371  Int32GetDatum(buflen)));
372 
373  LexizeInit(&ldata, cfg);
374 
375  do
376  {
378  PointerGetDatum(prsdata),
379  PointerGetDatum(&lemm),
380  PointerGetDatum(&lenlemm)));
381 
382  if (type > 0 && lenlemm >= MAXSTRLEN)
383  {
384 #ifdef IGNORE_LONGLEXEME
385  ereport(NOTICE,
386  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
387  errmsg("word is too long to be indexed"),
388  errdetail("Words longer than %d characters are ignored.",
389  MAXSTRLEN)));
390  continue;
391 #else
392  ereport(ERROR,
393  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
394  errmsg("word is too long to be indexed"),
395  errdetail("Words longer than %d characters are ignored.",
396  MAXSTRLEN)));
397 #endif
398  }
399 
400  LexizeAddLemm(&ldata, type, lemm, lenlemm);
401 
402  while ((norms = LexizeExec(&ldata, NULL)) != NULL)
403  {
404  TSLexeme *ptr = norms;
405 
406  prs->pos++; /* set pos */
407 
408  while (ptr->lexeme)
409  {
410  if (prs->curwords == prs->lenwords)
411  {
412  prs->lenwords *= 2;
413  prs->words = (ParsedWord *) repalloc(prs->words, prs->lenwords * sizeof(ParsedWord));
414  }
415 
416  if (ptr->flags & TSL_ADDPOS)
417  prs->pos++;
418  prs->words[prs->curwords].len = strlen(ptr->lexeme);
419  prs->words[prs->curwords].word = ptr->lexeme;
420  prs->words[prs->curwords].nvariant = ptr->nvariant;
421  prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
422  prs->words[prs->curwords].alen = 0;
423  prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
424  ptr++;
425  prs->curwords++;
426  }
427  pfree(norms);
428  }
429  } while (type > 0);
430 
431  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
432 }
int32 pos
Definition: ts_utils.h:107
int32 lenwords
Definition: ts_utils.h:105
int32 curwords
Definition: ts_utils.h:106
ParsedWord * words
Definition: ts_utils.h:104
uint16 alen
Definition: ts_utils.h:87
uint16 flags
Definition: ts_utils.h:84
uint16 nvariant
Definition: ts_utils.h:86
uint16 len
Definition: ts_utils.h:85
uint16 pos
Definition: ts_utils.h:90
char * word
Definition: ts_utils.h:99
uint16 nvariant
Definition: ts_public.h:134
#define TSL_PREFIX
Definition: ts_public.h:143

References ParsedWord::alen, buf, ParsedText::curwords, DatumGetInt32(), DatumGetPointer(), ereport, errcode(), errdetail(), errmsg(), ERROR, TSLexeme::flags, ParsedWord::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum(), ParsedWord::len, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, TSLexeme::nvariant, ParsedWord::nvariant, pfree(), PointerGetDatum(), ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, TSL_PREFIX, type, ParsedWord::word, and ParsedText::words.

Referenced by add_to_tsvector(), pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().

◆ RemoveHead()

static void RemoveHead ( LexizeData ld)
static

Definition at line 112 of file ts_parse.c.

113 {
114  LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
115 
116  ld->posDict = 0;
117 }
static ParsedLex * LPLRemoveHead(ListParsedLex *list)
Definition: ts_parse.c:86

References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.

Referenced by LexizeExec(), and moveToWaste().

◆ setCorrLex()

static void setCorrLex ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 120 of file ts_parse.c.

121 {
122  if (correspondLexem)
123  {
124  *correspondLexem = ld->waste.head;
125  }
126  else
127  {
128  ParsedLex *tmp,
129  *ptr = ld->waste.head;
130 
131  while (ptr)
132  {
133  tmp = ptr->next;
134  pfree(ptr);
135  ptr = tmp;
136  }
137  }
138  ld->waste.head = ld->waste.tail = NULL;
139 }

References ListParsedLex::head, ParsedLex::next, pfree(), ListParsedLex::tail, and LexizeData::waste.

Referenced by LexizeExec().

◆ setNewTmpRes()

static void setNewTmpRes ( LexizeData ld,
ParsedLex lex,
TSLexeme res 
)
static

Definition at line 158 of file ts_parse.c.

159 {
160  if (ld->tmpRes)
161  {
162  TSLexeme *ptr;
163 
164  for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
165  pfree(ptr->lexeme);
166  pfree(ld->tmpRes);
167  }
168  ld->tmpRes = res;
169  ld->lastRes = lex;
170 }

References LexizeData::lastRes, TSLexeme::lexeme, pfree(), res, and LexizeData::tmpRes.

Referenced by LexizeExec().