PostgreSQL Source Code  git master
ts_parse.c File Reference
#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
Include dependency graph for ts_parse.c:

Go to the source code of this file.

Data Structures

struct  ParsedLex
 
struct  ListParsedLex
 
struct  LexizeData
 

Macros

#define IGNORE_LONGLEXEME   1
 

Typedefs

typedef struct ParsedLex ParsedLex
 
typedef struct ListParsedLex ListParsedLex
 

Functions

static void LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg)
 
static void LPLAddTail (ListParsedLex *list, ParsedLex *newpl)
 
static ParsedLexLPLRemoveHead (ListParsedLex *list)
 
static void LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm)
 
static void RemoveHead (LexizeData *ld)
 
static void setCorrLex (LexizeData *ld, ParsedLex **correspondLexem)
 
static void moveToWaste (LexizeData *ld, ParsedLex *stop)
 
static void setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res)
 
static TSLexemeLexizeExec (LexizeData *ld, ParsedLex **correspondLexem)
 
void parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen)
 
static void hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type)
 
static void hlfinditem (HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 
static void addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
 
void hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
 
textgenerateHeadline (HeadlineParsedText *prs)
 

Macro Definition Documentation

◆ IGNORE_LONGLEXEME

#define IGNORE_LONGLEXEME   1

Definition at line 20 of file ts_parse.c.

Typedef Documentation

◆ ListParsedLex

typedef struct ListParsedLex ListParsedLex

◆ ParsedLex

typedef struct ParsedLex ParsedLex

Function Documentation

◆ addHLParsedLex()

static void addHLParsedLex ( HeadlineParsedText prs,
TSQuery  query,
ParsedLex lexs,
TSLexeme norms 
)
static

Definition at line 488 of file ts_parse.c.

References TSLexeme::flags, hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), TSL_ADDPOS, ParsedLex::type, and HeadlineParsedText::vectorpos.

Referenced by hlparsetext().

489 {
490  ParsedLex *tmplexs;
491  TSLexeme *ptr;
492  int32 savedpos;
493 
494  while (lexs)
495  {
496  if (lexs->type > 0)
497  hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
498 
499  ptr = norms;
500  savedpos = prs->vectorpos;
501  while (ptr && ptr->lexeme)
502  {
503  if (ptr->flags & TSL_ADDPOS)
504  savedpos++;
505  hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
506  ptr++;
507  }
508 
509  tmplexs = lexs->next;
510  pfree(lexs);
511  lexs = tmplexs;
512  }
513 
514  if (norms)
515  {
516  ptr = norms;
517  while (ptr->lexeme)
518  {
519  if (ptr->flags & TSL_ADDPOS)
520  prs->vectorpos++;
521  pfree(ptr->lexeme);
522  ptr++;
523  }
524  pfree(norms);
525  }
526 }
#define TSL_ADDPOS
Definition: ts_public.h:115
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
signed int int32
Definition: c.h:362
void pfree(void *pointer)
Definition: mcxt.c:1057
char * lexeme
Definition: ts_public.h:111
uint16 flags
Definition: ts_public.h:109
static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
Definition: ts_parse.c:453
static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
Definition: ts_parse.c:437
char * lemm
Definition: ts_parse.c:29
struct ParsedLex * next
Definition: ts_parse.c:31

◆ generateHeadline()

text* generateHeadline ( HeadlineParsedText prs)

Definition at line 594 of file ts_parse.c.

References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, VARHDRSZ, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by headline_json_value(), and ts_headline_byid_opt().

595 {
596  text *out;
597  char *ptr;
598  int len = 128;
599  int numfragments = 0;
600  int16 infrag = 0;
601 
602  HeadlineWordEntry *wrd = prs->words;
603 
604  out = (text *) palloc(len);
605  ptr = ((char *) out) + VARHDRSZ;
606 
607  while (wrd - prs->words < prs->curwords)
608  {
609  while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
610  {
611  int dist = ptr - ((char *) out);
612 
613  len *= 2;
614  out = (text *) repalloc(out, len);
615  ptr = ((char *) out) + dist;
616  }
617 
618  if (wrd->in && !wrd->repeated)
619  {
620  if (!infrag)
621  {
622 
623  /* start of a new fragment */
624  infrag = 1;
625  numfragments++;
626  /* add a fragment delimiter if this is after the first one */
627  if (numfragments > 1)
628  {
629  memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
630  ptr += prs->fragdelimlen;
631  }
632 
633  }
634  if (wrd->replace)
635  {
636  *ptr = ' ';
637  ptr++;
638  }
639  else if (!wrd->skip)
640  {
641  if (wrd->selected)
642  {
643  memcpy(ptr, prs->startsel, prs->startsellen);
644  ptr += prs->startsellen;
645  }
646  memcpy(ptr, wrd->word, wrd->len);
647  ptr += wrd->len;
648  if (wrd->selected)
649  {
650  memcpy(ptr, prs->stopsel, prs->stopsellen);
651  ptr += prs->stopsellen;
652  }
653  }
654  }
655  else if (!wrd->repeated)
656  {
657  if (infrag)
658  infrag = 0;
659  pfree(wrd->word);
660  }
661 
662  wrd++;
663  }
664 
665  SET_VARSIZE(out, ptr - ((char *) out));
666  return out;
667 }
signed short int16
Definition: c.h:361
#define VARHDRSZ
Definition: c.h:568
void pfree(void *pointer)
Definition: mcxt.c:1057
HeadlineWordEntry * words
Definition: ts_public.h:52
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
void * palloc(Size size)
Definition: mcxt.c:950
Definition: c.h:562
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329

◆ hladdword()

static void hladdword ( HeadlineParsedText prs,
char *  buf,
int  buflen,
int  type 
)
static

Definition at line 437 of file ts_parse.c.

References HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by addHLParsedLex().

438 {
439  while (prs->curwords >= prs->lenwords)
440  {
441  prs->lenwords *= 2;
442  prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
443  }
444  memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
445  prs->words[prs->curwords].type = (uint8) type;
446  prs->words[prs->curwords].len = buflen;
447  prs->words[prs->curwords].word = palloc(buflen);
448  memcpy(prs->words[prs->curwords].word, buf, buflen);
449  prs->curwords++;
450 }
unsigned char uint8
Definition: c.h:372
static char * buf
Definition: pg_test_fsync.c:67
HeadlineWordEntry * words
Definition: ts_public.h:52
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
void * palloc(Size size)
Definition: mcxt.c:950

◆ hlfinditem()

static void hlfinditem ( HeadlineParsedText prs,
TSQuery  query,
int32  pos,
char *  buf,
int  buflen 
)
static

Definition at line 453 of file ts_parse.c.

References HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, LIMITPOS, HeadlineWordEntry::pos, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, word(), and HeadlineParsedText::words.

Referenced by addHLParsedLex().

454 {
455  int i;
456  QueryItem *item = GETQUERY(query);
458 
459  while (prs->curwords + query->size >= prs->lenwords)
460  {
461  prs->lenwords *= 2;
462  prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
463  }
464 
465  word = &(prs->words[prs->curwords - 1]);
466  word->pos = LIMITPOS(pos);
467  for (i = 0; i < query->size; i++)
468  {
469  if (item->type == QI_VAL &&
471  buf, buflen, item->qoperand.prefix) == 0)
472  {
473  if (word->item)
474  {
475  memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
476  prs->words[prs->curwords].item = &item->qoperand;
477  prs->words[prs->curwords].repeated = 1;
478  prs->curwords++;
479  }
480  else
481  word->item = &item->qoperand;
482  }
483  item++;
484  }
485 }
QueryOperand * item
Definition: ts_public.h:47
#define QI_VAL
Definition: ts_type.h:134
uint32 distance
Definition: ts_type.h:158
#define GETQUERY(x)
Definition: _int.h:157
WordEntryPos pos
Definition: ts_public.h:45
#define GETOPERAND(x)
Definition: ltree.h:151
static char * buf
Definition: pg_test_fsync.c:67
HeadlineWordEntry * words
Definition: ts_public.h:52
QueryItemType type
Definition: ts_type.h:195
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1147
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
uint32 length
Definition: ts_type.h:158
static void word(struct vars *, int, struct state *, struct state *)
Definition: regcomp.c:1246
int32 size
Definition: ts_type.h:208
int i
#define LIMITPOS(x)
Definition: ts_type.h:87
bool prefix
Definition: ts_type.h:150
QueryOperand qoperand
Definition: ts_type.h:197

◆ hlparsetext()

void hlparsetext ( Oid  cfgId,
HeadlineParsedText prs,
TSQuery  query,
char *  buf,
int  buflen 
)

Definition at line 529 of file ts_parse.c.

References addHLParsedLex(), DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, ParsedLex::lemm, ParsedLex::lenlemm, LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, PointerGetDatum, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, ParsedLex::type, and HeadlineParsedText::vectorpos.

Referenced by headline_json_value(), and ts_headline_byid_opt().

530 {
531  int type,
532  lenlemm;
533  char *lemm = NULL;
534  LexizeData ldata;
535  TSLexeme *norms;
536  ParsedLex *lexs;
537  TSConfigCacheEntry *cfg;
538  TSParserCacheEntry *prsobj;
539  void *prsdata;
540 
541  cfg = lookup_ts_config_cache(cfgId);
542  prsobj = lookup_ts_parser_cache(cfg->prsId);
543 
544  prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
546  Int32GetDatum(buflen)));
547 
548  LexizeInit(&ldata, cfg);
549 
550  do
551  {
552  type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
553  PointerGetDatum(prsdata),
554  PointerGetDatum(&lemm),
555  PointerGetDatum(&lenlemm)));
556 
557  if (type > 0 && lenlemm >= MAXSTRLEN)
558  {
559 #ifdef IGNORE_LONGLEXEME
560  ereport(NOTICE,
561  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
562  errmsg("word is too long to be indexed"),
563  errdetail("Words longer than %d characters are ignored.",
564  MAXSTRLEN)));
565  continue;
566 #else
567  ereport(ERROR,
568  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
569  errmsg("word is too long to be indexed"),
570  errdetail("Words longer than %d characters are ignored.",
571  MAXSTRLEN)));
572 #endif
573  }
574 
575  LexizeAddLemm(&ldata, type, lemm, lenlemm);
576 
577  do
578  {
579  if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
580  {
581  prs->vectorpos++;
582  addHLParsedLex(prs, query, lexs, norms);
583  }
584  else
585  addHLParsedLex(prs, query, lexs, NULL);
586  } while (norms);
587 
588  } while (type > 0);
589 
590  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
591 }
#define DatumGetInt32(X)
Definition: postgres.h:472
static void addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
Definition: ts_parse.c:488
#define PointerGetDatum(X)
Definition: postgres.h:556
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:644
int errcode(int sqlerrcode)
Definition: elog.c:610
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:112
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:646
#define ERROR
Definition: elog.h:43
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:389
static char * buf
Definition: pg_test_fsync.c:67
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:60
int errdetail(const char *fmt,...)
Definition: elog.c:957
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define ereport(elevel,...)
Definition: elog.h:144
#define NOTICE
Definition: elog.h:37
FmgrInfo prstoken
Definition: ts_cache.h:46
#define DatumGetPointer(X)
Definition: postgres.h:549
FmgrInfo prsstart
Definition: ts_cache.h:45
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:642
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:99
#define MAXSTRLEN
Definition: ts_type.h:49

◆ LexizeAddLemm()

static void LexizeAddLemm ( LexizeData ld,
int  type,
char *  lemm,
int  lenlemm 
)
static

Definition at line 99 of file ts_parse.c.

References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, and ParsedLex::type.

Referenced by hlparsetext(), and parsetext().

100 {
101  ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
102 
103  newpl->type = type;
104  newpl->lemm = lemm;
105  newpl->lenlemm = lenlemm;
106  LPLAddTail(&ld->towork, newpl);
107  ld->curSub = ld->towork.tail;
108 }
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
ParsedLex * curSub
Definition: ts_parse.c:46
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:72
ParsedLex * tail
Definition: ts_parse.c:37
ListParsedLex towork
Definition: ts_parse.c:47
char * lemm
Definition: ts_parse.c:29
void * palloc(Size size)
Definition: mcxt.c:950

◆ LexizeExec()

static TSLexeme* LexizeExec ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 172 of file ts_parse.c.

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId, DatumGetPointer, TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, TSLexeme::flags, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum, InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, PointerGetDatum, LexizeData::posDict, DictSubState::private_state, RemoveHead(), setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.

Referenced by hlparsetext(), and parsetext().

173 {
174  int i;
175  ListDictionary *map;
177  TSLexeme *res;
178 
179  if (ld->curDictId == InvalidOid)
180  {
181  /*
182  * usual mode: dictionary wants only one word, but we should keep in
183  * mind that we should go through all stack
184  */
185 
186  while (ld->towork.head)
187  {
188  ParsedLex *curVal = ld->towork.head;
189  char *curValLemm = curVal->lemm;
190  int curValLenLemm = curVal->lenlemm;
191 
192  map = ld->cfg->map + curVal->type;
193 
194  if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
195  {
196  /* skip this type of lexeme */
197  RemoveHead(ld);
198  continue;
199  }
200 
201  for (i = ld->posDict; i < map->len; i++)
202  {
203  dict = lookup_ts_dictionary_cache(map->dictIds[i]);
204 
205  ld->dictState.isend = ld->dictState.getnext = false;
206  ld->dictState.private_state = NULL;
207  res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
208  PointerGetDatum(dict->dictData),
209  PointerGetDatum(curValLemm),
210  Int32GetDatum(curValLenLemm),
211  PointerGetDatum(&ld->dictState)));
212 
213  if (ld->dictState.getnext)
214  {
215  /*
216  * dictionary wants next word, so setup and store current
217  * position and go to multiword mode
218  */
219 
220  ld->curDictId = DatumGetObjectId(map->dictIds[i]);
221  ld->posDict = i + 1;
222  ld->curSub = curVal->next;
223  if (res)
224  setNewTmpRes(ld, curVal, res);
225  return LexizeExec(ld, correspondLexem);
226  }
227 
228  if (!res) /* dictionary doesn't know this lexeme */
229  continue;
230 
231  if (res->flags & TSL_FILTER)
232  {
233  curValLemm = res->lexeme;
234  curValLenLemm = strlen(res->lexeme);
235  continue;
236  }
237 
238  RemoveHead(ld);
239  setCorrLex(ld, correspondLexem);
240  return res;
241  }
242 
243  RemoveHead(ld);
244  }
245  }
246  else
247  { /* curDictId is valid */
249 
250  /*
251  * Dictionary ld->curDictId asks us about following words
252  */
253 
254  while (ld->curSub)
255  {
256  ParsedLex *curVal = ld->curSub;
257 
258  map = ld->cfg->map + curVal->type;
259 
260  if (curVal->type != 0)
261  {
262  bool dictExists = false;
263 
264  if (curVal->type >= ld->cfg->lenmap || map->len == 0)
265  {
266  /* skip this type of lexeme */
267  ld->curSub = curVal->next;
268  continue;
269  }
270 
271  /*
272  * We should be sure that current type of lexeme is recognized
273  * by our dictionary: we just check is it exist in list of
274  * dictionaries ?
275  */
276  for (i = 0; i < map->len && !dictExists; i++)
277  if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
278  dictExists = true;
279 
280  if (!dictExists)
281  {
282  /*
283  * Dictionary can't work with current type of lexeme,
284  * return to basic mode and redo all stored lexemes
285  */
286  ld->curDictId = InvalidOid;
287  return LexizeExec(ld, correspondLexem);
288  }
289  }
290 
291  ld->dictState.isend = (curVal->type == 0) ? true : false;
292  ld->dictState.getnext = false;
293 
294  res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
295  PointerGetDatum(dict->dictData),
296  PointerGetDatum(curVal->lemm),
297  Int32GetDatum(curVal->lenlemm),
298  PointerGetDatum(&ld->dictState)));
299 
300  if (ld->dictState.getnext)
301  {
302  /* Dictionary wants one more */
303  ld->curSub = curVal->next;
304  if (res)
305  setNewTmpRes(ld, curVal, res);
306  continue;
307  }
308 
309  if (res || ld->tmpRes)
310  {
311  /*
312  * Dictionary normalizes lexemes, so we remove from stack all
313  * used lexemes, return to basic mode and redo end of stack
314  * (if it exists)
315  */
316  if (res)
317  {
318  moveToWaste(ld, ld->curSub);
319  }
320  else
321  {
322  res = ld->tmpRes;
323  moveToWaste(ld, ld->lastRes);
324  }
325 
326  /* reset to initial state */
327  ld->curDictId = InvalidOid;
328  ld->posDict = 0;
329  ld->lastRes = NULL;
330  ld->tmpRes = NULL;
331  setCorrLex(ld, correspondLexem);
332  return res;
333  }
334 
335  /*
336  * Dict don't want next lexem and didn't recognize anything, redo
337  * from ld->towork.head
338  */
339  ld->curDictId = InvalidOid;
340  return LexizeExec(ld, correspondLexem);
341  }
342  }
343 
344  setCorrLex(ld, correspondLexem);
345  return NULL;
346 }
ParsedLex * lastRes
Definition: ts_parse.c:55
#define TSL_FILTER
Definition: ts_public.h:117
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
#define PointerGetDatum(X)
Definition: postgres.h:556
#define DatumGetObjectId(X)
Definition: postgres.h:500
bool getnext
Definition: ts_public.h:127
static void moveToWaste(LexizeData *ld, ParsedLex *stop)
Definition: ts_parse.c:141
ParsedLex * curSub
Definition: ts_parse.c:46
static void setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:119
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:210
char * lexeme
Definition: ts_public.h:111
Oid curDictId
Definition: ts_parse.c:43
Oid * dictIds
Definition: ts_cache.h:68
ListDictionary * map
Definition: ts_cache.h:80
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:648
uint16 flags
Definition: ts_public.h:109
DictSubState dictState
Definition: ts_parse.c:45
static void setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
Definition: ts_parse.c:157
ListParsedLex towork
Definition: ts_parse.c:47
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define InvalidOid
Definition: postgres_ext.h:36
char * lemm
Definition: ts_parse.c:29
int posDict
Definition: ts_parse.c:44
#define DatumGetPointer(X)
Definition: postgres.h:549
#define Int32GetDatum(X)
Definition: postgres.h:479
void * private_state
Definition: ts_public.h:128
ParsedLex * head
Definition: ts_parse.c:36
int i
TSConfigCacheEntry * cfg
Definition: ts_parse.c:42
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:111
struct ParsedLex * next
Definition: ts_parse.c:31
TSLexeme * tmpRes
Definition: ts_parse.c:56

◆ LexizeInit()

static void LexizeInit ( LexizeData ld,
TSConfigCacheEntry cfg 
)
static

Definition at line 60 of file ts_parse.c.

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, InvalidOid, LexizeData::lastRes, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.

Referenced by hlparsetext(), and parsetext().

61 {
62  ld->cfg = cfg;
63  ld->curDictId = InvalidOid;
64  ld->posDict = 0;
65  ld->towork.head = ld->towork.tail = ld->curSub = NULL;
66  ld->waste.head = ld->waste.tail = NULL;
67  ld->lastRes = NULL;
68  ld->tmpRes = NULL;
69 }
ParsedLex * lastRes
Definition: ts_parse.c:55
ParsedLex * curSub
Definition: ts_parse.c:46
Oid curDictId
Definition: ts_parse.c:43
ParsedLex * tail
Definition: ts_parse.c:37
ListParsedLex towork
Definition: ts_parse.c:47
#define InvalidOid
Definition: postgres_ext.h:36
int posDict
Definition: ts_parse.c:44
ListParsedLex waste
Definition: ts_parse.c:48
ParsedLex * head
Definition: ts_parse.c:36
TSConfigCacheEntry * cfg
Definition: ts_parse.c:42
TSLexeme * tmpRes
Definition: ts_parse.c:56

◆ LPLAddTail()

static void LPLAddTail ( ListParsedLex list,
ParsedLex newpl 
)
static

Definition at line 72 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, and ListParsedLex::tail.

Referenced by LexizeAddLemm(), and RemoveHead().

73 {
74  if (list->tail)
75  {
76  list->tail->next = newpl;
77  list->tail = newpl;
78  }
79  else
80  list->head = list->tail = newpl;
81  newpl->next = NULL;
82 }
ParsedLex * tail
Definition: ts_parse.c:37
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31

◆ LPLRemoveHead()

static ParsedLex* LPLRemoveHead ( ListParsedLex list)
static

Definition at line 85 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, and ListParsedLex::tail.

Referenced by RemoveHead().

86 {
87  ParsedLex *res = list->head;
88 
89  if (list->head)
90  list->head = list->head->next;
91 
92  if (list->head == NULL)
93  list->tail = NULL;
94 
95  return res;
96 }
ParsedLex * tail
Definition: ts_parse.c:37
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31

◆ moveToWaste()

static void moveToWaste ( LexizeData ld,
ParsedLex stop 
)
static

Definition at line 141 of file ts_parse.c.

References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.

Referenced by LexizeExec().

142 {
143  bool go = true;
144 
145  while (ld->towork.head && go)
146  {
147  if (ld->towork.head == stop)
148  {
149  ld->curSub = stop->next;
150  go = false;
151  }
152  RemoveHead(ld);
153  }
154 }
ParsedLex * curSub
Definition: ts_parse.c:46
ListParsedLex towork
Definition: ts_parse.c:47
ParsedLex * head
Definition: ts_parse.c:36
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:111
struct ParsedLex * next
Definition: ts_parse.c:31

◆ parsetext()

void parsetext ( Oid  cfgId,
ParsedText prs,
char *  buf,
int  buflen 
)

Definition at line 354 of file ts_parse.c.

References ParsedWord::alen, ParsedText::curwords, DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, ParsedWord::flags, TSLexeme::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, ParsedLex::lemm, ParsedWord::len, ParsedLex::lenlemm, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, ParsedWord::nvariant, TSLexeme::nvariant, pfree(), PointerGetDatum, ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, TSL_PREFIX, ParsedLex::type, ParsedWord::word, and ParsedText::words.

Referenced by add_to_tsvector(), pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().

355 {
356  int type,
357  lenlemm;
358  char *lemm = NULL;
359  LexizeData ldata;
360  TSLexeme *norms;
361  TSConfigCacheEntry *cfg;
362  TSParserCacheEntry *prsobj;
363  void *prsdata;
364 
365  cfg = lookup_ts_config_cache(cfgId);
366  prsobj = lookup_ts_parser_cache(cfg->prsId);
367 
368  prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
370  Int32GetDatum(buflen)));
371 
372  LexizeInit(&ldata, cfg);
373 
374  do
375  {
376  type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
377  PointerGetDatum(prsdata),
378  PointerGetDatum(&lemm),
379  PointerGetDatum(&lenlemm)));
380 
381  if (type > 0 && lenlemm >= MAXSTRLEN)
382  {
383 #ifdef IGNORE_LONGLEXEME
384  ereport(NOTICE,
385  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
386  errmsg("word is too long to be indexed"),
387  errdetail("Words longer than %d characters are ignored.",
388  MAXSTRLEN)));
389  continue;
390 #else
391  ereport(ERROR,
392  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
393  errmsg("word is too long to be indexed"),
394  errdetail("Words longer than %d characters are ignored.",
395  MAXSTRLEN)));
396 #endif
397  }
398 
399  LexizeAddLemm(&ldata, type, lemm, lenlemm);
400 
401  while ((norms = LexizeExec(&ldata, NULL)) != NULL)
402  {
403  TSLexeme *ptr = norms;
404 
405  prs->pos++; /* set pos */
406 
407  while (ptr->lexeme)
408  {
409  if (prs->curwords == prs->lenwords)
410  {
411  prs->lenwords *= 2;
412  prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
413  }
414 
415  if (ptr->flags & TSL_ADDPOS)
416  prs->pos++;
417  prs->words[prs->curwords].len = strlen(ptr->lexeme);
418  prs->words[prs->curwords].word = ptr->lexeme;
419  prs->words[prs->curwords].nvariant = ptr->nvariant;
420  prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
421  prs->words[prs->curwords].alen = 0;
422  prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
423  ptr++;
424  prs->curwords++;
425  }
426  pfree(norms);
427  }
428  } while (type > 0);
429 
430  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
431 }
#define TSL_PREFIX
Definition: ts_public.h:116
#define TSL_ADDPOS
Definition: ts_public.h:115
#define DatumGetInt32(X)
Definition: postgres.h:472
#define PointerGetDatum(X)
Definition: postgres.h:556
uint16 nvariant
Definition: ts_utils.h:81
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:644
int errcode(int sqlerrcode)
Definition: elog.c:610
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:112
int32 lenwords
Definition: ts_utils.h:101
int32 curwords
Definition: ts_utils.h:102
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:646
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ERROR
Definition: elog.h:43
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:389
char * lexeme
Definition: ts_public.h:111
static char * buf
Definition: pg_test_fsync.c:67
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:60
int errdetail(const char *fmt,...)
Definition: elog.c:957
ParsedWord * words
Definition: ts_utils.h:100
uint16 pos
Definition: ts_utils.h:84
uint16 nvariant
Definition: ts_public.h:107
uint16 flags
Definition: ts_public.h:109
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define ereport(elevel,...)
Definition: elog.h:144
#define NOTICE
Definition: elog.h:37
FmgrInfo prstoken
Definition: ts_cache.h:46
uint16 len
Definition: ts_utils.h:80
int32 pos
Definition: ts_utils.h:103
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
#define DatumGetPointer(X)
Definition: postgres.h:549
uint16 flags
Definition: ts_utils.h:93
uint32 alen
Definition: ts_utils.h:95
FmgrInfo prsstart
Definition: ts_cache.h:45
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:642
#define LIMITPOS(x)
Definition: ts_type.h:87
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:99
#define MAXSTRLEN
Definition: ts_type.h:49
char * word
Definition: ts_utils.h:94

◆ RemoveHead()

static void RemoveHead ( LexizeData ld)
static

Definition at line 111 of file ts_parse.c.

References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.

Referenced by LexizeExec(), and moveToWaste().

112 {
113  LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
114 
115  ld->posDict = 0;
116 }
static ParsedLex * LPLRemoveHead(ListParsedLex *list)
Definition: ts_parse.c:85
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:72
ListParsedLex towork
Definition: ts_parse.c:47
int posDict
Definition: ts_parse.c:44
ListParsedLex waste
Definition: ts_parse.c:48

◆ setCorrLex()

static void setCorrLex ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 119 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, pfree(), ListParsedLex::tail, and LexizeData::waste.

Referenced by LexizeExec().

120 {
121  if (correspondLexem)
122  {
123  *correspondLexem = ld->waste.head;
124  }
125  else
126  {
127  ParsedLex *tmp,
128  *ptr = ld->waste.head;
129 
130  while (ptr)
131  {
132  tmp = ptr->next;
133  pfree(ptr);
134  ptr = tmp;
135  }
136  }
137  ld->waste.head = ld->waste.tail = NULL;
138 }
void pfree(void *pointer)
Definition: mcxt.c:1057
ParsedLex * tail
Definition: ts_parse.c:37
ListParsedLex waste
Definition: ts_parse.c:48
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31

◆ setNewTmpRes()

static void setNewTmpRes ( LexizeData ld,
ParsedLex lex,
TSLexeme res 
)
static

Definition at line 157 of file ts_parse.c.

References LexizeData::lastRes, TSLexeme::lexeme, pfree(), and LexizeData::tmpRes.

Referenced by LexizeExec().

158 {
159  if (ld->tmpRes)
160  {
161  TSLexeme *ptr;
162 
163  for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
164  pfree(ptr->lexeme);
165  pfree(ld->tmpRes);
166  }
167  ld->tmpRes = res;
168  ld->lastRes = lex;
169 }
ParsedLex * lastRes
Definition: ts_parse.c:55
void pfree(void *pointer)
Definition: mcxt.c:1057
char * lexeme
Definition: ts_public.h:111
TSLexeme * tmpRes
Definition: ts_parse.c:56