PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ts_parse.c File Reference
#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
Include dependency graph for ts_parse.c:

Go to the source code of this file.

Data Structures

struct  ParsedLex
 
struct  ListParsedLex
 
struct  LexizeData
 

Macros

#define IGNORE_LONGLEXEME   1
 

Typedefs

typedef struct ParsedLex ParsedLex
 
typedef struct ListParsedLex ListParsedLex
 

Functions

static void LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg)
 
static void LPLAddTail (ListParsedLex *list, ParsedLex *newpl)
 
static ParsedLexLPLRemoveHead (ListParsedLex *list)
 
static void LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm)
 
static void RemoveHead (LexizeData *ld)
 
static void setCorrLex (LexizeData *ld, ParsedLex **correspondLexem)
 
static void moveToWaste (LexizeData *ld, ParsedLex *stop)
 
static void setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res)
 
static TSLexemeLexizeExec (LexizeData *ld, ParsedLex **correspondLexem)
 
void parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen)
 
static void hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type)
 
static void hlfinditem (HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 
static void addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
 
void hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
 
textgenerateHeadline (HeadlineParsedText *prs)
 

Macro Definition Documentation

#define IGNORE_LONGLEXEME   1

Definition at line 20 of file ts_parse.c.

Typedef Documentation

Function Documentation

static void addHLParsedLex ( HeadlineParsedText prs,
TSQuery  query,
ParsedLex lexs,
TSLexeme norms 
)
static

Definition at line 492 of file ts_parse.c.

References TSLexeme::flags, hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), TSL_ADDPOS, ParsedLex::type, and HeadlineParsedText::vectorpos.

Referenced by hlparsetext().

493 {
494  ParsedLex *tmplexs;
495  TSLexeme *ptr;
496  int32 savedpos;
497 
498  while (lexs)
499  {
500  if (lexs->type > 0)
501  hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
502 
503  ptr = norms;
504  savedpos = prs->vectorpos;
505  while (ptr && ptr->lexeme)
506  {
507  if (ptr->flags & TSL_ADDPOS)
508  savedpos++;
509  hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
510  ptr++;
511  }
512 
513  tmplexs = lexs->next;
514  pfree(lexs);
515  lexs = tmplexs;
516  }
517 
518  if (norms)
519  {
520  ptr = norms;
521  while (ptr->lexeme)
522  {
523  if (ptr->flags & TSL_ADDPOS)
524  prs->vectorpos++;
525  pfree(ptr->lexeme);
526  ptr++;
527  }
528  pfree(norms);
529  }
530 }
#define TSL_ADDPOS
Definition: ts_public.h:115
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
signed int int32
Definition: c.h:256
void pfree(void *pointer)
Definition: mcxt.c:950
char * lexeme
Definition: ts_public.h:111
uint16 flags
Definition: ts_public.h:109
static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
Definition: ts_parse.c:457
static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
Definition: ts_parse.c:441
char * lemm
Definition: ts_parse.c:29
struct ParsedLex * next
Definition: ts_parse.c:31
text* generateHeadline ( HeadlineParsedText prs)

Definition at line 598 of file ts_parse.c.

References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, VARHDRSZ, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by headline_json_value(), and ts_headline_byid_opt().

599 {
600  text *out;
601  char *ptr;
602  int len = 128;
603  int numfragments = 0;
604  int16 infrag = 0;
605 
606  HeadlineWordEntry *wrd = prs->words;
607 
608  out = (text *) palloc(len);
609  ptr = ((char *) out) + VARHDRSZ;
610 
611  while (wrd - prs->words < prs->curwords)
612  {
613  while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
614  {
615  int dist = ptr - ((char *) out);
616 
617  len *= 2;
618  out = (text *) repalloc(out, len);
619  ptr = ((char *) out) + dist;
620  }
621 
622  if (wrd->in && !wrd->repeated)
623  {
624  if (!infrag)
625  {
626 
627  /* start of a new fragment */
628  infrag = 1;
629  numfragments++;
630  /* add a fragment delimiter if this is after the first one */
631  if (numfragments > 1)
632  {
633  memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
634  ptr += prs->fragdelimlen;
635  }
636 
637  }
638  if (wrd->replace)
639  {
640  *ptr = ' ';
641  ptr++;
642  }
643  else if (!wrd->skip)
644  {
645  if (wrd->selected)
646  {
647  memcpy(ptr, prs->startsel, prs->startsellen);
648  ptr += prs->startsellen;
649  }
650  memcpy(ptr, wrd->word, wrd->len);
651  ptr += wrd->len;
652  if (wrd->selected)
653  {
654  memcpy(ptr, prs->stopsel, prs->stopsellen);
655  ptr += prs->stopsellen;
656  }
657  }
658  }
659  else if (!wrd->repeated)
660  {
661  if (infrag)
662  infrag = 0;
663  pfree(wrd->word);
664  }
665 
666  wrd++;
667  }
668 
669  SET_VARSIZE(out, ptr - ((char *) out));
670  return out;
671 }
signed short int16
Definition: c.h:255
#define VARHDRSZ
Definition: c.h:445
void pfree(void *pointer)
Definition: mcxt.c:950
HeadlineWordEntry * words
Definition: ts_public.h:52
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
void * palloc(Size size)
Definition: mcxt.c:849
Definition: c.h:439
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
static void hladdword ( HeadlineParsedText prs,
char *  buf,
int  buflen,
int  type 
)
static

Definition at line 441 of file ts_parse.c.

References HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.

Referenced by addHLParsedLex().

442 {
443  while (prs->curwords >= prs->lenwords)
444  {
445  prs->lenwords *= 2;
446  prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
447  }
448  memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
449  prs->words[prs->curwords].type = (uint8) type;
450  prs->words[prs->curwords].len = buflen;
451  prs->words[prs->curwords].word = palloc(buflen);
452  memcpy(prs->words[prs->curwords].word, buf, buflen);
453  prs->curwords++;
454 }
unsigned char uint8
Definition: c.h:266
static char * buf
Definition: pg_test_fsync.c:66
HeadlineWordEntry * words
Definition: ts_public.h:52
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
void * palloc(Size size)
Definition: mcxt.c:849
static void hlfinditem ( HeadlineParsedText prs,
TSQuery  query,
int32  pos,
char *  buf,
int  buflen 
)
static

Definition at line 457 of file ts_parse.c.

References HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, LIMITPOS, HeadlineWordEntry::pos, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, word(), and HeadlineParsedText::words.

Referenced by addHLParsedLex().

458 {
459  int i;
460  QueryItem *item = GETQUERY(query);
462 
463  while (prs->curwords + query->size >= prs->lenwords)
464  {
465  prs->lenwords *= 2;
466  prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
467  }
468 
469  word = &(prs->words[prs->curwords - 1]);
470  word->pos = LIMITPOS(pos);
471  for (i = 0; i < query->size; i++)
472  {
473  if (item->type == QI_VAL &&
475  buf, buflen, item->qoperand.prefix) == 0)
476  {
477  if (word->item)
478  {
479  memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
480  prs->words[prs->curwords].item = &item->qoperand;
481  prs->words[prs->curwords].repeated = 1;
482  prs->curwords++;
483  }
484  else
485  word->item = &item->qoperand;
486  }
487  item++;
488  }
489 }
QueryOperand * item
Definition: ts_public.h:47
#define QI_VAL
Definition: ts_type.h:143
uint32 distance
Definition: ts_type.h:167
#define GETQUERY(x)
Definition: _int.h:142
WordEntryPos pos
Definition: ts_public.h:45
#define GETOPERAND(x)
Definition: ltree.h:118
static char * buf
Definition: pg_test_fsync.c:66
HeadlineWordEntry * words
Definition: ts_public.h:52
QueryItemType type
Definition: ts_type.h:204
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1160
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
uint32 length
Definition: ts_type.h:167
static void word(struct vars *, int, struct state *, struct state *)
Definition: regcomp.c:1243
int32 size
Definition: ts_type.h:217
int i
#define LIMITPOS(x)
Definition: ts_type.h:87
bool prefix
Definition: ts_type.h:159
QueryOperand qoperand
Definition: ts_type.h:206
void hlparsetext ( Oid  cfgId,
HeadlineParsedText prs,
TSQuery  query,
char *  buf,
int  buflen 
)

Definition at line 533 of file ts_parse.c.

References addHLParsedLex(), DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, PointerGetDatum, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, and HeadlineParsedText::vectorpos.

Referenced by headline_json_value(), and ts_headline_byid_opt().

534 {
535  int type,
536  lenlemm;
537  char *lemm = NULL;
538  LexizeData ldata;
539  TSLexeme *norms;
540  ParsedLex *lexs;
541  TSConfigCacheEntry *cfg;
542  TSParserCacheEntry *prsobj;
543  void *prsdata;
544 
545  cfg = lookup_ts_config_cache(cfgId);
546  prsobj = lookup_ts_parser_cache(cfg->prsId);
547 
548  prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
550  Int32GetDatum(buflen)));
551 
552  LexizeInit(&ldata, cfg);
553 
554  do
555  {
556  type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
557  PointerGetDatum(prsdata),
558  PointerGetDatum(&lemm),
559  PointerGetDatum(&lenlemm)));
560 
561  if (type > 0 && lenlemm >= MAXSTRLEN)
562  {
563 #ifdef IGNORE_LONGLEXEME
564  ereport(NOTICE,
565  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
566  errmsg("word is too long to be indexed"),
567  errdetail("Words longer than %d characters are ignored.",
568  MAXSTRLEN)));
569  continue;
570 #else
571  ereport(ERROR,
572  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
573  errmsg("word is too long to be indexed"),
574  errdetail("Words longer than %d characters are ignored.",
575  MAXSTRLEN)));
576 #endif
577  }
578 
579  LexizeAddLemm(&ldata, type, lemm, lenlemm);
580 
581  do
582  {
583  if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
584  {
585  prs->vectorpos++;
586  addHLParsedLex(prs, query, lexs, norms);
587  }
588  else
589  addHLParsedLex(prs, query, lexs, NULL);
590  } while (norms);
591 
592  } while (type > 0);
593 
594  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
595 }
#define DatumGetInt32(X)
Definition: postgres.h:478
static void addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
Definition: ts_parse.c:492
#define PointerGetDatum(X)
Definition: postgres.h:562
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:604
int errcode(int sqlerrcode)
Definition: elog.c:575
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:112
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:606
#define ERROR
Definition: elog.h:43
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:385
static char * buf
Definition: pg_test_fsync.c:66
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:60
int errdetail(const char *fmt,...)
Definition: elog.c:873
#define ereport(elevel, rest)
Definition: elog.h:122
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define NOTICE
Definition: elog.h:37
#define NULL
Definition: c.h:229
FmgrInfo prstoken
Definition: ts_cache.h:46
#define DatumGetPointer(X)
Definition: postgres.h:555
FmgrInfo prsstart
Definition: ts_cache.h:45
#define Int32GetDatum(X)
Definition: postgres.h:485
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:602
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:99
#define MAXSTRLEN
Definition: ts_type.h:49
static void LexizeAddLemm ( LexizeData ld,
int  type,
char *  lemm,
int  lenlemm 
)
static

Definition at line 99 of file ts_parse.c.

References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, and ParsedLex::type.

Referenced by hlparsetext(), and parsetext().

100 {
101  ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
102 
103  newpl->type = type;
104  newpl->lemm = lemm;
105  newpl->lenlemm = lenlemm;
106  LPLAddTail(&ld->towork, newpl);
107  ld->curSub = ld->towork.tail;
108 }
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
ParsedLex * curSub
Definition: ts_parse.c:46
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:72
ParsedLex * tail
Definition: ts_parse.c:37
ListParsedLex towork
Definition: ts_parse.c:47
char * lemm
Definition: ts_parse.c:29
void * palloc(Size size)
Definition: mcxt.c:849
static TSLexeme* LexizeExec ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 172 of file ts_parse.c.

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId, DatumGetPointer, TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, TSLexeme::flags, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum, InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, NULL, PointerGetDatum, LexizeData::posDict, DictSubState::private_state, RemoveHead(), setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.

Referenced by hlparsetext(), and parsetext().

173 {
174  int i;
175  ListDictionary *map;
177  TSLexeme *res;
178 
179  if (ld->curDictId == InvalidOid)
180  {
181  /*
182  * usual mode: dictionary wants only one word, but we should keep in
183  * mind that we should go through all stack
184  */
185 
186  while (ld->towork.head)
187  {
188  ParsedLex *curVal = ld->towork.head;
189  char *curValLemm = curVal->lemm;
190  int curValLenLemm = curVal->lenlemm;
191 
192  map = ld->cfg->map + curVal->type;
193 
194  if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
195  {
196  /* skip this type of lexeme */
197  RemoveHead(ld);
198  continue;
199  }
200 
201  for (i = ld->posDict; i < map->len; i++)
202  {
203  dict = lookup_ts_dictionary_cache(map->dictIds[i]);
204 
205  ld->dictState.isend = ld->dictState.getnext = false;
208  &(dict->lexize),
209  PointerGetDatum(dict->dictData),
210  PointerGetDatum(curValLemm),
211  Int32GetDatum(curValLenLemm),
213  ));
214 
215  if (ld->dictState.getnext)
216  {
217  /*
218  * dictionary wants next word, so setup and store current
219  * position and go to multiword mode
220  */
221 
222  ld->curDictId = DatumGetObjectId(map->dictIds[i]);
223  ld->posDict = i + 1;
224  ld->curSub = curVal->next;
225  if (res)
226  setNewTmpRes(ld, curVal, res);
227  return LexizeExec(ld, correspondLexem);
228  }
229 
230  if (!res) /* dictionary doesn't know this lexeme */
231  continue;
232 
233  if (res->flags & TSL_FILTER)
234  {
235  curValLemm = res->lexeme;
236  curValLenLemm = strlen(res->lexeme);
237  continue;
238  }
239 
240  RemoveHead(ld);
241  setCorrLex(ld, correspondLexem);
242  return res;
243  }
244 
245  RemoveHead(ld);
246  }
247  }
248  else
249  { /* curDictId is valid */
251 
252  /*
253  * Dictionary ld->curDictId asks us about following words
254  */
255 
256  while (ld->curSub)
257  {
258  ParsedLex *curVal = ld->curSub;
259 
260  map = ld->cfg->map + curVal->type;
261 
262  if (curVal->type != 0)
263  {
264  bool dictExists = false;
265 
266  if (curVal->type >= ld->cfg->lenmap || map->len == 0)
267  {
268  /* skip this type of lexeme */
269  ld->curSub = curVal->next;
270  continue;
271  }
272 
273  /*
274  * We should be sure that current type of lexeme is recognized
275  * by our dictionary: we just check is it exist in list of
276  * dictionaries ?
277  */
278  for (i = 0; i < map->len && !dictExists; i++)
279  if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
280  dictExists = true;
281 
282  if (!dictExists)
283  {
284  /*
285  * Dictionary can't work with current tpe of lexeme,
286  * return to basic mode and redo all stored lexemes
287  */
288  ld->curDictId = InvalidOid;
289  return LexizeExec(ld, correspondLexem);
290  }
291  }
292 
293  ld->dictState.isend = (curVal->type == 0) ? true : false;
294  ld->dictState.getnext = false;
295 
297  &(dict->lexize),
298  PointerGetDatum(dict->dictData),
299  PointerGetDatum(curVal->lemm),
300  Int32GetDatum(curVal->lenlemm),
302  ));
303 
304  if (ld->dictState.getnext)
305  {
306  /* Dictionary wants one more */
307  ld->curSub = curVal->next;
308  if (res)
309  setNewTmpRes(ld, curVal, res);
310  continue;
311  }
312 
313  if (res || ld->tmpRes)
314  {
315  /*
316  * Dictionary normalizes lexemes, so we remove from stack all
317  * used lexemes, return to basic mode and redo end of stack
318  * (if it exists)
319  */
320  if (res)
321  {
322  moveToWaste(ld, ld->curSub);
323  }
324  else
325  {
326  res = ld->tmpRes;
327  moveToWaste(ld, ld->lastRes);
328  }
329 
330  /* reset to initial state */
331  ld->curDictId = InvalidOid;
332  ld->posDict = 0;
333  ld->lastRes = NULL;
334  ld->tmpRes = NULL;
335  setCorrLex(ld, correspondLexem);
336  return res;
337  }
338 
339  /*
340  * Dict don't want next lexem and didn't recognize anything, redo
341  * from ld->towork.head
342  */
343  ld->curDictId = InvalidOid;
344  return LexizeExec(ld, correspondLexem);
345  }
346  }
347 
348  setCorrLex(ld, correspondLexem);
349  return NULL;
350 }
ParsedLex * lastRes
Definition: ts_parse.c:55
#define TSL_FILTER
Definition: ts_public.h:117
int type
Definition: ts_parse.c:28
int lenlemm
Definition: ts_parse.c:30
#define PointerGetDatum(X)
Definition: postgres.h:562
#define DatumGetObjectId(X)
Definition: postgres.h:506
bool getnext
Definition: ts_public.h:127
static void moveToWaste(LexizeData *ld, ParsedLex *stop)
Definition: ts_parse.c:141
ParsedLex * curSub
Definition: ts_parse.c:46
static void setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:119
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
Definition: ts_cache.c:210
char * lexeme
Definition: ts_public.h:111
Oid curDictId
Definition: ts_parse.c:43
Oid * dictIds
Definition: ts_cache.h:68
ListDictionary * map
Definition: ts_cache.h:80
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
Definition: fmgr.h:608
uint16 flags
Definition: ts_public.h:109
DictSubState dictState
Definition: ts_parse.c:45
static void setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
Definition: ts_parse.c:157
ListParsedLex towork
Definition: ts_parse.c:47
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define InvalidOid
Definition: postgres_ext.h:36
char * lemm
Definition: ts_parse.c:29
#define NULL
Definition: c.h:229
int posDict
Definition: ts_parse.c:44
#define DatumGetPointer(X)
Definition: postgres.h:555
#define Int32GetDatum(X)
Definition: postgres.h:485
void * private_state
Definition: ts_public.h:128
ParsedLex * head
Definition: ts_parse.c:36
int i
TSConfigCacheEntry * cfg
Definition: ts_parse.c:42
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:111
struct ParsedLex * next
Definition: ts_parse.c:31
TSLexeme * tmpRes
Definition: ts_parse.c:56
static void LexizeInit ( LexizeData ld,
TSConfigCacheEntry cfg 
)
static

Definition at line 60 of file ts_parse.c.

References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, InvalidOid, LexizeData::lastRes, NULL, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.

Referenced by hlparsetext(), and parsetext().

61 {
62  ld->cfg = cfg;
63  ld->curDictId = InvalidOid;
64  ld->posDict = 0;
65  ld->towork.head = ld->towork.tail = ld->curSub = NULL;
66  ld->waste.head = ld->waste.tail = NULL;
67  ld->lastRes = NULL;
68  ld->tmpRes = NULL;
69 }
ParsedLex * lastRes
Definition: ts_parse.c:55
ParsedLex * curSub
Definition: ts_parse.c:46
Oid curDictId
Definition: ts_parse.c:43
ParsedLex * tail
Definition: ts_parse.c:37
ListParsedLex towork
Definition: ts_parse.c:47
#define InvalidOid
Definition: postgres_ext.h:36
#define NULL
Definition: c.h:229
int posDict
Definition: ts_parse.c:44
ListParsedLex waste
Definition: ts_parse.c:48
ParsedLex * head
Definition: ts_parse.c:36
TSConfigCacheEntry * cfg
Definition: ts_parse.c:42
TSLexeme * tmpRes
Definition: ts_parse.c:56
static void LPLAddTail ( ListParsedLex list,
ParsedLex newpl 
)
static

Definition at line 72 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, NULL, and ListParsedLex::tail.

Referenced by LexizeAddLemm(), and RemoveHead().

73 {
74  if (list->tail)
75  {
76  list->tail->next = newpl;
77  list->tail = newpl;
78  }
79  else
80  list->head = list->tail = newpl;
81  newpl->next = NULL;
82 }
ParsedLex * tail
Definition: ts_parse.c:37
#define NULL
Definition: c.h:229
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31
static ParsedLex* LPLRemoveHead ( ListParsedLex list)
static

Definition at line 85 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, NULL, and ListParsedLex::tail.

Referenced by RemoveHead().

86 {
87  ParsedLex *res = list->head;
88 
89  if (list->head)
90  list->head = list->head->next;
91 
92  if (list->head == NULL)
93  list->tail = NULL;
94 
95  return res;
96 }
ParsedLex * tail
Definition: ts_parse.c:37
#define NULL
Definition: c.h:229
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31
static void moveToWaste ( LexizeData ld,
ParsedLex stop 
)
static

Definition at line 141 of file ts_parse.c.

References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.

Referenced by LexizeExec().

142 {
143  bool go = true;
144 
145  while (ld->towork.head && go)
146  {
147  if (ld->towork.head == stop)
148  {
149  ld->curSub = stop->next;
150  go = false;
151  }
152  RemoveHead(ld);
153  }
154 }
ParsedLex * curSub
Definition: ts_parse.c:46
ListParsedLex towork
Definition: ts_parse.c:47
ParsedLex * head
Definition: ts_parse.c:36
static void RemoveHead(LexizeData *ld)
Definition: ts_parse.c:111
struct ParsedLex * next
Definition: ts_parse.c:31
void parsetext ( Oid  cfgId,
ParsedText prs,
char *  buf,
int  buflen 
)

Definition at line 358 of file ts_parse.c.

References ParsedWord::alen, ParsedText::curwords, DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, ParsedWord::flags, TSLexeme::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, ParsedWord::len, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, ParsedWord::nvariant, TSLexeme::nvariant, pfree(), PointerGetDatum, ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, TSL_PREFIX, ParsedWord::word, and ParsedText::words.

Referenced by add_to_tsvector(), pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().

359 {
360  int type,
361  lenlemm;
362  char *lemm = NULL;
363  LexizeData ldata;
364  TSLexeme *norms;
365  TSConfigCacheEntry *cfg;
366  TSParserCacheEntry *prsobj;
367  void *prsdata;
368 
369  cfg = lookup_ts_config_cache(cfgId);
370  prsobj = lookup_ts_parser_cache(cfg->prsId);
371 
372  prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
374  Int32GetDatum(buflen)));
375 
376  LexizeInit(&ldata, cfg);
377 
378  do
379  {
380  type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
381  PointerGetDatum(prsdata),
382  PointerGetDatum(&lemm),
383  PointerGetDatum(&lenlemm)));
384 
385  if (type > 0 && lenlemm >= MAXSTRLEN)
386  {
387 #ifdef IGNORE_LONGLEXEME
388  ereport(NOTICE,
389  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
390  errmsg("word is too long to be indexed"),
391  errdetail("Words longer than %d characters are ignored.",
392  MAXSTRLEN)));
393  continue;
394 #else
395  ereport(ERROR,
396  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
397  errmsg("word is too long to be indexed"),
398  errdetail("Words longer than %d characters are ignored.",
399  MAXSTRLEN)));
400 #endif
401  }
402 
403  LexizeAddLemm(&ldata, type, lemm, lenlemm);
404 
405  while ((norms = LexizeExec(&ldata, NULL)) != NULL)
406  {
407  TSLexeme *ptr = norms;
408 
409  prs->pos++; /* set pos */
410 
411  while (ptr->lexeme)
412  {
413  if (prs->curwords == prs->lenwords)
414  {
415  prs->lenwords *= 2;
416  prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
417  }
418 
419  if (ptr->flags & TSL_ADDPOS)
420  prs->pos++;
421  prs->words[prs->curwords].len = strlen(ptr->lexeme);
422  prs->words[prs->curwords].word = ptr->lexeme;
423  prs->words[prs->curwords].nvariant = ptr->nvariant;
424  prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
425  prs->words[prs->curwords].alen = 0;
426  prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
427  ptr++;
428  prs->curwords++;
429  }
430  pfree(norms);
431  }
432  } while (type > 0);
433 
434  FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
435 }
#define TSL_PREFIX
Definition: ts_public.h:116
#define TSL_ADDPOS
Definition: ts_public.h:115
#define DatumGetInt32(X)
Definition: postgres.h:478
#define PointerGetDatum(X)
Definition: postgres.h:562
uint16 nvariant
Definition: ts_utils.h:65
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:604
int errcode(int sqlerrcode)
Definition: elog.c:575
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:112
int32 lenwords
Definition: ts_utils.h:85
int32 curwords
Definition: ts_utils.h:86
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:606
void pfree(void *pointer)
Definition: mcxt.c:950
#define ERROR
Definition: elog.h:43
FmgrInfo prsend
Definition: ts_cache.h:47
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:385
char * lexeme
Definition: ts_public.h:111
static char * buf
Definition: pg_test_fsync.c:66
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
Definition: ts_parse.c:60
int errdetail(const char *fmt,...)
Definition: elog.c:873
ParsedWord * words
Definition: ts_utils.h:84
uint16 pos
Definition: ts_utils.h:68
uint16 nvariant
Definition: ts_public.h:107
#define ereport(elevel, rest)
Definition: elog.h:122
uint16 flags
Definition: ts_public.h:109
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
Definition: ts_parse.c:172
#define NOTICE
Definition: elog.h:37
#define NULL
Definition: c.h:229
FmgrInfo prstoken
Definition: ts_cache.h:46
uint16 len
Definition: ts_utils.h:64
int32 pos
Definition: ts_utils.h:87
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
#define DatumGetPointer(X)
Definition: postgres.h:555
uint16 flags
Definition: ts_utils.h:77
uint32 alen
Definition: ts_utils.h:79
FmgrInfo prsstart
Definition: ts_cache.h:45
#define Int32GetDatum(X)
Definition: postgres.h:485
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:602
#define LIMITPOS(x)
Definition: ts_type.h:87
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
Definition: ts_parse.c:99
#define MAXSTRLEN
Definition: ts_type.h:49
char * word
Definition: ts_utils.h:78
static void RemoveHead ( LexizeData ld)
static

Definition at line 111 of file ts_parse.c.

References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.

Referenced by LexizeExec(), and moveToWaste().

112 {
113  LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
114 
115  ld->posDict = 0;
116 }
static ParsedLex * LPLRemoveHead(ListParsedLex *list)
Definition: ts_parse.c:85
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
Definition: ts_parse.c:72
ListParsedLex towork
Definition: ts_parse.c:47
int posDict
Definition: ts_parse.c:44
ListParsedLex waste
Definition: ts_parse.c:48
static void setCorrLex ( LexizeData ld,
ParsedLex **  correspondLexem 
)
static

Definition at line 119 of file ts_parse.c.

References ListParsedLex::head, ParsedLex::next, NULL, pfree(), ListParsedLex::tail, and LexizeData::waste.

Referenced by LexizeExec().

120 {
121  if (correspondLexem)
122  {
123  *correspondLexem = ld->waste.head;
124  }
125  else
126  {
127  ParsedLex *tmp,
128  *ptr = ld->waste.head;
129 
130  while (ptr)
131  {
132  tmp = ptr->next;
133  pfree(ptr);
134  ptr = tmp;
135  }
136  }
137  ld->waste.head = ld->waste.tail = NULL;
138 }
void pfree(void *pointer)
Definition: mcxt.c:950
ParsedLex * tail
Definition: ts_parse.c:37
#define NULL
Definition: c.h:229
ListParsedLex waste
Definition: ts_parse.c:48
ParsedLex * head
Definition: ts_parse.c:36
struct ParsedLex * next
Definition: ts_parse.c:31
static void setNewTmpRes ( LexizeData ld,
ParsedLex lex,
TSLexeme res 
)
static

Definition at line 157 of file ts_parse.c.

References LexizeData::lastRes, TSLexeme::lexeme, pfree(), and LexizeData::tmpRes.

Referenced by LexizeExec().

158 {
159  if (ld->tmpRes)
160  {
161  TSLexeme *ptr;
162 
163  for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
164  pfree(ptr->lexeme);
165  pfree(ld->tmpRes);
166  }
167  ld->tmpRes = res;
168  ld->lastRes = lex;
169 }
ParsedLex * lastRes
Definition: ts_parse.c:55
void pfree(void *pointer)
Definition: mcxt.c:950
char * lexeme
Definition: ts_public.h:111
TSLexeme * tmpRes
Definition: ts_parse.c:56