41 #define VERSIONNUMBER 8
42 #define NUMPARTHWORD 9
44 #define ASCIIPARTHWORD 11
55 #define UNSIGNEDINT 22
91 "Word, letters and digits",
95 "Scientific notation",
97 "Hyphenated word part, letters and digits",
98 "Hyphenated word part, all letters",
99 "Hyphenated word part, all ASCII",
103 "Hyphenated word, letters and digits",
104 "Hyphenated word, all ASCII",
105 "Hyphenated word, all letters",
218 #define A_NEXT 0x0000
219 #define A_BINGO 0x0001
221 #define A_PUSH 0x0004
222 #define A_RERUN 0x0008
223 #define A_CLEAR 0x0010
224 #define A_MERGE 0x0020
225 #define A_CLRALL 0x0040
281 res->pushedAtAction = NULL;
386 fprintf(stderr,
"closing parser\n");
406 fprintf(stderr,
"closing parser copy\n");
422 #define p_iswhat(type, nonascii) \
425 p_is##type(TParser *prs) \
427 Assert(prs->state); \
432 unsigned int c = *(prs->pgwstr + prs->state->poschar); \
435 return is##type(c); \
437 return isw##type(*(prs->wstr + prs->state->poschar)); \
439 return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
443 p_isnot##type(TParser *prs) \
445 return !p_is##type(prs); \
468 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) ==
c)) ? 1 : 0;
481 return p_iseq(prs, prs->
c);
487 return !p_iseq(prs, prs->
c);
499 return (
p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
512 if (ch <= 0x20 || ch >= 0x7F)
623 return (prs->
ignore) ? 1 : 0;
694 static const pg_wchar strange_letter[] = {
928 const pg_wchar *StopLow = strange_letter,
929 *StopHigh = strange_letter +
lengthof(strange_letter),
938 while (StopLow < StopHigh)
940 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
941 if (*StopMiddle ==
c)
943 else if (*StopMiddle <
c)
944 StopLow = StopMiddle + 1;
946 StopHigh = StopMiddle;
1597 #ifdef WPARSER_TRACE
1598 const char *state_name;
1602 #ifdef WPARSER_TRACE
1603 #define TPARSERSTATEACTION(state) \
1604 { CppConcat(action,state), state, CppAsString(state) }
1606 #define TPARSERSTATEACTION(state) \
1607 { CppConcat(action,state), state }
1742 #ifdef WPARSER_TRACE
1755 fprintf(stderr,
" matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1766 (item->
type > 0) ?
" type " :
"",
1916 #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1917 #define HLIDREPLACE(x) ( (x)==TAG_T )
1918 #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1919 #define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1920 #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
1921 #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1930 #define INTERESTINGWORD(j) \
1931 (prs->words[j].item && !prs->words[j].repeated)
1934 #define BADENDPOINT(j) \
1935 ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
1936 !INTERESTINGWORD(j))
1971 for (
i = 0;
i < checkval->
len;
i++)
1982 data->allocated =
true;
2017 int *nextpos,
int *p,
int *q)
2037 foreach(lc, locations)
2042 for (
int i = 0;
i < pdata->
npos;
i++)
2045 int endp = pdata->
pos[
i];
2070 foreach(lc, locations)
2075 for (
int i = pdata->
npos - 1;
i >= 0;
i--)
2078 int startp = pdata->
pos[
i] - pdata->
width;
2098 posb =
Max(posb, pos);
2118 if (idxb < 0 && prs->words[
i].pos >= posb)
2127 if (idxb >= 0 && idxe >= idxb)
2139 ch.
len = idxe - idxb + 1;
2144 *nextpos = posb + 1;
2205 int *curlen,
int *poslen,
int max_words)
2257 int shortword,
int min_words,
2258 int max_words,
int max_fragments)
2275 int32 numcovers = 0,
2286 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2301 if (numcovers >= maxcovers)
2308 covers[numcovers].
curlen = curlen;
2309 covers[numcovers].
poslen = poslen;
2310 covers[numcovers].
chosen =
false;
2311 covers[numcovers].
excluded =
false;
2319 for (f = 0; f < max_fragments; f++)
2329 for (
i = 0;
i < numcovers;
i++)
2331 if (!covers[
i].chosen && !covers[
i].excluded &&
2332 (maxitems < covers[
i].poslen ||
2333 (maxitems == covers[
i].poslen &&
2334 minwords > covers[
i].curlen)))
2344 covers[minI].
chosen =
true;
2348 curlen = covers[minI].
curlen;
2350 if (curlen < max_words)
2353 maxstretch = (max_words - curlen) / 2;
2396 covers[minI].
curlen = curlen;
2401 for (
i = 0;
i < numcovers;
i++)
2422 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2440 int shortword,
int min_words,
int max_words)
2448 bool bestcover =
false;
2459 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2470 for (
i = p;
i <= q && curlen < max_words;
i++)
2479 if (curlen < max_words)
2486 for (
i =
i - 1;
i < prs->
curwords && curlen < max_words;
i++)
2498 if (curlen >= min_words)
2501 if (curlen < min_words)
2507 for (
i = p - 1;
i >= 0;
i--)
2513 if (curlen >= max_words)
2517 if (curlen >= min_words)
2520 posb = (
i >= 0) ?
i : 0;
2531 for (; curlen > min_words;
i--)
2547 poscover = (posb <= p && pose >= q);
2556 if (poscover > bestcover ||
2557 (poscover == bestcover && poslen > bestlen) ||
2558 (poscover == bestcover && poslen == bestlen &&
2564 bestcover = poscover;
2576 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2611 int max_fragments = 0;
2612 bool highlightall =
false;
2619 foreach(l, prsoptions)
2647 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2648 errmsg(
"unrecognized headline parameter: \"%s\"",
2655 if (min_words >= max_words)
2657 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2658 errmsg(
"MinWords should be less than MaxWords")));
2661 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2662 errmsg(
"MinWords should be positive")));
2665 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2666 errmsg(
"ShortWord should be >= 0")));
2667 if (max_fragments < 0)
2669 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2670 errmsg(
"MaxFragments should be >= 0")));
2674 if (query->
size > 0)
2687 if (max_fragments == 0)
2688 mark_hl_words(prs, query, locations, highlightall, shortword,
2689 min_words, max_words);
2692 min_words, max_words, max_fragments);
void print(const void *obj)
static void PGresult * res
char * defGetString(DefElem *def)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_database_encoding_max_length(void)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
int pg_mblen(const char *mbstr)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc0(Size size)
void * repalloc(void *pointer, Size size)
int32 pg_strtoint32(const char *s)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static XLogRecPtr startpos
int pg_strcasecmp(const char *s1, const char *s2)
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
HeadlineWordEntry * words
const TParserStateActionItem * pushedAtAction
struct TParserPosition * prev
const TParserStateActionItem * action
HeadlineWordEntry * words
#define PG_GETARG_TSQUERY(n)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static const TParserStateActionItem actionTPS_InParseHyphen[]
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static const TParserStateActionItem actionTPS_InHostFirstAN[]
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static const TParserStateActionItem actionTPS_InPathSecond[]
static const TParserStateActionItem actionTPS_InPathFirst[]
static const TParserStateActionItem actionTPS_InHostDomainSecond[]
static const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static void SpecialFURL(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentEnd[]
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
void _make_compiler_happy(void)
static const TParserStateActionItem actionTPS_InURLPathStart[]
static const TParserStateActionItem actionTPS_InHostFirstDomain[]
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static const TParserStateActionItem actionTPS_InHostDomain[]
static const TParserStateActionItem actionTPS_InVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
Datum prsd_nexttoken(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTagName[]
static const TParserStateActionItem actionTPS_InFileNext[]
static const TParserStateActionItem actionTPS_InXMLEntity[]
static const TParserStateActionItem actionTPS_InFURL[]
#define p_iswhat(type, nonascii)
static const TParserStateActionItem actionTPS_InMantissaSign[]
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
@ TPS_InXMLEntityHexNumFirst
@ TPS_InHyphenAsciiWordFirst
@ TPS_InHyphenNumWordPart
@ TPS_InHyphenNumWordFirst
@ TPS_InHyphenUnsignedInt
@ TPS_InXMLEntityNumFirst
@ TPS_InCloseCommentFirst
@ TPS_InParseHyphenHyphen
@ TPS_InHyphenAsciiWordPart
@ TPS_InHyphenDigitLookahead
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
static const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static const TParserStateActionItem actionTPS_InHyphenNumWord[]
static const TParserStateActionItem actionTPS_InDecimal[]
static const TParserStateActionItem actionTPS_InSignedIntFirst[]
static const TParserStateActionItem actionTPS_InTagEscapeK[]
static const TParserStateActionItem actionTPS_InSpace[]
static const TParserStateActionItem actionTPS_InFile[]
static TParser * TParserCopyInit(const TParser *orig)
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static int p_iseqC(TParser *prs)
Datum prsd_headline(PG_FUNCTION_ARGS)
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
static const TParserStateActionItem actionTPS_InUDecimal[]
int(* TParserCharTest)(struct TParser *)
static const TParserStateActionItem actionTPS_InSignedInt[]
static int p_isurlchar(TParser *prs)
static const TParserStateActionItem actionTPS_InTagBeginEnd[]
static const TParserStateActionItem actionTPS_InTagFirst[]
struct TParserPosition TParserPosition
static const TParserStateActionItem actionTPS_InTagEscapeKK[]
static int p_isneC(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentLast[]
static const TParserStateActionItem actionTPS_InHyphenWordPart[]
static const TParserStateActionItem actionTPS_InMantissaFirst[]
static const TParserStateActionItem actionTPS_Base[]
static void SpecialHyphen(TParser *prs)
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
void(* TParserSpecial)(struct TParser *)
static const TParserStateActionItem actionTPS_InEmail[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static const TParserStateActionItem actionTPS_InURLPath[]
static const TParserStateActionItem actionTPS_InSVerVersion[]
static const TParserStateActionItem actionTPS_InAsciiWord[]
static const char *const tok_alias[]
static int p_isstophost(TParser *prs)
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static int p_isasclet(TParser *prs)
static const TParserStateAction Actions[]
static const TParserStateActionItem actionTPS_InXMLBegin[]
static const TParserStateActionItem actionTPS_InMantissa[]
static const TParserStateActionItem actionTPS_InVersionFirst[]
static int p_isascii(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentFirst[]
static const TParserStateActionItem actionTPS_InHyphenWord[]
static int p_isignore(TParser *prs)
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static const TParserStateActionItem actionTPS_InPort[]
static const TParserStateActionItem actionTPS_InDecimalFirst[]
static TParserPosition * newTParserPosition(TParserPosition *prev)
Datum prsd_lextype(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTag[]
Datum prsd_start(PG_FUNCTION_ARGS)
static TParser * TParserInit(char *str, int len)
#define TPARSERSTATEACTION(state)
static bool TParserGet(TParser *prs)
static int p_ishost(TParser *prs)
Datum prsd_end(PG_FUNCTION_ARGS)
static int p_isURLPath(TParser *prs)
static void SpecialVerVersion(TParser *prs)
static const TParserStateActionItem actionTPS_InProtocolFirst[]
static const TParserStateActionItem actionTPS_InUnsignedInt[]
static const TParserStateActionItem actionTPS_InUDecimalFirst[]
static const TParserStateActionItem actionTPS_InTagCloseFirst[]
static int p_isEOF(TParser *prs)
static const TParserStateActionItem actionTPS_InCloseCommentLast[]
static void TParserCopyClose(TParser *prs)
static const TParserStateActionItem actionTPS_InFileFirst[]
static const TParserStateActionItem actionTPS_InNumWord[]
static const TParserStateActionItem actionTPS_InFileTwiddle[]
static const TParserStateActionItem actionTPS_InHost[]
static const TParserStateActionItem actionTPS_InTagBackSleshed[]
static const TParserStateActionItem actionTPS_InProtocolSecond[]
static const TParserStateActionItem actionTPS_InWord[]
static int p_isspecial(TParser *prs)
static void TParserClose(TParser *prs)
static const TParserStateActionItem actionTPS_InXMLEntityNum[]
static const TParserStateActionItem actionTPS_InVerVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static const TParserStateActionItem actionTPS_InPortFirst[]
static const char *const lex_descr[]
#define INTERESTINGWORD(j)
static void SpecialTags(TParser *prs)
static const TParserStateActionItem actionTPS_InTagEnd[]
static const TParserStateActionItem actionTPS_InComment[]
static const TParserStateActionItem actionTPS_InProtocolEnd[]
static const TParserStateActionItem actionTPS_InURLPathFirst[]
static const TParserStateActionItem actionTPS_InPathFirstFirst[]