43 #define VERSIONNUMBER 8
44 #define NUMPARTHWORD 9
46 #define ASCIIPARTHWORD 11
57 #define UNSIGNEDINT 22
93 "Word, letters and digits",
97 "Scientific notation",
99 "Hyphenated word part, letters and digits",
100 "Hyphenated word part, all letters",
101 "Hyphenated word part, all ASCII",
105 "Hyphenated word, letters and digits",
106 "Hyphenated word, all ASCII",
107 "Hyphenated word, all letters",
220 #define A_NEXT 0x0000
221 #define A_BINGO 0x0001
223 #define A_PUSH 0x0004
224 #define A_RERUN 0x0008
225 #define A_CLEAR 0x0010
226 #define A_MERGE 0x0020
227 #define A_CLRALL 0x0040
283 res->pushedAtAction = NULL;
388 fprintf(stderr,
"closing parser\n");
408 fprintf(stderr,
"closing parser copy\n");
424 #define p_iswhat(type, nonascii) \
427 p_is##type(TParser *prs) \
429 Assert(prs->state); \
434 unsigned int c = *(prs->pgwstr + prs->state->poschar); \
437 return is##type(c); \
439 return isw##type(*(prs->wstr + prs->state->poschar)); \
441 return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
445 p_isnot##type(TParser *prs) \
447 return !p_is##type(prs); \
470 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) ==
c)) ? 1 : 0;
483 return p_iseq(prs, prs->
c);
489 return !p_iseq(prs, prs->
c);
501 return (
p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
514 if (ch <= 0x20 || ch >= 0x7F)
625 return (prs->
ignore) ? 1 : 0;
708 static const pg_wchar strange_letter[] = {
942 const pg_wchar *StopLow = strange_letter,
943 *StopHigh = strange_letter +
lengthof(strange_letter),
952 while (StopLow < StopHigh)
954 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
955 if (*StopMiddle ==
c)
957 else if (*StopMiddle <
c)
958 StopLow = StopMiddle + 1;
960 StopHigh = StopMiddle;
1611 #ifdef WPARSER_TRACE
1612 const char *state_name;
1616 #ifdef WPARSER_TRACE
1617 #define TPARSERSTATEACTION(state) \
1618 { CppConcat(action,state), state, CppAsString(state) }
1620 #define TPARSERSTATEACTION(state) \
1621 { CppConcat(action,state), state }
1758 #ifdef WPARSER_TRACE
1771 fprintf(stderr,
" matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1782 (item->
type > 0) ?
" type " :
"",
1932 #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1933 #define HLIDREPLACE(x) ( (x)==TAG_T )
1934 #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1935 #define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1936 #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
1937 #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1946 #define INTERESTINGWORD(j) \
1947 (prs->words[j].item && !prs->words[j].repeated)
1950 #define BADENDPOINT(j) \
1951 ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
1952 !INTERESTINGWORD(j))
1987 for (
i = 0;
i < checkval->
len;
i++)
1998 data->allocated =
true;
2033 int *nextpos,
int *p,
int *q)
2053 foreach(lc, locations)
2058 for (
int i = 0;
i < pdata->
npos;
i++)
2061 int endp = pdata->
pos[
i];
2086 foreach(lc, locations)
2091 for (
int i = pdata->
npos - 1;
i >= 0;
i--)
2094 int startp = pdata->
pos[
i] - pdata->
width;
2114 posb =
Max(posb, pos);
2134 if (idxb < 0 && prs->words[
i].pos >= posb)
2143 if (idxb >= 0 && idxe >= idxb)
2155 ch.
len = idxe - idxb + 1;
2160 *nextpos = posb + 1;
2221 int *curlen,
int *poslen,
int max_words)
2273 int shortword,
int min_words,
2274 int max_words,
int max_fragments)
2291 int32 numcovers = 0,
2302 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2317 if (numcovers >= maxcovers)
2324 covers[numcovers].
curlen = curlen;
2325 covers[numcovers].
poslen = poslen;
2326 covers[numcovers].
chosen =
false;
2327 covers[numcovers].
excluded =
false;
2335 for (f = 0; f < max_fragments; f++)
2345 for (
i = 0;
i < numcovers;
i++)
2347 if (!covers[
i].chosen && !covers[
i].excluded &&
2348 (maxitems < covers[
i].poslen ||
2349 (maxitems == covers[
i].poslen &&
2350 minwords > covers[
i].curlen)))
2360 covers[minI].
chosen =
true;
2364 curlen = covers[minI].
curlen;
2366 if (curlen < max_words)
2369 maxstretch = (max_words - curlen) / 2;
2412 covers[minI].
curlen = curlen;
2417 for (
i = 0;
i < numcovers;
i++)
2438 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2456 int shortword,
int min_words,
int max_words)
2464 bool bestcover =
false;
2475 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2486 for (
i = p;
i <= q && curlen < max_words;
i++)
2495 if (curlen < max_words)
2502 for (
i =
i - 1;
i < prs->
curwords && curlen < max_words;
i++)
2514 if (curlen >= min_words)
2517 if (curlen < min_words)
2523 for (
i = p - 1;
i >= 0;
i--)
2529 if (curlen >= max_words)
2533 if (curlen >= min_words)
2536 posb = (
i >= 0) ?
i : 0;
2547 for (; curlen > min_words;
i--)
2563 poscover = (posb <= p && pose >= q);
2572 if (poscover > bestcover ||
2573 (poscover == bestcover && poslen > bestlen) ||
2574 (poscover == bestcover && poslen == bestlen &&
2580 bestcover = poscover;
2592 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2627 int max_fragments = 0;
2628 bool highlightall =
false;
2635 foreach(l, prsoptions)
2663 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2664 errmsg(
"unrecognized headline parameter: \"%s\"",
2671 if (min_words >= max_words)
2673 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2674 errmsg(
"MinWords should be less than MaxWords")));
2677 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2678 errmsg(
"MinWords should be positive")));
2681 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2682 errmsg(
"ShortWord should be >= 0")));
2683 if (max_fragments < 0)
2685 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2686 errmsg(
"MaxFragments should be >= 0")));
2690 if (query->
size > 0)
2703 if (max_fragments == 0)
2704 mark_hl_words(prs, query, locations, highlightall, shortword,
2705 min_words, max_words);
2708 min_words, max_words, max_fragments);
void print(const void *obj)
#define Assert(condition)
static void PGresult * res
char * defGetString(DefElem *def)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
int GetDatabaseEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_database_encoding_max_length(void)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
int pg_mblen(const char *mbstr)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc0(Size size)
void * repalloc(void *pointer, Size size)
#define CHECK_FOR_INTERRUPTS()
int32 pg_strtoint32(const char *s)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static XLogRecPtr startpos
int pg_strcasecmp(const char *s1, const char *s2)
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
void check_stack_depth(void)
HeadlineWordEntry * words
const TParserStateActionItem * pushedAtAction
struct TParserPosition * prev
const TParserStateActionItem * action
HeadlineWordEntry * words
#define PG_GETARG_TSQUERY(n)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static const TParserStateActionItem actionTPS_InParseHyphen[]
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static const TParserStateActionItem actionTPS_InHostFirstAN[]
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static const TParserStateActionItem actionTPS_InPathSecond[]
static const TParserStateActionItem actionTPS_InPathFirst[]
static const TParserStateActionItem actionTPS_InHostDomainSecond[]
static const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static void SpecialFURL(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentEnd[]
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
void _make_compiler_happy(void)
static const TParserStateActionItem actionTPS_InURLPathStart[]
static const TParserStateActionItem actionTPS_InHostFirstDomain[]
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static const TParserStateActionItem actionTPS_InHostDomain[]
static const TParserStateActionItem actionTPS_InVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
Datum prsd_nexttoken(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTagName[]
static const TParserStateActionItem actionTPS_InFileNext[]
static const TParserStateActionItem actionTPS_InXMLEntity[]
static const TParserStateActionItem actionTPS_InFURL[]
#define p_iswhat(type, nonascii)
static const TParserStateActionItem actionTPS_InMantissaSign[]
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
@ TPS_InXMLEntityHexNumFirst
@ TPS_InHyphenAsciiWordFirst
@ TPS_InHyphenNumWordPart
@ TPS_InHyphenNumWordFirst
@ TPS_InHyphenUnsignedInt
@ TPS_InXMLEntityNumFirst
@ TPS_InCloseCommentFirst
@ TPS_InParseHyphenHyphen
@ TPS_InHyphenAsciiWordPart
@ TPS_InHyphenDigitLookahead
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
static const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static const TParserStateActionItem actionTPS_InHyphenNumWord[]
static const TParserStateActionItem actionTPS_InDecimal[]
static const TParserStateActionItem actionTPS_InSignedIntFirst[]
static const TParserStateActionItem actionTPS_InTagEscapeK[]
static const TParserStateActionItem actionTPS_InSpace[]
static const TParserStateActionItem actionTPS_InFile[]
static TParser * TParserCopyInit(const TParser *orig)
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static int p_iseqC(TParser *prs)
Datum prsd_headline(PG_FUNCTION_ARGS)
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
static const TParserStateActionItem actionTPS_InUDecimal[]
int(* TParserCharTest)(struct TParser *)
static const TParserStateActionItem actionTPS_InSignedInt[]
static int p_isurlchar(TParser *prs)
static const TParserStateActionItem actionTPS_InTagBeginEnd[]
static const TParserStateActionItem actionTPS_InTagFirst[]
struct TParserPosition TParserPosition
static const TParserStateActionItem actionTPS_InTagEscapeKK[]
static int p_isneC(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentLast[]
static const TParserStateActionItem actionTPS_InHyphenWordPart[]
static const TParserStateActionItem actionTPS_InMantissaFirst[]
static const TParserStateActionItem actionTPS_Base[]
static void SpecialHyphen(TParser *prs)
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
void(* TParserSpecial)(struct TParser *)
static const TParserStateActionItem actionTPS_InEmail[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static const TParserStateActionItem actionTPS_InURLPath[]
static const TParserStateActionItem actionTPS_InSVerVersion[]
static const TParserStateActionItem actionTPS_InAsciiWord[]
static const char *const tok_alias[]
static int p_isstophost(TParser *prs)
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static int p_isasclet(TParser *prs)
static const TParserStateAction Actions[]
static const TParserStateActionItem actionTPS_InXMLBegin[]
static const TParserStateActionItem actionTPS_InMantissa[]
static const TParserStateActionItem actionTPS_InVersionFirst[]
static int p_isascii(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentFirst[]
static const TParserStateActionItem actionTPS_InHyphenWord[]
static int p_isignore(TParser *prs)
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static const TParserStateActionItem actionTPS_InPort[]
static const TParserStateActionItem actionTPS_InDecimalFirst[]
static TParserPosition * newTParserPosition(TParserPosition *prev)
Datum prsd_lextype(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTag[]
Datum prsd_start(PG_FUNCTION_ARGS)
static TParser * TParserInit(char *str, int len)
#define TPARSERSTATEACTION(state)
static bool TParserGet(TParser *prs)
static int p_ishost(TParser *prs)
Datum prsd_end(PG_FUNCTION_ARGS)
static int p_isURLPath(TParser *prs)
static void SpecialVerVersion(TParser *prs)
static const TParserStateActionItem actionTPS_InProtocolFirst[]
static const TParserStateActionItem actionTPS_InUnsignedInt[]
static const TParserStateActionItem actionTPS_InUDecimalFirst[]
static const TParserStateActionItem actionTPS_InTagCloseFirst[]
static int p_isEOF(TParser *prs)
static const TParserStateActionItem actionTPS_InCloseCommentLast[]
static void TParserCopyClose(TParser *prs)
static const TParserStateActionItem actionTPS_InFileFirst[]
static const TParserStateActionItem actionTPS_InNumWord[]
static const TParserStateActionItem actionTPS_InFileTwiddle[]
static const TParserStateActionItem actionTPS_InHost[]
static const TParserStateActionItem actionTPS_InTagBackSleshed[]
static const TParserStateActionItem actionTPS_InProtocolSecond[]
static const TParserStateActionItem actionTPS_InWord[]
static int p_isspecial(TParser *prs)
static void TParserClose(TParser *prs)
static const TParserStateActionItem actionTPS_InXMLEntityNum[]
static const TParserStateActionItem actionTPS_InVerVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static const TParserStateActionItem actionTPS_InPortFirst[]
static const char *const lex_descr[]
#define INTERESTINGWORD(j)
static void SpecialTags(TParser *prs)
static const TParserStateActionItem actionTPS_InTagEnd[]
static const TParserStateActionItem actionTPS_InComment[]
static const TParserStateActionItem actionTPS_InProtocolEnd[]
static const TParserStateActionItem actionTPS_InURLPathFirst[]
static const TParserStateActionItem actionTPS_InPathFirstFirst[]