41 #define VERSIONNUMBER 8
42 #define NUMPARTHWORD 9
44 #define ASCIIPARTHWORD 11
55 #define UNSIGNEDINT 22
91 "Word, letters and digits",
95 "Scientific notation",
97 "Hyphenated word part, letters and digits",
98 "Hyphenated word part, all letters",
99 "Hyphenated word part, all ASCII",
103 "Hyphenated word, letters and digits",
104 "Hyphenated word, all ASCII",
105 "Hyphenated word, all letters",
218 #define A_NEXT 0x0000
219 #define A_BINGO 0x0001
221 #define A_PUSH 0x0004
222 #define A_RERUN 0x0008
223 #define A_CLEAR 0x0010
224 #define A_MERGE 0x0020
225 #define A_CLRALL 0x0040
281 res->pushedAtAction = NULL;
300 Oid collation = DEFAULT_COLLATION_OID;
387 fprintf(stderr,
"closing parser\n");
407 fprintf(stderr,
"closing parser copy\n");
423 #define p_iswhat(type, nonascii) \
426 p_is##type(TParser *prs) \
428 Assert(prs->state); \
433 unsigned int c = *(prs->pgwstr + prs->state->poschar); \
436 return is##type(c); \
438 return isw##type(*(prs->wstr + prs->state->poschar)); \
440 return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
444 p_isnot##type(TParser *prs) \
446 return !p_is##type(prs); \
469 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) ==
c)) ? 1 : 0;
482 return p_iseq(prs, prs->
c);
488 return !p_iseq(prs, prs->
c);
500 return (
p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
513 if (ch <= 0x20 || ch >= 0x7F)
624 return (prs->
ignore) ? 1 : 0;
695 static const pg_wchar strange_letter[] = {
929 const pg_wchar *StopLow = strange_letter,
930 *StopHigh = strange_letter +
lengthof(strange_letter),
939 while (StopLow < StopHigh)
941 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
942 if (*StopMiddle ==
c)
944 else if (*StopMiddle <
c)
945 StopLow = StopMiddle + 1;
947 StopHigh = StopMiddle;
1598 #ifdef WPARSER_TRACE
1599 const char *state_name;
1603 #ifdef WPARSER_TRACE
1604 #define TPARSERSTATEACTION(state) \
1605 { CppConcat(action,state), state, CppAsString(state) }
1607 #define TPARSERSTATEACTION(state) \
1608 { CppConcat(action,state), state }
1743 #ifdef WPARSER_TRACE
1756 fprintf(stderr,
" matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1767 (item->
type > 0) ?
" type " :
"",
1917 #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1918 #define HLIDREPLACE(x) ( (x)==TAG_T )
1919 #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1920 #define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1921 #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
1922 #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1931 #define INTERESTINGWORD(j) \
1932 (prs->words[j].item && !prs->words[j].repeated)
1935 #define BADENDPOINT(j) \
1936 ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
1937 !INTERESTINGWORD(j))
1972 for (
i = 0;
i < checkval->
len;
i++)
1983 data->allocated =
true;
2018 int *nextpos,
int *p,
int *q)
2038 foreach(lc, locations)
2043 for (
int i = 0;
i < pdata->
npos;
i++)
2046 int endp = pdata->
pos[
i];
2071 foreach(lc, locations)
2076 for (
int i = pdata->
npos - 1;
i >= 0;
i--)
2079 int startp = pdata->
pos[
i] - pdata->
width;
2099 posb =
Max(posb, pos);
2119 if (idxb < 0 && prs->words[
i].pos >= posb)
2128 if (idxb >= 0 && idxe >= idxb)
2140 ch.
len = idxe - idxb + 1;
2145 *nextpos = posb + 1;
2206 int *curlen,
int *poslen,
int max_words)
2258 int shortword,
int min_words,
2259 int max_words,
int max_fragments)
2276 int32 numcovers = 0,
2287 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2302 if (numcovers >= maxcovers)
2309 covers[numcovers].
curlen = curlen;
2310 covers[numcovers].
poslen = poslen;
2311 covers[numcovers].
chosen =
false;
2312 covers[numcovers].
excluded =
false;
2320 for (f = 0; f < max_fragments; f++)
2330 for (
i = 0;
i < numcovers;
i++)
2332 if (!covers[
i].chosen && !covers[
i].excluded &&
2333 (maxitems < covers[
i].poslen ||
2334 (maxitems == covers[
i].poslen &&
2335 minwords > covers[
i].curlen)))
2345 covers[minI].
chosen =
true;
2349 curlen = covers[minI].
curlen;
2351 if (curlen < max_words)
2354 maxstretch = (max_words - curlen) / 2;
2397 covers[minI].
curlen = curlen;
2402 for (
i = 0;
i < numcovers;
i++)
2422 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2440 int shortword,
int min_words,
int max_words)
2448 bool bestcover =
false;
2459 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2470 for (
i = p;
i <= q && curlen < max_words;
i++)
2479 if (curlen < max_words)
2486 for (
i =
i - 1;
i < prs->
curwords && curlen < max_words;
i++)
2498 if (curlen >= min_words)
2501 if (curlen < min_words)
2507 for (
i = p - 1;
i >= 0;
i--)
2513 if (curlen >= max_words)
2517 if (curlen >= min_words)
2520 posb = (
i >= 0) ?
i : 0;
2531 for (; curlen > min_words;
i--)
2547 poscover = (posb <= p && pose >= q);
2556 if (poscover > bestcover ||
2557 (poscover == bestcover && poslen > bestlen) ||
2558 (poscover == bestcover && poslen == bestlen &&
2564 bestcover = poscover;
2576 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2612 int max_fragments = 0;
2613 bool highlightall =
false;
2620 foreach(l, prsoptions)
2648 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2649 errmsg(
"unrecognized headline parameter: \"%s\"",
2656 if (min_words >= max_words)
2658 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2659 errmsg(
"MinWords should be less than MaxWords")));
2662 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2663 errmsg(
"MinWords should be positive")));
2666 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2667 errmsg(
"ShortWord should be >= 0")));
2668 if (max_fragments < 0)
2670 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2671 errmsg(
"MaxFragments should be >= 0")));
2681 if (max_fragments == 0)
2682 mark_hl_words(prs, query, locations, highlightall, shortword,
2683 min_words, max_words);
2686 min_words, max_words, max_fragments);
void print(const void *obj)
static void PGresult * res
char * defGetString(DefElem *def)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_database_encoding_max_length(void)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
int pg_mblen(const char *mbstr)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc0(Size size)
void * repalloc(void *pointer, Size size)
int32 pg_strtoint32(const char *s)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
bool lc_ctype_is_c(Oid collation)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static XLogRecPtr startpos
int pg_strcasecmp(const char *s1, const char *s2)
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
HeadlineWordEntry * words
const TParserStateActionItem * pushedAtAction
struct TParserPosition * prev
const TParserStateActionItem * action
HeadlineWordEntry * words
#define PG_GETARG_TSQUERY(n)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static const TParserStateActionItem actionTPS_InParseHyphen[]
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static const TParserStateActionItem actionTPS_InHostFirstAN[]
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static const TParserStateActionItem actionTPS_InPathSecond[]
static const TParserStateActionItem actionTPS_InPathFirst[]
static const TParserStateActionItem actionTPS_InHostDomainSecond[]
static const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static void SpecialFURL(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentEnd[]
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
void _make_compiler_happy(void)
static const TParserStateActionItem actionTPS_InURLPathStart[]
static const TParserStateActionItem actionTPS_InHostFirstDomain[]
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static const TParserStateActionItem actionTPS_InHostDomain[]
static const TParserStateActionItem actionTPS_InVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
Datum prsd_nexttoken(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTagName[]
static const TParserStateActionItem actionTPS_InFileNext[]
static const TParserStateActionItem actionTPS_InXMLEntity[]
static const TParserStateActionItem actionTPS_InFURL[]
#define p_iswhat(type, nonascii)
static const TParserStateActionItem actionTPS_InMantissaSign[]
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
@ TPS_InXMLEntityHexNumFirst
@ TPS_InHyphenAsciiWordFirst
@ TPS_InHyphenNumWordPart
@ TPS_InHyphenNumWordFirst
@ TPS_InHyphenUnsignedInt
@ TPS_InXMLEntityNumFirst
@ TPS_InCloseCommentFirst
@ TPS_InParseHyphenHyphen
@ TPS_InHyphenAsciiWordPart
@ TPS_InHyphenDigitLookahead
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
static const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static const TParserStateActionItem actionTPS_InHyphenNumWord[]
static const TParserStateActionItem actionTPS_InDecimal[]
static const TParserStateActionItem actionTPS_InSignedIntFirst[]
static const TParserStateActionItem actionTPS_InTagEscapeK[]
static const TParserStateActionItem actionTPS_InSpace[]
static const TParserStateActionItem actionTPS_InFile[]
static TParser * TParserCopyInit(const TParser *orig)
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static int p_iseqC(TParser *prs)
Datum prsd_headline(PG_FUNCTION_ARGS)
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
static const TParserStateActionItem actionTPS_InUDecimal[]
int(* TParserCharTest)(struct TParser *)
static const TParserStateActionItem actionTPS_InSignedInt[]
static int p_isurlchar(TParser *prs)
static const TParserStateActionItem actionTPS_InTagBeginEnd[]
static const TParserStateActionItem actionTPS_InTagFirst[]
struct TParserPosition TParserPosition
static const TParserStateActionItem actionTPS_InTagEscapeKK[]
static int p_isneC(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentLast[]
static const TParserStateActionItem actionTPS_InHyphenWordPart[]
static const TParserStateActionItem actionTPS_InMantissaFirst[]
static const TParserStateActionItem actionTPS_Base[]
static void SpecialHyphen(TParser *prs)
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
void(* TParserSpecial)(struct TParser *)
static const TParserStateActionItem actionTPS_InEmail[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static const TParserStateActionItem actionTPS_InURLPath[]
static const TParserStateActionItem actionTPS_InSVerVersion[]
static const TParserStateActionItem actionTPS_InAsciiWord[]
static const char *const tok_alias[]
static int p_isstophost(TParser *prs)
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static int p_isasclet(TParser *prs)
static const TParserStateAction Actions[]
static const TParserStateActionItem actionTPS_InXMLBegin[]
static const TParserStateActionItem actionTPS_InMantissa[]
static const TParserStateActionItem actionTPS_InVersionFirst[]
static int p_isascii(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentFirst[]
static const TParserStateActionItem actionTPS_InHyphenWord[]
static int p_isignore(TParser *prs)
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static const TParserStateActionItem actionTPS_InPort[]
static const TParserStateActionItem actionTPS_InDecimalFirst[]
static TParserPosition * newTParserPosition(TParserPosition *prev)
Datum prsd_lextype(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTag[]
Datum prsd_start(PG_FUNCTION_ARGS)
static TParser * TParserInit(char *str, int len)
#define TPARSERSTATEACTION(state)
static bool TParserGet(TParser *prs)
static int p_ishost(TParser *prs)
Datum prsd_end(PG_FUNCTION_ARGS)
static int p_isURLPath(TParser *prs)
static void SpecialVerVersion(TParser *prs)
static const TParserStateActionItem actionTPS_InProtocolFirst[]
static const TParserStateActionItem actionTPS_InUnsignedInt[]
static const TParserStateActionItem actionTPS_InUDecimalFirst[]
static const TParserStateActionItem actionTPS_InTagCloseFirst[]
static int p_isEOF(TParser *prs)
static const TParserStateActionItem actionTPS_InCloseCommentLast[]
static void TParserCopyClose(TParser *prs)
static const TParserStateActionItem actionTPS_InFileFirst[]
static const TParserStateActionItem actionTPS_InNumWord[]
static const TParserStateActionItem actionTPS_InFileTwiddle[]
static const TParserStateActionItem actionTPS_InHost[]
static const TParserStateActionItem actionTPS_InTagBackSleshed[]
static const TParserStateActionItem actionTPS_InProtocolSecond[]
static const TParserStateActionItem actionTPS_InWord[]
static int p_isspecial(TParser *prs)
static void TParserClose(TParser *prs)
static const TParserStateActionItem actionTPS_InXMLEntityNum[]
static const TParserStateActionItem actionTPS_InVerVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static const TParserStateActionItem actionTPS_InPortFirst[]
static const char *const lex_descr[]
#define INTERESTINGWORD(j)
static void SpecialTags(TParser *prs)
static const TParserStateActionItem actionTPS_InTagEnd[]
static const TParserStateActionItem actionTPS_InComment[]
static const TParserStateActionItem actionTPS_InProtocolEnd[]
static const TParserStateActionItem actionTPS_InURLPathFirst[]
static const TParserStateActionItem actionTPS_InPathFirstFirst[]