42 #define VERSIONNUMBER 8
43 #define NUMPARTHWORD 9
45 #define ASCIIPARTHWORD 11
56 #define UNSIGNEDINT 22
92 "Word, letters and digits",
96 "Scientific notation",
98 "Hyphenated word part, letters and digits",
99 "Hyphenated word part, all letters",
100 "Hyphenated word part, all ASCII",
104 "Hyphenated word, letters and digits",
105 "Hyphenated word, all ASCII",
106 "Hyphenated word, all letters",
219 #define A_NEXT 0x0000
220 #define A_BINGO 0x0001
222 #define A_PUSH 0x0004
223 #define A_RERUN 0x0008
224 #define A_CLEAR 0x0010
225 #define A_MERGE 0x0020
226 #define A_CLRALL 0x0040
282 res->pushedAtAction = NULL;
387 fprintf(stderr,
"closing parser\n");
407 fprintf(stderr,
"closing parser copy\n");
423 #define p_iswhat(type, nonascii) \
426 p_is##type(TParser *prs) \
428 Assert(prs->state); \
433 unsigned int c = *(prs->pgwstr + prs->state->poschar); \
436 return is##type(c); \
438 return isw##type(*(prs->wstr + prs->state->poschar)); \
440 return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
444 p_isnot##type(TParser *prs) \
446 return !p_is##type(prs); \
469 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) ==
c)) ? 1 : 0;
482 return p_iseq(prs, prs->
c);
488 return !p_iseq(prs, prs->
c);
500 return (
p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
513 if (ch <= 0x20 || ch >= 0x7F)
624 return (prs->
ignore) ? 1 : 0;
707 static const pg_wchar strange_letter[] = {
941 const pg_wchar *StopLow = strange_letter,
942 *StopHigh = strange_letter +
lengthof(strange_letter),
951 while (StopLow < StopHigh)
953 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
954 if (*StopMiddle ==
c)
956 else if (*StopMiddle <
c)
957 StopLow = StopMiddle + 1;
959 StopHigh = StopMiddle;
1610 #ifdef WPARSER_TRACE
1611 const char *state_name;
1615 #ifdef WPARSER_TRACE
1616 #define TPARSERSTATEACTION(state) \
1617 { CppConcat(action,state), state, CppAsString(state) }
1619 #define TPARSERSTATEACTION(state) \
1620 { CppConcat(action,state), state }
1757 #ifdef WPARSER_TRACE
1770 fprintf(stderr,
" matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1781 (item->
type > 0) ?
" type " :
"",
1931 #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1932 #define HLIDREPLACE(x) ( (x)==TAG_T )
1933 #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1934 #define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1935 #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
1936 #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1945 #define INTERESTINGWORD(j) \
1946 (prs->words[j].item && !prs->words[j].repeated)
1949 #define BADENDPOINT(j) \
1950 ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
1951 !INTERESTINGWORD(j))
1986 for (
i = 0;
i < checkval->
len;
i++)
1997 data->allocated =
true;
2032 int *nextpos,
int *p,
int *q)
2052 foreach(lc, locations)
2057 for (
int i = 0;
i < pdata->
npos;
i++)
2060 int endp = pdata->
pos[
i];
2085 foreach(lc, locations)
2090 for (
int i = pdata->
npos - 1;
i >= 0;
i--)
2093 int startp = pdata->
pos[
i] - pdata->
width;
2113 posb =
Max(posb, pos);
2133 if (idxb < 0 && prs->words[
i].pos >= posb)
2142 if (idxb >= 0 && idxe >= idxb)
2154 ch.
len = idxe - idxb + 1;
2159 *nextpos = posb + 1;
2220 int *curlen,
int *poslen,
int max_words)
2272 int shortword,
int min_words,
2273 int max_words,
int max_fragments)
2290 int32 numcovers = 0,
2301 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2316 if (numcovers >= maxcovers)
2323 covers[numcovers].
curlen = curlen;
2324 covers[numcovers].
poslen = poslen;
2325 covers[numcovers].
chosen =
false;
2326 covers[numcovers].
excluded =
false;
2334 for (f = 0; f < max_fragments; f++)
2344 for (
i = 0;
i < numcovers;
i++)
2346 if (!covers[
i].chosen && !covers[
i].excluded &&
2347 (maxitems < covers[
i].poslen ||
2348 (maxitems == covers[
i].poslen &&
2349 minwords > covers[
i].curlen)))
2359 covers[minI].
chosen =
true;
2363 curlen = covers[minI].
curlen;
2365 if (curlen < max_words)
2368 maxstretch = (max_words - curlen) / 2;
2411 covers[minI].
curlen = curlen;
2416 for (
i = 0;
i < numcovers;
i++)
2437 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2455 int shortword,
int min_words,
int max_words)
2463 bool bestcover =
false;
2474 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2485 for (
i = p;
i <= q && curlen < max_words;
i++)
2494 if (curlen < max_words)
2501 for (
i =
i - 1;
i < prs->
curwords && curlen < max_words;
i++)
2513 if (curlen >= min_words)
2516 if (curlen < min_words)
2522 for (
i = p - 1;
i >= 0;
i--)
2528 if (curlen >= max_words)
2532 if (curlen >= min_words)
2535 posb = (
i >= 0) ?
i : 0;
2546 for (; curlen > min_words;
i--)
2562 poscover = (posb <= p && pose >= q);
2571 if (poscover > bestcover ||
2572 (poscover == bestcover && poslen > bestlen) ||
2573 (poscover == bestcover && poslen == bestlen &&
2579 bestcover = poscover;
2591 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2626 int max_fragments = 0;
2627 bool highlightall =
false;
2634 foreach(l, prsoptions)
2662 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2663 errmsg(
"unrecognized headline parameter: \"%s\"",
2670 if (min_words >= max_words)
2672 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2673 errmsg(
"MinWords should be less than MaxWords")));
2676 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2677 errmsg(
"MinWords should be positive")));
2680 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2681 errmsg(
"ShortWord should be >= 0")));
2682 if (max_fragments < 0)
2684 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2685 errmsg(
"MaxFragments should be >= 0")));
2689 if (query->
size > 0)
2702 if (max_fragments == 0)
2703 mark_hl_words(prs, query, locations, highlightall, shortword,
2704 min_words, max_words);
2707 min_words, max_words, max_fragments);
void print(const void *obj)
static void PGresult * res
char * defGetString(DefElem *def)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_database_encoding_max_length(void)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
int pg_mblen(const char *mbstr)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc0(Size size)
void * repalloc(void *pointer, Size size)
#define CHECK_FOR_INTERRUPTS()
int32 pg_strtoint32(const char *s)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static XLogRecPtr startpos
int pg_strcasecmp(const char *s1, const char *s2)
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
void check_stack_depth(void)
HeadlineWordEntry * words
const TParserStateActionItem * pushedAtAction
struct TParserPosition * prev
const TParserStateActionItem * action
HeadlineWordEntry * words
#define PG_GETARG_TSQUERY(n)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static const TParserStateActionItem actionTPS_InParseHyphen[]
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static const TParserStateActionItem actionTPS_InHostFirstAN[]
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static const TParserStateActionItem actionTPS_InPathSecond[]
static const TParserStateActionItem actionTPS_InPathFirst[]
static const TParserStateActionItem actionTPS_InHostDomainSecond[]
static const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static void SpecialFURL(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentEnd[]
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
void _make_compiler_happy(void)
static const TParserStateActionItem actionTPS_InURLPathStart[]
static const TParserStateActionItem actionTPS_InHostFirstDomain[]
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static const TParserStateActionItem actionTPS_InHostDomain[]
static const TParserStateActionItem actionTPS_InVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
Datum prsd_nexttoken(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTagName[]
static const TParserStateActionItem actionTPS_InFileNext[]
static const TParserStateActionItem actionTPS_InXMLEntity[]
static const TParserStateActionItem actionTPS_InFURL[]
#define p_iswhat(type, nonascii)
static const TParserStateActionItem actionTPS_InMantissaSign[]
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
@ TPS_InXMLEntityHexNumFirst
@ TPS_InHyphenAsciiWordFirst
@ TPS_InHyphenNumWordPart
@ TPS_InHyphenNumWordFirst
@ TPS_InHyphenUnsignedInt
@ TPS_InXMLEntityNumFirst
@ TPS_InCloseCommentFirst
@ TPS_InParseHyphenHyphen
@ TPS_InHyphenAsciiWordPart
@ TPS_InHyphenDigitLookahead
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
static const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static const TParserStateActionItem actionTPS_InHyphenNumWord[]
static const TParserStateActionItem actionTPS_InDecimal[]
static const TParserStateActionItem actionTPS_InSignedIntFirst[]
static const TParserStateActionItem actionTPS_InTagEscapeK[]
static const TParserStateActionItem actionTPS_InSpace[]
static const TParserStateActionItem actionTPS_InFile[]
static TParser * TParserCopyInit(const TParser *orig)
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static int p_iseqC(TParser *prs)
Datum prsd_headline(PG_FUNCTION_ARGS)
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
static const TParserStateActionItem actionTPS_InUDecimal[]
int(* TParserCharTest)(struct TParser *)
static const TParserStateActionItem actionTPS_InSignedInt[]
static int p_isurlchar(TParser *prs)
static const TParserStateActionItem actionTPS_InTagBeginEnd[]
static const TParserStateActionItem actionTPS_InTagFirst[]
struct TParserPosition TParserPosition
static const TParserStateActionItem actionTPS_InTagEscapeKK[]
static int p_isneC(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentLast[]
static const TParserStateActionItem actionTPS_InHyphenWordPart[]
static const TParserStateActionItem actionTPS_InMantissaFirst[]
static const TParserStateActionItem actionTPS_Base[]
static void SpecialHyphen(TParser *prs)
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
void(* TParserSpecial)(struct TParser *)
static const TParserStateActionItem actionTPS_InEmail[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static const TParserStateActionItem actionTPS_InURLPath[]
static const TParserStateActionItem actionTPS_InSVerVersion[]
static const TParserStateActionItem actionTPS_InAsciiWord[]
static const char *const tok_alias[]
static int p_isstophost(TParser *prs)
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static int p_isasclet(TParser *prs)
static const TParserStateAction Actions[]
static const TParserStateActionItem actionTPS_InXMLBegin[]
static const TParserStateActionItem actionTPS_InMantissa[]
static const TParserStateActionItem actionTPS_InVersionFirst[]
static int p_isascii(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentFirst[]
static const TParserStateActionItem actionTPS_InHyphenWord[]
static int p_isignore(TParser *prs)
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static const TParserStateActionItem actionTPS_InPort[]
static const TParserStateActionItem actionTPS_InDecimalFirst[]
static TParserPosition * newTParserPosition(TParserPosition *prev)
Datum prsd_lextype(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTag[]
Datum prsd_start(PG_FUNCTION_ARGS)
static TParser * TParserInit(char *str, int len)
#define TPARSERSTATEACTION(state)
static bool TParserGet(TParser *prs)
static int p_ishost(TParser *prs)
Datum prsd_end(PG_FUNCTION_ARGS)
static int p_isURLPath(TParser *prs)
static void SpecialVerVersion(TParser *prs)
static const TParserStateActionItem actionTPS_InProtocolFirst[]
static const TParserStateActionItem actionTPS_InUnsignedInt[]
static const TParserStateActionItem actionTPS_InUDecimalFirst[]
static const TParserStateActionItem actionTPS_InTagCloseFirst[]
static int p_isEOF(TParser *prs)
static const TParserStateActionItem actionTPS_InCloseCommentLast[]
static void TParserCopyClose(TParser *prs)
static const TParserStateActionItem actionTPS_InFileFirst[]
static const TParserStateActionItem actionTPS_InNumWord[]
static const TParserStateActionItem actionTPS_InFileTwiddle[]
static const TParserStateActionItem actionTPS_InHost[]
static const TParserStateActionItem actionTPS_InTagBackSleshed[]
static const TParserStateActionItem actionTPS_InProtocolSecond[]
static const TParserStateActionItem actionTPS_InWord[]
static int p_isspecial(TParser *prs)
static void TParserClose(TParser *prs)
static const TParserStateActionItem actionTPS_InXMLEntityNum[]
static const TParserStateActionItem actionTPS_InVerVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static const TParserStateActionItem actionTPS_InPortFirst[]
static const char *const lex_descr[]
#define INTERESTINGWORD(j)
static void SpecialTags(TParser *prs)
static const TParserStateActionItem actionTPS_InTagEnd[]
static const TParserStateActionItem actionTPS_InComment[]
static const TParserStateActionItem actionTPS_InProtocolEnd[]
static const TParserStateActionItem actionTPS_InURLPathFirst[]
static const TParserStateActionItem actionTPS_InPathFirstFirst[]