PostgreSQL Source Code  git master
wparser_def.c File Reference
#include "postgres.h"
#include <limits.h>
#include "catalog/pg_collation.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
Include dependency graph for wparser_def.c:

Go to the source code of this file.

Data Structures

struct  TParserStateActionItem
 
struct  TParserPosition
 
struct  TParser
 
struct  TParserStateAction
 
struct  CoverPos
 
struct  hlCheck
 

Macros

#define ASCIIWORD   1
 
#define WORD_T   2
 
#define NUMWORD   3
 
#define EMAIL   4
 
#define URL_T   5
 
#define HOST   6
 
#define SCIENTIFIC   7
 
#define VERSIONNUMBER   8
 
#define NUMPARTHWORD   9
 
#define PARTHWORD   10
 
#define ASCIIPARTHWORD   11
 
#define SPACE   12
 
#define TAG_T   13
 
#define PROTOCOL   14
 
#define NUMHWORD   15
 
#define ASCIIHWORD   16
 
#define HWORD   17
 
#define URLPATH   18
 
#define FILEPATH   19
 
#define DECIMAL_T   20
 
#define SIGNEDINT   21
 
#define UNSIGNEDINT   22
 
#define XMLENTITY   23
 
#define LASTNUM   23
 
#define A_NEXT   0x0000
 
#define A_BINGO   0x0001
 
#define A_POP   0x0002
 
#define A_PUSH   0x0004
 
#define A_RERUN   0x0008
 
#define A_CLEAR   0x0010
 
#define A_MERGE   0x0020
 
#define A_CLRALL   0x0040
 
#define p_iswhat(type, nonascii)
 
#define TPARSERSTATEACTION(state)    { CppConcat(action,state), state }
 
#define LEAVETOKEN(x)   ( (x)==SPACE )
 
#define COMPLEXTOKEN(x)   ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 
#define ENDPUNCTOKEN(x)   ( (x)==SPACE )
 
#define TS_IDIGNORE(x)   ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
 
#define HLIDREPLACE(x)   ( (x)==TAG_T )
 
#define HLIDSKIP(x)   ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 
#define XMLHLIDSKIP(x)   ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 
#define NONWORDTOKEN(x)   ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
 
#define NOENDTOKEN(x)   ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
 
#define INTERESTINGWORD(j)    (prs->words[j].item && !prs->words[j].repeated)
 
#define BADENDPOINT(j)
 

Typedefs

typedef int(* TParserCharTest) (struct TParser *)
 
typedef void(* TParserSpecial) (struct TParser *)
 
typedef struct TParserPosition TParserPosition
 
typedef struct TParser TParser
 

Enumerations

enum  TParserState {
  TPS_Base = 0 , TPS_InNumWord , TPS_InAsciiWord , TPS_InWord ,
  TPS_InUnsignedInt , TPS_InSignedIntFirst , TPS_InSignedInt , TPS_InSpace ,
  TPS_InUDecimalFirst , TPS_InUDecimal , TPS_InDecimalFirst , TPS_InDecimal ,
  TPS_InVerVersion , TPS_InSVerVersion , TPS_InVersionFirst , TPS_InVersion ,
  TPS_InMantissaFirst , TPS_InMantissaSign , TPS_InMantissa , TPS_InXMLEntityFirst ,
  TPS_InXMLEntity , TPS_InXMLEntityNumFirst , TPS_InXMLEntityNum , TPS_InXMLEntityHexNumFirst ,
  TPS_InXMLEntityHexNum , TPS_InXMLEntityEnd , TPS_InTagFirst , TPS_InXMLBegin ,
  TPS_InTagCloseFirst , TPS_InTagName , TPS_InTagBeginEnd , TPS_InTag ,
  TPS_InTagEscapeK , TPS_InTagEscapeKK , TPS_InTagBackSleshed , TPS_InTagEnd ,
  TPS_InCommentFirst , TPS_InCommentLast , TPS_InComment , TPS_InCloseCommentFirst ,
  TPS_InCloseCommentLast , TPS_InCommentEnd , TPS_InHostFirstDomain , TPS_InHostDomainSecond ,
  TPS_InHostDomain , TPS_InPortFirst , TPS_InPort , TPS_InHostFirstAN ,
  TPS_InHost , TPS_InEmail , TPS_InFileFirst , TPS_InFileTwiddle ,
  TPS_InPathFirst , TPS_InPathFirstFirst , TPS_InPathSecond , TPS_InFile ,
  TPS_InFileNext , TPS_InURLPathFirst , TPS_InURLPathStart , TPS_InURLPath ,
  TPS_InFURL , TPS_InProtocolFirst , TPS_InProtocolSecond , TPS_InProtocolEnd ,
  TPS_InHyphenAsciiWordFirst , TPS_InHyphenAsciiWord , TPS_InHyphenWordFirst , TPS_InHyphenWord ,
  TPS_InHyphenNumWordFirst , TPS_InHyphenNumWord , TPS_InHyphenDigitLookahead , TPS_InParseHyphen ,
  TPS_InParseHyphenHyphen , TPS_InHyphenWordPart , TPS_InHyphenAsciiWordPart , TPS_InHyphenNumWordPart ,
  TPS_InHyphenUnsignedInt , TPS_Null
}
 

Functions

static bool TParserGet (TParser *prs)
 
static TParserPositionnewTParserPosition (TParserPosition *prev)
 
static TParserTParserInit (char *str, int len)
 
static TParserTParserCopyInit (const TParser *orig)
 
static void TParserClose (TParser *prs)
 
static void TParserCopyClose (TParser *prs)
 
 p_iswhat (alnum, 1)
 
static int p_isEOF (TParser *prs)
 
static int p_iseqC (TParser *prs)
 
static int p_isneC (TParser *prs)
 
static int p_isascii (TParser *prs)
 
static int p_isasclet (TParser *prs)
 
static int p_isurlchar (TParser *prs)
 
void _make_compiler_happy (void)
 
static void SpecialTags (TParser *prs)
 
static void SpecialFURL (TParser *prs)
 
static void SpecialHyphen (TParser *prs)
 
static void SpecialVerVersion (TParser *prs)
 
static int p_isstophost (TParser *prs)
 
static int p_isignore (TParser *prs)
 
static int p_ishost (TParser *prs)
 
static int p_isURLPath (TParser *prs)
 
static int p_isspecial (TParser *prs)
 
Datum prsd_lextype (PG_FUNCTION_ARGS)
 
Datum prsd_start (PG_FUNCTION_ARGS)
 
Datum prsd_nexttoken (PG_FUNCTION_ARGS)
 
Datum prsd_end (PG_FUNCTION_ARGS)
 
static TSTernaryValue checkcondition_HL (void *opaque, QueryOperand *val, ExecPhraseData *data)
 
static int hlFirstIndex (HeadlineParsedText *prs, int pos)
 
static bool hlCover (HeadlineParsedText *prs, TSQuery query, int max_cover, int *p, int *q)
 
static void mark_fragment (HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
 
static void get_next_fragment (HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
 
static void mark_hl_fragments (HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, int max_words, int max_fragments, int max_cover)
 
static void mark_hl_words (HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, int max_words, int max_cover)
 
Datum prsd_headline (PG_FUNCTION_ARGS)
 

Variables

static const char *const tok_alias []
 
static const char *const lex_descr []
 
static const TParserStateActionItem actionTPS_Base []
 
static const TParserStateActionItem actionTPS_InNumWord []
 
static const TParserStateActionItem actionTPS_InAsciiWord []
 
static const TParserStateActionItem actionTPS_InWord []
 
static const TParserStateActionItem actionTPS_InUnsignedInt []
 
static const TParserStateActionItem actionTPS_InSignedIntFirst []
 
static const TParserStateActionItem actionTPS_InSignedInt []
 
static const TParserStateActionItem actionTPS_InSpace []
 
static const TParserStateActionItem actionTPS_InUDecimalFirst []
 
static const TParserStateActionItem actionTPS_InUDecimal []
 
static const TParserStateActionItem actionTPS_InDecimalFirst []
 
static const TParserStateActionItem actionTPS_InDecimal []
 
static const TParserStateActionItem actionTPS_InVerVersion []
 
static const TParserStateActionItem actionTPS_InSVerVersion []
 
static const TParserStateActionItem actionTPS_InVersionFirst []
 
static const TParserStateActionItem actionTPS_InVersion []
 
static const TParserStateActionItem actionTPS_InMantissaFirst []
 
static const TParserStateActionItem actionTPS_InMantissaSign []
 
static const TParserStateActionItem actionTPS_InMantissa []
 
static const TParserStateActionItem actionTPS_InXMLEntityFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntity []
 
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntityNum []
 
static const TParserStateActionItem actionTPS_InXMLEntityHexNum []
 
static const TParserStateActionItem actionTPS_InXMLEntityEnd []
 
static const TParserStateActionItem actionTPS_InTagFirst []
 
static const TParserStateActionItem actionTPS_InXMLBegin []
 
static const TParserStateActionItem actionTPS_InTagCloseFirst []
 
static const TParserStateActionItem actionTPS_InTagName []
 
static const TParserStateActionItem actionTPS_InTagBeginEnd []
 
static const TParserStateActionItem actionTPS_InTag []
 
static const TParserStateActionItem actionTPS_InTagEscapeK []
 
static const TParserStateActionItem actionTPS_InTagEscapeKK []
 
static const TParserStateActionItem actionTPS_InTagBackSleshed []
 
static const TParserStateActionItem actionTPS_InTagEnd []
 
static const TParserStateActionItem actionTPS_InCommentFirst []
 
static const TParserStateActionItem actionTPS_InCommentLast []
 
static const TParserStateActionItem actionTPS_InComment []
 
static const TParserStateActionItem actionTPS_InCloseCommentFirst []
 
static const TParserStateActionItem actionTPS_InCloseCommentLast []
 
static const TParserStateActionItem actionTPS_InCommentEnd []
 
static const TParserStateActionItem actionTPS_InHostFirstDomain []
 
static const TParserStateActionItem actionTPS_InHostDomainSecond []
 
static const TParserStateActionItem actionTPS_InHostDomain []
 
static const TParserStateActionItem actionTPS_InPortFirst []
 
static const TParserStateActionItem actionTPS_InPort []
 
static const TParserStateActionItem actionTPS_InHostFirstAN []
 
static const TParserStateActionItem actionTPS_InHost []
 
static const TParserStateActionItem actionTPS_InEmail []
 
static const TParserStateActionItem actionTPS_InFileFirst []
 
static const TParserStateActionItem actionTPS_InFileTwiddle []
 
static const TParserStateActionItem actionTPS_InPathFirst []
 
static const TParserStateActionItem actionTPS_InPathFirstFirst []
 
static const TParserStateActionItem actionTPS_InPathSecond []
 
static const TParserStateActionItem actionTPS_InFile []
 
static const TParserStateActionItem actionTPS_InFileNext []
 
static const TParserStateActionItem actionTPS_InURLPathFirst []
 
static const TParserStateActionItem actionTPS_InURLPathStart []
 
static const TParserStateActionItem actionTPS_InURLPath []
 
static const TParserStateActionItem actionTPS_InFURL []
 
static const TParserStateActionItem actionTPS_InProtocolFirst []
 
static const TParserStateActionItem actionTPS_InProtocolSecond []
 
static const TParserStateActionItem actionTPS_InProtocolEnd []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWord []
 
static const TParserStateActionItem actionTPS_InHyphenWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenWord []
 
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenNumWord []
 
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead []
 
static const TParserStateActionItem actionTPS_InParseHyphen []
 
static const TParserStateActionItem actionTPS_InParseHyphenHyphen []
 
static const TParserStateActionItem actionTPS_InHyphenWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenNumWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt []
 
static const TParserStateAction Actions []
 

Macro Definition Documentation

◆ A_BINGO

#define A_BINGO   0x0001

Definition at line 219 of file wparser_def.c.

◆ A_CLEAR

#define A_CLEAR   0x0010

Definition at line 223 of file wparser_def.c.

◆ A_CLRALL

#define A_CLRALL   0x0040

Definition at line 225 of file wparser_def.c.

◆ A_MERGE

#define A_MERGE   0x0020

Definition at line 224 of file wparser_def.c.

◆ A_NEXT

#define A_NEXT   0x0000

Definition at line 218 of file wparser_def.c.

◆ A_POP

#define A_POP   0x0002

Definition at line 220 of file wparser_def.c.

◆ A_PUSH

#define A_PUSH   0x0004

Definition at line 221 of file wparser_def.c.

◆ A_RERUN

#define A_RERUN   0x0008

Definition at line 222 of file wparser_def.c.

◆ ASCIIHWORD

#define ASCIIHWORD   16

Definition at line 49 of file wparser_def.c.

◆ ASCIIPARTHWORD

#define ASCIIPARTHWORD   11

Definition at line 44 of file wparser_def.c.

◆ ASCIIWORD

#define ASCIIWORD   1

Definition at line 34 of file wparser_def.c.

◆ BADENDPOINT

#define BADENDPOINT (   j)
Value:
((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
int j
Definition: isn.c:74
#define NOENDTOKEN(x)
Definition: wparser_def.c:1926
#define INTERESTINGWORD(j)
Definition: wparser_def.c:1935

Definition at line 1939 of file wparser_def.c.

◆ COMPLEXTOKEN

#define COMPLEXTOKEN (   x)    ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

Definition at line 1918 of file wparser_def.c.

◆ DECIMAL_T

#define DECIMAL_T   20

Definition at line 53 of file wparser_def.c.

◆ EMAIL

#define EMAIL   4

Definition at line 37 of file wparser_def.c.

◆ ENDPUNCTOKEN

#define ENDPUNCTOKEN (   x)    ( (x)==SPACE )

Definition at line 1919 of file wparser_def.c.

◆ FILEPATH

#define FILEPATH   19

Definition at line 52 of file wparser_def.c.

◆ HLIDREPLACE

#define HLIDREPLACE (   x)    ( (x)==TAG_T )

Definition at line 1922 of file wparser_def.c.

◆ HLIDSKIP

#define HLIDSKIP (   x)    ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

Definition at line 1923 of file wparser_def.c.

◆ HOST

#define HOST   6

Definition at line 39 of file wparser_def.c.

◆ HWORD

#define HWORD   17

Definition at line 50 of file wparser_def.c.

◆ INTERESTINGWORD

#define INTERESTINGWORD (   j)     (prs->words[j].item && !prs->words[j].repeated)

Definition at line 1935 of file wparser_def.c.

◆ LASTNUM

#define LASTNUM   23

Definition at line 58 of file wparser_def.c.

◆ LEAVETOKEN

#define LEAVETOKEN (   x)    ( (x)==SPACE )

Definition at line 1917 of file wparser_def.c.

◆ NOENDTOKEN

#define NOENDTOKEN (   x)    ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )

Definition at line 1926 of file wparser_def.c.

◆ NONWORDTOKEN

#define NONWORDTOKEN (   x)    ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )

Definition at line 1925 of file wparser_def.c.

◆ NUMHWORD

#define NUMHWORD   15

Definition at line 48 of file wparser_def.c.

◆ NUMPARTHWORD

#define NUMPARTHWORD   9

Definition at line 42 of file wparser_def.c.

◆ NUMWORD

#define NUMWORD   3

Definition at line 36 of file wparser_def.c.

◆ p_iswhat

#define p_iswhat (   type,
  nonascii 
)
Value:
\
static int \
p_is##type(TParser *prs) \
{ \
Assert(prs->state); \
if (prs->usewide) \
{ \
if (prs->pgwstr) \
{ \
unsigned int c = *(prs->pgwstr + prs->state->poschar); \
if (c > 0x7f) \
return nonascii; \
return is##type(c); \
} \
return isw##type(*(prs->wstr + prs->state->poschar)); \
} \
return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
} \
\
static int \
p_isnot##type(TParser *prs) \
{ \
return !p_is##type(prs); \
}
char * c

Definition at line 423 of file wparser_def.c.

◆ PARTHWORD

#define PARTHWORD   10

Definition at line 43 of file wparser_def.c.

◆ PROTOCOL

#define PROTOCOL   14

Definition at line 47 of file wparser_def.c.

◆ SCIENTIFIC

#define SCIENTIFIC   7

Definition at line 40 of file wparser_def.c.

◆ SIGNEDINT

#define SIGNEDINT   21

Definition at line 54 of file wparser_def.c.

◆ SPACE

#define SPACE   12

Definition at line 45 of file wparser_def.c.

◆ TAG_T

#define TAG_T   13

Definition at line 46 of file wparser_def.c.

◆ TPARSERSTATEACTION

#define TPARSERSTATEACTION (   state)     { CppConcat(action,state), state }

Definition at line 1607 of file wparser_def.c.

◆ TS_IDIGNORE

#define TS_IDIGNORE (   x)    ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )

Definition at line 1921 of file wparser_def.c.

◆ UNSIGNEDINT

#define UNSIGNEDINT   22

Definition at line 55 of file wparser_def.c.

◆ URL_T

#define URL_T   5

Definition at line 38 of file wparser_def.c.

◆ URLPATH

#define URLPATH   18

Definition at line 51 of file wparser_def.c.

◆ VERSIONNUMBER

#define VERSIONNUMBER   8

Definition at line 41 of file wparser_def.c.

◆ WORD_T

#define WORD_T   2

Definition at line 35 of file wparser_def.c.

◆ XMLENTITY

#define XMLENTITY   23

Definition at line 56 of file wparser_def.c.

◆ XMLHLIDSKIP

#define XMLHLIDSKIP (   x)    ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

Definition at line 1924 of file wparser_def.c.

Typedef Documentation

◆ TParser

typedef struct TParser TParser

◆ TParserCharTest

typedef int(* TParserCharTest) (struct TParser *)

Definition at line 202 of file wparser_def.c.

◆ TParserPosition

◆ TParserSpecial

typedef void(* TParserSpecial) (struct TParser *)

Definition at line 204 of file wparser_def.c.

Enumeration Type Documentation

◆ TParserState

Enumerator
TPS_Base 
TPS_InNumWord 
TPS_InAsciiWord 
TPS_InWord 
TPS_InUnsignedInt 
TPS_InSignedIntFirst 
TPS_InSignedInt 
TPS_InSpace 
TPS_InUDecimalFirst 
TPS_InUDecimal 
TPS_InDecimalFirst 
TPS_InDecimal 
TPS_InVerVersion 
TPS_InSVerVersion 
TPS_InVersionFirst 
TPS_InVersion 
TPS_InMantissaFirst 
TPS_InMantissaSign 
TPS_InMantissa 
TPS_InXMLEntityFirst 
TPS_InXMLEntity 
TPS_InXMLEntityNumFirst 
TPS_InXMLEntityNum 
TPS_InXMLEntityHexNumFirst 
TPS_InXMLEntityHexNum 
TPS_InXMLEntityEnd 
TPS_InTagFirst 
TPS_InXMLBegin 
TPS_InTagCloseFirst 
TPS_InTagName 
TPS_InTagBeginEnd 
TPS_InTag 
TPS_InTagEscapeK 
TPS_InTagEscapeKK 
TPS_InTagBackSleshed 
TPS_InTagEnd 
TPS_InCommentFirst 
TPS_InCommentLast 
TPS_InComment 
TPS_InCloseCommentFirst 
TPS_InCloseCommentLast 
TPS_InCommentEnd 
TPS_InHostFirstDomain 
TPS_InHostDomainSecond 
TPS_InHostDomain 
TPS_InPortFirst 
TPS_InPort 
TPS_InHostFirstAN 
TPS_InHost 
TPS_InEmail 
TPS_InFileFirst 
TPS_InFileTwiddle 
TPS_InPathFirst 
TPS_InPathFirstFirst 
TPS_InPathSecond 
TPS_InFile 
TPS_InFileNext 
TPS_InURLPathFirst 
TPS_InURLPathStart 
TPS_InURLPath 
TPS_InFURL 
TPS_InProtocolFirst 
TPS_InProtocolSecond 
TPS_InProtocolEnd 
TPS_InHyphenAsciiWordFirst 
TPS_InHyphenAsciiWord 
TPS_InHyphenWordFirst 
TPS_InHyphenWord 
TPS_InHyphenNumWordFirst 
TPS_InHyphenNumWord 
TPS_InHyphenDigitLookahead 
TPS_InParseHyphen 
TPS_InParseHyphenHyphen 
TPS_InHyphenWordPart 
TPS_InHyphenAsciiWordPart 
TPS_InHyphenNumWordPart 
TPS_InHyphenUnsignedInt 
TPS_Null 

Definition at line 117 of file wparser_def.c.

118 {
119  TPS_Base = 0,
122  TPS_InWord,
126  TPS_InSpace,
150  TPS_InTag,
154  TPS_InTagEnd,
165  TPS_InPort,
167  TPS_InHost,
168  TPS_InEmail,
174  TPS_InFile,
179  TPS_InFURL,
196  TPS_Null /* last state (fake value) */
197 } TParserState;
TParserState
Definition: wparser_def.c:118
@ TPS_InXMLEntityHexNumFirst
Definition: wparser_def.c:142
@ TPS_InPort
Definition: wparser_def.c:165
@ TPS_InXMLEntityHexNum
Definition: wparser_def.c:143
@ TPS_InHostDomainSecond
Definition: wparser_def.c:162
@ TPS_InMantissaFirst
Definition: wparser_def.c:135
@ TPS_InTagName
Definition: wparser_def.c:148
@ TPS_InHyphenAsciiWordFirst
Definition: wparser_def.c:183
@ TPS_Null
Definition: wparser_def.c:196
@ TPS_InPathFirstFirst
Definition: wparser_def.c:172
@ TPS_InSignedIntFirst
Definition: wparser_def.c:124
@ TPS_InSignedInt
Definition: wparser_def.c:125
@ TPS_InUnsignedInt
Definition: wparser_def.c:123
@ TPS_InMantissa
Definition: wparser_def.c:137
@ TPS_InProtocolFirst
Definition: wparser_def.c:180
@ TPS_InFURL
Definition: wparser_def.c:179
@ TPS_InMantissaSign
Definition: wparser_def.c:136
@ TPS_InXMLBegin
Definition: wparser_def.c:146
@ TPS_InCommentEnd
Definition: wparser_def.c:160
@ TPS_InHyphenWordFirst
Definition: wparser_def.c:185
@ TPS_InHyphenNumWordPart
Definition: wparser_def.c:194
@ TPS_InPortFirst
Definition: wparser_def.c:164
@ TPS_InProtocolEnd
Definition: wparser_def.c:182
@ TPS_InXMLEntityFirst
Definition: wparser_def.c:138
@ TPS_InHyphenNumWordFirst
Definition: wparser_def.c:187
@ TPS_InCommentLast
Definition: wparser_def.c:156
@ TPS_InFileTwiddle
Definition: wparser_def.c:170
@ TPS_InURLPathStart
Definition: wparser_def.c:177
@ TPS_InURLPathFirst
Definition: wparser_def.c:176
@ TPS_InPathFirst
Definition: wparser_def.c:171
@ TPS_InPathSecond
Definition: wparser_def.c:173
@ TPS_InHyphenUnsignedInt
Definition: wparser_def.c:195
@ TPS_InFileFirst
Definition: wparser_def.c:169
@ TPS_InXMLEntityNumFirst
Definition: wparser_def.c:140
@ TPS_InHyphenWordPart
Definition: wparser_def.c:192
@ TPS_InNumWord
Definition: wparser_def.c:120
@ TPS_InAsciiWord
Definition: wparser_def.c:121
@ TPS_InVersion
Definition: wparser_def.c:134
@ TPS_InHost
Definition: wparser_def.c:167
@ TPS_InFile
Definition: wparser_def.c:174
@ TPS_InProtocolSecond
Definition: wparser_def.c:181
@ TPS_InCloseCommentFirst
Definition: wparser_def.c:158
@ TPS_InTagEscapeK
Definition: wparser_def.c:151
@ TPS_InParseHyphenHyphen
Definition: wparser_def.c:191
@ TPS_InTagBackSleshed
Definition: wparser_def.c:153
@ TPS_InTagFirst
Definition: wparser_def.c:145
@ TPS_InTagEnd
Definition: wparser_def.c:154
@ TPS_InComment
Definition: wparser_def.c:157
@ TPS_InHyphenWord
Definition: wparser_def.c:186
@ TPS_InHyphenAsciiWord
Definition: wparser_def.c:184
@ TPS_InWord
Definition: wparser_def.c:122
@ TPS_InXMLEntityEnd
Definition: wparser_def.c:144
@ TPS_InTagEscapeKK
Definition: wparser_def.c:152
@ TPS_InSpace
Definition: wparser_def.c:126
@ TPS_InFileNext
Definition: wparser_def.c:175
@ TPS_InURLPath
Definition: wparser_def.c:178
@ TPS_Base
Definition: wparser_def.c:119
@ TPS_InUDecimal
Definition: wparser_def.c:128
@ TPS_InParseHyphen
Definition: wparser_def.c:190
@ TPS_InHostFirstAN
Definition: wparser_def.c:166
@ TPS_InEmail
Definition: wparser_def.c:168
@ TPS_InDecimalFirst
Definition: wparser_def.c:129
@ TPS_InVersionFirst
Definition: wparser_def.c:133
@ TPS_InCloseCommentLast
Definition: wparser_def.c:159
@ TPS_InSVerVersion
Definition: wparser_def.c:132
@ TPS_InHyphenAsciiWordPart
Definition: wparser_def.c:193
@ TPS_InCommentFirst
Definition: wparser_def.c:155
@ TPS_InUDecimalFirst
Definition: wparser_def.c:127
@ TPS_InHostFirstDomain
Definition: wparser_def.c:161
@ TPS_InHostDomain
Definition: wparser_def.c:163
@ TPS_InHyphenDigitLookahead
Definition: wparser_def.c:189
@ TPS_InVerVersion
Definition: wparser_def.c:131
@ TPS_InXMLEntityNum
Definition: wparser_def.c:141
@ TPS_InTag
Definition: wparser_def.c:150
@ TPS_InDecimal
Definition: wparser_def.c:130
@ TPS_InTagCloseFirst
Definition: wparser_def.c:147
@ TPS_InXMLEntity
Definition: wparser_def.c:139
@ TPS_InHyphenNumWord
Definition: wparser_def.c:188
@ TPS_InTagBeginEnd
Definition: wparser_def.c:149

Function Documentation

◆ _make_compiler_happy()

void _make_compiler_happy ( void  )

Definition at line 536 of file wparser_def.c.

537 {
538  p_isalnum(NULL);
539  p_isnotalnum(NULL);
540  p_isalpha(NULL);
541  p_isnotalpha(NULL);
542  p_isdigit(NULL);
543  p_isnotdigit(NULL);
544  p_islower(NULL);
545  p_isnotlower(NULL);
546  p_isprint(NULL);
547  p_isnotprint(NULL);
548  p_ispunct(NULL);
549  p_isnotpunct(NULL);
550  p_isspace(NULL);
551  p_isnotspace(NULL);
552  p_isupper(NULL);
553  p_isnotupper(NULL);
554  p_isxdigit(NULL);
555  p_isnotxdigit(NULL);
556  p_isEOF(NULL);
557  p_iseqC(NULL);
558  p_isneC(NULL);
559 }
static int p_iseqC(TParser *prs)
Definition: wparser_def.c:480
static int p_isneC(TParser *prs)
Definition: wparser_def.c:486
static int p_isEOF(TParser *prs)
Definition: wparser_def.c:473

References p_isEOF(), p_iseqC(), and p_isneC().

◆ checkcondition_HL()

static TSTernaryValue checkcondition_HL ( void *  opaque,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 1966 of file wparser_def.c.

1967 {
1968  hlCheck *checkval = (hlCheck *) opaque;
1969  int i;
1970 
1971  /* scan words array for marching items */
1972  for (i = 0; i < checkval->len; i++)
1973  {
1974  if (checkval->words[i].item == val)
1975  {
1976  /* if data == NULL, don't need to report positions */
1977  if (!data)
1978  return TS_YES;
1979 
1980  if (!data->pos)
1981  {
1982  data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
1983  data->allocated = true;
1984  data->npos = 1;
1985  data->pos[0] = checkval->words[i].pos;
1986  }
1987  else if (data->pos[data->npos - 1] < checkval->words[i].pos)
1988  {
1989  data->pos[data->npos++] = checkval->words[i].pos;
1990  }
1991  }
1992  }
1993 
1994  if (data && data->npos > 0)
1995  return TS_YES;
1996 
1997  return TS_NO;
1998 }
long val
Definition: informix.c:664
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1068
const void * data
WordEntryPos pos
Definition: ts_public.h:45
QueryOperand * item
Definition: ts_public.h:47
HeadlineWordEntry * words
Definition: wparser_def.c:1957
uint16 WordEntryPos
Definition: ts_type.h:63
@ TS_NO
Definition: ts_utils.h:130
@ TS_YES
Definition: ts_utils.h:131

References data, i, HeadlineWordEntry::item, hlCheck::len, palloc(), HeadlineWordEntry::pos, TS_NO, TS_YES, val, and hlCheck::words.

Referenced by hlCover().

◆ get_next_fragment()

static void get_next_fragment ( HeadlineParsedText prs,
int *  startpos,
int *  endpos,
int *  curlen,
int *  poslen,
int  max_words 
)
static

Definition at line 2133 of file wparser_def.c.

2135 {
2136  int i;
2137 
2138  /*
2139  * Objective: select a fragment of words between startpos and endpos such
2140  * that it has at most max_words and both ends have query words. If the
2141  * startpos and endpos are the endpoints of the cover and the cover has
2142  * fewer words than max_words, then this function should just return the
2143  * cover
2144  */
2145  /* first move startpos to an item */
2146  for (i = *startpos; i <= *endpos; i++)
2147  {
2148  *startpos = i;
2149  if (INTERESTINGWORD(i))
2150  break;
2151  }
2152  /* cut endpos to have only max_words */
2153  *curlen = 0;
2154  *poslen = 0;
2155  for (i = *startpos; i <= *endpos && *curlen < max_words; i++)
2156  {
2157  if (!NONWORDTOKEN(prs->words[i].type))
2158  *curlen += 1;
2159  if (INTERESTINGWORD(i))
2160  *poslen += 1;
2161  }
2162  /* if the cover was cut then move back endpos to a query item */
2163  if (*endpos > i)
2164  {
2165  *endpos = i;
2166  for (i = *endpos; i >= *startpos; i--)
2167  {
2168  *endpos = i;
2169  if (INTERESTINGWORD(i))
2170  break;
2171  if (!NONWORDTOKEN(prs->words[i].type))
2172  *curlen -= 1;
2173  }
2174  }
2175 }
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
static XLogRecPtr startpos
HeadlineWordEntry * words
Definition: ts_public.h:52
#define NONWORDTOKEN(x)
Definition: wparser_def.c:1925

References endpos, i, INTERESTINGWORD, NONWORDTOKEN, startpos, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by mark_hl_fragments().

◆ hlCover()

static bool hlCover ( HeadlineParsedText prs,
TSQuery  query,
int  max_cover,
int *  p,
int *  q 
)
static

Definition at line 2037 of file wparser_def.c.

2039 {
2040  int pmin,
2041  pmax,
2042  nextpmin,
2043  nextpmax;
2044  hlCheck ch;
2045 
2046  /*
2047  * We look for the earliest, shortest substring of prs->words that
2048  * satisfies the query. Both the pmin and pmax indices must be words
2049  * appearing in the query; there's no point in trying endpoints in between
2050  * such points.
2051  */
2052  pmin = hlFirstIndex(prs, *p);
2053  while (pmin >= 0)
2054  {
2055  /* This useless assignment just keeps stupider compilers quiet */
2056  nextpmin = -1;
2057  /* Consider substrings starting at pmin */
2058  ch.words = &(prs->words[pmin]);
2059  /* Consider the length-one substring first, then longer substrings */
2060  pmax = pmin;
2061  do
2062  {
2063  /* Try to match query against pmin .. pmax substring */
2064  ch.len = pmax - pmin + 1;
2065  if (TS_execute(GETQUERY(query), &ch,
2067  {
2068  *p = pmin;
2069  *q = pmax;
2070  return true;
2071  }
2072  /* Nope, so advance pmax to next feasible endpoint */
2073  nextpmax = hlFirstIndex(prs, pmax + 1);
2074 
2075  /*
2076  * If this is our first advance past pmin, then the result is also
2077  * the next feasible value of pmin; remember it to save a
2078  * redundant search.
2079  */
2080  if (pmax == pmin)
2081  nextpmin = nextpmax;
2082  pmax = nextpmax;
2083  }
2084  while (pmax >= 0 && pmax - pmin < max_cover);
2085  /* No luck here, so try next feasible startpoint */
2086  pmin = nextpmin;
2087  }
2088  return false;
2089 }
#define GETQUERY(x)
Definition: _int.h:157
#define TS_EXEC_EMPTY
Definition: ts_utils.h:184
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1847
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
Definition: wparser_def.c:1966
static int hlFirstIndex(HeadlineParsedText *prs, int pos)
Definition: wparser_def.c:2006

References checkcondition_HL(), GETQUERY, hlFirstIndex(), hlCheck::len, TS_EXEC_EMPTY, TS_execute(), hlCheck::words, and HeadlineParsedText::words.

Referenced by mark_hl_fragments(), and mark_hl_words().

◆ hlFirstIndex()

static int hlFirstIndex ( HeadlineParsedText prs,
int  pos 
)
static

Definition at line 2006 of file wparser_def.c.

2007 {
2008  int i;
2009 
2010  for (i = pos; i < prs->curwords; i++)
2011  {
2012  if (prs->words[i].item != NULL)
2013  return i;
2014  }
2015  return -1;
2016 }

References HeadlineParsedText::curwords, i, HeadlineWordEntry::item, and HeadlineParsedText::words.

Referenced by hlCover().

◆ mark_fragment()

static void mark_fragment ( HeadlineParsedText prs,
bool  highlightall,
int  startpos,
int  endpos 
)
static

Definition at line 2097 of file wparser_def.c.

2099 {
2100  int i;
2101 
2102  for (i = startpos; i <= endpos; i++)
2103  {
2104  if (prs->words[i].item)
2105  prs->words[i].selected = 1;
2106  if (!highlightall)
2107  {
2108  if (HLIDREPLACE(prs->words[i].type))
2109  prs->words[i].replace = 1;
2110  else if (HLIDSKIP(prs->words[i].type))
2111  prs->words[i].skip = 1;
2112  }
2113  else
2114  {
2115  if (XMLHLIDSKIP(prs->words[i].type))
2116  prs->words[i].skip = 1;
2117  }
2118 
2119  prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
2120  }
2121 }
#define XMLHLIDSKIP(x)
Definition: wparser_def.c:1924
#define HLIDSKIP(x)
Definition: wparser_def.c:1923
#define HLIDREPLACE(x)
Definition: wparser_def.c:1922

References endpos, HLIDREPLACE, HLIDSKIP, i, HeadlineWordEntry::in, HeadlineWordEntry::item, HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, HeadlineWordEntry::skip, startpos, HeadlineWordEntry::type, HeadlineParsedText::words, and XMLHLIDSKIP.

Referenced by mark_hl_fragments(), and mark_hl_words().

◆ mark_hl_fragments()

static void mark_hl_fragments ( HeadlineParsedText prs,
TSQuery  query,
bool  highlightall,
int  shortword,
int  min_words,
int  max_words,
int  max_fragments,
int  max_cover 
)
static

Definition at line 2184 of file wparser_def.c.

2187 {
2188  int32 poslen,
2189  curlen,
2190  i,
2191  f,
2192  num_f = 0;
2193  int32 stretch,
2194  maxstretch,
2195  posmarker;
2196 
2197  int32 startpos = 0,
2198  endpos = 0,
2199  p = 0,
2200  q = 0;
2201 
2202  int32 numcovers = 0,
2203  maxcovers = 32;
2204 
2205  int32 minI,
2206  minwords,
2207  maxitems;
2208  CoverPos *covers;
2209 
2210  covers = palloc(maxcovers * sizeof(CoverPos));
2211 
2212  /* get all covers */
2213  while (hlCover(prs, query, max_cover, &p, &q))
2214  {
2215  startpos = p;
2216  endpos = q;
2217 
2218  /*
2219  * Break the cover into smaller fragments such that each fragment has
2220  * at most max_words. Also ensure that each end of each fragment is a
2221  * query word. This will allow us to stretch the fragment in either
2222  * direction
2223  */
2224 
2225  while (startpos <= endpos)
2226  {
2227  get_next_fragment(prs, &startpos, &endpos, &curlen, &poslen, max_words);
2228  if (numcovers >= maxcovers)
2229  {
2230  maxcovers *= 2;
2231  covers = repalloc(covers, sizeof(CoverPos) * maxcovers);
2232  }
2233  covers[numcovers].startpos = startpos;
2234  covers[numcovers].endpos = endpos;
2235  covers[numcovers].curlen = curlen;
2236  covers[numcovers].poslen = poslen;
2237  covers[numcovers].chosen = false;
2238  covers[numcovers].excluded = false;
2239  numcovers++;
2240  startpos = endpos + 1;
2241  endpos = q;
2242  }
2243 
2244  /* move p to generate the next cover */
2245  p++;
2246  }
2247 
2248  /* choose best covers */
2249  for (f = 0; f < max_fragments; f++)
2250  {
2251  maxitems = 0;
2252  minwords = PG_INT32_MAX;
2253  minI = -1;
2254 
2255  /*
2256  * Choose the cover that contains max items. In case of tie choose the
2257  * one with smaller number of words.
2258  */
2259  for (i = 0; i < numcovers; i++)
2260  {
2261  if (!covers[i].chosen && !covers[i].excluded &&
2262  (maxitems < covers[i].poslen ||
2263  (maxitems == covers[i].poslen &&
2264  minwords > covers[i].curlen)))
2265  {
2266  maxitems = covers[i].poslen;
2267  minwords = covers[i].curlen;
2268  minI = i;
2269  }
2270  }
2271  /* if a cover was found mark it */
2272  if (minI >= 0)
2273  {
2274  covers[minI].chosen = true;
2275  /* adjust the size of cover */
2276  startpos = covers[minI].startpos;
2277  endpos = covers[minI].endpos;
2278  curlen = covers[minI].curlen;
2279  /* stretch the cover if cover size is lower than max_words */
2280  if (curlen < max_words)
2281  {
2282  /* divide the stretch on both sides of cover */
2283  maxstretch = (max_words - curlen) / 2;
2284 
2285  /*
2286  * first stretch the startpos stop stretching if 1. we hit the
2287  * beginning of document 2. exceed maxstretch 3. we hit an
2288  * already marked fragment
2289  */
2290  stretch = 0;
2291  posmarker = startpos;
2292  for (i = startpos - 1; i >= 0 && stretch < maxstretch && !prs->words[i].in; i--)
2293  {
2294  if (!NONWORDTOKEN(prs->words[i].type))
2295  {
2296  curlen++;
2297  stretch++;
2298  }
2299  posmarker = i;
2300  }
2301  /* cut back startpos till we find a good endpoint */
2302  for (i = posmarker; i < startpos && BADENDPOINT(i); i++)
2303  {
2304  if (!NONWORDTOKEN(prs->words[i].type))
2305  curlen--;
2306  }
2307  startpos = i;
2308  /* now stretch the endpos as much as possible */
2309  posmarker = endpos;
2310  for (i = endpos + 1; i < prs->curwords && curlen < max_words && !prs->words[i].in; i++)
2311  {
2312  if (!NONWORDTOKEN(prs->words[i].type))
2313  curlen++;
2314  posmarker = i;
2315  }
2316  /* cut back endpos till we find a good endpoint */
2317  for (i = posmarker; i > endpos && BADENDPOINT(i); i--)
2318  {
2319  if (!NONWORDTOKEN(prs->words[i].type))
2320  curlen--;
2321  }
2322  endpos = i;
2323  }
2324  covers[minI].startpos = startpos;
2325  covers[minI].endpos = endpos;
2326  covers[minI].curlen = curlen;
2327  /* Mark the chosen fragments (covers) */
2328  mark_fragment(prs, highlightall, startpos, endpos);
2329  num_f++;
2330  /* Exclude covers overlapping this one from future consideration */
2331  for (i = 0; i < numcovers; i++)
2332  {
2333  if (i != minI &&
2334  ((covers[i].startpos >= startpos &&
2335  covers[i].startpos <= endpos) ||
2336  (covers[i].endpos >= startpos &&
2337  covers[i].endpos <= endpos) ||
2338  (covers[i].startpos < startpos &&
2339  covers[i].endpos > endpos)))
2340  covers[i].excluded = true;
2341  }
2342  }
2343  else
2344  break; /* no selectable covers remain */
2345  }
2346 
2347  /* show the first min_words words if we have not marked anything */
2348  if (num_f <= 0)
2349  {
2350  startpos = endpos = curlen = 0;
2351  for (i = 0; i < prs->curwords && curlen < min_words; i++)
2352  {
2353  if (!NONWORDTOKEN(prs->words[i].type))
2354  curlen++;
2355  endpos = i;
2356  }
2357  mark_fragment(prs, highlightall, startpos, endpos);
2358  }
2359 
2360  pfree(covers);
2361 }
#define PG_INT32_MAX
Definition: c.h:535
signed int int32
Definition: c.h:440
void pfree(void *pointer)
Definition: mcxt.c:1175
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
bool chosen
Definition: wparser_def.c:1950
int32 endpos
Definition: wparser_def.c:1947
int32 curlen
Definition: wparser_def.c:1949
int32 startpos
Definition: wparser_def.c:1946
bool excluded
Definition: wparser_def.c:1951
int32 poslen
Definition: wparser_def.c:1948
#define BADENDPOINT(j)
Definition: wparser_def.c:1939
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
Definition: wparser_def.c:2097
static bool hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover, int *p, int *q)
Definition: wparser_def.c:2037
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
Definition: wparser_def.c:2133

References BADENDPOINT, CoverPos::chosen, CoverPos::curlen, HeadlineParsedText::curwords, CoverPos::endpos, endpos, CoverPos::excluded, get_next_fragment(), hlCover(), i, HeadlineWordEntry::in, mark_fragment(), NONWORDTOKEN, palloc(), pfree(), PG_INT32_MAX, CoverPos::poslen, repalloc(), CoverPos::startpos, startpos, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by prsd_headline().

◆ mark_hl_words()

static void mark_hl_words ( HeadlineParsedText prs,
TSQuery  query,
bool  highlightall,
int  shortword,
int  min_words,
int  max_words,
int  max_cover 
)
static

Definition at line 2367 of file wparser_def.c.

2369 {
2370  int p = 0,
2371  q = 0;
2372  int bestb = -1,
2373  beste = -1;
2374  int bestlen = -1;
2375  bool bestcover = false;
2376  int pose,
2377  posb,
2378  poslen,
2379  curlen;
2380  bool poscover;
2381  int i;
2382 
2383  if (!highlightall)
2384  {
2385  /* examine all covers, select a headline using the best one */
2386  while (hlCover(prs, query, max_cover, &p, &q))
2387  {
2388  /*
2389  * Count words (curlen) and interesting words (poslen) within
2390  * cover, but stop once we reach max_words. This step doesn't
2391  * consider whether that's a good stopping point. posb and pose
2392  * are set to the start and end indexes of the possible headline.
2393  */
2394  curlen = 0;
2395  poslen = 0;
2396  posb = pose = p;
2397  for (i = p; i <= q && curlen < max_words; i++)
2398  {
2399  if (!NONWORDTOKEN(prs->words[i].type))
2400  curlen++;
2401  if (INTERESTINGWORD(i))
2402  poslen++;
2403  pose = i;
2404  }
2405 
2406  if (curlen < max_words)
2407  {
2408  /*
2409  * We have room to lengthen the headline, so search forward
2410  * until it's full or we find a good stopping point. We'll
2411  * reconsider the word at "q", then move forward.
2412  */
2413  for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
2414  {
2415  if (i > q)
2416  {
2417  if (!NONWORDTOKEN(prs->words[i].type))
2418  curlen++;
2419  if (INTERESTINGWORD(i))
2420  poslen++;
2421  }
2422  pose = i;
2423  if (BADENDPOINT(i))
2424  continue;
2425  if (curlen >= min_words)
2426  break;
2427  }
2428  if (curlen < min_words)
2429  {
2430  /*
2431  * Reached end of text and our headline is still shorter
2432  * than min_words, so try to extend it to the left.
2433  */
2434  for (i = p - 1; i >= 0; i--)
2435  {
2436  if (!NONWORDTOKEN(prs->words[i].type))
2437  curlen++;
2438  if (INTERESTINGWORD(i))
2439  poslen++;
2440  if (curlen >= max_words)
2441  break;
2442  if (BADENDPOINT(i))
2443  continue;
2444  if (curlen >= min_words)
2445  break;
2446  }
2447  posb = (i >= 0) ? i : 0;
2448  }
2449  }
2450  else
2451  {
2452  /*
2453  * Can't make headline longer, so consider making it shorter
2454  * if needed to avoid a bad endpoint.
2455  */
2456  if (i > q)
2457  i = q;
2458  for (; curlen > min_words; i--)
2459  {
2460  if (!BADENDPOINT(i))
2461  break;
2462  if (!NONWORDTOKEN(prs->words[i].type))
2463  curlen--;
2464  if (INTERESTINGWORD(i))
2465  poslen--;
2466  pose = i - 1;
2467  }
2468  }
2469 
2470  /*
2471  * Check whether the proposed headline includes the original
2472  * cover; it might not if we trimmed it due to max_words.
2473  */
2474  poscover = (posb <= p && pose >= q);
2475 
2476  /*
2477  * Adopt this headline if it's better than the last one, giving
2478  * highest priority to headlines including the cover, then to
2479  * headlines with more interesting words, then to headlines with
2480  * good stopping points. (Since bestlen is initially -1, we will
2481  * certainly adopt the first headline.)
2482  */
2483  if (poscover > bestcover ||
2484  (poscover == bestcover && poslen > bestlen) ||
2485  (poscover == bestcover && poslen == bestlen &&
2486  !BADENDPOINT(pose) && BADENDPOINT(beste)))
2487  {
2488  bestb = posb;
2489  beste = pose;
2490  bestlen = poslen;
2491  bestcover = poscover;
2492  }
2493 
2494  /* move p to generate the next cover */
2495  p++;
2496  }
2497 
2498  /*
2499  * If we found nothing acceptable, select min_words words starting at
2500  * the beginning.
2501  */
2502  if (bestlen < 0)
2503  {
2504  curlen = 0;
2505  pose = 0;
2506  for (i = 0; i < prs->curwords && curlen < min_words; i++)
2507  {
2508  if (!NONWORDTOKEN(prs->words[i].type))
2509  curlen++;
2510  pose = i;
2511  }
2512  bestb = 0;
2513  beste = pose;
2514  }
2515  }
2516  else
2517  {
2518  /* highlightall mode: headline is whole document */
2519  bestb = 0;
2520  beste = prs->curwords - 1;
2521  }
2522 
2523  mark_fragment(prs, highlightall, bestb, beste);
2524 }

References BADENDPOINT, HeadlineParsedText::curwords, hlCover(), i, INTERESTINGWORD, mark_fragment(), NONWORDTOKEN, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by prsd_headline().

◆ newTParserPosition()

static TParserPosition* newTParserPosition ( TParserPosition prev)
static

Definition at line 270 of file wparser_def.c.

271 {
273 
274  if (prev)
275  memcpy(res, prev, sizeof(TParserPosition));
276  else
277  memset(res, 0, sizeof(TParserPosition));
278 
279  res->prev = prev;
280 
281  res->pushedAtAction = NULL;
282 
283  return res;
284 }

References palloc(), and res.

Referenced by p_isURLPath(), TParserCopyInit(), TParserGet(), and TParserInit().

◆ p_isascii()

static int p_isascii ( TParser prs)
static

Definition at line 492 of file wparser_def.c.

493 {
494  return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0;
495 }
char * str
Definition: wparser_def.c:242
TParserPosition * state
Definition: wparser_def.c:250

References TParserPosition::charlen, TParserPosition::posbyte, TParser::state, and TParser::str.

Referenced by p_isasclet().

◆ p_isasclet()

static int p_isasclet ( TParser prs)
static

Definition at line 498 of file wparser_def.c.

499 {
500  return (p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
501 }
static int p_isascii(TParser *prs)
Definition: wparser_def.c:492

References p_isascii().

◆ p_isEOF()

static int p_isEOF ( TParser prs)
static

Definition at line 473 of file wparser_def.c.

474 {
475  Assert(prs->state);
476  return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0;
477 }
Assert(fmt[strlen(fmt) - 1] !='\n')
int lenstr
Definition: wparser_def.c:243

References Assert(), TParserPosition::charlen, TParser::lenstr, TParserPosition::posbyte, and TParser::state.

Referenced by _make_compiler_happy().

◆ p_iseqC()

static int p_iseqC ( TParser prs)
static

Definition at line 480 of file wparser_def.c.

481 {
482  return p_iseq(prs, prs->c);
483 }
char c
Definition: wparser_def.c:255

References TParser::c.

Referenced by _make_compiler_happy().

◆ p_ishost()

static int p_ishost ( TParser prs)
static

Definition at line 628 of file wparser_def.c.

629 {
630  TParser *tmpprs = TParserCopyInit(prs);
631  int res = 0;
632 
633  tmpprs->wanthost = true;
634 
635  if (TParserGet(tmpprs) && tmpprs->type == HOST)
636  {
637  prs->state->posbyte += tmpprs->lenbytetoken;
638  prs->state->poschar += tmpprs->lenchartoken;
639  prs->state->lenbytetoken += tmpprs->lenbytetoken;
640  prs->state->lenchartoken += tmpprs->lenchartoken;
641  prs->state->charlen = tmpprs->state->charlen;
642  res = 1;
643  }
644  TParserCopyClose(tmpprs);
645 
646  return res;
647 }
int type
Definition: wparser_def.c:261
bool wanthost
Definition: wparser_def.c:252
int lenbytetoken
Definition: wparser_def.c:259
int lenchartoken
Definition: wparser_def.c:260
static TParser * TParserCopyInit(const TParser *orig)
Definition: wparser_def.c:345
#define HOST
Definition: wparser_def.c:39
static bool TParserGet(TParser *prs)
Definition: wparser_def.c:1697
static void TParserCopyClose(TParser *prs)
Definition: wparser_def.c:396

References TParserPosition::charlen, HOST, TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, TParserPosition::posbyte, TParserPosition::poschar, res, TParser::state, TParserCopyClose(), TParserCopyInit(), TParserGet(), TParser::type, and TParser::wanthost.

◆ p_isignore()

static int p_isignore ( TParser prs)
static

Definition at line 622 of file wparser_def.c.

623 {
624  return (prs->ignore) ? 1 : 0;
625 }
bool ignore
Definition: wparser_def.c:251

References TParser::ignore.

◆ p_isneC()

static int p_isneC ( TParser prs)
static

Definition at line 486 of file wparser_def.c.

487 {
488  return !p_iseq(prs, prs->c);
489 }

References TParser::c.

Referenced by _make_compiler_happy().

◆ p_isspecial()

static int p_isspecial ( TParser prs)
static

Definition at line 679 of file wparser_def.c.

680 {
681  /*
682  * pg_dsplen could return -1 which means error or control character
683  */
684  if (pg_dsplen(prs->str + prs->state->posbyte) == 0)
685  return 1;
686 
687  /*
688  * Unicode Characters in the 'Mark, Spacing Combining' Category That
689  * characters are not alpha although they are not breakers of word too.
690  * Check that only in utf encoding, because other encodings aren't
691  * supported by postgres or even exists.
692  */
693  if (GetDatabaseEncoding() == PG_UTF8 && prs->usewide)
694  {
695  static const pg_wchar strange_letter[] = {
696  /*
697  * use binary search, so elements should be ordered
698  */
699  0x0903, /* DEVANAGARI SIGN VISARGA */
700  0x093E, /* DEVANAGARI VOWEL SIGN AA */
701  0x093F, /* DEVANAGARI VOWEL SIGN I */
702  0x0940, /* DEVANAGARI VOWEL SIGN II */
703  0x0949, /* DEVANAGARI VOWEL SIGN CANDRA O */
704  0x094A, /* DEVANAGARI VOWEL SIGN SHORT O */
705  0x094B, /* DEVANAGARI VOWEL SIGN O */
706  0x094C, /* DEVANAGARI VOWEL SIGN AU */
707  0x0982, /* BENGALI SIGN ANUSVARA */
708  0x0983, /* BENGALI SIGN VISARGA */
709  0x09BE, /* BENGALI VOWEL SIGN AA */
710  0x09BF, /* BENGALI VOWEL SIGN I */
711  0x09C0, /* BENGALI VOWEL SIGN II */
712  0x09C7, /* BENGALI VOWEL SIGN E */
713  0x09C8, /* BENGALI VOWEL SIGN AI */
714  0x09CB, /* BENGALI VOWEL SIGN O */
715  0x09CC, /* BENGALI VOWEL SIGN AU */
716  0x09D7, /* BENGALI AU LENGTH MARK */
717  0x0A03, /* GURMUKHI SIGN VISARGA */
718  0x0A3E, /* GURMUKHI VOWEL SIGN AA */
719  0x0A3F, /* GURMUKHI VOWEL SIGN I */
720  0x0A40, /* GURMUKHI VOWEL SIGN II */
721  0x0A83, /* GUJARATI SIGN VISARGA */
722  0x0ABE, /* GUJARATI VOWEL SIGN AA */
723  0x0ABF, /* GUJARATI VOWEL SIGN I */
724  0x0AC0, /* GUJARATI VOWEL SIGN II */
725  0x0AC9, /* GUJARATI VOWEL SIGN CANDRA O */
726  0x0ACB, /* GUJARATI VOWEL SIGN O */
727  0x0ACC, /* GUJARATI VOWEL SIGN AU */
728  0x0B02, /* ORIYA SIGN ANUSVARA */
729  0x0B03, /* ORIYA SIGN VISARGA */
730  0x0B3E, /* ORIYA VOWEL SIGN AA */
731  0x0B40, /* ORIYA VOWEL SIGN II */
732  0x0B47, /* ORIYA VOWEL SIGN E */
733  0x0B48, /* ORIYA VOWEL SIGN AI */
734  0x0B4B, /* ORIYA VOWEL SIGN O */
735  0x0B4C, /* ORIYA VOWEL SIGN AU */
736  0x0B57, /* ORIYA AU LENGTH MARK */
737  0x0BBE, /* TAMIL VOWEL SIGN AA */
738  0x0BBF, /* TAMIL VOWEL SIGN I */
739  0x0BC1, /* TAMIL VOWEL SIGN U */
740  0x0BC2, /* TAMIL VOWEL SIGN UU */
741  0x0BC6, /* TAMIL VOWEL SIGN E */
742  0x0BC7, /* TAMIL VOWEL SIGN EE */
743  0x0BC8, /* TAMIL VOWEL SIGN AI */
744  0x0BCA, /* TAMIL VOWEL SIGN O */
745  0x0BCB, /* TAMIL VOWEL SIGN OO */
746  0x0BCC, /* TAMIL VOWEL SIGN AU */
747  0x0BD7, /* TAMIL AU LENGTH MARK */
748  0x0C01, /* TELUGU SIGN CANDRABINDU */
749  0x0C02, /* TELUGU SIGN ANUSVARA */
750  0x0C03, /* TELUGU SIGN VISARGA */
751  0x0C41, /* TELUGU VOWEL SIGN U */
752  0x0C42, /* TELUGU VOWEL SIGN UU */
753  0x0C43, /* TELUGU VOWEL SIGN VOCALIC R */
754  0x0C44, /* TELUGU VOWEL SIGN VOCALIC RR */
755  0x0C82, /* KANNADA SIGN ANUSVARA */
756  0x0C83, /* KANNADA SIGN VISARGA */
757  0x0CBE, /* KANNADA VOWEL SIGN AA */
758  0x0CC0, /* KANNADA VOWEL SIGN II */
759  0x0CC1, /* KANNADA VOWEL SIGN U */
760  0x0CC2, /* KANNADA VOWEL SIGN UU */
761  0x0CC3, /* KANNADA VOWEL SIGN VOCALIC R */
762  0x0CC4, /* KANNADA VOWEL SIGN VOCALIC RR */
763  0x0CC7, /* KANNADA VOWEL SIGN EE */
764  0x0CC8, /* KANNADA VOWEL SIGN AI */
765  0x0CCA, /* KANNADA VOWEL SIGN O */
766  0x0CCB, /* KANNADA VOWEL SIGN OO */
767  0x0CD5, /* KANNADA LENGTH MARK */
768  0x0CD6, /* KANNADA AI LENGTH MARK */
769  0x0D02, /* MALAYALAM SIGN ANUSVARA */
770  0x0D03, /* MALAYALAM SIGN VISARGA */
771  0x0D3E, /* MALAYALAM VOWEL SIGN AA */
772  0x0D3F, /* MALAYALAM VOWEL SIGN I */
773  0x0D40, /* MALAYALAM VOWEL SIGN II */
774  0x0D46, /* MALAYALAM VOWEL SIGN E */
775  0x0D47, /* MALAYALAM VOWEL SIGN EE */
776  0x0D48, /* MALAYALAM VOWEL SIGN AI */
777  0x0D4A, /* MALAYALAM VOWEL SIGN O */
778  0x0D4B, /* MALAYALAM VOWEL SIGN OO */
779  0x0D4C, /* MALAYALAM VOWEL SIGN AU */
780  0x0D57, /* MALAYALAM AU LENGTH MARK */
781  0x0D82, /* SINHALA SIGN ANUSVARAYA */
782  0x0D83, /* SINHALA SIGN VISARGAYA */
783  0x0DCF, /* SINHALA VOWEL SIGN AELA-PILLA */
784  0x0DD0, /* SINHALA VOWEL SIGN KETTI AEDA-PILLA */
785  0x0DD1, /* SINHALA VOWEL SIGN DIGA AEDA-PILLA */
786  0x0DD8, /* SINHALA VOWEL SIGN GAETTA-PILLA */
787  0x0DD9, /* SINHALA VOWEL SIGN KOMBUVA */
788  0x0DDA, /* SINHALA VOWEL SIGN DIGA KOMBUVA */
789  0x0DDB, /* SINHALA VOWEL SIGN KOMBU DEKA */
790  0x0DDC, /* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA */
791  0x0DDD, /* SINHALA VOWEL SIGN KOMBUVA HAA DIGA
792  * AELA-PILLA */
793  0x0DDE, /* SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA */
794  0x0DDF, /* SINHALA VOWEL SIGN GAYANUKITTA */
795  0x0DF2, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA */
796  0x0DF3, /* SINHALA VOWEL SIGN DIGA GAYANUKITTA */
797  0x0F3E, /* TIBETAN SIGN YAR TSHES */
798  0x0F3F, /* TIBETAN SIGN MAR TSHES */
799  0x0F7F, /* TIBETAN SIGN RNAM BCAD */
800  0x102B, /* MYANMAR VOWEL SIGN TALL AA */
801  0x102C, /* MYANMAR VOWEL SIGN AA */
802  0x1031, /* MYANMAR VOWEL SIGN E */
803  0x1038, /* MYANMAR SIGN VISARGA */
804  0x103B, /* MYANMAR CONSONANT SIGN MEDIAL YA */
805  0x103C, /* MYANMAR CONSONANT SIGN MEDIAL RA */
806  0x1056, /* MYANMAR VOWEL SIGN VOCALIC R */
807  0x1057, /* MYANMAR VOWEL SIGN VOCALIC RR */
808  0x1062, /* MYANMAR VOWEL SIGN SGAW KAREN EU */
809  0x1063, /* MYANMAR TONE MARK SGAW KAREN HATHI */
810  0x1064, /* MYANMAR TONE MARK SGAW KAREN KE PHO */
811  0x1067, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU */
812  0x1068, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN UE */
813  0x1069, /* MYANMAR SIGN WESTERN PWO KAREN TONE-1 */
814  0x106A, /* MYANMAR SIGN WESTERN PWO KAREN TONE-2 */
815  0x106B, /* MYANMAR SIGN WESTERN PWO KAREN TONE-3 */
816  0x106C, /* MYANMAR SIGN WESTERN PWO KAREN TONE-4 */
817  0x106D, /* MYANMAR SIGN WESTERN PWO KAREN TONE-5 */
818  0x1083, /* MYANMAR VOWEL SIGN SHAN AA */
819  0x1084, /* MYANMAR VOWEL SIGN SHAN E */
820  0x1087, /* MYANMAR SIGN SHAN TONE-2 */
821  0x1088, /* MYANMAR SIGN SHAN TONE-3 */
822  0x1089, /* MYANMAR SIGN SHAN TONE-5 */
823  0x108A, /* MYANMAR SIGN SHAN TONE-6 */
824  0x108B, /* MYANMAR SIGN SHAN COUNCIL TONE-2 */
825  0x108C, /* MYANMAR SIGN SHAN COUNCIL TONE-3 */
826  0x108F, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */
827  0x17B6, /* KHMER VOWEL SIGN AA */
828  0x17BE, /* KHMER VOWEL SIGN OE */
829  0x17BF, /* KHMER VOWEL SIGN YA */
830  0x17C0, /* KHMER VOWEL SIGN IE */
831  0x17C1, /* KHMER VOWEL SIGN E */
832  0x17C2, /* KHMER VOWEL SIGN AE */
833  0x17C3, /* KHMER VOWEL SIGN AI */
834  0x17C4, /* KHMER VOWEL SIGN OO */
835  0x17C5, /* KHMER VOWEL SIGN AU */
836  0x17C7, /* KHMER SIGN REAHMUK */
837  0x17C8, /* KHMER SIGN YUUKALEAPINTU */
838  0x1923, /* LIMBU VOWEL SIGN EE */
839  0x1924, /* LIMBU VOWEL SIGN AI */
840  0x1925, /* LIMBU VOWEL SIGN OO */
841  0x1926, /* LIMBU VOWEL SIGN AU */
842  0x1929, /* LIMBU SUBJOINED LETTER YA */
843  0x192A, /* LIMBU SUBJOINED LETTER RA */
844  0x192B, /* LIMBU SUBJOINED LETTER WA */
845  0x1930, /* LIMBU SMALL LETTER KA */
846  0x1931, /* LIMBU SMALL LETTER NGA */
847  0x1933, /* LIMBU SMALL LETTER TA */
848  0x1934, /* LIMBU SMALL LETTER NA */
849  0x1935, /* LIMBU SMALL LETTER PA */
850  0x1936, /* LIMBU SMALL LETTER MA */
851  0x1937, /* LIMBU SMALL LETTER RA */
852  0x1938, /* LIMBU SMALL LETTER LA */
853  0x19B0, /* NEW TAI LUE VOWEL SIGN VOWEL SHORTENER */
854  0x19B1, /* NEW TAI LUE VOWEL SIGN AA */
855  0x19B2, /* NEW TAI LUE VOWEL SIGN II */
856  0x19B3, /* NEW TAI LUE VOWEL SIGN U */
857  0x19B4, /* NEW TAI LUE VOWEL SIGN UU */
858  0x19B5, /* NEW TAI LUE VOWEL SIGN E */
859  0x19B6, /* NEW TAI LUE VOWEL SIGN AE */
860  0x19B7, /* NEW TAI LUE VOWEL SIGN O */
861  0x19B8, /* NEW TAI LUE VOWEL SIGN OA */
862  0x19B9, /* NEW TAI LUE VOWEL SIGN UE */
863  0x19BA, /* NEW TAI LUE VOWEL SIGN AY */
864  0x19BB, /* NEW TAI LUE VOWEL SIGN AAY */
865  0x19BC, /* NEW TAI LUE VOWEL SIGN UY */
866  0x19BD, /* NEW TAI LUE VOWEL SIGN OY */
867  0x19BE, /* NEW TAI LUE VOWEL SIGN OAY */
868  0x19BF, /* NEW TAI LUE VOWEL SIGN UEY */
869  0x19C0, /* NEW TAI LUE VOWEL SIGN IY */
870  0x19C8, /* NEW TAI LUE TONE MARK-1 */
871  0x19C9, /* NEW TAI LUE TONE MARK-2 */
872  0x1A19, /* BUGINESE VOWEL SIGN E */
873  0x1A1A, /* BUGINESE VOWEL SIGN O */
874  0x1A1B, /* BUGINESE VOWEL SIGN AE */
875  0x1B04, /* BALINESE SIGN BISAH */
876  0x1B35, /* BALINESE VOWEL SIGN TEDUNG */
877  0x1B3B, /* BALINESE VOWEL SIGN RA REPA TEDUNG */
878  0x1B3D, /* BALINESE VOWEL SIGN LA LENGA TEDUNG */
879  0x1B3E, /* BALINESE VOWEL SIGN TALING */
880  0x1B3F, /* BALINESE VOWEL SIGN TALING REPA */
881  0x1B40, /* BALINESE VOWEL SIGN TALING TEDUNG */
882  0x1B41, /* BALINESE VOWEL SIGN TALING REPA TEDUNG */
883  0x1B43, /* BALINESE VOWEL SIGN PEPET TEDUNG */
884  0x1B44, /* BALINESE ADEG ADEG */
885  0x1B82, /* SUNDANESE SIGN PANGWISAD */
886  0x1BA1, /* SUNDANESE CONSONANT SIGN PAMINGKAL */
887  0x1BA6, /* SUNDANESE VOWEL SIGN PANAELAENG */
888  0x1BA7, /* SUNDANESE VOWEL SIGN PANOLONG */
889  0x1BAA, /* SUNDANESE SIGN PAMAAEH */
890  0x1C24, /* LEPCHA SUBJOINED LETTER YA */
891  0x1C25, /* LEPCHA SUBJOINED LETTER RA */
892  0x1C26, /* LEPCHA VOWEL SIGN AA */
893  0x1C27, /* LEPCHA VOWEL SIGN I */
894  0x1C28, /* LEPCHA VOWEL SIGN O */
895  0x1C29, /* LEPCHA VOWEL SIGN OO */
896  0x1C2A, /* LEPCHA VOWEL SIGN U */
897  0x1C2B, /* LEPCHA VOWEL SIGN UU */
898  0x1C34, /* LEPCHA CONSONANT SIGN NYIN-DO */
899  0x1C35, /* LEPCHA CONSONANT SIGN KANG */
900  0xA823, /* SYLOTI NAGRI VOWEL SIGN A */
901  0xA824, /* SYLOTI NAGRI VOWEL SIGN I */
902  0xA827, /* SYLOTI NAGRI VOWEL SIGN OO */
903  0xA880, /* SAURASHTRA SIGN ANUSVARA */
904  0xA881, /* SAURASHTRA SIGN VISARGA */
905  0xA8B4, /* SAURASHTRA CONSONANT SIGN HAARU */
906  0xA8B5, /* SAURASHTRA VOWEL SIGN AA */
907  0xA8B6, /* SAURASHTRA VOWEL SIGN I */
908  0xA8B7, /* SAURASHTRA VOWEL SIGN II */
909  0xA8B8, /* SAURASHTRA VOWEL SIGN U */
910  0xA8B9, /* SAURASHTRA VOWEL SIGN UU */
911  0xA8BA, /* SAURASHTRA VOWEL SIGN VOCALIC R */
912  0xA8BB, /* SAURASHTRA VOWEL SIGN VOCALIC RR */
913  0xA8BC, /* SAURASHTRA VOWEL SIGN VOCALIC L */
914  0xA8BD, /* SAURASHTRA VOWEL SIGN VOCALIC LL */
915  0xA8BE, /* SAURASHTRA VOWEL SIGN E */
916  0xA8BF, /* SAURASHTRA VOWEL SIGN EE */
917  0xA8C0, /* SAURASHTRA VOWEL SIGN AI */
918  0xA8C1, /* SAURASHTRA VOWEL SIGN O */
919  0xA8C2, /* SAURASHTRA VOWEL SIGN OO */
920  0xA8C3, /* SAURASHTRA VOWEL SIGN AU */
921  0xA952, /* REJANG CONSONANT SIGN H */
922  0xA953, /* REJANG VIRAMA */
923  0xAA2F, /* CHAM VOWEL SIGN O */
924  0xAA30, /* CHAM VOWEL SIGN AI */
925  0xAA33, /* CHAM CONSONANT SIGN YA */
926  0xAA34, /* CHAM CONSONANT SIGN RA */
927  0xAA4D /* CHAM CONSONANT SIGN FINAL H */
928  };
929  const pg_wchar *StopLow = strange_letter,
930  *StopHigh = strange_letter + lengthof(strange_letter),
931  *StopMiddle;
932  pg_wchar c;
933 
934  if (prs->pgwstr)
935  c = *(prs->pgwstr + prs->state->poschar);
936  else
937  c = (pg_wchar) *(prs->wstr + prs->state->poschar);
938 
939  while (StopLow < StopHigh)
940  {
941  StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
942  if (*StopMiddle == c)
943  return 1;
944  else if (*StopMiddle < c)
945  StopLow = StopMiddle + 1;
946  else
947  StopHigh = StopMiddle;
948  }
949  }
950 
951  return 0;
952 }
#define lengthof(array)
Definition: c.h:745
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
int pg_dsplen(const char *mbstr)
Definition: mbutils.c:973
@ PG_UTF8
Definition: pg_wchar.h:230
pg_wchar * pgwstr
Definition: wparser_def.c:245
wchar_t * wstr
Definition: wparser_def.c:244
bool usewide
Definition: wparser_def.c:246

References GetDatabaseEncoding(), lengthof, pg_dsplen(), PG_UTF8, TParser::pgwstr, TParserPosition::posbyte, TParserPosition::poschar, TParser::state, TParser::str, TParser::usewide, and TParser::wstr.

◆ p_isstophost()

static int p_isstophost ( TParser prs)
static

Definition at line 611 of file wparser_def.c.

612 {
613  if (prs->wanthost)
614  {
615  prs->wanthost = false;
616  return 1;
617  }
618  return 0;
619 }

References TParser::wanthost.

◆ p_isurlchar()

static int p_isurlchar ( TParser prs)
static

Definition at line 504 of file wparser_def.c.

505 {
506  char ch;
507 
508  /* no non-ASCII need apply */
509  if (prs->state->charlen != 1)
510  return 0;
511  ch = *(prs->str + prs->state->posbyte);
512  /* no spaces or control characters */
513  if (ch <= 0x20 || ch >= 0x7F)
514  return 0;
515  /* reject characters disallowed by RFC 3986 */
516  switch (ch)
517  {
518  case '"':
519  case '<':
520  case '>':
521  case '\\':
522  case '^':
523  case '`':
524  case '{':
525  case '|':
526  case '}':
527  return 0;
528  }
529  return 1;
530 }

References TParserPosition::charlen, TParserPosition::posbyte, TParser::state, and TParser::str.

◆ p_isURLPath()

static int p_isURLPath ( TParser prs)
static

Definition at line 650 of file wparser_def.c.

651 {
652  TParser *tmpprs = TParserCopyInit(prs);
653  int res = 0;
654 
655  tmpprs->state = newTParserPosition(tmpprs->state);
656  tmpprs->state->state = TPS_InURLPathFirst;
657 
658  if (TParserGet(tmpprs) && tmpprs->type == URLPATH)
659  {
660  prs->state->posbyte += tmpprs->lenbytetoken;
661  prs->state->poschar += tmpprs->lenchartoken;
662  prs->state->lenbytetoken += tmpprs->lenbytetoken;
663  prs->state->lenchartoken += tmpprs->lenchartoken;
664  prs->state->charlen = tmpprs->state->charlen;
665  res = 1;
666  }
667  TParserCopyClose(tmpprs);
668 
669  return res;
670 }
TParserState state
Definition: wparser_def.c:234
static TParserPosition * newTParserPosition(TParserPosition *prev)
Definition: wparser_def.c:270
#define URLPATH
Definition: wparser_def.c:51

References TParserPosition::charlen, TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, newTParserPosition(), TParserPosition::posbyte, TParserPosition::poschar, res, TParserPosition::state, TParser::state, TParserCopyClose(), TParserCopyInit(), TParserGet(), TPS_InURLPathFirst, TParser::type, and URLPATH.

◆ p_iswhat()

p_iswhat ( alnum  ,
 
)

Definition at line 453 of file wparser_def.c.

467 {
468  Assert(prs->state);
469  return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
470 }

References Assert().

◆ prsd_end()

Datum prsd_end ( PG_FUNCTION_ARGS  )

Definition at line 1903 of file wparser_def.c.

1904 {
1905  TParser *p = (TParser *) PG_GETARG_POINTER(0);
1906 
1907  TParserClose(p);
1908  PG_RETURN_VOID();
1909 }
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
static void TParserClose(TParser *prs)
Definition: wparser_def.c:371

References PG_GETARG_POINTER, PG_RETURN_VOID, and TParserClose().

◆ prsd_headline()

Datum prsd_headline ( PG_FUNCTION_ARGS  )

Definition at line 2530 of file wparser_def.c.

2531 {
2533  List *prsoptions = (List *) PG_GETARG_POINTER(1);
2534  TSQuery query = PG_GETARG_TSQUERY(2);
2535 
2536  /* default option values: */
2537  int min_words = 15;
2538  int max_words = 35;
2539  int shortword = 3;
2540  int max_fragments = 0;
2541  bool highlightall = false;
2542  int max_cover;
2543  ListCell *l;
2544 
2545  /* Extract configuration option values */
2546  prs->startsel = NULL;
2547  prs->stopsel = NULL;
2548  prs->fragdelim = NULL;
2549  foreach(l, prsoptions)
2550  {
2551  DefElem *defel = (DefElem *) lfirst(l);
2552  char *val = defGetString(defel);
2553 
2554  if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
2555  max_words = pg_strtoint32(val);
2556  else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
2557  min_words = pg_strtoint32(val);
2558  else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
2559  shortword = pg_strtoint32(val);
2560  else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
2561  max_fragments = pg_strtoint32(val);
2562  else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
2563  prs->startsel = pstrdup(val);
2564  else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
2565  prs->stopsel = pstrdup(val);
2566  else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
2567  prs->fragdelim = pstrdup(val);
2568  else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
2569  highlightall = (pg_strcasecmp(val, "1") == 0 ||
2570  pg_strcasecmp(val, "on") == 0 ||
2571  pg_strcasecmp(val, "true") == 0 ||
2572  pg_strcasecmp(val, "t") == 0 ||
2573  pg_strcasecmp(val, "y") == 0 ||
2574  pg_strcasecmp(val, "yes") == 0);
2575  else
2576  ereport(ERROR,
2577  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2578  errmsg("unrecognized headline parameter: \"%s\"",
2579  defel->defname)));
2580  }
2581 
2582  /*
2583  * We might eventually make max_cover a user-settable parameter, but for
2584  * now, just compute a reasonable value based on max_words and
2585  * max_fragments.
2586  */
2587  max_cover = Max(max_words * 10, 100);
2588  if (max_fragments > 0)
2589  max_cover *= max_fragments;
2590 
2591  /* in HighlightAll mode these parameters are ignored */
2592  if (!highlightall)
2593  {
2594  if (min_words >= max_words)
2595  ereport(ERROR,
2596  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2597  errmsg("MinWords should be less than MaxWords")));
2598  if (min_words <= 0)
2599  ereport(ERROR,
2600  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2601  errmsg("MinWords should be positive")));
2602  if (shortword < 0)
2603  ereport(ERROR,
2604  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2605  errmsg("ShortWord should be >= 0")));
2606  if (max_fragments < 0)
2607  ereport(ERROR,
2608  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2609  errmsg("MaxFragments should be >= 0")));
2610  }
2611 
2612  /* Apply appropriate headline selector */
2613  if (max_fragments == 0)
2614  mark_hl_words(prs, query, highlightall, shortword,
2615  min_words, max_words, max_cover);
2616  else
2617  mark_hl_fragments(prs, query, highlightall, shortword,
2618  min_words, max_words, max_fragments, max_cover);
2619 
2620  /* Fill in default values for string options */
2621  if (!prs->startsel)
2622  prs->startsel = pstrdup("<b>");
2623  if (!prs->stopsel)
2624  prs->stopsel = pstrdup("</b>");
2625  if (!prs->fragdelim)
2626  prs->fragdelim = pstrdup(" ... ");
2627 
2628  /* Caller will need these lengths, too */
2629  prs->startsellen = strlen(prs->startsel);
2630  prs->stopsellen = strlen(prs->stopsel);
2631  prs->fragdelimlen = strlen(prs->fragdelim);
2632 
2633  PG_RETURN_POINTER(prs);
2634 }
#define Max(x, y)
Definition: c.h:991
char * defGetString(DefElem *def)
Definition: define.c:49
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define ereport(elevel,...)
Definition: elog.h:143
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
char * pstrdup(const char *in)
Definition: mcxt.c:1305
int32 pg_strtoint32(const char *s)
Definition: numutils.c:175
#define lfirst(lc)
Definition: pg_list.h:170
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char * defname
Definition: parsenodes.h:766
Definition: pg_list.h:52
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:236
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, int max_words, int max_fragments, int max_cover)
Definition: wparser_def.c:2184
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, int max_words, int max_cover)
Definition: wparser_def.c:2367

References defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, lfirst, mark_hl_fragments(), mark_hl_words(), Max, PG_GETARG_POINTER, PG_GETARG_TSQUERY, PG_RETURN_POINTER, pg_strcasecmp(), pg_strtoint32(), pstrdup(), HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, and val.

◆ prsd_lextype()

Datum prsd_lextype ( PG_FUNCTION_ARGS  )

Definition at line 1863 of file wparser_def.c.

1864 {
1865  LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
1866  int i;
1867 
1868  for (i = 1; i <= LASTNUM; i++)
1869  {
1870  descr[i - 1].lexid = i;
1871  descr[i - 1].alias = pstrdup(tok_alias[i]);
1872  descr[i - 1].descr = pstrdup(lex_descr[i]);
1873  }
1874 
1875  descr[LASTNUM].lexid = 0;
1876 
1877  PG_RETURN_POINTER(descr);
1878 }
char * alias
Definition: ts_public.h:28
int lexid
Definition: ts_public.h:27
char * descr
Definition: ts_public.h:29
#define LASTNUM
Definition: wparser_def.c:58
static const char *const tok_alias[]
Definition: wparser_def.c:60
static const char *const lex_descr[]
Definition: wparser_def.c:87

References LexDescr::alias, LexDescr::descr, i, LASTNUM, lex_descr, LexDescr::lexid, palloc(), PG_RETURN_POINTER, pstrdup(), and tok_alias.

◆ prsd_nexttoken()

Datum prsd_nexttoken ( PG_FUNCTION_ARGS  )

Definition at line 1887 of file wparser_def.c.

1888 {
1889  TParser *p = (TParser *) PG_GETARG_POINTER(0);
1890  char **t = (char **) PG_GETARG_POINTER(1);
1891  int *tlen = (int *) PG_GETARG_POINTER(2);
1892 
1893  if (!TParserGet(p))
1894  PG_RETURN_INT32(0);
1895 
1896  *t = p->token;
1897  *tlen = p->lenbytetoken;
1898 
1899  PG_RETURN_INT32(p->type);
1900 }
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
char * token
Definition: wparser_def.c:258

References TParser::lenbytetoken, PG_GETARG_POINTER, PG_RETURN_INT32, TParser::token, TParserGet(), and TParser::type.

◆ prsd_start()

Datum prsd_start ( PG_FUNCTION_ARGS  )

Definition at line 1881 of file wparser_def.c.

1882 {
1884 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
static TParser * TParserInit(char *str, int len)
Definition: wparser_def.c:287

References PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_POINTER, and TParserInit().

◆ SpecialFURL()

static void SpecialFURL ( TParser prs)
static

◆ SpecialHyphen()

static void SpecialHyphen ( TParser prs)
static

◆ SpecialTags()

static void SpecialTags ( TParser prs)
static

Definition at line 563 of file wparser_def.c.

564 {
565  switch (prs->state->lenchartoken)
566  {
567  case 8: /* </script */
568  if (pg_strncasecmp(prs->token, "</script", 8) == 0)
569  prs->ignore = false;
570  break;
571  case 7: /* <script || </style */
572  if (pg_strncasecmp(prs->token, "</style", 7) == 0)
573  prs->ignore = false;
574  else if (pg_strncasecmp(prs->token, "<script", 7) == 0)
575  prs->ignore = true;
576  break;
577  case 6: /* <style */
578  if (pg_strncasecmp(prs->token, "<style", 6) == 0)
579  prs->ignore = true;
580  break;
581  default:
582  break;
583  }
584 }
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69

References TParser::ignore, TParserPosition::lenchartoken, pg_strncasecmp(), TParser::state, and TParser::token.

◆ SpecialVerVersion()

static void SpecialVerVersion ( TParser prs)
static

Definition at line 602 of file wparser_def.c.

603 {
604  prs->state->posbyte -= prs->state->lenbytetoken;
605  prs->state->poschar -= prs->state->lenchartoken;
606  prs->state->lenbytetoken = 0;
607  prs->state->lenchartoken = 0;
608 }

References TParserPosition::lenbytetoken, TParserPosition::lenchartoken, TParserPosition::posbyte, TParserPosition::poschar, and TParser::state.

◆ TParserClose()

static void TParserClose ( TParser prs)
static

Definition at line 371 of file wparser_def.c.

372 {
373  while (prs->state)
374  {
375  TParserPosition *ptr = prs->state->prev;
376 
377  pfree(prs->state);
378  prs->state = ptr;
379  }
380 
381  if (prs->wstr)
382  pfree(prs->wstr);
383  if (prs->pgwstr)
384  pfree(prs->pgwstr);
385 
386 #ifdef WPARSER_TRACE
387  fprintf(stderr, "closing parser\n");
388 #endif
389  pfree(prs);
390 }
#define fprintf
Definition: port.h:229
struct TParserPosition * prev
Definition: wparser_def.c:235

References fprintf, pfree(), TParser::pgwstr, TParserPosition::prev, TParser::state, and TParser::wstr.

Referenced by prsd_end().

◆ TParserCopyClose()

static void TParserCopyClose ( TParser prs)
static

Definition at line 396 of file wparser_def.c.

397 {
398  while (prs->state)
399  {
400  TParserPosition *ptr = prs->state->prev;
401 
402  pfree(prs->state);
403  prs->state = ptr;
404  }
405 
406 #ifdef WPARSER_TRACE
407  fprintf(stderr, "closing parser copy\n");
408 #endif
409  pfree(prs);
410 }

References fprintf, pfree(), TParserPosition::prev, and TParser::state.

Referenced by p_ishost(), and p_isURLPath().

◆ TParserCopyInit()

static TParser* TParserCopyInit ( const TParser orig)
static

Definition at line 345 of file wparser_def.c.

346 {
347  TParser *prs = (TParser *) palloc0(sizeof(TParser));
348 
349  prs->charmaxlen = orig->charmaxlen;
350  prs->str = orig->str + orig->state->posbyte;
351  prs->lenstr = orig->lenstr - orig->state->posbyte;
352  prs->usewide = orig->usewide;
353 
354  if (orig->pgwstr)
355  prs->pgwstr = orig->pgwstr + orig->state->poschar;
356  if (orig->wstr)
357  prs->wstr = orig->wstr + orig->state->poschar;
358 
359  prs->state = newTParserPosition(NULL);
360  prs->state->state = TPS_Base;
361 
362 #ifdef WPARSER_TRACE
363  fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
364 #endif
365 
366  return prs;
367 }
void * palloc0(Size size)
Definition: mcxt.c:1099
int charmaxlen
Definition: wparser_def.c:249

References TParser::charmaxlen, fprintf, TParser::lenstr, newTParserPosition(), palloc0(), TParser::pgwstr, TParserPosition::posbyte, TParserPosition::poschar, TParserPosition::state, TParser::state, TParser::str, TPS_Base, TParser::usewide, and TParser::wstr.

Referenced by p_ishost(), and p_isURLPath().

◆ TParserGet()

static bool TParserGet ( TParser prs)
static

Definition at line 1697 of file wparser_def.c.

1698 {
1699  const TParserStateActionItem *item = NULL;
1700 
1701  Assert(prs->state);
1702 
1703  if (prs->state->posbyte >= prs->lenstr)
1704  return false;
1705 
1706  prs->token = prs->str + prs->state->posbyte;
1707  prs->state->pushedAtAction = NULL;
1708 
1709  /* look at string */
1710  while (prs->state->posbyte <= prs->lenstr)
1711  {
1712  if (prs->state->posbyte == prs->lenstr)
1713  prs->state->charlen = 0;
1714  else
1715  prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
1716  pg_mblen(prs->str + prs->state->posbyte);
1717 
1718  Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
1719  Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
1720  Assert(Actions[prs->state->state].state == prs->state->state);
1721 
1722  if (prs->state->pushedAtAction)
1723  {
1724  /* After a POP, pick up at the next test */
1725  item = prs->state->pushedAtAction + 1;
1726  prs->state->pushedAtAction = NULL;
1727  }
1728  else
1729  {
1730  item = Actions[prs->state->state].action;
1731  Assert(item != NULL);
1732  }
1733 
1734  /* find action by character class */
1735  while (item->isclass)
1736  {
1737  prs->c = item->c;
1738  if (item->isclass(prs) != 0)
1739  break;
1740  item++;
1741  }
1742 
1743 #ifdef WPARSER_TRACE
1744  {
1745  TParserPosition *ptr;
1746 
1747  fprintf(stderr, "state ");
1748  /* indent according to stack depth */
1749  for (ptr = prs->state->prev; ptr; ptr = ptr->prev)
1750  fprintf(stderr, " ");
1751  fprintf(stderr, "%s ", Actions[prs->state->state].state_name);
1752  if (prs->state->posbyte < prs->lenstr)
1753  fprintf(stderr, "at %c", *(prs->str + prs->state->posbyte));
1754  else
1755  fprintf(stderr, "at EOF");
1756  fprintf(stderr, " matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1757  (int) (item - Actions[prs->state->state].action),
1758  (item->flags & A_BINGO) ? " BINGO" : "",
1759  (item->flags & A_POP) ? " POP" : "",
1760  (item->flags & A_PUSH) ? " PUSH" : "",
1761  (item->flags & A_RERUN) ? " RERUN" : "",
1762  (item->flags & A_CLEAR) ? " CLEAR" : "",
1763  (item->flags & A_MERGE) ? " MERGE" : "",
1764  (item->flags & A_CLRALL) ? " CLRALL" : "",
1765  (item->tostate != TPS_Null) ? " tostate " : "",
1766  (item->tostate != TPS_Null) ? Actions[item->tostate].state_name : "",
1767  (item->type > 0) ? " type " : "",
1768  tok_alias[item->type]);
1769  }
1770 #endif
1771 
1772  /* call special handler if exists */
1773  if (item->special)
1774  item->special(prs);
1775 
1776  /* BINGO, token is found */
1777  if (item->flags & A_BINGO)
1778  {
1779  Assert(item->type > 0);
1780  prs->lenbytetoken = prs->state->lenbytetoken;
1781  prs->lenchartoken = prs->state->lenchartoken;
1782  prs->state->lenbytetoken = prs->state->lenchartoken = 0;
1783  prs->type = item->type;
1784  }
1785 
1786  /* do various actions by flags */
1787  if (item->flags & A_POP)
1788  { /* pop stored state in stack */
1789  TParserPosition *ptr = prs->state->prev;
1790 
1791  pfree(prs->state);
1792  prs->state = ptr;
1793  Assert(prs->state);
1794  }
1795  else if (item->flags & A_PUSH)
1796  { /* push (store) state in stack */
1797  prs->state->pushedAtAction = item; /* remember where we push */
1798  prs->state = newTParserPosition(prs->state);
1799  }
1800  else if (item->flags & A_CLEAR)
1801  { /* clear previous pushed state */
1802  TParserPosition *ptr;
1803 
1804  Assert(prs->state->prev);
1805  ptr = prs->state->prev->prev;
1806  pfree(prs->state->prev);
1807  prs->state->prev = ptr;
1808  }
1809  else if (item->flags & A_CLRALL)
1810  { /* clear all previous pushed state */
1811  TParserPosition *ptr;
1812 
1813  while (prs->state->prev)
1814  {
1815  ptr = prs->state->prev->prev;
1816  pfree(prs->state->prev);
1817  prs->state->prev = ptr;
1818  }
1819  }
1820  else if (item->flags & A_MERGE)
1821  { /* merge posinfo with current and pushed state */
1822  TParserPosition *ptr = prs->state;
1823 
1824  Assert(prs->state->prev);
1825  prs->state = prs->state->prev;
1826 
1827  prs->state->posbyte = ptr->posbyte;
1828  prs->state->poschar = ptr->poschar;
1829  prs->state->charlen = ptr->charlen;
1830  prs->state->lenbytetoken = ptr->lenbytetoken;
1831  prs->state->lenchartoken = ptr->lenchartoken;
1832  pfree(ptr);
1833  }
1834 
1835  /* set new state if pointed */
1836  if (item->tostate != TPS_Null)
1837  prs->state->state = item->tostate;
1838 
1839  /* check for go away */
1840  if ((item->flags & A_BINGO) ||
1841  (prs->state->posbyte >= prs->lenstr &&
1842  (item->flags & A_RERUN) == 0))
1843  break;
1844 
1845  /* go to beginning of loop if we should rerun or we just restore state */
1846  if (item->flags & (A_RERUN | A_POP))
1847  continue;
1848 
1849  /* move forward */
1850  if (prs->state->charlen)
1851  {
1852  prs->state->posbyte += prs->state->charlen;
1853  prs->state->lenbytetoken += prs->state->charlen;
1854  prs->state->poschar++;
1855  prs->state->lenchartoken++;
1856  }
1857  }
1858 
1859  return (item && (item->flags & A_BINGO));
1860 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
const TParserStateActionItem * pushedAtAction
Definition: wparser_def.c:236
TParserCharTest isclass
Definition: wparser_def.c:209
TParserState tostate
Definition: wparser_def.c:212
TParserSpecial special
Definition: wparser_def.c:214
const TParserStateActionItem * action
Definition: wparser_def.c:1596
TParserState state
Definition: wparser_def.c:1597
#define A_POP
Definition: wparser_def.c:220
#define A_RERUN
Definition: wparser_def.c:222
static const TParserStateAction Actions[]
Definition: wparser_def.c:1615
#define A_MERGE
Definition: wparser_def.c:224
#define A_BINGO
Definition: wparser_def.c:219
#define A_CLRALL
Definition: wparser_def.c:225
#define A_CLEAR
Definition: wparser_def.c:223
#define A_PUSH
Definition: wparser_def.c:221

References A_BINGO, A_CLEAR, A_CLRALL, A_MERGE, A_POP, A_PUSH, A_RERUN, TParserStateAction::action, Actions, Assert(), TParserStateActionItem::c, TParser::c, TParserPosition::charlen, TParser::charmaxlen, TParserStateActionItem::flags, fprintf, TParserStateActionItem::isclass, TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, TParser::lenstr, newTParserPosition(), pfree(), pg_mblen(), TParserPosition::posbyte, TParserPosition::poschar, TParserPosition::prev, TParserPosition::pushedAtAction, TParserStateActionItem::special, TParserPosition::state, TParser::state, TParserStateAction::state, TParser::str, tok_alias, TParser::token, TParserStateActionItem::tostate, TPS_Base, TPS_Null, TParserStateActionItem::type, and TParser::type.

Referenced by p_ishost(), p_isURLPath(), and prsd_nexttoken().

◆ TParserInit()

static TParser* TParserInit ( char *  str,
int  len 
)
static

Definition at line 287 of file wparser_def.c.

288 {
289  TParser *prs = (TParser *) palloc0(sizeof(TParser));
290 
292  prs->str = str;
293  prs->lenstr = len;
294 
295  /*
296  * Use wide char code only when max encoding length > 1.
297  */
298  if (prs->charmaxlen > 1)
299  {
300  Oid collation = DEFAULT_COLLATION_OID; /* TODO */
301  pg_locale_t mylocale = 0; /* TODO */
302 
303  prs->usewide = true;
304  if (lc_ctype_is_c(collation))
305  {
306  /*
307  * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
308  * be different from sizeof(wchar_t)
309  */
310  prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
311  pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
312  }
313  else
314  {
315  prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
316  char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
317  mylocale);
318  }
319  }
320  else
321  prs->usewide = false;
322 
323  prs->state = newTParserPosition(NULL);
324  prs->state->state = TPS_Base;
325 
326 #ifdef WPARSER_TRACE
327  fprintf(stderr, "parsing \"%.*s\"\n", len, str);
328 #endif
329 
330  return prs;
331 }
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:929
const void size_t len
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1379
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2088
unsigned int Oid
Definition: postgres_ext.h:31

References char2wchar(), TParser::charmaxlen, fprintf, lc_ctype_is_c(), len, TParser::lenstr, newTParserPosition(), palloc(), palloc0(), pg_database_encoding_max_length(), pg_mb2wchar_with_len(), TParser::pgwstr, TParserPosition::state, TParser::state, generate_unaccent_rules::str, TParser::str, TPS_Base, TParser::usewide, and TParser::wstr.

Referenced by prsd_start().

Variable Documentation

◆ Actions

const TParserStateAction Actions[]
static

Definition at line 1615 of file wparser_def.c.

Referenced by TParserGet().

◆ actionTPS_Base

const TParserStateActionItem actionTPS_Base[]
static
Initial value:
= {
{p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
{p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '&', A_PUSH, TPS_InXMLEntityFirst, 0, NULL},
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL},
{NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
}
static int p_isasclet(TParser *prs)
Definition: wparser_def.c:498
static int p_isignore(TParser *prs)
Definition: wparser_def.c:622
#define A_NEXT
Definition: wparser_def.c:218

Definition at line 958 of file wparser_def.c.

◆ actionTPS_InAsciiWord

const TParserStateActionItem actionTPS_InAsciiWord[]
static
Initial value:
= {
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
}
#define ASCIIWORD
Definition: wparser_def.c:34
static int p_isspecial(TParser *prs)
Definition: wparser_def.c:679

Definition at line 986 of file wparser_def.c.

◆ actionTPS_InCloseCommentFirst

const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
}

Definition at line 1296 of file wparser_def.c.

◆ actionTPS_InCloseCommentLast

const TParserStateActionItem actionTPS_InCloseCommentLast[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
}

Definition at line 1302 of file wparser_def.c.

◆ actionTPS_InComment

const TParserStateActionItem actionTPS_InComment[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
}

Definition at line 1290 of file wparser_def.c.

◆ actionTPS_InCommentEnd

const TParserStateActionItem actionTPS_InCommentEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
}
#define TAG_T
Definition: wparser_def.c:46

Definition at line 1309 of file wparser_def.c.

◆ actionTPS_InCommentFirst

const TParserStateActionItem actionTPS_InCommentFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
{p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL},
{p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1275 of file wparser_def.c.

◆ actionTPS_InCommentLast

const TParserStateActionItem actionTPS_InCommentLast[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1284 of file wparser_def.c.

◆ actionTPS_InDecimal

const TParserStateActionItem actionTPS_InDecimal[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
}
#define DECIMAL_T
Definition: wparser_def.c:53

Definition at line 1078 of file wparser_def.c.

◆ actionTPS_InDecimalFirst

const TParserStateActionItem actionTPS_InDecimalFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1072 of file wparser_def.c.

◆ actionTPS_InEmail

const TParserStateActionItem actionTPS_InEmail[]
static
Initial value:
= {
{p_isstophost, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
#define EMAIL
Definition: wparser_def.c:37
static int p_isstophost(TParser *prs)
Definition: wparser_def.c:611
static int p_ishost(TParser *prs)
Definition: wparser_def.c:628

Definition at line 1378 of file wparser_def.c.

◆ actionTPS_InFile

const TParserStateActionItem actionTPS_InFile[]
static
Initial value:
= {
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
}
#define FILEPATH
Definition: wparser_def.c:52

Definition at line 1428 of file wparser_def.c.

◆ actionTPS_InFileFirst

const TParserStateActionItem actionTPS_InFileFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1384 of file wparser_def.c.

◆ actionTPS_InFileNext

const TParserStateActionItem actionTPS_InFileNext[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1439 of file wparser_def.c.

◆ actionTPS_InFileTwiddle

const TParserStateActionItem actionTPS_InFileTwiddle[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1394 of file wparser_def.c.

◆ actionTPS_InFURL

const TParserStateActionItem actionTPS_InFURL[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialFURL(TParser *prs)
Definition: wparser_def.c:587
static int p_isURLPath(TParser *prs)
Definition: wparser_def.c:650
#define URL_T
Definition: wparser_def.c:38

Definition at line 1463 of file wparser_def.c.

◆ actionTPS_InHost

const TParserStateActionItem actionTPS_InHost[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1367 of file wparser_def.c.

◆ actionTPS_InHostDomain

const TParserStateActionItem actionTPS_InHostDomain[]
static
Initial value:
= {
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
}

Definition at line 1331 of file wparser_def.c.

◆ actionTPS_InHostDomainSecond

const TParserStateActionItem actionTPS_InHostDomainSecond[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1320 of file wparser_def.c.

◆ actionTPS_InHostFirstAN

const TParserStateActionItem actionTPS_InHostFirstAN[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1360 of file wparser_def.c.

◆ actionTPS_InHostFirstDomain

const TParserStateActionItem actionTPS_InHostFirstDomain[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1313 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWord

const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
}
static void SpecialHyphen(TParser *prs)
Definition: wparser_def.c:595
#define ASCIIHWORD
Definition: wparser_def.c:49

Definition at line 1493 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWordFirst

const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1485 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWordPart

const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
}
#define ASCIIPARTHWORD
Definition: wparser_def.c:44

Definition at line 1566 of file wparser_def.c.

◆ actionTPS_InHyphenDigitLookahead

const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1534 of file wparser_def.c.

◆ actionTPS_InHyphenNumWord

const TParserStateActionItem actionTPS_InHyphenNumWord[]
static
Initial value:

Definition at line 1526 of file wparser_def.c.

◆ actionTPS_InHyphenNumWordFirst

const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1519 of file wparser_def.c.

◆ actionTPS_InHyphenNumWordPart

const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static
Initial value:
= {
{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
}
#define NUMPARTHWORD
Definition: wparser_def.c:42

Definition at line 1575 of file wparser_def.c.

◆ actionTPS_InHyphenUnsignedInt

const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isalpha, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1582 of file wparser_def.c.

◆ actionTPS_InHyphenWord

const TParserStateActionItem actionTPS_InHyphenWord[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
}
#define HWORD
Definition: wparser_def.c:50

Definition at line 1510 of file wparser_def.c.

◆ actionTPS_InHyphenWordFirst

const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1503 of file wparser_def.c.

◆ actionTPS_InHyphenWordPart

const TParserStateActionItem actionTPS_InHyphenWordPart[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
}
#define PARTHWORD
Definition: wparser_def.c:43

Definition at line 1558 of file wparser_def.c.

◆ actionTPS_InMantissa

const TParserStateActionItem actionTPS_InMantissa[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
}
#define SCIENTIFIC
Definition: wparser_def.c:40

Definition at line 1127 of file wparser_def.c.

◆ actionTPS_InMantissaFirst

const TParserStateActionItem actionTPS_InMantissaFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1113 of file wparser_def.c.

◆ actionTPS_InMantissaSign

const TParserStateActionItem actionTPS_InMantissaSign[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1121 of file wparser_def.c.

◆ actionTPS_InNumWord

const TParserStateActionItem actionTPS_InNumWord[]
static
Initial value:
= {
{p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
}
#define NUMWORD
Definition: wparser_def.c:36

Definition at line 975 of file wparser_def.c.

◆ actionTPS_InParseHyphen

const TParserStateActionItem actionTPS_InParseHyphen[]
static
Initial value:
= {
{p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHyphenUnsignedInt, 0, NULL},
{NULL, 0, A_RERUN, TPS_Base, 0, NULL}
}

Definition at line 1542 of file wparser_def.c.

◆ actionTPS_InParseHyphenHyphen

const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
#define SPACE
Definition: wparser_def.c:45

Definition at line 1551 of file wparser_def.c.

◆ actionTPS_InPathFirst

const TParserStateActionItem actionTPS_InPathFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1403 of file wparser_def.c.

◆ actionTPS_InPathFirstFirst

const TParserStateActionItem actionTPS_InPathFirstFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1413 of file wparser_def.c.

◆ actionTPS_InPathSecond

const TParserStateActionItem actionTPS_InPathSecond[]
static
Initial value:
= {
{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1420 of file wparser_def.c.

◆ actionTPS_InPort

const TParserStateActionItem actionTPS_InPort[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
}

Definition at line 1352 of file wparser_def.c.

◆ actionTPS_InPortFirst

const TParserStateActionItem actionTPS_InPortFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1346 of file wparser_def.c.

◆ actionTPS_InProtocolEnd

const TParserStateActionItem actionTPS_InProtocolEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL}
}
#define PROTOCOL
Definition: wparser_def.c:47

Definition at line 1481 of file wparser_def.c.

◆ actionTPS_InProtocolFirst

const TParserStateActionItem actionTPS_InProtocolFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1469 of file wparser_def.c.

◆ actionTPS_InProtocolSecond

const TParserStateActionItem actionTPS_InProtocolSecond[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1475 of file wparser_def.c.

◆ actionTPS_InSignedInt

const TParserStateActionItem actionTPS_InSignedInt[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
}
#define SIGNEDINT
Definition: wparser_def.c:54

Definition at line 1036 of file wparser_def.c.

◆ actionTPS_InSignedIntFirst

const TParserStateActionItem actionTPS_InSignedIntFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1030 of file wparser_def.c.

◆ actionTPS_InSpace

const TParserStateActionItem actionTPS_InSpace[]
static
Initial value:
= {
{p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
{p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
{p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
}

Definition at line 1045 of file wparser_def.c.

◆ actionTPS_InSVerVersion

const TParserStateActionItem actionTPS_InSVerVersion[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
}

Definition at line 1093 of file wparser_def.c.

◆ actionTPS_InTag

const TParserStateActionItem actionTPS_InTag[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL},
{p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialTags(TParser *prs)
Definition: wparser_def.c:563

Definition at line 1230 of file wparser_def.c.

◆ actionTPS_InTagBackSleshed

const TParserStateActionItem actionTPS_InTagBackSleshed[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_MERGE, TPS_Null, 0, NULL}
}

Definition at line 1266 of file wparser_def.c.

◆ actionTPS_InTagBeginEnd

const TParserStateActionItem actionTPS_InTagBeginEnd[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1224 of file wparser_def.c.

◆ actionTPS_InTagCloseFirst

const TParserStateActionItem actionTPS_InTagCloseFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InTagName, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1204 of file wparser_def.c.

◆ actionTPS_InTagEnd

const TParserStateActionItem actionTPS_InTagEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
}

Definition at line 1271 of file wparser_def.c.

◆ actionTPS_InTagEscapeK

const TParserStateActionItem actionTPS_InTagEscapeK[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
}

Definition at line 1252 of file wparser_def.c.

◆ actionTPS_InTagEscapeKK

const TParserStateActionItem actionTPS_InTagEscapeKK[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
}

Definition at line 1259 of file wparser_def.c.

◆ actionTPS_InTagFirst

const TParserStateActionItem actionTPS_InTagFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
{p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
{p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL},
{p_isasclet, 0, A_PUSH, TPS_InTagName, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InTagName, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InTagName, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1185 of file wparser_def.c.

◆ actionTPS_InTagName

const TParserStateActionItem actionTPS_InTagName[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL},
{p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags},
{p_isalnum, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1210 of file wparser_def.c.

◆ actionTPS_InUDecimal

const TParserStateActionItem actionTPS_InUDecimal[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
}

Definition at line 1063 of file wparser_def.c.

◆ actionTPS_InUDecimalFirst

const TParserStateActionItem actionTPS_InUDecimalFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1057 of file wparser_def.c.

◆ actionTPS_InUnsignedInt

const TParserStateActionItem actionTPS_InUnsignedInt[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_isasclet, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
}
#define UNSIGNEDINT
Definition: wparser_def.c:55

Definition at line 1013 of file wparser_def.c.

◆ actionTPS_InURLPath

const TParserStateActionItem actionTPS_InURLPath[]
static
Initial value:
= {
{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
}
static int p_isurlchar(TParser *prs)
Definition: wparser_def.c:504

Definition at line 1457 of file wparser_def.c.

◆ actionTPS_InURLPathFirst

const TParserStateActionItem actionTPS_InURLPathFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL},
}

Definition at line 1447 of file wparser_def.c.

◆ actionTPS_InURLPathStart

const TParserStateActionItem actionTPS_InURLPathStart[]
static
Initial value:
= {
{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
}

Definition at line 1453 of file wparser_def.c.

◆ actionTPS_InVersion

const TParserStateActionItem actionTPS_InVersion[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
}
#define VERSIONNUMBER
Definition: wparser_def.c:41

Definition at line 1106 of file wparser_def.c.

◆ actionTPS_InVersionFirst

const TParserStateActionItem actionTPS_InVersionFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1100 of file wparser_def.c.

◆ actionTPS_InVerVersion

const TParserStateActionItem actionTPS_InVerVersion[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialVerVersion(TParser *prs)
Definition: wparser_def.c:602

Definition at line 1087 of file wparser_def.c.

◆ actionTPS_InWord

const TParserStateActionItem actionTPS_InWord[]
static
Initial value:
= {
{p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
{p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
}
#define WORD_T
Definition: wparser_def.c:35

Definition at line 1004 of file wparser_def.c.

◆ actionTPS_InXMLBegin

const TParserStateActionItem actionTPS_InXMLBegin[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1196 of file wparser_def.c.

◆ actionTPS_InXMLEntity

const TParserStateActionItem actionTPS_InXMLEntity[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalnum, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1142 of file wparser_def.c.

◆ actionTPS_InXMLEntityEnd

const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, XMLENTITY, NULL}
}
#define XMLENTITY
Definition: wparser_def.c:56

Definition at line 1181 of file wparser_def.c.

◆ actionTPS_InXMLEntityFirst

const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1133 of file wparser_def.c.

◆ actionTPS_InXMLEntityHexNum

const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1174 of file wparser_def.c.

◆ actionTPS_InXMLEntityHexNumFirst

const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1161 of file wparser_def.c.

◆ actionTPS_InXMLEntityNum

const TParserStateActionItem actionTPS_InXMLEntityNum[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1167 of file wparser_def.c.

◆ actionTPS_InXMLEntityNumFirst

const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1153 of file wparser_def.c.

◆ lex_descr

const char* const lex_descr[]
static
Initial value:
= {
"",
"Word, all ASCII",
"Word, all letters",
"Word, letters and digits",
"Email address",
"URL",
"Host",
"Scientific notation",
"Version number",
"Hyphenated word part, letters and digits",
"Hyphenated word part, all letters",
"Hyphenated word part, all ASCII",
"Space symbols",
"XML tag",
"Protocol head",
"Hyphenated word, letters and digits",
"Hyphenated word, all ASCII",
"Hyphenated word, all letters",
"URL path",
"File or path name",
"Decimal notation",
"Signed integer",
"Unsigned integer",
"XML entity"
}

Definition at line 87 of file wparser_def.c.

Referenced by prsd_lextype().

◆ tok_alias

const char* const tok_alias[]
static
Initial value:
= {
"",
"asciiword",
"word",
"numword",
"email",
"url",
"host",
"sfloat",
"version",
"hword_numpart",
"hword_part",
"hword_asciipart",
"blank",
"tag",
"protocol",
"numhword",
"asciihword",
"hword",
"url_path",
"file",
"float",
"int",
"uint",
"entity"
}

Definition at line 60 of file wparser_def.c.

Referenced by prsd_lextype(), and TParserGet().