PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
wparser_def.c File Reference
#include "postgres.h"
#include <limits.h>
#include <wctype.h>
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
Include dependency graph for wparser_def.c:

Go to the source code of this file.

Data Structures

struct  TParserStateActionItem
 
struct  TParserPosition
 
struct  TParser
 
struct  TParserStateAction
 
struct  CoverPos
 
struct  hlCheck
 

Macros

#define ASCIIWORD   1
 
#define WORD_T   2
 
#define NUMWORD   3
 
#define EMAIL   4
 
#define URL_T   5
 
#define HOST   6
 
#define SCIENTIFIC   7
 
#define VERSIONNUMBER   8
 
#define NUMPARTHWORD   9
 
#define PARTHWORD   10
 
#define ASCIIPARTHWORD   11
 
#define SPACE   12
 
#define TAG_T   13
 
#define PROTOCOL   14
 
#define NUMHWORD   15
 
#define ASCIIHWORD   16
 
#define HWORD   17
 
#define URLPATH   18
 
#define FILEPATH   19
 
#define DECIMAL_T   20
 
#define SIGNEDINT   21
 
#define UNSIGNEDINT   22
 
#define XMLENTITY   23
 
#define LASTNUM   23
 
#define A_NEXT   0x0000
 
#define A_BINGO   0x0001
 
#define A_POP   0x0002
 
#define A_PUSH   0x0004
 
#define A_RERUN   0x0008
 
#define A_CLEAR   0x0010
 
#define A_MERGE   0x0020
 
#define A_CLRALL   0x0040
 
#define p_iswhat(type, nonascii)
 
#define TPARSERSTATEACTION(state)    { CppConcat(action,state), state }
 
#define TS_IDIGNORE(x)   ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
 
#define HLIDREPLACE(x)   ( (x)==TAG_T )
 
#define HLIDSKIP(x)   ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 
#define XMLHLIDSKIP(x)   ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 
#define NONWORDTOKEN(x)   ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
 
#define NOENDTOKEN(x)   ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
 
#define INTERESTINGWORD(j)    (prs->words[j].item && !prs->words[j].repeated)
 
#define BADENDPOINT(j)
 

Typedefs

typedef int(* TParserCharTest) (struct TParser *)
 
typedef void(* TParserSpecial) (struct TParser *)
 
typedef struct TParserPosition TParserPosition
 
typedef struct TParser TParser
 

Enumerations

enum  TParserState {
  TPS_Base = 0 , TPS_InNumWord , TPS_InAsciiWord , TPS_InWord ,
  TPS_InUnsignedInt , TPS_InSignedIntFirst , TPS_InSignedInt , TPS_InSpace ,
  TPS_InUDecimalFirst , TPS_InUDecimal , TPS_InDecimalFirst , TPS_InDecimal ,
  TPS_InVerVersion , TPS_InSVerVersion , TPS_InVersionFirst , TPS_InVersion ,
  TPS_InMantissaFirst , TPS_InMantissaSign , TPS_InMantissa , TPS_InXMLEntityFirst ,
  TPS_InXMLEntity , TPS_InXMLEntityNumFirst , TPS_InXMLEntityNum , TPS_InXMLEntityHexNumFirst ,
  TPS_InXMLEntityHexNum , TPS_InXMLEntityEnd , TPS_InTagFirst , TPS_InXMLBegin ,
  TPS_InTagCloseFirst , TPS_InTagName , TPS_InTagBeginEnd , TPS_InTag ,
  TPS_InTagEscapeK , TPS_InTagEscapeKK , TPS_InTagBackSleshed , TPS_InTagEnd ,
  TPS_InCommentFirst , TPS_InCommentLast , TPS_InComment , TPS_InCloseCommentFirst ,
  TPS_InCloseCommentLast , TPS_InCommentEnd , TPS_InHostFirstDomain , TPS_InHostDomainSecond ,
  TPS_InHostDomain , TPS_InPortFirst , TPS_InPort , TPS_InHostFirstAN ,
  TPS_InHost , TPS_InEmail , TPS_InFileFirst , TPS_InFileTwiddle ,
  TPS_InPathFirst , TPS_InPathFirstFirst , TPS_InPathSecond , TPS_InFile ,
  TPS_InFileNext , TPS_InURLPathFirst , TPS_InURLPathStart , TPS_InURLPath ,
  TPS_InFURL , TPS_InProtocolFirst , TPS_InProtocolSecond , TPS_InProtocolEnd ,
  TPS_InHyphenAsciiWordFirst , TPS_InHyphenAsciiWord , TPS_InHyphenWordFirst , TPS_InHyphenWord ,
  TPS_InHyphenNumWordFirst , TPS_InHyphenNumWord , TPS_InHyphenDigitLookahead , TPS_InParseHyphen ,
  TPS_InParseHyphenHyphen , TPS_InHyphenWordPart , TPS_InHyphenAsciiWordPart , TPS_InHyphenNumWordPart ,
  TPS_InHyphenUnsignedInt , TPS_Null
}
 

Functions

static bool TParserGet (TParser *prs)
 
static TParserPositionnewTParserPosition (TParserPosition *prev)
 
static TParserTParserInit (char *str, int len)
 
static TParserTParserCopyInit (const TParser *orig)
 
static void TParserClose (TParser *prs)
 
static void TParserCopyClose (TParser *prs)
 
 p_iswhat (alnum, 1)
 
static int p_isEOF (TParser *prs)
 
static int p_iseqC (TParser *prs)
 
static int p_isneC (TParser *prs)
 
static int p_isascii (TParser *prs)
 
static int p_isasclet (TParser *prs)
 
static int p_isurlchar (TParser *prs)
 
void _make_compiler_happy (void)
 
static void SpecialTags (TParser *prs)
 
static void SpecialFURL (TParser *prs)
 
static void SpecialHyphen (TParser *prs)
 
static void SpecialVerVersion (TParser *prs)
 
static int p_isstophost (TParser *prs)
 
static int p_isignore (TParser *prs)
 
static int p_ishost (TParser *prs)
 
static int p_isURLPath (TParser *prs)
 
static int p_isspecial (TParser *prs)
 
Datum prsd_lextype (PG_FUNCTION_ARGS)
 
Datum prsd_start (PG_FUNCTION_ARGS)
 
Datum prsd_nexttoken (PG_FUNCTION_ARGS)
 
Datum prsd_end (PG_FUNCTION_ARGS)
 
static TSTernaryValue checkcondition_HL (void *opaque, QueryOperand *val, ExecPhraseData *data)
 
static bool hlCover (HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
 
static void mark_fragment (HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
 
static void get_next_fragment (HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
 
static void mark_hl_fragments (HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
 
static void mark_hl_words (HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
 
Datum prsd_headline (PG_FUNCTION_ARGS)
 

Variables

static const char *const tok_alias []
 
static const char *const lex_descr []
 
static const TParserStateActionItem actionTPS_Base []
 
static const TParserStateActionItem actionTPS_InNumWord []
 
static const TParserStateActionItem actionTPS_InAsciiWord []
 
static const TParserStateActionItem actionTPS_InWord []
 
static const TParserStateActionItem actionTPS_InUnsignedInt []
 
static const TParserStateActionItem actionTPS_InSignedIntFirst []
 
static const TParserStateActionItem actionTPS_InSignedInt []
 
static const TParserStateActionItem actionTPS_InSpace []
 
static const TParserStateActionItem actionTPS_InUDecimalFirst []
 
static const TParserStateActionItem actionTPS_InUDecimal []
 
static const TParserStateActionItem actionTPS_InDecimalFirst []
 
static const TParserStateActionItem actionTPS_InDecimal []
 
static const TParserStateActionItem actionTPS_InVerVersion []
 
static const TParserStateActionItem actionTPS_InSVerVersion []
 
static const TParserStateActionItem actionTPS_InVersionFirst []
 
static const TParserStateActionItem actionTPS_InVersion []
 
static const TParserStateActionItem actionTPS_InMantissaFirst []
 
static const TParserStateActionItem actionTPS_InMantissaSign []
 
static const TParserStateActionItem actionTPS_InMantissa []
 
static const TParserStateActionItem actionTPS_InXMLEntityFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntity []
 
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst []
 
static const TParserStateActionItem actionTPS_InXMLEntityNum []
 
static const TParserStateActionItem actionTPS_InXMLEntityHexNum []
 
static const TParserStateActionItem actionTPS_InXMLEntityEnd []
 
static const TParserStateActionItem actionTPS_InTagFirst []
 
static const TParserStateActionItem actionTPS_InXMLBegin []
 
static const TParserStateActionItem actionTPS_InTagCloseFirst []
 
static const TParserStateActionItem actionTPS_InTagName []
 
static const TParserStateActionItem actionTPS_InTagBeginEnd []
 
static const TParserStateActionItem actionTPS_InTag []
 
static const TParserStateActionItem actionTPS_InTagEscapeK []
 
static const TParserStateActionItem actionTPS_InTagEscapeKK []
 
static const TParserStateActionItem actionTPS_InTagBackSleshed []
 
static const TParserStateActionItem actionTPS_InTagEnd []
 
static const TParserStateActionItem actionTPS_InCommentFirst []
 
static const TParserStateActionItem actionTPS_InCommentLast []
 
static const TParserStateActionItem actionTPS_InComment []
 
static const TParserStateActionItem actionTPS_InCloseCommentFirst []
 
static const TParserStateActionItem actionTPS_InCloseCommentLast []
 
static const TParserStateActionItem actionTPS_InCommentEnd []
 
static const TParserStateActionItem actionTPS_InHostFirstDomain []
 
static const TParserStateActionItem actionTPS_InHostDomainSecond []
 
static const TParserStateActionItem actionTPS_InHostDomain []
 
static const TParserStateActionItem actionTPS_InPortFirst []
 
static const TParserStateActionItem actionTPS_InPort []
 
static const TParserStateActionItem actionTPS_InHostFirstAN []
 
static const TParserStateActionItem actionTPS_InHost []
 
static const TParserStateActionItem actionTPS_InEmail []
 
static const TParserStateActionItem actionTPS_InFileFirst []
 
static const TParserStateActionItem actionTPS_InFileTwiddle []
 
static const TParserStateActionItem actionTPS_InPathFirst []
 
static const TParserStateActionItem actionTPS_InPathFirstFirst []
 
static const TParserStateActionItem actionTPS_InPathSecond []
 
static const TParserStateActionItem actionTPS_InFile []
 
static const TParserStateActionItem actionTPS_InFileNext []
 
static const TParserStateActionItem actionTPS_InURLPathFirst []
 
static const TParserStateActionItem actionTPS_InURLPathStart []
 
static const TParserStateActionItem actionTPS_InURLPath []
 
static const TParserStateActionItem actionTPS_InFURL []
 
static const TParserStateActionItem actionTPS_InProtocolFirst []
 
static const TParserStateActionItem actionTPS_InProtocolSecond []
 
static const TParserStateActionItem actionTPS_InProtocolEnd []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWord []
 
static const TParserStateActionItem actionTPS_InHyphenWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenWord []
 
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst []
 
static const TParserStateActionItem actionTPS_InHyphenNumWord []
 
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead []
 
static const TParserStateActionItem actionTPS_InParseHyphen []
 
static const TParserStateActionItem actionTPS_InParseHyphenHyphen []
 
static const TParserStateActionItem actionTPS_InHyphenWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenNumWordPart []
 
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt []
 
static const TParserStateAction Actions []
 

Macro Definition Documentation

◆ A_BINGO

#define A_BINGO   0x0001

Definition at line 221 of file wparser_def.c.

◆ A_CLEAR

#define A_CLEAR   0x0010

Definition at line 225 of file wparser_def.c.

◆ A_CLRALL

#define A_CLRALL   0x0040

Definition at line 227 of file wparser_def.c.

◆ A_MERGE

#define A_MERGE   0x0020

Definition at line 226 of file wparser_def.c.

◆ A_NEXT

#define A_NEXT   0x0000

Definition at line 220 of file wparser_def.c.

◆ A_POP

#define A_POP   0x0002

Definition at line 222 of file wparser_def.c.

◆ A_PUSH

#define A_PUSH   0x0004

Definition at line 223 of file wparser_def.c.

◆ A_RERUN

#define A_RERUN   0x0008

Definition at line 224 of file wparser_def.c.

◆ ASCIIHWORD

#define ASCIIHWORD   16

Definition at line 51 of file wparser_def.c.

◆ ASCIIPARTHWORD

#define ASCIIPARTHWORD   11

Definition at line 46 of file wparser_def.c.

◆ ASCIIWORD

#define ASCIIWORD   1

Definition at line 36 of file wparser_def.c.

◆ BADENDPOINT

#define BADENDPOINT (   j)
Value:
((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
int j
Definition: isn.c:78
#define NOENDTOKEN(x)
Definition: wparser_def.c:1892
#define INTERESTINGWORD(j)
Definition: wparser_def.c:1901

Definition at line 1905 of file wparser_def.c.

◆ DECIMAL_T

#define DECIMAL_T   20

Definition at line 55 of file wparser_def.c.

◆ EMAIL

#define EMAIL   4

Definition at line 39 of file wparser_def.c.

◆ FILEPATH

#define FILEPATH   19

Definition at line 54 of file wparser_def.c.

◆ HLIDREPLACE

#define HLIDREPLACE (   x)    ( (x)==TAG_T )

Definition at line 1888 of file wparser_def.c.

◆ HLIDSKIP

#define HLIDSKIP (   x)    ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

Definition at line 1889 of file wparser_def.c.

◆ HOST

#define HOST   6

Definition at line 41 of file wparser_def.c.

◆ HWORD

#define HWORD   17

Definition at line 52 of file wparser_def.c.

◆ INTERESTINGWORD

#define INTERESTINGWORD (   j)     (prs->words[j].item && !prs->words[j].repeated)

Definition at line 1901 of file wparser_def.c.

◆ LASTNUM

#define LASTNUM   23

Definition at line 60 of file wparser_def.c.

◆ NOENDTOKEN

#define NOENDTOKEN (   x)    ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )

Definition at line 1892 of file wparser_def.c.

◆ NONWORDTOKEN

#define NONWORDTOKEN (   x)    ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )

Definition at line 1891 of file wparser_def.c.

◆ NUMHWORD

#define NUMHWORD   15

Definition at line 50 of file wparser_def.c.

◆ NUMPARTHWORD

#define NUMPARTHWORD   9

Definition at line 44 of file wparser_def.c.

◆ NUMWORD

#define NUMWORD   3

Definition at line 38 of file wparser_def.c.

◆ p_iswhat

#define p_iswhat (   type,
  nonascii 
)
Value:
\
static int \
p_is##type(TParser *prs) \
{ \
pg_locale_t locale = pg_database_locale(); \
pg_wchar wc; \
Assert(prs->state); \
wc = prs->pgwstr[prs->state->poschar]; \
if (prs->charmaxlen > 1 && locale->ctype_is_c && wc > 0x7f) \
return nonascii; \
return pg_isw##type(wc, pg_database_locale()); \
} \
\
static int \
p_isnot##type(TParser *prs) \
{ \
return !p_is##type(prs); \
}
static char * locale
Definition: initdb.c:140
pg_locale_t pg_database_locale(void)
Definition: pg_locale.c:1166
const char * type

Definition at line 388 of file wparser_def.c.

◆ PARTHWORD

#define PARTHWORD   10

Definition at line 45 of file wparser_def.c.

◆ PROTOCOL

#define PROTOCOL   14

Definition at line 49 of file wparser_def.c.

◆ SCIENTIFIC

#define SCIENTIFIC   7

Definition at line 42 of file wparser_def.c.

◆ SIGNEDINT

#define SIGNEDINT   21

Definition at line 56 of file wparser_def.c.

◆ SPACE

#define SPACE   12

Definition at line 47 of file wparser_def.c.

◆ TAG_T

#define TAG_T   13

Definition at line 48 of file wparser_def.c.

◆ TPARSERSTATEACTION

#define TPARSERSTATEACTION (   state)     { CppConcat(action,state), state }

Definition at line 1575 of file wparser_def.c.

◆ TS_IDIGNORE

#define TS_IDIGNORE (   x)    ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )

Definition at line 1887 of file wparser_def.c.

◆ UNSIGNEDINT

#define UNSIGNEDINT   22

Definition at line 57 of file wparser_def.c.

◆ URL_T

#define URL_T   5

Definition at line 40 of file wparser_def.c.

◆ URLPATH

#define URLPATH   18

Definition at line 53 of file wparser_def.c.

◆ VERSIONNUMBER

#define VERSIONNUMBER   8

Definition at line 43 of file wparser_def.c.

◆ WORD_T

#define WORD_T   2

Definition at line 37 of file wparser_def.c.

◆ XMLENTITY

#define XMLENTITY   23

Definition at line 58 of file wparser_def.c.

◆ XMLHLIDSKIP

#define XMLHLIDSKIP (   x)    ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

Definition at line 1890 of file wparser_def.c.

Typedef Documentation

◆ TParser

typedef struct TParser TParser

◆ TParserCharTest

typedef int(* TParserCharTest) (struct TParser *)

Definition at line 204 of file wparser_def.c.

◆ TParserPosition

◆ TParserSpecial

typedef void(* TParserSpecial) (struct TParser *)

Definition at line 206 of file wparser_def.c.

Enumeration Type Documentation

◆ TParserState

Enumerator
TPS_Base 
TPS_InNumWord 
TPS_InAsciiWord 
TPS_InWord 
TPS_InUnsignedInt 
TPS_InSignedIntFirst 
TPS_InSignedInt 
TPS_InSpace 
TPS_InUDecimalFirst 
TPS_InUDecimal 
TPS_InDecimalFirst 
TPS_InDecimal 
TPS_InVerVersion 
TPS_InSVerVersion 
TPS_InVersionFirst 
TPS_InVersion 
TPS_InMantissaFirst 
TPS_InMantissaSign 
TPS_InMantissa 
TPS_InXMLEntityFirst 
TPS_InXMLEntity 
TPS_InXMLEntityNumFirst 
TPS_InXMLEntityNum 
TPS_InXMLEntityHexNumFirst 
TPS_InXMLEntityHexNum 
TPS_InXMLEntityEnd 
TPS_InTagFirst 
TPS_InXMLBegin 
TPS_InTagCloseFirst 
TPS_InTagName 
TPS_InTagBeginEnd 
TPS_InTag 
TPS_InTagEscapeK 
TPS_InTagEscapeKK 
TPS_InTagBackSleshed 
TPS_InTagEnd 
TPS_InCommentFirst 
TPS_InCommentLast 
TPS_InComment 
TPS_InCloseCommentFirst 
TPS_InCloseCommentLast 
TPS_InCommentEnd 
TPS_InHostFirstDomain 
TPS_InHostDomainSecond 
TPS_InHostDomain 
TPS_InPortFirst 
TPS_InPort 
TPS_InHostFirstAN 
TPS_InHost 
TPS_InEmail 
TPS_InFileFirst 
TPS_InFileTwiddle 
TPS_InPathFirst 
TPS_InPathFirstFirst 
TPS_InPathSecond 
TPS_InFile 
TPS_InFileNext 
TPS_InURLPathFirst 
TPS_InURLPathStart 
TPS_InURLPath 
TPS_InFURL 
TPS_InProtocolFirst 
TPS_InProtocolSecond 
TPS_InProtocolEnd 
TPS_InHyphenAsciiWordFirst 
TPS_InHyphenAsciiWord 
TPS_InHyphenWordFirst 
TPS_InHyphenWord 
TPS_InHyphenNumWordFirst 
TPS_InHyphenNumWord 
TPS_InHyphenDigitLookahead 
TPS_InParseHyphen 
TPS_InParseHyphenHyphen 
TPS_InHyphenWordPart 
TPS_InHyphenAsciiWordPart 
TPS_InHyphenNumWordPart 
TPS_InHyphenUnsignedInt 
TPS_Null 

Definition at line 119 of file wparser_def.c.

120{
121 TPS_Base = 0,
152 TPS_InTag,
198 TPS_Null /* last state (fake value) */
TParserState
Definition: wparser_def.c:120
@ TPS_InXMLEntityHexNumFirst
Definition: wparser_def.c:144
@ TPS_InPort
Definition: wparser_def.c:167
@ TPS_InXMLEntityHexNum
Definition: wparser_def.c:145
@ TPS_InHostDomainSecond
Definition: wparser_def.c:164
@ TPS_InMantissaFirst
Definition: wparser_def.c:137
@ TPS_InTagName
Definition: wparser_def.c:150
@ TPS_InHyphenAsciiWordFirst
Definition: wparser_def.c:185
@ TPS_Null
Definition: wparser_def.c:198
@ TPS_InPathFirstFirst
Definition: wparser_def.c:174
@ TPS_InSignedIntFirst
Definition: wparser_def.c:126
@ TPS_InSignedInt
Definition: wparser_def.c:127
@ TPS_InUnsignedInt
Definition: wparser_def.c:125
@ TPS_InMantissa
Definition: wparser_def.c:139
@ TPS_InProtocolFirst
Definition: wparser_def.c:182
@ TPS_InFURL
Definition: wparser_def.c:181
@ TPS_InMantissaSign
Definition: wparser_def.c:138
@ TPS_InXMLBegin
Definition: wparser_def.c:148
@ TPS_InCommentEnd
Definition: wparser_def.c:162
@ TPS_InHyphenWordFirst
Definition: wparser_def.c:187
@ TPS_InHyphenNumWordPart
Definition: wparser_def.c:196
@ TPS_InPortFirst
Definition: wparser_def.c:166
@ TPS_InProtocolEnd
Definition: wparser_def.c:184
@ TPS_InXMLEntityFirst
Definition: wparser_def.c:140
@ TPS_InHyphenNumWordFirst
Definition: wparser_def.c:189
@ TPS_InCommentLast
Definition: wparser_def.c:158
@ TPS_InFileTwiddle
Definition: wparser_def.c:172
@ TPS_InURLPathStart
Definition: wparser_def.c:179
@ TPS_InURLPathFirst
Definition: wparser_def.c:178
@ TPS_InPathFirst
Definition: wparser_def.c:173
@ TPS_InPathSecond
Definition: wparser_def.c:175
@ TPS_InHyphenUnsignedInt
Definition: wparser_def.c:197
@ TPS_InFileFirst
Definition: wparser_def.c:171
@ TPS_InXMLEntityNumFirst
Definition: wparser_def.c:142
@ TPS_InHyphenWordPart
Definition: wparser_def.c:194
@ TPS_InNumWord
Definition: wparser_def.c:122
@ TPS_InAsciiWord
Definition: wparser_def.c:123
@ TPS_InVersion
Definition: wparser_def.c:136
@ TPS_InHost
Definition: wparser_def.c:169
@ TPS_InFile
Definition: wparser_def.c:176
@ TPS_InProtocolSecond
Definition: wparser_def.c:183
@ TPS_InCloseCommentFirst
Definition: wparser_def.c:160
@ TPS_InTagEscapeK
Definition: wparser_def.c:153
@ TPS_InParseHyphenHyphen
Definition: wparser_def.c:193
@ TPS_InTagBackSleshed
Definition: wparser_def.c:155
@ TPS_InTagFirst
Definition: wparser_def.c:147
@ TPS_InTagEnd
Definition: wparser_def.c:156
@ TPS_InComment
Definition: wparser_def.c:159
@ TPS_InHyphenWord
Definition: wparser_def.c:188
@ TPS_InHyphenAsciiWord
Definition: wparser_def.c:186
@ TPS_InWord
Definition: wparser_def.c:124
@ TPS_InXMLEntityEnd
Definition: wparser_def.c:146
@ TPS_InTagEscapeKK
Definition: wparser_def.c:154
@ TPS_InSpace
Definition: wparser_def.c:128
@ TPS_InFileNext
Definition: wparser_def.c:177
@ TPS_InURLPath
Definition: wparser_def.c:180
@ TPS_Base
Definition: wparser_def.c:121
@ TPS_InUDecimal
Definition: wparser_def.c:130
@ TPS_InParseHyphen
Definition: wparser_def.c:192
@ TPS_InHostFirstAN
Definition: wparser_def.c:168
@ TPS_InEmail
Definition: wparser_def.c:170
@ TPS_InDecimalFirst
Definition: wparser_def.c:131
@ TPS_InVersionFirst
Definition: wparser_def.c:135
@ TPS_InCloseCommentLast
Definition: wparser_def.c:161
@ TPS_InSVerVersion
Definition: wparser_def.c:134
@ TPS_InHyphenAsciiWordPart
Definition: wparser_def.c:195
@ TPS_InCommentFirst
Definition: wparser_def.c:157
@ TPS_InUDecimalFirst
Definition: wparser_def.c:129
@ TPS_InHostFirstDomain
Definition: wparser_def.c:163
@ TPS_InHostDomain
Definition: wparser_def.c:165
@ TPS_InHyphenDigitLookahead
Definition: wparser_def.c:191
@ TPS_InVerVersion
Definition: wparser_def.c:133
@ TPS_InXMLEntityNum
Definition: wparser_def.c:143
@ TPS_InTag
Definition: wparser_def.c:152
@ TPS_InDecimal
Definition: wparser_def.c:132
@ TPS_InTagCloseFirst
Definition: wparser_def.c:149
@ TPS_InXMLEntity
Definition: wparser_def.c:141
@ TPS_InHyphenNumWord
Definition: wparser_def.c:190
@ TPS_InTagBeginEnd
Definition: wparser_def.c:151

Function Documentation

◆ _make_compiler_happy()

void _make_compiler_happy ( void  )

Definition at line 495 of file wparser_def.c.

496{
497 p_isalnum(NULL);
498 p_isnotalnum(NULL);
499 p_isalpha(NULL);
500 p_isnotalpha(NULL);
501 p_isdigit(NULL);
502 p_isnotdigit(NULL);
503 p_islower(NULL);
504 p_isnotlower(NULL);
505 p_isprint(NULL);
506 p_isnotprint(NULL);
507 p_ispunct(NULL);
508 p_isnotpunct(NULL);
509 p_isspace(NULL);
510 p_isnotspace(NULL);
511 p_isupper(NULL);
512 p_isnotupper(NULL);
513 p_isxdigit(NULL);
514 p_isnotxdigit(NULL);
515 p_isEOF(NULL);
516 p_iseqC(NULL);
517 p_isneC(NULL);
518}
static int p_iseqC(TParser *prs)
Definition: wparser_def.c:439
static int p_isneC(TParser *prs)
Definition: wparser_def.c:445
static int p_isEOF(TParser *prs)
Definition: wparser_def.c:432

References p_isEOF(), p_iseqC(), and p_isneC().

◆ checkcondition_HL()

static TSTernaryValue checkcondition_HL ( void *  opaque,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 1936 of file wparser_def.c.

1937{
1938 hlCheck *checkval = (hlCheck *) opaque;
1939 int i;
1940
1941 /* scan words array for matching items */
1942 for (i = 0; i < checkval->len; i++)
1943 {
1944 if (checkval->words[i].item == val)
1945 {
1946 /* if data == NULL, don't need to report positions */
1947 if (!data)
1948 return TS_YES;
1949
1950 if (!data->pos)
1951 {
1952 data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
1953 data->allocated = true;
1954 data->npos = 1;
1955 data->pos[0] = checkval->words[i].pos;
1956 }
1957 else if (data->pos[data->npos - 1] < checkval->words[i].pos)
1958 {
1959 data->pos[data->npos++] = checkval->words[i].pos;
1960 }
1961 }
1962 }
1963
1964 if (data && data->npos > 0)
1965 return TS_YES;
1966
1967 return TS_NO;
1968}
long val
Definition: informix.c:689
int i
Definition: isn.c:77
void * palloc(Size size)
Definition: mcxt.c:1365
const void * data
WordEntryPos pos
Definition: ts_public.h:68
QueryOperand * item
Definition: ts_public.h:70
HeadlineWordEntry * words
Definition: wparser_def.c:1923
uint16 WordEntryPos
Definition: ts_type.h:63
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135

References data, i, HeadlineWordEntry::item, hlCheck::len, palloc(), HeadlineWordEntry::pos, TS_NO, TS_YES, val, and hlCheck::words.

Referenced by hlCover(), and prsd_headline().

◆ get_next_fragment()

static void get_next_fragment ( HeadlineParsedText prs,
int *  startpos,
int *  endpos,
int *  curlen,
int *  poslen,
int  max_words 
)
static

Definition at line 2175 of file wparser_def.c.

2177{
2178 int i;
2179
2180 /*
2181 * Objective: select a fragment of words between startpos and endpos such
2182 * that it has at most max_words and both ends have query words. If the
2183 * startpos and endpos are the endpoints of the cover and the cover has
2184 * fewer words than max_words, then this function should just return the
2185 * cover
2186 */
2187 /* first move startpos to an item */
2188 for (i = *startpos; i <= *endpos; i++)
2189 {
2190 *startpos = i;
2191 if (INTERESTINGWORD(i))
2192 break;
2193 }
2194 /* cut endpos to have only max_words */
2195 *curlen = 0;
2196 *poslen = 0;
2197 for (i = *startpos; i <= *endpos && *curlen < max_words; i++)
2198 {
2199 if (!NONWORDTOKEN(prs->words[i].type))
2200 *curlen += 1;
2201 if (INTERESTINGWORD(i))
2202 *poslen += 1;
2203 }
2204 /* if the cover was cut then move back endpos to a query item */
2205 if (*endpos > i)
2206 {
2207 *endpos = i;
2208 for (i = *endpos; i >= *startpos; i--)
2209 {
2210 *endpos = i;
2211 if (INTERESTINGWORD(i))
2212 break;
2213 if (!NONWORDTOKEN(prs->words[i].type))
2214 *curlen -= 1;
2215 }
2216 }
2217}
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
static XLogRecPtr startpos
HeadlineWordEntry * words
Definition: ts_public.h:76
#define NONWORDTOKEN(x)
Definition: wparser_def.c:1891

References endpos, i, INTERESTINGWORD, NONWORDTOKEN, startpos, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by mark_hl_fragments().

◆ hlCover()

static bool hlCover ( HeadlineParsedText prs,
TSQuery  query,
List locations,
int *  nextpos,
int *  p,
int *  q 
)
static

Definition at line 1987 of file wparser_def.c.

1989{
1990 int pos = *nextpos;
1991
1992 /* This loop repeats when our selected word-range fails the query */
1993 for (;;)
1994 {
1995 int posb,
1996 pose;
1997 ListCell *lc;
1998
1999 /*
2000 * For each AND'ed query term or phrase, find its first occurrence at
2001 * or after pos; set pose to the maximum of those positions.
2002 *
2003 * We need not consider ORs or NOTs here; see the comments for
2004 * TS_execute_locations(). Rechecking the match with TS_execute(),
2005 * below, will deal with any ensuing imprecision.
2006 */
2007 pose = -1;
2008 foreach(lc, locations)
2009 {
2010 ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
2011 int first = -1;
2012
2013 for (int i = 0; i < pdata->npos; i++)
2014 {
2015 /* For phrase matches, use the ending lexeme */
2016 int endp = pdata->pos[i];
2017
2018 if (endp >= pos)
2019 {
2020 first = endp;
2021 break;
2022 }
2023 }
2024 if (first < 0)
2025 return false; /* no more matches for this term */
2026 if (first > pose)
2027 pose = first;
2028 }
2029
2030 if (pose < 0)
2031 return false; /* we only get here if empty list */
2032
2033 /*
2034 * Now, for each AND'ed query term or phrase, find its last occurrence
2035 * at or before pose; set posb to the minimum of those positions.
2036 *
2037 * We start posb at INT_MAX - 1 to guarantee no overflow if we compute
2038 * posb + 1 below.
2039 */
2040 posb = INT_MAX - 1;
2041 foreach(lc, locations)
2042 {
2043 ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
2044 int last = -1;
2045
2046 for (int i = pdata->npos - 1; i >= 0; i--)
2047 {
2048 /* For phrase matches, use the starting lexeme */
2049 int startp = pdata->pos[i] - pdata->width;
2050
2051 if (startp <= pose)
2052 {
2053 last = startp;
2054 break;
2055 }
2056 }
2057 if (last < posb)
2058 posb = last;
2059 }
2060
2061 /*
2062 * We could end up with posb to the left of pos, in case some phrase
2063 * match crosses pos. Try the match starting at pos anyway, since the
2064 * result of TS_execute_locations is imprecise for phrase matches OR'd
2065 * with plain matches; that is, if the query is "(A <-> B) | C" then C
2066 * could match at pos even though the phrase match would have to
2067 * extend to the left of pos.
2068 */
2069 posb = Max(posb, pos);
2070
2071 /* This test probably always succeeds, but be paranoid */
2072 if (posb <= pose)
2073 {
2074 /*
2075 * posb .. pose is now the shortest, earliest-after-pos range of
2076 * lexeme positions containing all the query terms. It will
2077 * contain all phrase matches, too, except in the corner case
2078 * described just above.
2079 *
2080 * Now convert these lexeme positions to indexes in prs->words[].
2081 */
2082 int idxb = -1;
2083 int idxe = -1;
2084
2085 for (int i = 0; i < prs->curwords; i++)
2086 {
2087 if (prs->words[i].item == NULL)
2088 continue;
2089 if (idxb < 0 && prs->words[i].pos >= posb)
2090 idxb = i;
2091 if (prs->words[i].pos <= pose)
2092 idxe = i;
2093 else
2094 break;
2095 }
2096
2097 /* This test probably always succeeds, but be paranoid */
2098 if (idxb >= 0 && idxe >= idxb)
2099 {
2100 /*
2101 * Finally, check that the selected range satisfies the query.
2102 * This should succeed in all simple cases; but odd cases
2103 * involving non-top-level NOT conditions or phrase matches
2104 * OR'd with other things could fail, since the result of
2105 * TS_execute_locations doesn't fully represent such things.
2106 */
2107 hlCheck ch;
2108
2109 ch.words = &(prs->words[idxb]);
2110 ch.len = idxe - idxb + 1;
2111 if (TS_execute(GETQUERY(query), &ch,
2113 {
2114 /* Match! Advance *nextpos and return the word range. */
2115 *nextpos = posb + 1;
2116 *p = idxb;
2117 *q = idxe;
2118 return true;
2119 }
2120 }
2121 }
2122
2123 /*
2124 * Advance pos and try again. Any later workable match must start
2125 * beyond posb.
2126 */
2127 pos = posb + 1;
2128 }
2129 /* Can't get here, but stupider compilers complain if we leave it off */
2130 return false;
2131}
#define GETQUERY(x)
Definition: _int.h:157
#define Max(x, y)
Definition: c.h:1001
#define lfirst(lc)
Definition: pg_list.h:172
WordEntryPos * pos
Definition: ts_utils.h:166
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
Definition: wparser_def.c:1936

References checkcondition_HL(), HeadlineParsedText::curwords, GETQUERY, i, HeadlineWordEntry::item, hlCheck::len, lfirst, Max, ExecPhraseData::npos, HeadlineWordEntry::pos, ExecPhraseData::pos, TS_EXEC_EMPTY, TS_execute(), ExecPhraseData::width, hlCheck::words, and HeadlineParsedText::words.

Referenced by mark_hl_fragments(), and mark_hl_words().

◆ mark_fragment()

static void mark_fragment ( HeadlineParsedText prs,
bool  highlightall,
int  startpos,
int  endpos 
)
static

Definition at line 2139 of file wparser_def.c.

2141{
2142 int i;
2143
2144 for (i = startpos; i <= endpos; i++)
2145 {
2146 if (prs->words[i].item)
2147 prs->words[i].selected = 1;
2148 if (!highlightall)
2149 {
2150 if (HLIDREPLACE(prs->words[i].type))
2151 prs->words[i].replace = 1;
2152 else if (HLIDSKIP(prs->words[i].type))
2153 prs->words[i].skip = 1;
2154 }
2155 else
2156 {
2157 if (XMLHLIDSKIP(prs->words[i].type))
2158 prs->words[i].skip = 1;
2159 }
2160
2161 prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
2162 }
2163}
#define XMLHLIDSKIP(x)
Definition: wparser_def.c:1890
#define HLIDSKIP(x)
Definition: wparser_def.c:1889
#define HLIDREPLACE(x)
Definition: wparser_def.c:1888

References endpos, HLIDREPLACE, HLIDSKIP, i, HeadlineWordEntry::in, HeadlineWordEntry::item, HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, HeadlineWordEntry::skip, startpos, HeadlineWordEntry::type, HeadlineParsedText::words, and XMLHLIDSKIP.

Referenced by mark_hl_fragments(), and mark_hl_words().

◆ mark_hl_fragments()

static void mark_hl_fragments ( HeadlineParsedText prs,
TSQuery  query,
List locations,
bool  highlightall,
int  shortword,
int  min_words,
int  max_words,
int  max_fragments 
)
static

Definition at line 2226 of file wparser_def.c.

2230{
2231 int32 poslen,
2232 curlen,
2233 i,
2234 f,
2235 num_f = 0;
2236 int32 stretch,
2237 maxstretch,
2238 posmarker;
2239
2240 int32 startpos = 0,
2241 endpos = 0,
2242 nextpos = 0,
2243 p = 0,
2244 q = 0;
2245
2246 int32 numcovers = 0,
2247 maxcovers = 32;
2248
2249 int32 minI,
2250 minwords,
2251 maxitems;
2252 CoverPos *covers;
2253
2254 covers = palloc(maxcovers * sizeof(CoverPos));
2255
2256 /* get all covers */
2257 while (hlCover(prs, query, locations, &nextpos, &p, &q))
2258 {
2259 startpos = p;
2260 endpos = q;
2261
2262 /*
2263 * Break the cover into smaller fragments such that each fragment has
2264 * at most max_words. Also ensure that each end of each fragment is a
2265 * query word. This will allow us to stretch the fragment in either
2266 * direction
2267 */
2268
2269 while (startpos <= endpos)
2270 {
2271 get_next_fragment(prs, &startpos, &endpos, &curlen, &poslen, max_words);
2272 if (numcovers >= maxcovers)
2273 {
2274 maxcovers *= 2;
2275 covers = repalloc(covers, sizeof(CoverPos) * maxcovers);
2276 }
2277 covers[numcovers].startpos = startpos;
2278 covers[numcovers].endpos = endpos;
2279 covers[numcovers].curlen = curlen;
2280 covers[numcovers].poslen = poslen;
2281 covers[numcovers].chosen = false;
2282 covers[numcovers].excluded = false;
2283 numcovers++;
2284 startpos = endpos + 1;
2285 endpos = q;
2286 }
2287 }
2288
2289 /* choose best covers */
2290 for (f = 0; f < max_fragments; f++)
2291 {
2292 maxitems = 0;
2293 minwords = PG_INT32_MAX;
2294 minI = -1;
2295
2296 /*
2297 * Choose the cover that contains max items. In case of tie choose the
2298 * one with smaller number of words.
2299 */
2300 for (i = 0; i < numcovers; i++)
2301 {
2302 if (!covers[i].chosen && !covers[i].excluded &&
2303 (maxitems < covers[i].poslen ||
2304 (maxitems == covers[i].poslen &&
2305 minwords > covers[i].curlen)))
2306 {
2307 maxitems = covers[i].poslen;
2308 minwords = covers[i].curlen;
2309 minI = i;
2310 }
2311 }
2312 /* if a cover was found mark it */
2313 if (minI >= 0)
2314 {
2315 covers[minI].chosen = true;
2316 /* adjust the size of cover */
2317 startpos = covers[minI].startpos;
2318 endpos = covers[minI].endpos;
2319 curlen = covers[minI].curlen;
2320 /* stretch the cover if cover size is lower than max_words */
2321 if (curlen < max_words)
2322 {
2323 /* divide the stretch on both sides of cover */
2324 maxstretch = (max_words - curlen) / 2;
2325
2326 /*
2327 * first stretch the startpos stop stretching if 1. we hit the
2328 * beginning of document 2. exceed maxstretch 3. we hit an
2329 * already marked fragment
2330 */
2331 stretch = 0;
2332 posmarker = startpos;
2333 for (i = startpos - 1; i >= 0 && stretch < maxstretch && !prs->words[i].in; i--)
2334 {
2335 if (!NONWORDTOKEN(prs->words[i].type))
2336 {
2337 curlen++;
2338 stretch++;
2339 }
2340 posmarker = i;
2341 }
2342 /* cut back startpos till we find a good endpoint */
2343 for (i = posmarker; i < startpos && BADENDPOINT(i); i++)
2344 {
2345 if (!NONWORDTOKEN(prs->words[i].type))
2346 curlen--;
2347 }
2348 startpos = i;
2349 /* now stretch the endpos as much as possible */
2350 posmarker = endpos;
2351 for (i = endpos + 1; i < prs->curwords && curlen < max_words && !prs->words[i].in; i++)
2352 {
2353 if (!NONWORDTOKEN(prs->words[i].type))
2354 curlen++;
2355 posmarker = i;
2356 }
2357 /* cut back endpos till we find a good endpoint */
2358 for (i = posmarker; i > endpos && BADENDPOINT(i); i--)
2359 {
2360 if (!NONWORDTOKEN(prs->words[i].type))
2361 curlen--;
2362 }
2363 endpos = i;
2364 }
2365 covers[minI].startpos = startpos;
2366 covers[minI].endpos = endpos;
2367 covers[minI].curlen = curlen;
2368 /* Mark the chosen fragments (covers) */
2369 mark_fragment(prs, highlightall, startpos, endpos);
2370 num_f++;
2371 /* Exclude covers overlapping this one from future consideration */
2372 for (i = 0; i < numcovers; i++)
2373 {
2374 if (i != minI &&
2375 ((covers[i].startpos >= startpos &&
2376 covers[i].startpos <= endpos) ||
2377 (covers[i].endpos >= startpos &&
2378 covers[i].endpos <= endpos) ||
2379 (covers[i].startpos < startpos &&
2380 covers[i].endpos > endpos)))
2381 covers[i].excluded = true;
2382 }
2383 }
2384 else
2385 break; /* no selectable covers remain */
2386 }
2387
2388 /* show the first min_words words if we have not marked anything */
2389 if (num_f <= 0)
2390 {
2391 startpos = curlen = 0;
2392 endpos = -1;
2393 for (i = 0; i < prs->curwords && curlen < min_words; i++)
2394 {
2395 if (!NONWORDTOKEN(prs->words[i].type))
2396 curlen++;
2397 endpos = i;
2398 }
2399 mark_fragment(prs, highlightall, startpos, endpos);
2400 }
2401
2402 pfree(covers);
2403}
#define PG_INT32_MAX
Definition: c.h:598
int32_t int32
Definition: c.h:538
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
bool chosen
Definition: wparser_def.c:1916
int32 endpos
Definition: wparser_def.c:1913
int32 curlen
Definition: wparser_def.c:1915
int32 startpos
Definition: wparser_def.c:1912
bool excluded
Definition: wparser_def.c:1917
int32 poslen
Definition: wparser_def.c:1914
#define BADENDPOINT(j)
Definition: wparser_def.c:1905
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
Definition: wparser_def.c:2139
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
Definition: wparser_def.c:1987
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
Definition: wparser_def.c:2175

References BADENDPOINT, CoverPos::chosen, CoverPos::curlen, HeadlineParsedText::curwords, CoverPos::endpos, endpos, CoverPos::excluded, get_next_fragment(), hlCover(), i, HeadlineWordEntry::in, mark_fragment(), NONWORDTOKEN, palloc(), pfree(), PG_INT32_MAX, CoverPos::poslen, repalloc(), CoverPos::startpos, startpos, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by prsd_headline().

◆ mark_hl_words()

static void mark_hl_words ( HeadlineParsedText prs,
TSQuery  query,
List locations,
bool  highlightall,
int  shortword,
int  min_words,
int  max_words 
)
static

Definition at line 2409 of file wparser_def.c.

2412{
2413 int nextpos = 0,
2414 p = 0,
2415 q = 0;
2416 int bestb = -1,
2417 beste = -1;
2418 int bestlen = -1;
2419 bool bestcover = false;
2420 int pose,
2421 posb,
2422 poslen,
2423 curlen;
2424 bool poscover;
2425 int i;
2426
2427 if (!highlightall)
2428 {
2429 /* examine all covers, select a headline using the best one */
2430 while (hlCover(prs, query, locations, &nextpos, &p, &q))
2431 {
2432 /*
2433 * Count words (curlen) and interesting words (poslen) within
2434 * cover, but stop once we reach max_words. This step doesn't
2435 * consider whether that's a good stopping point. posb and pose
2436 * are set to the start and end indexes of the possible headline.
2437 */
2438 curlen = 0;
2439 poslen = 0;
2440 posb = pose = p;
2441 for (i = p; i <= q && curlen < max_words; i++)
2442 {
2443 if (!NONWORDTOKEN(prs->words[i].type))
2444 curlen++;
2445 if (INTERESTINGWORD(i))
2446 poslen++;
2447 pose = i;
2448 }
2449
2450 if (curlen < max_words)
2451 {
2452 /*
2453 * We have room to lengthen the headline, so search forward
2454 * until it's full or we find a good stopping point. We'll
2455 * reconsider the word at "q", then move forward.
2456 */
2457 for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
2458 {
2459 if (i > q)
2460 {
2461 if (!NONWORDTOKEN(prs->words[i].type))
2462 curlen++;
2463 if (INTERESTINGWORD(i))
2464 poslen++;
2465 }
2466 pose = i;
2467 if (BADENDPOINT(i))
2468 continue;
2469 if (curlen >= min_words)
2470 break;
2471 }
2472 if (curlen < min_words)
2473 {
2474 /*
2475 * Reached end of text and our headline is still shorter
2476 * than min_words, so try to extend it to the left.
2477 */
2478 for (i = p - 1; i >= 0; i--)
2479 {
2480 if (!NONWORDTOKEN(prs->words[i].type))
2481 curlen++;
2482 if (INTERESTINGWORD(i))
2483 poslen++;
2484 if (curlen >= max_words)
2485 break;
2486 if (BADENDPOINT(i))
2487 continue;
2488 if (curlen >= min_words)
2489 break;
2490 }
2491 posb = (i >= 0) ? i : 0;
2492 }
2493 }
2494 else
2495 {
2496 /*
2497 * Can't make headline longer, so consider making it shorter
2498 * if needed to avoid a bad endpoint.
2499 */
2500 if (i > q)
2501 i = q;
2502 for (; curlen > min_words; i--)
2503 {
2504 if (!BADENDPOINT(i))
2505 break;
2506 if (!NONWORDTOKEN(prs->words[i].type))
2507 curlen--;
2508 if (INTERESTINGWORD(i))
2509 poslen--;
2510 pose = i - 1;
2511 }
2512 }
2513
2514 /*
2515 * Check whether the proposed headline includes the original
2516 * cover; it might not if we trimmed it due to max_words.
2517 */
2518 poscover = (posb <= p && pose >= q);
2519
2520 /*
2521 * Adopt this headline if it's better than the last one, giving
2522 * highest priority to headlines including the cover, then to
2523 * headlines with more interesting words, then to headlines with
2524 * good stopping points. (Since bestlen is initially -1, we will
2525 * certainly adopt the first headline.)
2526 */
2527 if (poscover > bestcover ||
2528 (poscover == bestcover && poslen > bestlen) ||
2529 (poscover == bestcover && poslen == bestlen &&
2530 !BADENDPOINT(pose) && BADENDPOINT(beste)))
2531 {
2532 bestb = posb;
2533 beste = pose;
2534 bestlen = poslen;
2535 bestcover = poscover;
2536 }
2537 }
2538
2539 /*
2540 * If we found nothing acceptable, select min_words words starting at
2541 * the beginning.
2542 */
2543 if (bestlen < 0)
2544 {
2545 curlen = 0;
2546 pose = -1;
2547 for (i = 0; i < prs->curwords && curlen < min_words; i++)
2548 {
2549 if (!NONWORDTOKEN(prs->words[i].type))
2550 curlen++;
2551 pose = i;
2552 }
2553 bestb = 0;
2554 beste = pose;
2555 }
2556 }
2557 else
2558 {
2559 /* highlightall mode: headline is whole document */
2560 bestb = 0;
2561 beste = prs->curwords - 1;
2562 }
2563
2564 mark_fragment(prs, highlightall, bestb, beste);
2565}

References BADENDPOINT, HeadlineParsedText::curwords, hlCover(), i, INTERESTINGWORD, mark_fragment(), NONWORDTOKEN, HeadlineWordEntry::type, and HeadlineParsedText::words.

Referenced by prsd_headline().

◆ newTParserPosition()

static TParserPosition * newTParserPosition ( TParserPosition prev)
static

Definition at line 270 of file wparser_def.c.

271{
273
274 if (prev)
275 memcpy(res, prev, sizeof(TParserPosition));
276 else
277 memset(res, 0, sizeof(TParserPosition));
278
279 res->prev = prev;
280
281 res->pushedAtAction = NULL;
282
283 return res;
284}
const TParserStateActionItem * pushedAtAction
Definition: wparser_def.c:238
struct TParserPosition * prev
Definition: wparser_def.c:237

References palloc(), TParserPosition::prev, and TParserPosition::pushedAtAction.

Referenced by p_isURLPath(), TParserCopyInit(), TParserGet(), and TParserInit().

◆ p_isascii()

static int p_isascii ( TParser prs)
static

Definition at line 451 of file wparser_def.c.

452{
453 return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0;
454}
char * str
Definition: wparser_def.c:244
TParserPosition * state
Definition: wparser_def.c:250

References TParserPosition::charlen, TParserPosition::posbyte, TParser::state, and TParser::str.

Referenced by p_isasclet().

◆ p_isasclet()

static int p_isasclet ( TParser prs)
static

Definition at line 457 of file wparser_def.c.

458{
459 return (p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
460}
static int p_isascii(TParser *prs)
Definition: wparser_def.c:451

References p_isascii().

◆ p_isEOF()

static int p_isEOF ( TParser prs)
static

Definition at line 432 of file wparser_def.c.

433{
434 Assert(prs->state);
435 return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0;
436}
Assert(PointerIsAligned(start, uint64))
int lenstr
Definition: wparser_def.c:245

References Assert(), TParserPosition::charlen, TParser::lenstr, TParserPosition::posbyte, and TParser::state.

Referenced by _make_compiler_happy().

◆ p_iseqC()

static int p_iseqC ( TParser prs)
static

Definition at line 439 of file wparser_def.c.

440{
441 return p_iseq(prs, prs->c);
442}
char c
Definition: wparser_def.c:255

References TParser::c.

Referenced by _make_compiler_happy().

◆ p_ishost()

static int p_ishost ( TParser prs)
static

Definition at line 587 of file wparser_def.c.

588{
589 TParser *tmpprs = TParserCopyInit(prs);
590 int res = 0;
591
592 tmpprs->wanthost = true;
593
594 /*
595 * Check stack depth before recursing. (Since TParserGet() doesn't
596 * normally recurse, we put the cost of checking here not there.)
597 */
599
600 if (TParserGet(tmpprs) && tmpprs->type == HOST)
601 {
602 prs->state->posbyte += tmpprs->lenbytetoken;
603 prs->state->poschar += tmpprs->lenchartoken;
604 prs->state->lenbytetoken += tmpprs->lenbytetoken;
605 prs->state->lenchartoken += tmpprs->lenchartoken;
606 prs->state->charlen = tmpprs->state->charlen;
607 res = 1;
608 }
609 TParserCopyClose(tmpprs);
610
611 return res;
612}
void check_stack_depth(void)
Definition: stack_depth.c:95
int type
Definition: wparser_def.c:261
bool wanthost
Definition: wparser_def.c:252
int lenbytetoken
Definition: wparser_def.c:259
int lenchartoken
Definition: wparser_def.c:260
#define HOST
Definition: wparser_def.c:41
static bool TParserGet(TParser *prs)
Definition: wparser_def.c:1665
static void TParserCopyClose(TParser *prs)
Definition: wparser_def.c:365
static TParser * TParserCopyInit(const TParser *orig)
Definition: wparser_def.c:319

References TParserPosition::charlen, check_stack_depth(), HOST, TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, TParserPosition::posbyte, TParserPosition::poschar, TParser::state, TParserCopyClose(), TParserCopyInit(), TParserGet(), TParser::type, and TParser::wanthost.

◆ p_isignore()

static int p_isignore ( TParser prs)
static

Definition at line 581 of file wparser_def.c.

582{
583 return (prs->ignore) ? 1 : 0;
584}
bool ignore
Definition: wparser_def.c:251

References TParser::ignore.

◆ p_isneC()

static int p_isneC ( TParser prs)
static

Definition at line 445 of file wparser_def.c.

446{
447 return !p_iseq(prs, prs->c);
448}

References TParser::c.

Referenced by _make_compiler_happy().

◆ p_isspecial()

static int p_isspecial ( TParser prs)
static

Definition at line 650 of file wparser_def.c.

651{
652 /*
653 * pg_dsplen could return -1 which means error or control character
654 */
655 if (pg_dsplen(prs->str + prs->state->posbyte) == 0)
656 return 1;
657
658 /*
659 * Unicode Characters in the 'Mark, Spacing Combining' Category That
660 * characters are not alpha although they are not breakers of word too.
661 * Check that only in utf encoding, because other encodings aren't
662 * supported by postgres or even exists.
663 */
665 {
666 static const pg_wchar strange_letter[] = {
667 /*
668 * use binary search, so elements should be ordered
669 */
670 0x0903, /* DEVANAGARI SIGN VISARGA */
671 0x093E, /* DEVANAGARI VOWEL SIGN AA */
672 0x093F, /* DEVANAGARI VOWEL SIGN I */
673 0x0940, /* DEVANAGARI VOWEL SIGN II */
674 0x0949, /* DEVANAGARI VOWEL SIGN CANDRA O */
675 0x094A, /* DEVANAGARI VOWEL SIGN SHORT O */
676 0x094B, /* DEVANAGARI VOWEL SIGN O */
677 0x094C, /* DEVANAGARI VOWEL SIGN AU */
678 0x0982, /* BENGALI SIGN ANUSVARA */
679 0x0983, /* BENGALI SIGN VISARGA */
680 0x09BE, /* BENGALI VOWEL SIGN AA */
681 0x09BF, /* BENGALI VOWEL SIGN I */
682 0x09C0, /* BENGALI VOWEL SIGN II */
683 0x09C7, /* BENGALI VOWEL SIGN E */
684 0x09C8, /* BENGALI VOWEL SIGN AI */
685 0x09CB, /* BENGALI VOWEL SIGN O */
686 0x09CC, /* BENGALI VOWEL SIGN AU */
687 0x09D7, /* BENGALI AU LENGTH MARK */
688 0x0A03, /* GURMUKHI SIGN VISARGA */
689 0x0A3E, /* GURMUKHI VOWEL SIGN AA */
690 0x0A3F, /* GURMUKHI VOWEL SIGN I */
691 0x0A40, /* GURMUKHI VOWEL SIGN II */
692 0x0A83, /* GUJARATI SIGN VISARGA */
693 0x0ABE, /* GUJARATI VOWEL SIGN AA */
694 0x0ABF, /* GUJARATI VOWEL SIGN I */
695 0x0AC0, /* GUJARATI VOWEL SIGN II */
696 0x0AC9, /* GUJARATI VOWEL SIGN CANDRA O */
697 0x0ACB, /* GUJARATI VOWEL SIGN O */
698 0x0ACC, /* GUJARATI VOWEL SIGN AU */
699 0x0B02, /* ORIYA SIGN ANUSVARA */
700 0x0B03, /* ORIYA SIGN VISARGA */
701 0x0B3E, /* ORIYA VOWEL SIGN AA */
702 0x0B40, /* ORIYA VOWEL SIGN II */
703 0x0B47, /* ORIYA VOWEL SIGN E */
704 0x0B48, /* ORIYA VOWEL SIGN AI */
705 0x0B4B, /* ORIYA VOWEL SIGN O */
706 0x0B4C, /* ORIYA VOWEL SIGN AU */
707 0x0B57, /* ORIYA AU LENGTH MARK */
708 0x0BBE, /* TAMIL VOWEL SIGN AA */
709 0x0BBF, /* TAMIL VOWEL SIGN I */
710 0x0BC1, /* TAMIL VOWEL SIGN U */
711 0x0BC2, /* TAMIL VOWEL SIGN UU */
712 0x0BC6, /* TAMIL VOWEL SIGN E */
713 0x0BC7, /* TAMIL VOWEL SIGN EE */
714 0x0BC8, /* TAMIL VOWEL SIGN AI */
715 0x0BCA, /* TAMIL VOWEL SIGN O */
716 0x0BCB, /* TAMIL VOWEL SIGN OO */
717 0x0BCC, /* TAMIL VOWEL SIGN AU */
718 0x0BD7, /* TAMIL AU LENGTH MARK */
719 0x0C01, /* TELUGU SIGN CANDRABINDU */
720 0x0C02, /* TELUGU SIGN ANUSVARA */
721 0x0C03, /* TELUGU SIGN VISARGA */
722 0x0C41, /* TELUGU VOWEL SIGN U */
723 0x0C42, /* TELUGU VOWEL SIGN UU */
724 0x0C43, /* TELUGU VOWEL SIGN VOCALIC R */
725 0x0C44, /* TELUGU VOWEL SIGN VOCALIC RR */
726 0x0C82, /* KANNADA SIGN ANUSVARA */
727 0x0C83, /* KANNADA SIGN VISARGA */
728 0x0CBE, /* KANNADA VOWEL SIGN AA */
729 0x0CC0, /* KANNADA VOWEL SIGN II */
730 0x0CC1, /* KANNADA VOWEL SIGN U */
731 0x0CC2, /* KANNADA VOWEL SIGN UU */
732 0x0CC3, /* KANNADA VOWEL SIGN VOCALIC R */
733 0x0CC4, /* KANNADA VOWEL SIGN VOCALIC RR */
734 0x0CC7, /* KANNADA VOWEL SIGN EE */
735 0x0CC8, /* KANNADA VOWEL SIGN AI */
736 0x0CCA, /* KANNADA VOWEL SIGN O */
737 0x0CCB, /* KANNADA VOWEL SIGN OO */
738 0x0CD5, /* KANNADA LENGTH MARK */
739 0x0CD6, /* KANNADA AI LENGTH MARK */
740 0x0D02, /* MALAYALAM SIGN ANUSVARA */
741 0x0D03, /* MALAYALAM SIGN VISARGA */
742 0x0D3E, /* MALAYALAM VOWEL SIGN AA */
743 0x0D3F, /* MALAYALAM VOWEL SIGN I */
744 0x0D40, /* MALAYALAM VOWEL SIGN II */
745 0x0D46, /* MALAYALAM VOWEL SIGN E */
746 0x0D47, /* MALAYALAM VOWEL SIGN EE */
747 0x0D48, /* MALAYALAM VOWEL SIGN AI */
748 0x0D4A, /* MALAYALAM VOWEL SIGN O */
749 0x0D4B, /* MALAYALAM VOWEL SIGN OO */
750 0x0D4C, /* MALAYALAM VOWEL SIGN AU */
751 0x0D57, /* MALAYALAM AU LENGTH MARK */
752 0x0D82, /* SINHALA SIGN ANUSVARAYA */
753 0x0D83, /* SINHALA SIGN VISARGAYA */
754 0x0DCF, /* SINHALA VOWEL SIGN AELA-PILLA */
755 0x0DD0, /* SINHALA VOWEL SIGN KETTI AEDA-PILLA */
756 0x0DD1, /* SINHALA VOWEL SIGN DIGA AEDA-PILLA */
757 0x0DD8, /* SINHALA VOWEL SIGN GAETTA-PILLA */
758 0x0DD9, /* SINHALA VOWEL SIGN KOMBUVA */
759 0x0DDA, /* SINHALA VOWEL SIGN DIGA KOMBUVA */
760 0x0DDB, /* SINHALA VOWEL SIGN KOMBU DEKA */
761 0x0DDC, /* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA */
762 0x0DDD, /* SINHALA VOWEL SIGN KOMBUVA HAA DIGA
763 * AELA-PILLA */
764 0x0DDE, /* SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA */
765 0x0DDF, /* SINHALA VOWEL SIGN GAYANUKITTA */
766 0x0DF2, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA */
767 0x0DF3, /* SINHALA VOWEL SIGN DIGA GAYANUKITTA */
768 0x0F3E, /* TIBETAN SIGN YAR TSHES */
769 0x0F3F, /* TIBETAN SIGN MAR TSHES */
770 0x0F7F, /* TIBETAN SIGN RNAM BCAD */
771 0x102B, /* MYANMAR VOWEL SIGN TALL AA */
772 0x102C, /* MYANMAR VOWEL SIGN AA */
773 0x1031, /* MYANMAR VOWEL SIGN E */
774 0x1038, /* MYANMAR SIGN VISARGA */
775 0x103B, /* MYANMAR CONSONANT SIGN MEDIAL YA */
776 0x103C, /* MYANMAR CONSONANT SIGN MEDIAL RA */
777 0x1056, /* MYANMAR VOWEL SIGN VOCALIC R */
778 0x1057, /* MYANMAR VOWEL SIGN VOCALIC RR */
779 0x1062, /* MYANMAR VOWEL SIGN SGAW KAREN EU */
780 0x1063, /* MYANMAR TONE MARK SGAW KAREN HATHI */
781 0x1064, /* MYANMAR TONE MARK SGAW KAREN KE PHO */
782 0x1067, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU */
783 0x1068, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN UE */
784 0x1069, /* MYANMAR SIGN WESTERN PWO KAREN TONE-1 */
785 0x106A, /* MYANMAR SIGN WESTERN PWO KAREN TONE-2 */
786 0x106B, /* MYANMAR SIGN WESTERN PWO KAREN TONE-3 */
787 0x106C, /* MYANMAR SIGN WESTERN PWO KAREN TONE-4 */
788 0x106D, /* MYANMAR SIGN WESTERN PWO KAREN TONE-5 */
789 0x1083, /* MYANMAR VOWEL SIGN SHAN AA */
790 0x1084, /* MYANMAR VOWEL SIGN SHAN E */
791 0x1087, /* MYANMAR SIGN SHAN TONE-2 */
792 0x1088, /* MYANMAR SIGN SHAN TONE-3 */
793 0x1089, /* MYANMAR SIGN SHAN TONE-5 */
794 0x108A, /* MYANMAR SIGN SHAN TONE-6 */
795 0x108B, /* MYANMAR SIGN SHAN COUNCIL TONE-2 */
796 0x108C, /* MYANMAR SIGN SHAN COUNCIL TONE-3 */
797 0x108F, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */
798 0x17B6, /* KHMER VOWEL SIGN AA */
799 0x17BE, /* KHMER VOWEL SIGN OE */
800 0x17BF, /* KHMER VOWEL SIGN YA */
801 0x17C0, /* KHMER VOWEL SIGN IE */
802 0x17C1, /* KHMER VOWEL SIGN E */
803 0x17C2, /* KHMER VOWEL SIGN AE */
804 0x17C3, /* KHMER VOWEL SIGN AI */
805 0x17C4, /* KHMER VOWEL SIGN OO */
806 0x17C5, /* KHMER VOWEL SIGN AU */
807 0x17C7, /* KHMER SIGN REAHMUK */
808 0x17C8, /* KHMER SIGN YUUKALEAPINTU */
809 0x1923, /* LIMBU VOWEL SIGN EE */
810 0x1924, /* LIMBU VOWEL SIGN AI */
811 0x1925, /* LIMBU VOWEL SIGN OO */
812 0x1926, /* LIMBU VOWEL SIGN AU */
813 0x1929, /* LIMBU SUBJOINED LETTER YA */
814 0x192A, /* LIMBU SUBJOINED LETTER RA */
815 0x192B, /* LIMBU SUBJOINED LETTER WA */
816 0x1930, /* LIMBU SMALL LETTER KA */
817 0x1931, /* LIMBU SMALL LETTER NGA */
818 0x1933, /* LIMBU SMALL LETTER TA */
819 0x1934, /* LIMBU SMALL LETTER NA */
820 0x1935, /* LIMBU SMALL LETTER PA */
821 0x1936, /* LIMBU SMALL LETTER MA */
822 0x1937, /* LIMBU SMALL LETTER RA */
823 0x1938, /* LIMBU SMALL LETTER LA */
824 0x19B0, /* NEW TAI LUE VOWEL SIGN VOWEL SHORTENER */
825 0x19B1, /* NEW TAI LUE VOWEL SIGN AA */
826 0x19B2, /* NEW TAI LUE VOWEL SIGN II */
827 0x19B3, /* NEW TAI LUE VOWEL SIGN U */
828 0x19B4, /* NEW TAI LUE VOWEL SIGN UU */
829 0x19B5, /* NEW TAI LUE VOWEL SIGN E */
830 0x19B6, /* NEW TAI LUE VOWEL SIGN AE */
831 0x19B7, /* NEW TAI LUE VOWEL SIGN O */
832 0x19B8, /* NEW TAI LUE VOWEL SIGN OA */
833 0x19B9, /* NEW TAI LUE VOWEL SIGN UE */
834 0x19BA, /* NEW TAI LUE VOWEL SIGN AY */
835 0x19BB, /* NEW TAI LUE VOWEL SIGN AAY */
836 0x19BC, /* NEW TAI LUE VOWEL SIGN UY */
837 0x19BD, /* NEW TAI LUE VOWEL SIGN OY */
838 0x19BE, /* NEW TAI LUE VOWEL SIGN OAY */
839 0x19BF, /* NEW TAI LUE VOWEL SIGN UEY */
840 0x19C0, /* NEW TAI LUE VOWEL SIGN IY */
841 0x19C8, /* NEW TAI LUE TONE MARK-1 */
842 0x19C9, /* NEW TAI LUE TONE MARK-2 */
843 0x1A19, /* BUGINESE VOWEL SIGN E */
844 0x1A1A, /* BUGINESE VOWEL SIGN O */
845 0x1A1B, /* BUGINESE VOWEL SIGN AE */
846 0x1B04, /* BALINESE SIGN BISAH */
847 0x1B35, /* BALINESE VOWEL SIGN TEDUNG */
848 0x1B3B, /* BALINESE VOWEL SIGN RA REPA TEDUNG */
849 0x1B3D, /* BALINESE VOWEL SIGN LA LENGA TEDUNG */
850 0x1B3E, /* BALINESE VOWEL SIGN TALING */
851 0x1B3F, /* BALINESE VOWEL SIGN TALING REPA */
852 0x1B40, /* BALINESE VOWEL SIGN TALING TEDUNG */
853 0x1B41, /* BALINESE VOWEL SIGN TALING REPA TEDUNG */
854 0x1B43, /* BALINESE VOWEL SIGN PEPET TEDUNG */
855 0x1B44, /* BALINESE ADEG ADEG */
856 0x1B82, /* SUNDANESE SIGN PANGWISAD */
857 0x1BA1, /* SUNDANESE CONSONANT SIGN PAMINGKAL */
858 0x1BA6, /* SUNDANESE VOWEL SIGN PANAELAENG */
859 0x1BA7, /* SUNDANESE VOWEL SIGN PANOLONG */
860 0x1BAA, /* SUNDANESE SIGN PAMAAEH */
861 0x1C24, /* LEPCHA SUBJOINED LETTER YA */
862 0x1C25, /* LEPCHA SUBJOINED LETTER RA */
863 0x1C26, /* LEPCHA VOWEL SIGN AA */
864 0x1C27, /* LEPCHA VOWEL SIGN I */
865 0x1C28, /* LEPCHA VOWEL SIGN O */
866 0x1C29, /* LEPCHA VOWEL SIGN OO */
867 0x1C2A, /* LEPCHA VOWEL SIGN U */
868 0x1C2B, /* LEPCHA VOWEL SIGN UU */
869 0x1C34, /* LEPCHA CONSONANT SIGN NYIN-DO */
870 0x1C35, /* LEPCHA CONSONANT SIGN KANG */
871 0xA823, /* SYLOTI NAGRI VOWEL SIGN A */
872 0xA824, /* SYLOTI NAGRI VOWEL SIGN I */
873 0xA827, /* SYLOTI NAGRI VOWEL SIGN OO */
874 0xA880, /* SAURASHTRA SIGN ANUSVARA */
875 0xA881, /* SAURASHTRA SIGN VISARGA */
876 0xA8B4, /* SAURASHTRA CONSONANT SIGN HAARU */
877 0xA8B5, /* SAURASHTRA VOWEL SIGN AA */
878 0xA8B6, /* SAURASHTRA VOWEL SIGN I */
879 0xA8B7, /* SAURASHTRA VOWEL SIGN II */
880 0xA8B8, /* SAURASHTRA VOWEL SIGN U */
881 0xA8B9, /* SAURASHTRA VOWEL SIGN UU */
882 0xA8BA, /* SAURASHTRA VOWEL SIGN VOCALIC R */
883 0xA8BB, /* SAURASHTRA VOWEL SIGN VOCALIC RR */
884 0xA8BC, /* SAURASHTRA VOWEL SIGN VOCALIC L */
885 0xA8BD, /* SAURASHTRA VOWEL SIGN VOCALIC LL */
886 0xA8BE, /* SAURASHTRA VOWEL SIGN E */
887 0xA8BF, /* SAURASHTRA VOWEL SIGN EE */
888 0xA8C0, /* SAURASHTRA VOWEL SIGN AI */
889 0xA8C1, /* SAURASHTRA VOWEL SIGN O */
890 0xA8C2, /* SAURASHTRA VOWEL SIGN OO */
891 0xA8C3, /* SAURASHTRA VOWEL SIGN AU */
892 0xA952, /* REJANG CONSONANT SIGN H */
893 0xA953, /* REJANG VIRAMA */
894 0xAA2F, /* CHAM VOWEL SIGN O */
895 0xAA30, /* CHAM VOWEL SIGN AI */
896 0xAA33, /* CHAM CONSONANT SIGN YA */
897 0xAA34, /* CHAM CONSONANT SIGN RA */
898 0xAA4D /* CHAM CONSONANT SIGN FINAL H */
899 };
900 const pg_wchar *StopLow = strange_letter,
901 *StopHigh = strange_letter + lengthof(strange_letter),
902 *StopMiddle;
903 pg_wchar c;
904
905 c = *(prs->pgwstr + prs->state->poschar);
906
907 while (StopLow < StopHigh)
908 {
909 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
910 if (*StopMiddle == c)
911 return 1;
912 else if (*StopMiddle < c)
913 StopLow = StopMiddle + 1;
914 else
915 StopHigh = StopMiddle;
916 }
917 }
918
919 return 0;
920}
#define lengthof(array)
Definition: c.h:791
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
int pg_dsplen(const char *mbstr)
Definition: mbutils.c:1031
@ PG_UTF8
Definition: pg_wchar.h:232
char * c
pg_wchar * pgwstr
Definition: wparser_def.c:246

References GetDatabaseEncoding(), lengthof, pg_dsplen(), PG_UTF8, TParser::pgwstr, TParserPosition::posbyte, TParserPosition::poschar, TParser::state, and TParser::str.

◆ p_isstophost()

static int p_isstophost ( TParser prs)
static

Definition at line 570 of file wparser_def.c.

571{
572 if (prs->wanthost)
573 {
574 prs->wanthost = false;
575 return 1;
576 }
577 return 0;
578}

References TParser::wanthost.

◆ p_isurlchar()

static int p_isurlchar ( TParser prs)
static

Definition at line 463 of file wparser_def.c.

464{
465 char ch;
466
467 /* no non-ASCII need apply */
468 if (prs->state->charlen != 1)
469 return 0;
470 ch = *(prs->str + prs->state->posbyte);
471 /* no spaces or control characters */
472 if (ch <= 0x20 || ch >= 0x7F)
473 return 0;
474 /* reject characters disallowed by RFC 3986 */
475 switch (ch)
476 {
477 case '"':
478 case '<':
479 case '>':
480 case '\\':
481 case '^':
482 case '`':
483 case '{':
484 case '|':
485 case '}':
486 return 0;
487 }
488 return 1;
489}

References TParserPosition::charlen, TParserPosition::posbyte, TParser::state, and TParser::str.

◆ p_isURLPath()

static int p_isURLPath ( TParser prs)
static

Definition at line 615 of file wparser_def.c.

616{
617 TParser *tmpprs = TParserCopyInit(prs);
618 int res = 0;
619
620 tmpprs->state = newTParserPosition(tmpprs->state);
621 tmpprs->state->state = TPS_InURLPathFirst;
622
623 /*
624 * Check stack depth before recursing. (Since TParserGet() doesn't
625 * normally recurse, we put the cost of checking here not there.)
626 */
628
629 if (TParserGet(tmpprs) && tmpprs->type == URLPATH)
630 {
631 prs->state->posbyte += tmpprs->lenbytetoken;
632 prs->state->poschar += tmpprs->lenchartoken;
633 prs->state->lenbytetoken += tmpprs->lenbytetoken;
634 prs->state->lenchartoken += tmpprs->lenchartoken;
635 prs->state->charlen = tmpprs->state->charlen;
636 res = 1;
637 }
638 TParserCopyClose(tmpprs);
639
640 return res;
641}
TParserState state
Definition: wparser_def.c:236
static TParserPosition * newTParserPosition(TParserPosition *prev)
Definition: wparser_def.c:270
#define URLPATH
Definition: wparser_def.c:53

References TParserPosition::charlen, check_stack_depth(), TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, newTParserPosition(), TParserPosition::posbyte, TParserPosition::poschar, TParserPosition::state, TParser::state, TParserCopyClose(), TParserCopyInit(), TParserGet(), TPS_InURLPathFirst, TParser::type, and URLPATH.

◆ p_iswhat()

p_iswhat ( alnum  ,
 
)

Definition at line 412 of file wparser_def.c.

426{
427 Assert(prs->state);
428 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
429}

References Assert().

◆ prsd_end()

Datum prsd_end ( PG_FUNCTION_ARGS  )

Definition at line 1873 of file wparser_def.c.

1874{
1875 TParser *p = (TParser *) PG_GETARG_POINTER(0);
1876
1877 TParserClose(p);
1879}
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
static void TParserClose(TParser *prs)
Definition: wparser_def.c:342

References PG_GETARG_POINTER, PG_RETURN_VOID, and TParserClose().

◆ prsd_headline()

Datum prsd_headline ( PG_FUNCTION_ARGS  )

Definition at line 2571 of file wparser_def.c.

2572{
2574 List *prsoptions = (List *) PG_GETARG_POINTER(1);
2575 TSQuery query = PG_GETARG_TSQUERY(2);
2576 List *locations;
2577
2578 /* default option values: */
2579 int min_words = 15;
2580 int max_words = 35;
2581 int shortword = 3;
2582 int max_fragments = 0;
2583 bool highlightall = false;
2584 ListCell *l;
2585
2586 /* Extract configuration option values */
2587 prs->startsel = NULL;
2588 prs->stopsel = NULL;
2589 prs->fragdelim = NULL;
2590 foreach(l, prsoptions)
2591 {
2592 DefElem *defel = (DefElem *) lfirst(l);
2593 char *val = defGetString(defel);
2594
2595 if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
2596 max_words = pg_strtoint32(val);
2597 else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
2598 min_words = pg_strtoint32(val);
2599 else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
2600 shortword = pg_strtoint32(val);
2601 else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
2602 max_fragments = pg_strtoint32(val);
2603 else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
2604 prs->startsel = pstrdup(val);
2605 else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
2606 prs->stopsel = pstrdup(val);
2607 else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
2608 prs->fragdelim = pstrdup(val);
2609 else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
2610 highlightall = (pg_strcasecmp(val, "1") == 0 ||
2611 pg_strcasecmp(val, "on") == 0 ||
2612 pg_strcasecmp(val, "true") == 0 ||
2613 pg_strcasecmp(val, "t") == 0 ||
2614 pg_strcasecmp(val, "y") == 0 ||
2615 pg_strcasecmp(val, "yes") == 0);
2616 else
2617 ereport(ERROR,
2618 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2619 errmsg("unrecognized headline parameter: \"%s\"",
2620 defel->defname)));
2621 }
2622
2623 /* in HighlightAll mode these parameters are ignored */
2624 if (!highlightall)
2625 {
2626 if (min_words >= max_words)
2627 ereport(ERROR,
2628 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2629 errmsg("%s must be less than %s", "MinWords", "MaxWords")));
2630 if (min_words <= 0)
2631 ereport(ERROR,
2632 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2633 errmsg("%s must be positive", "MinWords")));
2634 if (shortword < 0)
2635 ereport(ERROR,
2636 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2637 errmsg("%s must be >= 0", "ShortWord")));
2638 if (max_fragments < 0)
2639 ereport(ERROR,
2640 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2641 errmsg("%s must be >= 0", "MaxFragments")));
2642 }
2643
2644 /* Locate words and phrases matching the query */
2645 if (query->size > 0)
2646 {
2647 hlCheck ch;
2648
2649 ch.words = prs->words;
2650 ch.len = prs->curwords;
2651 locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
2653 }
2654 else
2655 locations = NIL; /* empty query matches nothing */
2656
2657 /* Apply appropriate headline selector */
2658 if (max_fragments == 0)
2659 mark_hl_words(prs, query, locations, highlightall, shortword,
2660 min_words, max_words);
2661 else
2662 mark_hl_fragments(prs, query, locations, highlightall, shortword,
2663 min_words, max_words, max_fragments);
2664
2665 /* Fill in default values for string options */
2666 if (!prs->startsel)
2667 prs->startsel = pstrdup("<b>");
2668 if (!prs->stopsel)
2669 prs->stopsel = pstrdup("</b>");
2670 if (!prs->fragdelim)
2671 prs->fragdelim = pstrdup(" ... ");
2672
2673 /* Caller will need these lengths, too */
2674 prs->startsellen = strlen(prs->startsel);
2675 prs->stopsellen = strlen(prs->stopsel);
2676 prs->fragdelimlen = strlen(prs->fragdelim);
2677
2678 PG_RETURN_POINTER(prs);
2679}
char * defGetString(DefElem *def)
Definition: define.c:35
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
char * pstrdup(const char *in)
Definition: mcxt.c:1759
int32 pg_strtoint32(const char *s)
Definition: numutils.c:383
#define NIL
Definition: pg_list.h:68
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char * defname
Definition: parsenodes.h:843
Definition: pg_list.h:54
int32 size
Definition: ts_type.h:221
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:2007
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
Definition: wparser_def.c:2226
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
Definition: wparser_def.c:2409

References checkcondition_HL(), HeadlineParsedText::curwords, defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, GETQUERY, hlCheck::len, lfirst, mark_hl_fragments(), mark_hl_words(), NIL, PG_GETARG_POINTER, PG_GETARG_TSQUERY, PG_RETURN_POINTER, pg_strcasecmp(), pg_strtoint32(), pstrdup(), TSQueryData::size, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, TS_EXEC_EMPTY, TS_execute_locations(), val, hlCheck::words, and HeadlineParsedText::words.

◆ prsd_lextype()

Datum prsd_lextype ( PG_FUNCTION_ARGS  )

Definition at line 1833 of file wparser_def.c.

1834{
1835 LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
1836 int i;
1837
1838 for (i = 1; i <= LASTNUM; i++)
1839 {
1840 descr[i - 1].lexid = i;
1841 descr[i - 1].alias = pstrdup(tok_alias[i]);
1842 descr[i - 1].descr = pstrdup(lex_descr[i]);
1843 }
1844
1845 descr[LASTNUM].lexid = 0;
1846
1847 PG_RETURN_POINTER(descr);
1848}
char * alias
Definition: ts_public.h:28
int lexid
Definition: ts_public.h:27
char * descr
Definition: ts_public.h:29
#define LASTNUM
Definition: wparser_def.c:60
static const char *const tok_alias[]
Definition: wparser_def.c:62
static const char *const lex_descr[]
Definition: wparser_def.c:89

References LexDescr::alias, LexDescr::descr, i, LASTNUM, lex_descr, LexDescr::lexid, palloc(), PG_RETURN_POINTER, pstrdup(), and tok_alias.

◆ prsd_nexttoken()

Datum prsd_nexttoken ( PG_FUNCTION_ARGS  )

Definition at line 1857 of file wparser_def.c.

1858{
1859 TParser *p = (TParser *) PG_GETARG_POINTER(0);
1860 char **t = (char **) PG_GETARG_POINTER(1);
1861 int *tlen = (int *) PG_GETARG_POINTER(2);
1862
1863 if (!TParserGet(p))
1864 PG_RETURN_INT32(0);
1865
1866 *t = p->token;
1867 *tlen = p->lenbytetoken;
1868
1870}
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
char * token
Definition: wparser_def.c:258

References TParser::lenbytetoken, PG_GETARG_POINTER, PG_RETURN_INT32, TParser::token, TParserGet(), and TParser::type.

◆ prsd_start()

Datum prsd_start ( PG_FUNCTION_ARGS  )

Definition at line 1851 of file wparser_def.c.

1852{
1854}
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
static TParser * TParserInit(char *str, int len)
Definition: wparser_def.c:287

References PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_POINTER, and TParserInit().

◆ SpecialFURL()

static void SpecialFURL ( TParser prs)
static

◆ SpecialHyphen()

static void SpecialHyphen ( TParser prs)
static

◆ SpecialTags()

static void SpecialTags ( TParser prs)
static

Definition at line 522 of file wparser_def.c.

523{
524 switch (prs->state->lenchartoken)
525 {
526 case 8: /* </script */
527 if (pg_strncasecmp(prs->token, "</script", 8) == 0)
528 prs->ignore = false;
529 break;
530 case 7: /* <script || </style */
531 if (pg_strncasecmp(prs->token, "</style", 7) == 0)
532 prs->ignore = false;
533 else if (pg_strncasecmp(prs->token, "<script", 7) == 0)
534 prs->ignore = true;
535 break;
536 case 6: /* <style */
537 if (pg_strncasecmp(prs->token, "<style", 6) == 0)
538 prs->ignore = true;
539 break;
540 default:
541 break;
542 }
543}
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69

References TParser::ignore, TParserPosition::lenchartoken, pg_strncasecmp(), TParser::state, and TParser::token.

◆ SpecialVerVersion()

static void SpecialVerVersion ( TParser prs)
static

◆ TParserClose()

static void TParserClose ( TParser prs)
static

Definition at line 342 of file wparser_def.c.

343{
344 while (prs->state)
345 {
346 TParserPosition *ptr = prs->state->prev;
347
348 pfree(prs->state);
349 prs->state = ptr;
350 }
351
352 if (prs->pgwstr)
353 pfree(prs->pgwstr);
354
355#ifdef WPARSER_TRACE
356 fprintf(stderr, "closing parser\n");
357#endif
358 pfree(prs);
359}
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21

References fprintf, pfree(), TParser::pgwstr, TParserPosition::prev, and TParser::state.

Referenced by prsd_end().

◆ TParserCopyClose()

static void TParserCopyClose ( TParser prs)
static

Definition at line 365 of file wparser_def.c.

366{
367 while (prs->state)
368 {
369 TParserPosition *ptr = prs->state->prev;
370
371 pfree(prs->state);
372 prs->state = ptr;
373 }
374
375#ifdef WPARSER_TRACE
376 fprintf(stderr, "closing parser copy\n");
377#endif
378 pfree(prs);
379}

References fprintf, pfree(), TParserPosition::prev, and TParser::state.

Referenced by p_ishost(), and p_isURLPath().

◆ TParserCopyInit()

static TParser * TParserCopyInit ( const TParser orig)
static

Definition at line 319 of file wparser_def.c.

320{
321 TParser *prs = (TParser *) palloc0(sizeof(TParser));
322
323 prs->charmaxlen = orig->charmaxlen;
324 prs->str = orig->str + orig->state->posbyte;
325 prs->lenstr = orig->lenstr - orig->state->posbyte;
326
327 if (orig->pgwstr)
328 prs->pgwstr = orig->pgwstr + orig->state->poschar;
329
330 prs->state = newTParserPosition(NULL);
331 prs->state->state = TPS_Base;
332
333#ifdef WPARSER_TRACE
334 fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
335#endif
336
337 return prs;
338}
void * palloc0(Size size)
Definition: mcxt.c:1395
int charmaxlen
Definition: wparser_def.c:249

References TParser::charmaxlen, fprintf, TParser::lenstr, newTParserPosition(), palloc0(), TParser::pgwstr, TParserPosition::posbyte, TParserPosition::poschar, TParserPosition::state, TParser::state, TParser::str, and TPS_Base.

Referenced by p_ishost(), and p_isURLPath().

◆ TParserGet()

static bool TParserGet ( TParser prs)
static

Definition at line 1665 of file wparser_def.c.

1666{
1667 const TParserStateActionItem *item = NULL;
1668
1670
1671 Assert(prs->state);
1672
1673 if (prs->state->posbyte >= prs->lenstr)
1674 return false;
1675
1676 prs->token = prs->str + prs->state->posbyte;
1677 prs->state->pushedAtAction = NULL;
1678
1679 /* look at string */
1680 while (prs->state->posbyte <= prs->lenstr)
1681 {
1682 if (prs->state->posbyte == prs->lenstr)
1683 prs->state->charlen = 0;
1684 else
1685 prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
1686 pg_mblen(prs->str + prs->state->posbyte);
1687
1688 Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
1689 Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
1690 Assert(Actions[prs->state->state].state == prs->state->state);
1691
1692 if (prs->state->pushedAtAction)
1693 {
1694 /* After a POP, pick up at the next test */
1695 item = prs->state->pushedAtAction + 1;
1696 prs->state->pushedAtAction = NULL;
1697 }
1698 else
1699 {
1700 item = Actions[prs->state->state].action;
1701 Assert(item != NULL);
1702 }
1703
1704 /* find action by character class */
1705 while (item->isclass)
1706 {
1707 prs->c = item->c;
1708 if (item->isclass(prs) != 0)
1709 break;
1710 item++;
1711 }
1712
1713#ifdef WPARSER_TRACE
1714 {
1715 TParserPosition *ptr;
1716
1717 fprintf(stderr, "state ");
1718 /* indent according to stack depth */
1719 for (ptr = prs->state->prev; ptr; ptr = ptr->prev)
1720 fprintf(stderr, " ");
1721 fprintf(stderr, "%s ", Actions[prs->state->state].state_name);
1722 if (prs->state->posbyte < prs->lenstr)
1723 fprintf(stderr, "at %c", *(prs->str + prs->state->posbyte));
1724 else
1725 fprintf(stderr, "at EOF");
1726 fprintf(stderr, " matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1727 (int) (item - Actions[prs->state->state].action),
1728 (item->flags & A_BINGO) ? " BINGO" : "",
1729 (item->flags & A_POP) ? " POP" : "",
1730 (item->flags & A_PUSH) ? " PUSH" : "",
1731 (item->flags & A_RERUN) ? " RERUN" : "",
1732 (item->flags & A_CLEAR) ? " CLEAR" : "",
1733 (item->flags & A_MERGE) ? " MERGE" : "",
1734 (item->flags & A_CLRALL) ? " CLRALL" : "",
1735 (item->tostate != TPS_Null) ? " tostate " : "",
1736 (item->tostate != TPS_Null) ? Actions[item->tostate].state_name : "",
1737 (item->type > 0) ? " type " : "",
1738 tok_alias[item->type]);
1739 }
1740#endif
1741
1742 /* call special handler if exists */
1743 if (item->special)
1744 item->special(prs);
1745
1746 /* BINGO, token is found */
1747 if (item->flags & A_BINGO)
1748 {
1749 Assert(item->type > 0);
1750 prs->lenbytetoken = prs->state->lenbytetoken;
1751 prs->lenchartoken = prs->state->lenchartoken;
1752 prs->state->lenbytetoken = prs->state->lenchartoken = 0;
1753 prs->type = item->type;
1754 }
1755
1756 /* do various actions by flags */
1757 if (item->flags & A_POP)
1758 { /* pop stored state in stack */
1759 TParserPosition *ptr = prs->state->prev;
1760
1761 pfree(prs->state);
1762 prs->state = ptr;
1763 Assert(prs->state);
1764 }
1765 else if (item->flags & A_PUSH)
1766 { /* push (store) state in stack */
1767 prs->state->pushedAtAction = item; /* remember where we push */
1768 prs->state = newTParserPosition(prs->state);
1769 }
1770 else if (item->flags & A_CLEAR)
1771 { /* clear previous pushed state */
1772 TParserPosition *ptr;
1773
1774 Assert(prs->state->prev);
1775 ptr = prs->state->prev->prev;
1776 pfree(prs->state->prev);
1777 prs->state->prev = ptr;
1778 }
1779 else if (item->flags & A_CLRALL)
1780 { /* clear all previous pushed state */
1781 TParserPosition *ptr;
1782
1783 while (prs->state->prev)
1784 {
1785 ptr = prs->state->prev->prev;
1786 pfree(prs->state->prev);
1787 prs->state->prev = ptr;
1788 }
1789 }
1790 else if (item->flags & A_MERGE)
1791 { /* merge posinfo with current and pushed state */
1792 TParserPosition *ptr = prs->state;
1793
1794 Assert(prs->state->prev);
1795 prs->state = prs->state->prev;
1796
1797 prs->state->posbyte = ptr->posbyte;
1798 prs->state->poschar = ptr->poschar;
1799 prs->state->charlen = ptr->charlen;
1800 prs->state->lenbytetoken = ptr->lenbytetoken;
1801 prs->state->lenchartoken = ptr->lenchartoken;
1802 pfree(ptr);
1803 }
1804
1805 /* set new state if pointed */
1806 if (item->tostate != TPS_Null)
1807 prs->state->state = item->tostate;
1808
1809 /* check for go away */
1810 if ((item->flags & A_BINGO) ||
1811 (prs->state->posbyte >= prs->lenstr &&
1812 (item->flags & A_RERUN) == 0))
1813 break;
1814
1815 /* go to beginning of loop if we should rerun or we just restore state */
1816 if (item->flags & (A_RERUN | A_POP))
1817 continue;
1818
1819 /* move forward */
1820 if (prs->state->charlen)
1821 {
1822 prs->state->posbyte += prs->state->charlen;
1823 prs->state->lenbytetoken += prs->state->charlen;
1824 prs->state->poschar++;
1825 prs->state->lenchartoken++;
1826 }
1827 }
1828
1829 return (item && (item->flags & A_BINGO));
1830}
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
TParserCharTest isclass
Definition: wparser_def.c:211
TParserState tostate
Definition: wparser_def.c:214
TParserSpecial special
Definition: wparser_def.c:216
const TParserStateActionItem * action
Definition: wparser_def.c:1564
TParserState state
Definition: wparser_def.c:1565
#define A_POP
Definition: wparser_def.c:222
#define A_RERUN
Definition: wparser_def.c:224
static const TParserStateAction Actions[]
Definition: wparser_def.c:1583
#define A_MERGE
Definition: wparser_def.c:226
#define A_BINGO
Definition: wparser_def.c:221
#define A_CLRALL
Definition: wparser_def.c:227
#define A_CLEAR
Definition: wparser_def.c:225
#define A_PUSH
Definition: wparser_def.c:223

References A_BINGO, A_CLEAR, A_CLRALL, A_MERGE, A_POP, A_PUSH, A_RERUN, TParserStateAction::action, Actions, Assert(), TParserStateActionItem::c, TParser::c, TParserPosition::charlen, TParser::charmaxlen, CHECK_FOR_INTERRUPTS, TParserStateActionItem::flags, fprintf, TParserStateActionItem::isclass, TParserPosition::lenbytetoken, TParser::lenbytetoken, TParserPosition::lenchartoken, TParser::lenchartoken, TParser::lenstr, newTParserPosition(), pfree(), pg_mblen(), TParserPosition::posbyte, TParserPosition::poschar, TParserPosition::prev, TParserPosition::pushedAtAction, TParserStateActionItem::special, TParserPosition::state, TParser::state, TParserStateAction::state, TParser::str, tok_alias, TParser::token, TParserStateActionItem::tostate, TPS_Base, TPS_Null, TParserStateActionItem::type, and TParser::type.

Referenced by p_ishost(), p_isURLPath(), and prsd_nexttoken().

◆ TParserInit()

static TParser * TParserInit ( char *  str,
int  len 
)
static

Definition at line 287 of file wparser_def.c.

288{
289 TParser *prs = (TParser *) palloc0(sizeof(TParser));
290
292 prs->str = str;
293 prs->lenstr = len;
294 prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
295 pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
296
297 prs->state = newTParserPosition(NULL);
298 prs->state->state = TPS_Base;
299
300#ifdef WPARSER_TRACE
301 fprintf(stderr, "parsing \"%.*s\"\n", len, str);
302#endif
303
304 return prs;
305}
const char * str
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1547
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:987
const void size_t len

References TParser::charmaxlen, fprintf, len, TParser::lenstr, newTParserPosition(), palloc(), palloc0(), pg_database_encoding_max_length(), pg_mb2wchar_with_len(), TParser::pgwstr, TParserPosition::state, TParser::state, TParser::str, str, and TPS_Base.

Referenced by prsd_start().

Variable Documentation

◆ Actions

const TParserStateAction Actions[]
static

Definition at line 1583 of file wparser_def.c.

Referenced by TParserGet().

◆ actionTPS_Base

const TParserStateActionItem actionTPS_Base[]
static
Initial value:
= {
{p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
{p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '&', A_PUSH, TPS_InXMLEntityFirst, 0, NULL},
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL},
{NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
}
static int p_isasclet(TParser *prs)
Definition: wparser_def.c:457
static int p_isignore(TParser *prs)
Definition: wparser_def.c:581
#define A_NEXT
Definition: wparser_def.c:220

Definition at line 926 of file wparser_def.c.

◆ actionTPS_InAsciiWord

const TParserStateActionItem actionTPS_InAsciiWord[]
static
Initial value:
= {
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
}
#define ASCIIWORD
Definition: wparser_def.c:36
static int p_isspecial(TParser *prs)
Definition: wparser_def.c:650

Definition at line 954 of file wparser_def.c.

◆ actionTPS_InCloseCommentFirst

const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
}

Definition at line 1264 of file wparser_def.c.

◆ actionTPS_InCloseCommentLast

const TParserStateActionItem actionTPS_InCloseCommentLast[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
}

Definition at line 1270 of file wparser_def.c.

◆ actionTPS_InComment

const TParserStateActionItem actionTPS_InComment[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
}

Definition at line 1258 of file wparser_def.c.

◆ actionTPS_InCommentEnd

const TParserStateActionItem actionTPS_InCommentEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
}
#define TAG_T
Definition: wparser_def.c:48

Definition at line 1277 of file wparser_def.c.

◆ actionTPS_InCommentFirst

const TParserStateActionItem actionTPS_InCommentFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
{p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL},
{p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1243 of file wparser_def.c.

◆ actionTPS_InCommentLast

const TParserStateActionItem actionTPS_InCommentLast[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1252 of file wparser_def.c.

◆ actionTPS_InDecimal

const TParserStateActionItem actionTPS_InDecimal[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
}
#define DECIMAL_T
Definition: wparser_def.c:55

Definition at line 1046 of file wparser_def.c.

◆ actionTPS_InDecimalFirst

const TParserStateActionItem actionTPS_InDecimalFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1040 of file wparser_def.c.

◆ actionTPS_InEmail

const TParserStateActionItem actionTPS_InEmail[]
static
Initial value:
= {
{p_isstophost, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
#define EMAIL
Definition: wparser_def.c:39
static int p_isstophost(TParser *prs)
Definition: wparser_def.c:570
static int p_ishost(TParser *prs)
Definition: wparser_def.c:587

Definition at line 1346 of file wparser_def.c.

◆ actionTPS_InFile

const TParserStateActionItem actionTPS_InFile[]
static
Initial value:
= {
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
}
#define FILEPATH
Definition: wparser_def.c:54

Definition at line 1396 of file wparser_def.c.

◆ actionTPS_InFileFirst

const TParserStateActionItem actionTPS_InFileFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1352 of file wparser_def.c.

◆ actionTPS_InFileNext

const TParserStateActionItem actionTPS_InFileNext[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1407 of file wparser_def.c.

◆ actionTPS_InFileTwiddle

const TParserStateActionItem actionTPS_InFileTwiddle[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1362 of file wparser_def.c.

◆ actionTPS_InFURL

const TParserStateActionItem actionTPS_InFURL[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialFURL(TParser *prs)
Definition: wparser_def.c:546
static int p_isURLPath(TParser *prs)
Definition: wparser_def.c:615
#define URL_T
Definition: wparser_def.c:40

Definition at line 1431 of file wparser_def.c.

◆ actionTPS_InHost

const TParserStateActionItem actionTPS_InHost[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1335 of file wparser_def.c.

◆ actionTPS_InHostDomain

const TParserStateActionItem actionTPS_InHostDomain[]
static
Initial value:
= {
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
}

Definition at line 1299 of file wparser_def.c.

◆ actionTPS_InHostDomainSecond

const TParserStateActionItem actionTPS_InHostDomainSecond[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1288 of file wparser_def.c.

◆ actionTPS_InHostFirstAN

const TParserStateActionItem actionTPS_InHostFirstAN[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1328 of file wparser_def.c.

◆ actionTPS_InHostFirstDomain

const TParserStateActionItem actionTPS_InHostFirstDomain[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1281 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWord

const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
}
static void SpecialHyphen(TParser *prs)
Definition: wparser_def.c:554
#define ASCIIHWORD
Definition: wparser_def.c:51

Definition at line 1461 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWordFirst

const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1453 of file wparser_def.c.

◆ actionTPS_InHyphenAsciiWordPart

const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
}
#define ASCIIPARTHWORD
Definition: wparser_def.c:46

Definition at line 1534 of file wparser_def.c.

◆ actionTPS_InHyphenDigitLookahead

const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1502 of file wparser_def.c.

◆ actionTPS_InHyphenNumWord

const TParserStateActionItem actionTPS_InHyphenNumWord[]
static
Initial value:

Definition at line 1494 of file wparser_def.c.

◆ actionTPS_InHyphenNumWordFirst

const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1487 of file wparser_def.c.

◆ actionTPS_InHyphenNumWordPart

const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static
Initial value:
= {
{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
}
#define NUMPARTHWORD
Definition: wparser_def.c:44

Definition at line 1543 of file wparser_def.c.

◆ actionTPS_InHyphenUnsignedInt

const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isalpha, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1550 of file wparser_def.c.

◆ actionTPS_InHyphenWord

const TParserStateActionItem actionTPS_InHyphenWord[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
}
#define HWORD
Definition: wparser_def.c:52

Definition at line 1478 of file wparser_def.c.

◆ actionTPS_InHyphenWordFirst

const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1471 of file wparser_def.c.

◆ actionTPS_InHyphenWordPart

const TParserStateActionItem actionTPS_InHyphenWordPart[]
static
Initial value:
= {
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
}
#define PARTHWORD
Definition: wparser_def.c:45

Definition at line 1526 of file wparser_def.c.

◆ actionTPS_InMantissa

const TParserStateActionItem actionTPS_InMantissa[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
}
#define SCIENTIFIC
Definition: wparser_def.c:42

Definition at line 1095 of file wparser_def.c.

◆ actionTPS_InMantissaFirst

const TParserStateActionItem actionTPS_InMantissaFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1081 of file wparser_def.c.

◆ actionTPS_InMantissaSign

const TParserStateActionItem actionTPS_InMantissaSign[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1089 of file wparser_def.c.

◆ actionTPS_InNumWord

const TParserStateActionItem actionTPS_InNumWord[]
static
Initial value:
= {
{p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
}
#define NUMWORD
Definition: wparser_def.c:38

Definition at line 943 of file wparser_def.c.

◆ actionTPS_InParseHyphen

const TParserStateActionItem actionTPS_InParseHyphen[]
static
Initial value:
= {
{p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHyphenUnsignedInt, 0, NULL},
{NULL, 0, A_RERUN, TPS_Base, 0, NULL}
}

Definition at line 1510 of file wparser_def.c.

◆ actionTPS_InParseHyphenHyphen

const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
#define SPACE
Definition: wparser_def.c:47

Definition at line 1519 of file wparser_def.c.

◆ actionTPS_InPathFirst

const TParserStateActionItem actionTPS_InPathFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1371 of file wparser_def.c.

◆ actionTPS_InPathFirstFirst

const TParserStateActionItem actionTPS_InPathFirstFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1381 of file wparser_def.c.

◆ actionTPS_InPathSecond

const TParserStateActionItem actionTPS_InPathSecond[]
static
Initial value:
= {
{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1388 of file wparser_def.c.

◆ actionTPS_InPort

const TParserStateActionItem actionTPS_InPort[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
}

Definition at line 1320 of file wparser_def.c.

◆ actionTPS_InPortFirst

const TParserStateActionItem actionTPS_InPortFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1314 of file wparser_def.c.

◆ actionTPS_InProtocolEnd

const TParserStateActionItem actionTPS_InProtocolEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL}
}
#define PROTOCOL
Definition: wparser_def.c:49

Definition at line 1449 of file wparser_def.c.

◆ actionTPS_InProtocolFirst

const TParserStateActionItem actionTPS_InProtocolFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1437 of file wparser_def.c.

◆ actionTPS_InProtocolSecond

const TParserStateActionItem actionTPS_InProtocolSecond[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1443 of file wparser_def.c.

◆ actionTPS_InSignedInt

const TParserStateActionItem actionTPS_InSignedInt[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
}
#define SIGNEDINT
Definition: wparser_def.c:56

Definition at line 1004 of file wparser_def.c.

◆ actionTPS_InSignedIntFirst

const TParserStateActionItem actionTPS_InSignedIntFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 998 of file wparser_def.c.

◆ actionTPS_InSpace

const TParserStateActionItem actionTPS_InSpace[]
static
Initial value:
= {
{p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
{p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
{p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
}

Definition at line 1013 of file wparser_def.c.

◆ actionTPS_InSVerVersion

const TParserStateActionItem actionTPS_InSVerVersion[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
}

Definition at line 1061 of file wparser_def.c.

◆ actionTPS_InTag

const TParserStateActionItem actionTPS_InTag[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL},
{p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialTags(TParser *prs)
Definition: wparser_def.c:522

Definition at line 1198 of file wparser_def.c.

◆ actionTPS_InTagBackSleshed

const TParserStateActionItem actionTPS_InTagBackSleshed[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_MERGE, TPS_Null, 0, NULL}
}

Definition at line 1234 of file wparser_def.c.

◆ actionTPS_InTagBeginEnd

const TParserStateActionItem actionTPS_InTagBeginEnd[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1192 of file wparser_def.c.

◆ actionTPS_InTagCloseFirst

const TParserStateActionItem actionTPS_InTagCloseFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InTagName, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1172 of file wparser_def.c.

◆ actionTPS_InTagEnd

const TParserStateActionItem actionTPS_InTagEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
}

Definition at line 1239 of file wparser_def.c.

◆ actionTPS_InTagEscapeK

const TParserStateActionItem actionTPS_InTagEscapeK[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
}

Definition at line 1220 of file wparser_def.c.

◆ actionTPS_InTagEscapeKK

const TParserStateActionItem actionTPS_InTagEscapeKK[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
}

Definition at line 1227 of file wparser_def.c.

◆ actionTPS_InTagFirst

const TParserStateActionItem actionTPS_InTagFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
{p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
{p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL},
{p_isasclet, 0, A_PUSH, TPS_InTagName, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InTagName, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InTagName, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1153 of file wparser_def.c.

◆ actionTPS_InTagName

const TParserStateActionItem actionTPS_InTagName[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL},
{p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags},
{p_isalnum, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1178 of file wparser_def.c.

◆ actionTPS_InUDecimal

const TParserStateActionItem actionTPS_InUDecimal[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
}

Definition at line 1031 of file wparser_def.c.

◆ actionTPS_InUDecimalFirst

const TParserStateActionItem actionTPS_InUDecimalFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1025 of file wparser_def.c.

◆ actionTPS_InUnsignedInt

const TParserStateActionItem actionTPS_InUnsignedInt[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_isasclet, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
}
#define UNSIGNEDINT
Definition: wparser_def.c:57

Definition at line 981 of file wparser_def.c.

◆ actionTPS_InURLPath

const TParserStateActionItem actionTPS_InURLPath[]
static
Initial value:
= {
{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
}
static int p_isurlchar(TParser *prs)
Definition: wparser_def.c:463

Definition at line 1425 of file wparser_def.c.

◆ actionTPS_InURLPathFirst

const TParserStateActionItem actionTPS_InURLPathFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL},
}

Definition at line 1415 of file wparser_def.c.

◆ actionTPS_InURLPathStart

const TParserStateActionItem actionTPS_InURLPathStart[]
static
Initial value:
= {
{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
}

Definition at line 1421 of file wparser_def.c.

◆ actionTPS_InVersion

const TParserStateActionItem actionTPS_InVersion[]
static
Initial value:
= {
{p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
}
#define VERSIONNUMBER
Definition: wparser_def.c:43

Definition at line 1074 of file wparser_def.c.

◆ actionTPS_InVersionFirst

const TParserStateActionItem actionTPS_InVersionFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1068 of file wparser_def.c.

◆ actionTPS_InVerVersion

const TParserStateActionItem actionTPS_InVerVersion[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}
static void SpecialVerVersion(TParser *prs)
Definition: wparser_def.c:561

Definition at line 1055 of file wparser_def.c.

◆ actionTPS_InWord

const TParserStateActionItem actionTPS_InWord[]
static
Initial value:
= {
{p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
{p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isspecial, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
}
#define WORD_T
Definition: wparser_def.c:37

Definition at line 972 of file wparser_def.c.

◆ actionTPS_InXMLBegin

const TParserStateActionItem actionTPS_InXMLBegin[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1164 of file wparser_def.c.

◆ actionTPS_InXMLEntity

const TParserStateActionItem actionTPS_InXMLEntity[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalnum, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1110 of file wparser_def.c.

◆ actionTPS_InXMLEntityEnd

const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static
Initial value:
= {
{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, XMLENTITY, NULL}
}
#define XMLENTITY
Definition: wparser_def.c:58

Definition at line 1149 of file wparser_def.c.

◆ actionTPS_InXMLEntityFirst

const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1101 of file wparser_def.c.

◆ actionTPS_InXMLEntityHexNum

const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1142 of file wparser_def.c.

◆ actionTPS_InXMLEntityHexNumFirst

const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1129 of file wparser_def.c.

◆ actionTPS_InXMLEntityNum

const TParserStateActionItem actionTPS_InXMLEntityNum[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1135 of file wparser_def.c.

◆ actionTPS_InXMLEntityNumFirst

const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static
Initial value:
= {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
}

Definition at line 1121 of file wparser_def.c.

◆ lex_descr

const char* const lex_descr[]
static
Initial value:
= {
"",
"Word, all ASCII",
"Word, all letters",
"Word, letters and digits",
"Email address",
"URL",
"Host",
"Scientific notation",
"Version number",
"Hyphenated word part, letters and digits",
"Hyphenated word part, all letters",
"Hyphenated word part, all ASCII",
"Space symbols",
"XML tag",
"Protocol head",
"Hyphenated word, letters and digits",
"Hyphenated word, all ASCII",
"Hyphenated word, all letters",
"URL path",
"File or path name",
"Decimal notation",
"Signed integer",
"Unsigned integer",
"XML entity"
}

Definition at line 89 of file wparser_def.c.

Referenced by prsd_lextype().

◆ tok_alias

const char* const tok_alias[]
static
Initial value:
= {
"",
"asciiword",
"word",
"numword",
"email",
"url",
"host",
"sfloat",
"version",
"hword_numpart",
"hword_part",
"hword_asciipart",
"blank",
"tag",
"protocol",
"numhword",
"asciihword",
"hword",
"url_path",
"file",
"float",
"int",
"uint",
"entity"
}

Definition at line 62 of file wparser_def.c.

Referenced by prsd_lextype(), and TParserGet().