PostgreSQL Source Code  git master
ts_public.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_public.h
4  * Public interface to various tsearch modules, such as
5  * parsers and dictionaries.
6  *
7  * Copyright (c) 1998-2024, PostgreSQL Global Development Group
8  *
9  * src/include/tsearch/ts_public.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 #ifndef _PG_TS_PUBLIC_H_
14 #define _PG_TS_PUBLIC_H_
15 
16 #include "tsearch/ts_type.h"
17 
18 /*
19  * Parser's framework
20  */
21 
22 /*
23  * returning type for prslextype method of parser
24  */
25 typedef struct
26 {
27  int lexid;
28  char *alias;
29  char *descr;
30 } LexDescr;
31 
32 /*
33  * Interface to headline generator (tsparser's prsheadline function)
34  *
35  * HeadlineParsedText describes the text that is to be highlighted.
36  * Some fields are passed from the core code to the prsheadline function,
37  * while others are output from the prsheadline function.
38  *
39  * The principal data is words[], an array of HeadlineWordEntry,
40  * one entry per token, of length curwords.
41  * The fields of HeadlineWordEntry are:
42  *
43  * in, selected, replace, skip: these flags are initially zero
44  * and may be set by the prsheadline function. A consecutive group
45  * of tokens marked "in" form a "fragment" to be output.
46  * Such tokens may additionally be marked selected, replace, or skip
47  * to modify how they are shown. (If you set more than one of those
48  * bits, you get an unspecified one of those behaviors.)
49  *
50  * type, len, pos, word: filled by core code to describe the token.
51  *
52  * item: if the token matches any operand of the tsquery of interest,
53  * a pointer to such an operand. (If there are multiple matching
54  * operands, we generate extra copies of the HeadlineWordEntry to hold
55  * all the pointers. The extras are marked with repeated = 1 and should
56  * be ignored except for checking the item pointer.)
57  */
58 typedef struct
59 {
60  uint32 selected:1, /* token is to be highlighted */
61  in:1, /* token is part of headline */
62  replace:1, /* token is to be replaced with a space */
63  repeated:1, /* duplicate entry to hold item pointer */
64  skip:1, /* token is to be skipped (not output) */
65  unused:3, /* available bits */
66  type:8, /* parser's token category */
67  len:16; /* length of token */
68  WordEntryPos pos; /* position of token */
69  char *word; /* text of token (not null-terminated) */
70  QueryOperand *item; /* a matching query operand, or NULL if none */
72 
73 typedef struct
74 {
75  /* Fields filled by core code before calling prsheadline function: */
77  int32 lenwords; /* allocated length of words[] */
78  int32 curwords; /* current number of valid entries */
79  int32 vectorpos; /* used by ts_parse.c in filling pos fields */
80 
81  /* The prsheadline function must fill these fields: */
82  /* Strings for marking selected tokens and separating fragments: */
83  char *startsel; /* palloc'd strings */
84  char *stopsel;
85  char *fragdelim;
86  int16 startsellen; /* lengths of strings */
90 
91 /*
92  * Common useful things for tsearch subsystem
93  */
94 extern char *get_tsearch_config_filename(const char *basename,
95  const char *extension);
96 
97 /*
98  * Often useful stopword list management
99  */
100 typedef struct
101 {
102  int len;
103  char **stop;
104 } StopList;
105 
106 extern void readstoplist(const char *fname, StopList *s,
107  char *(*wordop) (const char *));
108 extern bool searchstoplist(StopList *s, char *key);
109 
110 /*
111  * Interface with dictionaries
112  */
113 
114 /* return struct for any lexize function */
115 typedef struct
116 {
117  /*----------
118  * Number of current variant of split word. For example the Norwegian
119  * word 'fotballklubber' has two variants to split: ( fotball, klubb )
120  * and ( fot, ball, klubb ). So, dictionary should return:
121  *
122  * nvariant lexeme
123  * 1 fotball
124  * 1 klubb
125  * 2 fot
126  * 2 ball
127  * 2 klubb
128  *
129  * In general, a TSLexeme will be considered to belong to the same split
130  * variant as the previous one if they have the same nvariant value.
131  * The exact values don't matter, only changes from one lexeme to next.
132  *----------
133  */
135 
136  uint16 flags; /* See flag bits below */
137 
138  char *lexeme; /* C string */
139 } TSLexeme;
140 
141 /* Flag bits that can appear in TSLexeme.flags */
142 #define TSL_ADDPOS 0x01
143 #define TSL_PREFIX 0x02
144 #define TSL_FILTER 0x04
145 
146 /*
147  * Struct for supporting complex dictionaries like thesaurus.
148  * 4th argument for dictlexize method is a pointer to this
149  */
150 typedef struct
151 {
152  bool isend; /* in: marks for lexize_info about text end is
153  * reached */
154  bool getnext; /* out: dict wants next lexeme */
155  void *private_state; /* internal dict state between calls with
156  * getnext == true */
157 } DictSubState;
158 
159 #endif /* _PG_TS_PUBLIC_H_ */
unsigned short uint16
Definition: c.h:492
unsigned int uint32
Definition: c.h:493
signed short int16
Definition: c.h:480
signed int int32
Definition: c.h:481
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154
HeadlineWordEntry * words
Definition: ts_public.h:76
WordEntryPos pos
Definition: ts_public.h:68
QueryOperand * item
Definition: ts_public.h:70
char * alias
Definition: ts_public.h:28
int lexid
Definition: ts_public.h:27
char * descr
Definition: ts_public.h:29
int len
Definition: ts_public.h:102
char ** stop
Definition: ts_public.h:103
uint16 nvariant
Definition: ts_public.h:134
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
void readstoplist(const char *fname, StopList *s, char *(*wordop)(const char *))
Definition: ts_utils.c:68
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:33
bool searchstoplist(StopList *s, char *key)
Definition: ts_utils.c:140
uint16 WordEntryPos
Definition: ts_type.h:63