PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ts_public.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ts_public.h
4 * Public interface to various tsearch modules, such as
5 * parsers and dictionaries.
6 *
7 * Copyright (c) 1998-2024, PostgreSQL Global Development Group
8 *
9 * src/include/tsearch/ts_public.h
10 *
11 *-------------------------------------------------------------------------
12 */
13#ifndef _PG_TS_PUBLIC_H_
14#define _PG_TS_PUBLIC_H_
15
16#include "tsearch/ts_type.h"
17
18/*
19 * Parser's framework
20 */
21
22/*
23 * returning type for prslextype method of parser
24 */
25typedef struct
26{
27 int lexid;
28 char *alias;
29 char *descr;
30} LexDescr;
31
32/*
33 * Interface to headline generator (tsparser's prsheadline function)
34 *
35 * HeadlineParsedText describes the text that is to be highlighted.
36 * Some fields are passed from the core code to the prsheadline function,
37 * while others are output from the prsheadline function.
38 *
39 * The principal data is words[], an array of HeadlineWordEntry,
40 * one entry per token, of length curwords.
41 * The fields of HeadlineWordEntry are:
42 *
43 * in, selected, replace, skip: these flags are initially zero
44 * and may be set by the prsheadline function. A consecutive group
45 * of tokens marked "in" form a "fragment" to be output.
46 * Such tokens may additionally be marked selected, replace, or skip
47 * to modify how they are shown. (If you set more than one of those
48 * bits, you get an unspecified one of those behaviors.)
49 *
50 * type, len, pos, word: filled by core code to describe the token.
51 *
52 * item: if the token matches any operand of the tsquery of interest,
53 * a pointer to such an operand. (If there are multiple matching
54 * operands, we generate extra copies of the HeadlineWordEntry to hold
55 * all the pointers. The extras are marked with repeated = 1 and should
56 * be ignored except for checking the item pointer.)
57 */
58typedef struct
59{
60 uint32 selected:1, /* token is to be highlighted */
61 in:1, /* token is part of headline */
62 replace:1, /* token is to be replaced with a space */
63 repeated:1, /* duplicate entry to hold item pointer */
64 skip:1, /* token is to be skipped (not output) */
65 unused:3, /* available bits */
66 type:8, /* parser's token category */
67 len:16; /* length of token */
68 WordEntryPos pos; /* position of token */
69 char *word; /* text of token (not null-terminated) */
70 QueryOperand *item; /* a matching query operand, or NULL if none */
72
73typedef struct
74{
75 /* Fields filled by core code before calling prsheadline function: */
77 int32 lenwords; /* allocated length of words[] */
78 int32 curwords; /* current number of valid entries */
79 int32 vectorpos; /* used by ts_parse.c in filling pos fields */
80
81 /* The prsheadline function must fill these fields: */
82 /* Strings for marking selected tokens and separating fragments: */
83 char *startsel; /* palloc'd strings */
84 char *stopsel;
85 char *fragdelim;
86 int16 startsellen; /* lengths of strings */
90
91/*
92 * Common useful things for tsearch subsystem
93 */
94extern char *get_tsearch_config_filename(const char *basename,
95 const char *extension);
96
97/*
98 * Often useful stopword list management
99 */
100typedef struct
101{
102 int len;
103 char **stop;
104} StopList;
105
106extern void readstoplist(const char *fname, StopList *s,
107 char *(*wordop) (const char *, size_t, Oid));
108extern bool searchstoplist(StopList *s, char *key);
109
110/*
111 * Interface with dictionaries
112 */
113
114/* return struct for any lexize function */
115typedef struct
116{
117 /*----------
118 * Number of current variant of split word. For example the Norwegian
119 * word 'fotballklubber' has two variants to split: ( fotball, klubb )
120 * and ( fot, ball, klubb ). So, dictionary should return:
121 *
122 * nvariant lexeme
123 * 1 fotball
124 * 1 klubb
125 * 2 fot
126 * 2 ball
127 * 2 klubb
128 *
129 * In general, a TSLexeme will be considered to belong to the same split
130 * variant as the previous one if they have the same nvariant value.
131 * The exact values don't matter, only changes from one lexeme to next.
132 *----------
133 */
135
136 uint16 flags; /* See flag bits below */
137
138 char *lexeme; /* C string */
139} TSLexeme;
140
141/* Flag bits that can appear in TSLexeme.flags */
142#define TSL_ADDPOS 0x01
143#define TSL_PREFIX 0x02
144#define TSL_FILTER 0x04
145
146/*
147 * Struct for supporting complex dictionaries like thesaurus.
148 * 4th argument for dictlexize method is a pointer to this
149 */
150typedef struct
151{
152 bool isend; /* in: marks for lexize_info about text end is
153 * reached */
154 bool getnext; /* out: dict wants next lexeme */
155 void *private_state; /* internal dict state between calls with
156 * getnext == true */
158
159#endif /* _PG_TS_PUBLIC_H_ */
int16_t int16
Definition: c.h:480
int32_t int32
Definition: c.h:481
uint16_t uint16
Definition: c.h:484
uint32_t uint32
Definition: c.h:485
unsigned int Oid
Definition: postgres_ext.h:31
void * private_state
Definition: ts_public.h:155
bool getnext
Definition: ts_public.h:154
HeadlineWordEntry * words
Definition: ts_public.h:76
WordEntryPos pos
Definition: ts_public.h:68
QueryOperand * item
Definition: ts_public.h:70
char * alias
Definition: ts_public.h:28
int lexid
Definition: ts_public.h:27
char * descr
Definition: ts_public.h:29
int len
Definition: ts_public.h:102
char ** stop
Definition: ts_public.h:103
uint16 nvariant
Definition: ts_public.h:134
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
void readstoplist(const char *fname, StopList *s, char *(*wordop)(const char *, size_t, Oid))
Definition: ts_utils.c:69
bool searchstoplist(StopList *s, char *key)
Definition: ts_utils.c:141
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:34
uint16 WordEntryPos
Definition: ts_type.h:63