PostgreSQL Source Code  git master
ts_type.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_type.h
4  * Definitions for the tsvector and tsquery types
5  *
6  * Copyright (c) 1998-2024, PostgreSQL Global Development Group
7  *
8  * src/include/tsearch/ts_type.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef _PG_TSTYPE_H_
13 #define _PG_TSTYPE_H_
14 
15 #include "fmgr.h"
16 #include "utils/memutils.h"
17 
18 
19 /*
20  * TSVector type.
21  *
22  * Structure of tsvector datatype:
23  * 1) standard varlena header
24  * 2) int32 size - number of lexemes (WordEntry array entries)
25  * 3) Array of WordEntry - one per lexeme; must be sorted according to
26  * tsCompareString() (ie, memcmp of lexeme strings).
27  * WordEntry->pos gives the number of bytes from end of WordEntry
28  * array to start of lexeme's string, which is of length len.
29  * 4) Per-lexeme data storage:
30  * lexeme string (not null-terminated)
31  * if haspos is true:
32  * padding byte if necessary to make the position data 2-byte aligned
33  * uint16 number of positions that follow
34  * WordEntryPos[] positions
35  *
36  * The positions for each lexeme must be sorted.
37  *
38  * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
39  */
40 
41 typedef struct
42 {
43  uint32
44  haspos:1,
45  len:11, /* MAX 2Kb */
46  pos:20; /* MAX 1Mb */
47 } WordEntry;
48 
49 #define MAXSTRLEN ( (1<<11) - 1)
50 #define MAXSTRPOS ( (1<<20) - 1)
51 
52 extern int compareWordEntryPos(const void *a, const void *b);
53 
54 /*
55  * Equivalent to
56  * typedef struct {
57  * uint16
58  * weight:2,
59  * pos:14;
60  * }
61  */
62 
64 
65 typedef struct
66 {
70 
71 /* WordEntryPosVector with exactly 1 entry */
72 typedef struct
73 {
75  WordEntryPos pos[1];
77 
78 
79 #define WEP_GETWEIGHT(x) ( (x) >> 14 )
80 #define WEP_GETPOS(x) ( (x) & 0x3fff )
81 
82 #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
83 #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
84 
85 #define MAXENTRYPOS (1<<14)
86 #define MAXNUMPOS (256)
87 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
88 
89 /* This struct represents a complete tsvector datum */
90 typedef struct
91 {
92  int32 vl_len_; /* varlena header (do not touch directly!) */
95  /* lexemes follow the entries[] array */
96 } TSVectorData;
97 
99 
100 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
101 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
102 
103 /* pointer to start of a tsvector's WordEntry array */
104 #define ARRPTR(x) ( (x)->entries )
105 
106 /* pointer to start of a tsvector's lexeme storage */
107 #define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
108 
109 #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
110 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
111 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
112 
113 /*
114  * fmgr interface functions
115  */
116 
117 static inline TSVector
119 {
120  return (TSVector) PG_DETOAST_DATUM(X);
121 }
122 
123 static inline TSVector
125 {
126  return (TSVector) PG_DETOAST_DATUM_COPY(X);
127 }
128 
129 static inline Datum
131 {
132  return PointerGetDatum(X);
133 }
134 
135 #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
136 #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
137 #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
138 
139 
140 /*
141  * TSQuery
142  *
143  *
144  */
145 
147 
148 /* Valid values for QueryItemType: */
149 #define QI_VAL 1
150 #define QI_OPR 2
151 #define QI_VALSTOP 3 /* This is only used in an intermediate stack
152  * representation in parse_tsquery. It's not a
153  * legal type elsewhere. */
154 
155 /*
156  * QueryItem is one node in tsquery - operator or operand.
157  */
158 typedef struct
159 {
160  QueryItemType type; /* operand or kind of operator (ts_tokentype) */
161  uint8 weight; /* weights of operand to search. It's a
162  * bitmask of allowed weights. if it =0 then
163  * any weight are allowed. Weights and bit
164  * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
165  bool prefix; /* true if it's a prefix search */
166  int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
167  * data type, but we use comparisons to signed
168  * integers in the code. They would need to be
169  * changed as well. */
170 
171  /* pointer to text value of operand, must correlate with WordEntry */
173  length:12,
174  distance:20;
175 } QueryOperand;
176 
177 
178 /*
179  * Legal values for QueryOperator.operator.
180  */
181 #define OP_NOT 1
182 #define OP_AND 2
183 #define OP_OR 3
184 #define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
185 #define OP_COUNT 4
186 
187 extern PGDLLIMPORT const int tsearch_op_priority[OP_COUNT];
188 
189 /* get operation priority by its code */
190 #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
191 /* get QueryOperator priority */
192 #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
193 
194 typedef struct
195 {
197  int8 oper; /* see above */
198  int16 distance; /* distance between agrs for OP_PHRASE */
199  uint32 left; /* pointer to left operand. Right operand is
200  * item + 1, left operand is placed
201  * item+item->left */
202 } QueryOperator;
203 
204 /*
205  * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
206  * inside QueryItem requiring 8-byte alignment, like int64.
207  */
208 typedef union
209 {
211  QueryOperator qoperator;
212  QueryOperand qoperand;
213 } QueryItem;
214 
215 /*
216  * Storage:
217  * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
218  */
219 
220 typedef struct
221 {
222  int32 vl_len_; /* varlena header (do not touch directly!) */
223  int32 size; /* number of QueryItems */
224  char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */
226 
228 
229 #define HDRSIZETQ ( VARHDRSZ + sizeof(int32) )
230 
231 /* Computes the size of header and all QueryItems. size is the number of
232  * QueryItems, and lenofoperand is the total length of all operands
233  */
234 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
235 #define TSQUERY_TOO_BIG(size, lenofoperand) \
236  ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
237 
238 /* Returns a pointer to the first QueryItem in a TSQuery */
239 #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
240 
241 /* Returns a pointer to the beginning of operands in a TSQuery */
242 #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
243 
244 /*
245  * fmgr interface functions
246  * Note, TSQuery type marked as plain storage, so it can't be toasted
247  * but PG_DETOAST_DATUM_COPY is used for simplicity
248  */
249 
250 static inline TSQuery
252 {
253  return (TSQuery) DatumGetPointer(X);
254 }
255 
256 static inline TSQuery
258 {
259  return (TSQuery) PG_DETOAST_DATUM_COPY(X);
260 }
261 
262 static inline Datum
264 {
265  return PointerGetDatum(X);
266 }
267 
268 #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
269 #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
270 #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
271 
272 #endif /* _PG_TSTYPE_H_ */
unsigned short uint16
Definition: c.h:508
unsigned int uint32
Definition: c.h:509
#define PGDLLIMPORT
Definition: c.h:1319
signed char int8
Definition: c.h:495
signed short int16
Definition: c.h:496
signed int int32
Definition: c.h:497
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:401
unsigned char uint8
Definition: c.h:507
#define PG_DETOAST_DATUM_COPY(datum)
Definition: fmgr.h:242
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
int b
Definition: isn.c:70
int a
Definition: isn.c:69
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
const void * data
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static pg_noinline void Size size
Definition: slab.c:607
int32 vl_len_
Definition: ts_type.h:92
int32 size
Definition: ts_type.h:93
uint32 pos
Definition: ts_type.h:46
uint32 haspos
Definition: ts_type.h:44
uint32 len
Definition: ts_type.h:45
#define OP_COUNT
Definition: ts_type.h:183
static TSQuery DatumGetTSQuery(Datum X)
Definition: ts_type.h:249
static TSVector DatumGetTSVector(Datum X)
Definition: ts_type.h:118
static Datum TSVectorGetDatum(const TSVectorData *X)
Definition: ts_type.h:130
TSQueryData * TSQuery
Definition: ts_type.h:225
uint16 WordEntryPos
Definition: ts_type.h:63
int8 QueryItemType
Definition: ts_type.h:146
PGDLLIMPORT const int tsearch_op_priority[OP_COUNT]
Definition: tsquery.c:29
TSVectorData * TSVector
Definition: ts_type.h:98
static TSQuery DatumGetTSQueryCopy(Datum X)
Definition: ts_type.h:255
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:36
static TSVector DatumGetTSVectorCopy(Datum X)
Definition: ts_type.h:124
static Datum TSQueryGetDatum(const TSQueryData *X)
Definition: ts_type.h:261
const char * type