PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ts_type.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_type.h
4  * Definitions for the tsvector and tsquery types
5  *
6  * Copyright (c) 1998-2017, PostgreSQL Global Development Group
7  *
8  * src/include/tsearch/ts_type.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef _PG_TSTYPE_H_
13 #define _PG_TSTYPE_H_
14 
15 #include "fmgr.h"
16 #include "utils/memutils.h"
17 
18 
19 /*
20  * TSVector type.
21  *
22  * Structure of tsvector datatype:
23  * 1) standard varlena header
24  * 2) int32 size - number of lexemes (WordEntry array entries)
25  * 3) Array of WordEntry - one per lexeme; must be sorted according to
26  * tsCompareString() (ie, memcmp of lexeme strings).
27  * WordEntry->pos gives the number of bytes from end of WordEntry
28  * array to start of lexeme's string, which is of length len.
29  * 4) Per-lexeme data storage:
30  * lexeme string (not null-terminated)
31  * if haspos is true:
32  * padding byte if necessary to make the position data 2-byte aligned
33  * uint16 number of positions that follow
34  * WordEntryPos[] positions
35  *
36  * The positions for each lexeme must be sorted.
37  *
38  * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
39  */
40 
41 typedef struct
42 {
43  uint32
44  haspos:1,
45  len:11, /* MAX 2Kb */
46  pos:20; /* MAX 1Mb */
47 } WordEntry;
48 
49 #define MAXSTRLEN ( (1<<11) - 1)
50 #define MAXSTRPOS ( (1<<20) - 1)
51 
52 extern int compareWordEntryPos(const void *a, const void *b);
53 
54 /*
55  * Equivalent to
56  * typedef struct {
57  * uint16
58  * weight:2,
59  * pos:14;
60  * }
61  */
62 
64 
65 typedef struct
66 {
68  WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
70 
71 /* WordEntryPosVector with exactly 1 entry */
72 typedef struct
73 {
75  WordEntryPos pos[1];
77 
78 
79 #define WEP_GETWEIGHT(x) ( (x) >> 14 )
80 #define WEP_GETPOS(x) ( (x) & 0x3fff )
81 
82 #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
83 #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
84 
85 #define MAXENTRYPOS (1<<14)
86 #define MAXNUMPOS (256)
87 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
88 
89 /*
90  * In case if a TSVector contains several parts and we want to treat them as
91  * separate, it's necessary to add an artificial increment to position of each
92  * lexeme from every next part. It's required to avoid the situation when
93  * tsquery can find a phrase consisting of lexemes from two of such parts.
94  * TS_JUMP defined a value of this increment.
95  */
96 #define TS_JUMP 1
97 
98 /* This struct represents a complete tsvector datum */
99 typedef struct
100 {
101  int32 vl_len_; /* varlena header (do not touch directly!) */
103  WordEntry entries[FLEXIBLE_ARRAY_MEMBER];
104  /* lexemes follow the entries[] array */
105 } TSVectorData;
106 
108 
109 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
110 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
111 
112 /* pointer to start of a tsvector's WordEntry array */
113 #define ARRPTR(x) ( (x)->entries )
114 
115 /* pointer to start of a tsvector's lexeme storage */
116 #define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
117 
118 #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
119 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
120 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
121 
122 /*
123  * fmgr interface macros
124  */
125 
126 #define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
127 #define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
128 #define TSVectorGetDatum(X) PointerGetDatum(X)
129 #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
130 #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
131 #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
132 
133 
134 /*
135  * TSQuery
136  *
137  *
138  */
139 
141 
142 /* Valid values for QueryItemType: */
143 #define QI_VAL 1
144 #define QI_OPR 2
145 #define QI_VALSTOP 3 /* This is only used in an intermediate stack
146  * representation in parse_tsquery. It's not a
147  * legal type elsewhere. */
148 
149 /*
150  * QueryItem is one node in tsquery - operator or operand.
151  */
152 typedef struct
153 {
154  QueryItemType type; /* operand or kind of operator (ts_tokentype) */
155  uint8 weight; /* weights of operand to search. It's a
156  * bitmask of allowed weights. if it =0 then
157  * any weight are allowed. Weights and bit
158  * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
159  bool prefix; /* true if it's a prefix search */
160  int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
161  * data type, but we use comparisons to signed
162  * integers in the code. They would need to be
163  * changed as well. */
164 
165  /* pointer to text value of operand, must correlate with WordEntry */
166  uint32
167  length:12,
168  distance:20;
169 } QueryOperand;
170 
171 
172 /*
173  * Legal values for QueryOperator.operator.
174  */
175 #define OP_NOT 1
176 #define OP_AND 2
177 #define OP_OR 3
178 #define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
179 #define OP_COUNT 4
180 
181 extern const int tsearch_op_priority[OP_COUNT];
182 
183 /* get operation priority by its code*/
184 #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
185 /* get QueryOperator priority */
186 #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
187 
188 typedef struct
189 {
191  int8 oper; /* see above */
192  int16 distance; /* distance between agrs for OP_PHRASE */
193  uint32 left; /* pointer to left operand. Right operand is
194  * item + 1, left operand is placed
195  * item+item->left */
196 } QueryOperator;
197 
198 /*
199  * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
200  * inside QueryItem requiring 8-byte alignment, like int64.
201  */
202 typedef union
203 {
207 } QueryItem;
208 
209 /*
210  * Storage:
211  * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
212  */
213 
214 typedef struct
215 {
216  int32 vl_len_; /* varlena header (do not touch directly!) */
217  int32 size; /* number of QueryItems */
218  char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */
219 } TSQueryData;
220 
222 
223 #define HDRSIZETQ ( VARHDRSZ + sizeof(int32) )
224 
225 /* Computes the size of header and all QueryItems. size is the number of
226  * QueryItems, and lenofoperand is the total length of all operands
227  */
228 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
229 #define TSQUERY_TOO_BIG(size, lenofoperand) \
230  ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
231 
232 /* Returns a pointer to the first QueryItem in a TSQuery */
233 #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
234 
235 /* Returns a pointer to the beginning of operands in a TSQuery */
236 #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
237 
238 /*
239  * fmgr interface macros
240  * Note, TSQuery type marked as plain storage, so it can't be toasted
241  * but PG_DETOAST_DATUM_COPY is used for simplicity
242  */
243 
244 #define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X))
245 #define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X))
246 #define TSQueryGetDatum(X) PointerGetDatum(X)
247 #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
248 #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
249 #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
250 
251 #endif /* _PG_TSTYPE_H_ */
uint16 WordEntryPos
Definition: ts_type.h:63
signed short int16
Definition: c.h:255
QueryOperator qoperator
Definition: ts_type.h:205
int length(const List *list)
Definition: list.c:1271
unsigned char uint8
Definition: c.h:266
TSQueryData * TSQuery
Definition: ts_type.h:221
int16 distance
Definition: ts_type.h:192
signed int int32
Definition: c.h:256
unsigned short uint16
Definition: c.h:267
const int tsearch_op_priority[OP_COUNT]
Definition: tsquery.c:27
uint8 weight
Definition: ts_type.h:155
int32 size
Definition: ts_type.h:102
int32 valcrc
Definition: ts_type.h:160
unsigned int uint32
Definition: c.h:268
int32 vl_len_
Definition: ts_type.h:101
signed char int8
Definition: c.h:254
#define OP_COUNT
Definition: ts_type.h:179
QueryItemType type
Definition: ts_type.h:204
TSVectorData * TSVector
Definition: ts_type.h:107
QueryItemType type
Definition: ts_type.h:190
int8 QueryItemType
Definition: ts_type.h:140
uint32 left
Definition: ts_type.h:193
int32 size
Definition: ts_type.h:217
QueryItemType type
Definition: ts_type.h:154
bool prefix
Definition: ts_type.h:159
QueryOperand qoperand
Definition: ts_type.h:206
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:33
int32 vl_len_
Definition: ts_type.h:216