PostgreSQL Source Code git master
Loading...
Searching...
No Matches
tsvector.c File Reference
#include "postgres.h"
#include "common/int.h"
#include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/fmgrprotos.h"
#include "utils/memutils.h"
#include "varatt.h"
Include dependency graph for tsvector.c:

Go to the source code of this file.

Data Structures

struct  WordEntryIN
 

Functions

int compareWordEntryPos (const void *a, const void *b)
 
static int uniquePos (WordEntryPos *a, int l)
 
static int compareentry (const void *va, const void *vb, void *arg)
 
static int uniqueentry (WordEntryIN *a, int l, char *buf, int *outbuflen)
 
Datum tsvectorin (PG_FUNCTION_ARGS)
 
Datum tsvectorout (PG_FUNCTION_ARGS)
 
Datum tsvectorsend (PG_FUNCTION_ARGS)
 
Datum tsvectorrecv (PG_FUNCTION_ARGS)
 

Function Documentation

◆ compareentry()

static int compareentry ( const void va,
const void vb,
void arg 
)
static

Definition at line 87 of file tsvector.c.

88{
89 const WordEntry *a = (const WordEntry *) va;
90 const WordEntry *b = (const WordEntry *) vb;
91 char *BufferStr = (char *) arg;
92
93 return tsCompareString(&BufferStr[a->pos], a->len,
94 &BufferStr[b->pos], b->len,
95 false);
96}
int b
Definition isn.c:74
int a
Definition isn.c:73
void * arg
static int fb(int x)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)

References a, arg, b, fb(), and tsCompareString().

Referenced by tsvectorrecv(), and uniqueentry().

◆ compareWordEntryPos()

int compareWordEntryPos ( const void a,
const void b 
)

Definition at line 36 of file tsvector.c.

37{
38 int apos = WEP_GETPOS(*(const WordEntryPos *) a);
39 int bpos = WEP_GETPOS(*(const WordEntryPos *) b);
40
41 return pg_cmp_s32(apos, bpos);
42}
static int pg_cmp_s32(int32 a, int32 b)
Definition int.h:713
#define WEP_GETPOS(x)
Definition ts_type.h:80
uint16 WordEntryPos
Definition ts_type.h:63

References a, b, fb(), pg_cmp_s32(), and WEP_GETPOS.

Referenced by checkcondition_str(), and uniquePos().

◆ tsvectorin()

Datum tsvectorin ( PG_FUNCTION_ARGS  )

Definition at line 175 of file tsvector.c.

176{
177 char *buf = PG_GETARG_CSTRING(0);
178 Node *escontext = fcinfo->context;
180 WordEntryIN *arr;
181 int totallen;
182 int arrlen; /* allocated size of arr */
184 int len = 0;
185 TSVector in;
186 int i;
187 char *token;
188 int toklen;
189 WordEntryPos *pos;
190 int poslen;
191 char *strbuf;
192 int stroff;
193
194 /*
195 * Tokens are appended to tmpbuf, cur is a pointer to the end of used
196 * space in tmpbuf.
197 */
198 char *tmpbuf;
199 char *cur;
200 int buflen = 256; /* allocated size of tmpbuf */
201
202 state = init_tsvector_parser(buf, 0, escontext);
203
204 arrlen = 64;
206 cur = tmpbuf = palloc_array(char, buflen);
207
208 while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
209 {
210 if (toklen >= MAXSTRLEN)
211 ereturn(escontext, (Datum) 0,
213 errmsg("word is too long (%d bytes, max %d bytes)",
214 toklen,
215 MAXSTRLEN - 1)));
216
217 if (cur - tmpbuf > MAXSTRPOS)
218 ereturn(escontext, (Datum) 0,
220 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
221 (long) (cur - tmpbuf), (long) MAXSTRPOS)));
222
223 /*
224 * Enlarge buffers if needed
225 */
226 if (len >= arrlen)
227 {
228 arrlen *= 2;
229 arr = (WordEntryIN *)
230 repalloc(arr, sizeof(WordEntryIN) * arrlen);
231 }
232 while ((cur - tmpbuf) + toklen >= buflen)
233 {
234 int dist = cur - tmpbuf;
235
236 buflen *= 2;
237 tmpbuf = (char *) repalloc(tmpbuf, buflen);
238 cur = tmpbuf + dist;
239 }
240 arr[len].entry.len = toklen;
241 arr[len].entry.pos = cur - tmpbuf;
243 cur += toklen;
244
245 if (poslen != 0)
246 {
247 arr[len].entry.haspos = 1;
248 arr[len].pos = pos;
249 arr[len].poslen = poslen;
250 }
251 else
252 {
253 arr[len].entry.haspos = 0;
254 arr[len].pos = NULL;
255 arr[len].poslen = 0;
256 }
257 len++;
258 }
259
261
262 /* Did gettoken_tsvector fail? */
263 if (SOFT_ERROR_OCCURRED(escontext))
265
266 if (len > 0)
267 len = uniqueentry(arr, len, tmpbuf, &buflen);
268 else
269 buflen = 0;
270
271 if (buflen > MAXSTRPOS)
272 ereturn(escontext, (Datum) 0,
274 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
275
276 totallen = CALCDATASIZE(len, buflen);
277 in = (TSVector) palloc0(totallen);
278 SET_VARSIZE(in, totallen);
279 in->size = len;
280 inarr = ARRPTR(in);
281 strbuf = STRPTR(in);
282 stroff = 0;
283 for (i = 0; i < len; i++)
284 {
285 memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
286 arr[i].entry.pos = stroff;
287 stroff += arr[i].entry.len;
288 if (arr[i].entry.haspos)
289 {
290 /* This should be unreachable because of MAXNUMPOS restrictions */
291 if (arr[i].poslen > 0xFFFF)
292 elog(ERROR, "positions array too long");
293
294 /* Copy number of positions */
296 *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
297 stroff += sizeof(uint16);
298
299 /* Copy positions */
300 memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
301 stroff += arr[i].poslen * sizeof(WordEntryPos);
302
303 pfree(arr[i].pos);
304 }
305 inarr[i] = arr[i].entry;
306 }
307
308 Assert((strbuf + stroff - (char *) in) == totallen);
309
311}
#define Assert(condition)
Definition c.h:873
#define SHORTALIGN(LEN)
Definition c.h:822
uint16_t uint16
Definition c.h:545
#define ARRPTR(x)
Definition cube.c:28
struct cursor * cur
Definition ecpg.c:29
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ereturn(context, dummy_value,...)
Definition elog.h:278
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define PG_GETARG_CSTRING(n)
Definition fmgr.h:278
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define CALCDATASIZE(x, lenstr)
Definition hstore.h:72
#define STRPTR(x)
Definition hstore.h:76
#define token
int i
Definition isn.c:77
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
#define SOFT_ERROR_OCCURRED(escontext)
Definition miscnodes.h:53
const void size_t len
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
Definition nodes.h:135
int32 size
Definition ts_type.h:93
WordEntryPos * pos
Definition tsvector.c:29
WordEntry entry
Definition tsvector.c:28
uint32 pos
Definition ts_type.h:46
uint32 haspos
Definition ts_type.h:44
uint32 len
Definition ts_type.h:45
#define PG_RETURN_TSVECTOR(x)
Definition ts_type.h:137
TSVectorData * TSVector
Definition ts_type.h:98
#define MAXSTRLEN
Definition ts_type.h:49
#define MAXSTRPOS
Definition ts_type.h:50
static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
Definition tsvector.c:103
void close_tsvector_parser(TSVectorParseState state)
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static StringInfoData tmpbuf
Definition walsender.c:178

References ARRPTR, Assert, buf, CALCDATASIZE, close_tsvector_parser(), cur, elog, WordEntryIN::entry, ereturn, errcode(), errmsg(), ERROR, fb(), gettoken_tsvector(), WordEntry::haspos, i, init_tsvector_parser(), WordEntry::len, len, MAXSTRLEN, MAXSTRPOS, palloc0(), palloc_array, pfree(), PG_GETARG_CSTRING, PG_RETURN_NULL, PG_RETURN_TSVECTOR, WordEntryIN::pos, WordEntry::pos, WordEntryIN::poslen, repalloc(), SET_VARSIZE(), SHORTALIGN, TSVectorData::size, SOFT_ERROR_OCCURRED, STRPTR, tmpbuf, token, and uniqueentry().

◆ tsvectorout()

Datum tsvectorout ( PG_FUNCTION_ARGS  )

Definition at line 314 of file tsvector.c.

315{
317 char *outbuf;
318 int32 i,
319 lenbuf = 0,
320 pp;
321 WordEntry *ptr = ARRPTR(out);
322 char *curin,
323 *curout;
324 const char *curend;
325
326 lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
327 for (i = 0; i < out->size; i++)
328 {
329 lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
330 if (ptr[i].haspos)
331 lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
332 }
333
334 curout = outbuf = (char *) palloc(lenbuf);
335 for (i = 0; i < out->size; i++)
336 {
337 curin = STRPTR(out) + ptr->pos;
338 curend = curin + ptr->len;
339 if (i != 0)
340 *curout++ = ' ';
341 *curout++ = '\'';
342 while (curin < curend)
343 {
345
346 if (t_iseq(curin, '\''))
347 *curout++ = '\'';
348 else if (t_iseq(curin, '\\'))
349 *curout++ = '\\';
350
351 while (len--)
352 *curout++ = *curin++;
353 }
354
355 *curout++ = '\'';
356 if ((pp = POSDATALEN(out, ptr)) != 0)
357 {
359
360 *curout++ = ':';
361 wptr = POSDATAPTR(out, ptr);
362 while (pp)
363 {
364 curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
365 switch (WEP_GETWEIGHT(*wptr))
366 {
367 case 3:
368 *curout++ = 'A';
369 break;
370 case 2:
371 *curout++ = 'B';
372 break;
373 case 1:
374 *curout++ = 'C';
375 break;
376 case 0:
377 default:
378 break;
379 }
380
381 if (pp > 1)
382 *curout++ = ',';
383 pp--;
384 wptr++;
385 }
386 }
387 ptr++;
388 }
389
390 *curout = '\0';
391 PG_FREE_IF_COPY(out, 0);
392 PG_RETURN_CSTRING(outbuf);
393}
int32_t int32
Definition c.h:542
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define PG_RETURN_CSTRING(x)
Definition fmgr.h:364
int pg_mblen_range(const char *mbstr, const char *end)
Definition mbutils.c:1084
int pg_database_encoding_max_length(void)
Definition mbutils.c:1672
void * palloc(Size size)
Definition mcxt.c:1387
#define sprintf
Definition port.h:262
#define t_iseq(x, c)
Definition ts_locale.h:38
#define PG_GETARG_TSVECTOR(n)
Definition ts_type.h:135
#define POSDATALEN(x, e)
Definition ts_type.h:110
#define POSDATAPTR(x, e)
Definition ts_type.h:111
#define WEP_GETWEIGHT(x)
Definition ts_type.h:79

References ARRPTR, fb(), i, WordEntry::len, len, palloc(), pg_database_encoding_max_length(), PG_FREE_IF_COPY, PG_GETARG_TSVECTOR, pg_mblen_range(), PG_RETURN_CSTRING, WordEntry::pos, POSDATALEN, POSDATAPTR, TSVectorData::size, sprintf, STRPTR, t_iseq, WEP_GETPOS, and WEP_GETWEIGHT.

◆ tsvectorrecv()

Datum tsvectorrecv ( PG_FUNCTION_ARGS  )

Definition at line 447 of file tsvector.c.

448{
451 int i;
452 int32 nentries;
453 int datalen; /* number of bytes used in the variable size
454 * area after fixed size TSVector header and
455 * WordEntries */
456 Size hdrlen;
457 Size len; /* allocated size of vec */
458 bool needSort = false;
459
460 nentries = pq_getmsgint(buf, sizeof(int32));
462 elog(ERROR, "invalid size of tsvector");
463
464 hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;
465
466 len = hdrlen * 2; /* times two to make room for lexemes */
467 vec = (TSVector) palloc0(len);
468 vec->size = nentries;
469
470 datalen = 0;
471 for (i = 0; i < nentries; i++)
472 {
473 const char *lexeme;
474 uint16 npos;
475 size_t lex_len;
476
477 lexeme = pq_getmsgstring(buf);
478 npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
479
480 /* sanity checks */
481
482 lex_len = strlen(lexeme);
483 if (lex_len > MAXSTRLEN)
484 elog(ERROR, "invalid tsvector: lexeme too long");
485
486 if (datalen > MAXSTRPOS)
487 elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");
488
489 if (npos > MAXNUMPOS)
490 elog(ERROR, "unexpected number of tsvector positions");
491
492 /*
493 * Looks valid. Fill the WordEntry struct, and copy lexeme.
494 *
495 * But make sure the buffer is large enough first.
496 */
497 while (hdrlen + SHORTALIGN(datalen + lex_len) +
498 sizeof(uint16) + npos * sizeof(WordEntryPos) >= len)
499 {
500 len *= 2;
502 }
503
504 vec->entries[i].haspos = (npos > 0) ? 1 : 0;
505 vec->entries[i].len = lex_len;
506 vec->entries[i].pos = datalen;
507
508 memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
509
510 datalen += lex_len;
511
512 if (i > 0 && compareentry(&vec->entries[i],
513 &vec->entries[i - 1],
514 STRPTR(vec)) <= 0)
515 needSort = true;
516
517 /* Receive positions */
518 if (npos > 0)
519 {
520 uint16 j;
522
523 /*
524 * Pad to 2-byte alignment if necessary. Though we used palloc0
525 * for the initial allocation, subsequent repalloc'd memory areas
526 * are not initialized to zero.
527 */
528 if (datalen != SHORTALIGN(datalen))
529 {
530 *(STRPTR(vec) + datalen) = '\0';
531 datalen = SHORTALIGN(datalen);
532 }
533
534 memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
535
536 wepptr = POSDATAPTR(vec, &vec->entries[i]);
537 for (j = 0; j < npos; j++)
538 {
540 if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
541 elog(ERROR, "position information is misordered");
542 }
543
544 datalen += sizeof(uint16) + npos * sizeof(WordEntryPos);
545 }
546 }
547
548 SET_VARSIZE(vec, hdrlen + datalen);
549
550 if (needSort)
551 qsort_arg(ARRPTR(vec), vec->size, sizeof(WordEntry),
553
555}
size_t Size
Definition c.h:619
#define MaxAllocSize
Definition fe_memutils.h:22
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
int j
Definition isn.c:78
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition pqformat.c:414
const char * pq_getmsgstring(StringInfo msg)
Definition pqformat.c:578
struct StringInfoData * StringInfo
Definition string.h:15
#define MAXNUMPOS
Definition ts_type.h:86
#define DATAHDRSIZE
Definition ts_type.h:100
static int compareentry(const void *va, const void *vb, void *arg)
Definition tsvector.c:87

References ARRPTR, buf, compareentry(), DATAHDRSIZE, elog, ERROR, fb(), i, j, len, MaxAllocSize, MAXNUMPOS, MAXSTRLEN, MAXSTRPOS, palloc0(), PG_GETARG_POINTER, PG_RETURN_TSVECTOR, POSDATAPTR, pq_getmsgint(), pq_getmsgstring(), qsort_arg(), repalloc(), SET_VARSIZE(), SHORTALIGN, STRPTR, and WEP_GETPOS.

◆ tsvectorsend()

Datum tsvectorsend ( PG_FUNCTION_ARGS  )

Definition at line 408 of file tsvector.c.

409{
412 int i,
413 j;
415
417
418 pq_sendint32(&buf, vec->size);
419 for (i = 0; i < vec->size; i++)
420 {
421 uint16 npos;
422
423 /*
424 * the strings in the TSVector array are not null-terminated, so we
425 * have to send the null-terminator separately
426 */
427 pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
428 pq_sendbyte(&buf, '\0');
429
430 npos = POSDATALEN(vec, weptr);
431 pq_sendint16(&buf, npos);
432
433 if (npos > 0)
434 {
436
437 for (j = 0; j < npos; j++)
439 }
440 weptr++;
441 }
442
444}
#define PG_RETURN_BYTEA_P(x)
Definition fmgr.h:373
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition pqformat.c:172
void pq_begintypsend(StringInfo buf)
Definition pqformat.c:325
bytea * pq_endtypsend(StringInfo buf)
Definition pqformat.c:345
static void pq_sendint32(StringInfo buf, uint32 i)
Definition pqformat.h:144
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition pqformat.h:136

References ARRPTR, buf, fb(), i, j, PG_GETARG_TSVECTOR, PG_RETURN_BYTEA_P, POSDATALEN, POSDATAPTR, pq_begintypsend(), pq_endtypsend(), pq_sendbyte(), pq_sendint16(), pq_sendint32(), pq_sendtext(), and STRPTR.

◆ uniqueentry()

static int uniqueentry ( WordEntryIN a,
int  l,
char buf,
int outbuflen 
)
static

Definition at line 103 of file tsvector.c.

104{
105 int buflen;
106 WordEntryIN *ptr,
107 *res;
108
109 Assert(l >= 1);
110
111 if (l > 1)
112 qsort_arg(a, l, sizeof(WordEntryIN), compareentry, buf);
113
114 buflen = 0;
115 res = a;
116 ptr = a + 1;
117 while (ptr - a < l)
118 {
119 if (!(ptr->entry.len == res->entry.len &&
120 strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
121 res->entry.len) == 0))
122 {
123 /* done accumulating data into *res, count space needed */
124 buflen += res->entry.len;
125 if (res->entry.haspos)
126 {
127 res->poslen = uniquePos(res->pos, res->poslen);
128 buflen = SHORTALIGN(buflen);
129 buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
130 }
131 res++;
132 if (res != ptr)
133 memcpy(res, ptr, sizeof(WordEntryIN));
134 }
135 else if (ptr->entry.haspos)
136 {
137 if (res->entry.haspos)
138 {
139 /* append ptr's positions to res's positions */
140 int newlen = ptr->poslen + res->poslen;
141
142 res->pos = (WordEntryPos *)
143 repalloc(res->pos, newlen * sizeof(WordEntryPos));
144 memcpy(&res->pos[res->poslen], ptr->pos,
145 ptr->poslen * sizeof(WordEntryPos));
146 res->poslen = newlen;
147 pfree(ptr->pos);
148 }
149 else
150 {
151 /* just give ptr's positions to pos */
152 res->entry.haspos = 1;
153 res->pos = ptr->pos;
154 res->poslen = ptr->poslen;
155 }
156 }
157 ptr++;
158 }
159
160 /* count space needed for last item */
161 buflen += res->entry.len;
162 if (res->entry.haspos)
163 {
164 res->poslen = uniquePos(res->pos, res->poslen);
165 buflen = SHORTALIGN(buflen);
166 buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
167 }
168
169 *outbuflen = buflen;
170 return res + 1 - a;
171}
static int uniquePos(WordEntryPos *a, int l)
Definition tsvector.c:52

References a, Assert, buf, compareentry(), WordEntryIN::entry, fb(), WordEntry::haspos, WordEntry::len, pfree(), WordEntryIN::pos, WordEntry::pos, WordEntryIN::poslen, qsort_arg(), repalloc(), SHORTALIGN, and uniquePos().

Referenced by tsvectorin().

◆ uniquePos()

static int uniquePos ( WordEntryPos a,
int  l 
)
static

Definition at line 52 of file tsvector.c.

53{
54 WordEntryPos *ptr,
55 *res;
56
57 if (l <= 1)
58 return l;
59
61
62 res = a;
63 ptr = a + 1;
64 while (ptr - a < l)
65 {
66 if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
67 {
68 res++;
69 *res = *ptr;
70 if (res - a >= MAXNUMPOS - 1 ||
71 WEP_GETPOS(*res) == MAXENTRYPOS - 1)
72 break;
73 }
74 else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
75 WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
76 ptr++;
77 }
78
79 return res + 1 - a;
80}
#define qsort(a, b, c, d)
Definition port.h:495
#define MAXENTRYPOS
Definition ts_type.h:85
#define WEP_SETWEIGHT(x, v)
Definition ts_type.h:82
int compareWordEntryPos(const void *a, const void *b)
Definition tsvector.c:36

References a, compareWordEntryPos(), MAXENTRYPOS, MAXNUMPOS, qsort, WEP_GETPOS, WEP_GETWEIGHT, and WEP_SETWEIGHT.

Referenced by uniqueentry().