PostgreSQL Source Code  git master
wparser.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * wparser.c
4  * Standard interface to word parser
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/wparser.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/namespace.h"
17 #include "catalog/pg_type.h"
18 #include "commands/defrem.h"
19 #include "common/jsonapi.h"
20 #include "funcapi.h"
21 #include "tsearch/ts_cache.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
24 #include "utils/jsonfuncs.h"
25 #include "utils/varlena.h"
26 
27 /******sql-level interface******/
28 
29 typedef struct
30 {
31  int cur;
34 
35 /* state for ts_headline_json_* */
36 typedef struct HeadlineJsonState
37 {
45 
46 static text *headline_json_value(void *_state, char *elem_value, int elem_len);
47 
48 static void
50 {
51  TupleDesc tupdesc;
52  MemoryContext oldcontext;
55 
56  if (!OidIsValid(prs->lextypeOid))
57  elog(ERROR, "method lextype isn't defined for text search parser %u",
58  prsid);
59 
60  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
61 
63  st->cur = 0;
64  /* lextype takes one dummy argument */
66  (Datum) 0));
67  funcctx->user_fctx = (void *) st;
68 
69  tupdesc = CreateTemplateTupleDesc(3);
70  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
71  INT4OID, -1, 0);
72  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
73  TEXTOID, -1, 0);
74  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
75  TEXTOID, -1, 0);
76 
77  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
78  MemoryContextSwitchTo(oldcontext);
79 }
80 
81 static Datum
83 {
85 
86  st = (TSTokenTypeStorage *) funcctx->user_fctx;
87  if (st->list && st->list[st->cur].lexid)
88  {
89  Datum result;
90  char *values[3];
91  char txtid[16];
92  HeapTuple tuple;
93 
94  sprintf(txtid, "%d", st->list[st->cur].lexid);
95  values[0] = txtid;
96  values[1] = st->list[st->cur].alias;
97  values[2] = st->list[st->cur].descr;
98 
99  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
100  result = HeapTupleGetDatum(tuple);
101 
102  pfree(values[1]);
103  pfree(values[2]);
104  st->cur++;
105  return result;
106  }
107  return (Datum) 0;
108 }
109 
110 Datum
112 {
113  FuncCallContext *funcctx;
114  Datum result;
115 
116  if (SRF_IS_FIRSTCALL())
117  {
118  funcctx = SRF_FIRSTCALL_INIT();
119  tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
120  }
121 
122  funcctx = SRF_PERCALL_SETUP();
123 
124  if ((result = tt_process_call(funcctx)) != (Datum) 0)
125  SRF_RETURN_NEXT(funcctx, result);
126  SRF_RETURN_DONE(funcctx);
127 }
128 
129 Datum
131 {
132  FuncCallContext *funcctx;
133  Datum result;
134 
135  if (SRF_IS_FIRSTCALL())
136  {
137  text *prsname = PG_GETARG_TEXT_PP(0);
138  Oid prsId;
139 
140  funcctx = SRF_FIRSTCALL_INIT();
141  prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
142  tt_setup_firstcall(funcctx, prsId);
143  }
144 
145  funcctx = SRF_PERCALL_SETUP();
146 
147  if ((result = tt_process_call(funcctx)) != (Datum) 0)
148  SRF_RETURN_NEXT(funcctx, result);
149  SRF_RETURN_DONE(funcctx);
150 }
151 
152 typedef struct
153 {
154  int type;
155  char *lexeme;
156 } LexemeEntry;
157 
158 typedef struct
159 {
160  int cur;
161  int len;
163 } PrsStorage;
164 
165 
166 static void
168 {
169  TupleDesc tupdesc;
170  MemoryContext oldcontext;
171  PrsStorage *st;
173  char *lex = NULL;
174  int llen = 0,
175  type = 0;
176  void *prsdata;
177 
178  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
179 
180  st = (PrsStorage *) palloc(sizeof(PrsStorage));
181  st->cur = 0;
182  st->len = 16;
183  st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
184 
185  prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
188 
189  while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
190  PointerGetDatum(prsdata),
191  PointerGetDatum(&lex),
192  PointerGetDatum(&llen)))) != 0)
193  {
194  if (st->cur >= st->len)
195  {
196  st->len = 2 * st->len;
197  st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
198  }
199  st->list[st->cur].lexeme = palloc(llen + 1);
200  memcpy(st->list[st->cur].lexeme, lex, llen);
201  st->list[st->cur].lexeme[llen] = '\0';
202  st->list[st->cur].type = type;
203  st->cur++;
204  }
205 
206  FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
207 
208  st->len = st->cur;
209  st->cur = 0;
210 
211  funcctx->user_fctx = (void *) st;
212  tupdesc = CreateTemplateTupleDesc(2);
213  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
214  INT4OID, -1, 0);
215  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
216  TEXTOID, -1, 0);
217 
218  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
219  MemoryContextSwitchTo(oldcontext);
220 }
221 
222 static Datum
224 {
225  PrsStorage *st;
226 
227  st = (PrsStorage *) funcctx->user_fctx;
228  if (st->cur < st->len)
229  {
230  Datum result;
231  char *values[2];
232  char tid[16];
233  HeapTuple tuple;
234 
235  values[0] = tid;
236  sprintf(tid, "%d", st->list[st->cur].type);
237  values[1] = st->list[st->cur].lexeme;
238  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
239  result = HeapTupleGetDatum(tuple);
240 
241  pfree(values[1]);
242  st->cur++;
243  return result;
244  }
245  return (Datum) 0;
246 }
247 
248 Datum
250 {
251  FuncCallContext *funcctx;
252  Datum result;
253 
254  if (SRF_IS_FIRSTCALL())
255  {
256  text *txt = PG_GETARG_TEXT_PP(1);
257 
258  funcctx = SRF_FIRSTCALL_INIT();
259  prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
260  PG_FREE_IF_COPY(txt, 1);
261  }
262 
263  funcctx = SRF_PERCALL_SETUP();
264 
265  if ((result = prs_process_call(funcctx)) != (Datum) 0)
266  SRF_RETURN_NEXT(funcctx, result);
267  SRF_RETURN_DONE(funcctx);
268 }
269 
270 Datum
272 {
273  FuncCallContext *funcctx;
274  Datum result;
275 
276  if (SRF_IS_FIRSTCALL())
277  {
278  text *prsname = PG_GETARG_TEXT_PP(0);
279  text *txt = PG_GETARG_TEXT_PP(1);
280  Oid prsId;
281 
282  funcctx = SRF_FIRSTCALL_INIT();
283  prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
284  prs_setup_firstcall(funcctx, prsId, txt);
285  }
286 
287  funcctx = SRF_PERCALL_SETUP();
288 
289  if ((result = prs_process_call(funcctx)) != (Datum) 0)
290  SRF_RETURN_NEXT(funcctx, result);
291  SRF_RETURN_DONE(funcctx);
292 }
293 
294 Datum
296 {
297  Oid tsconfig = PG_GETARG_OID(0);
298  text *in = PG_GETARG_TEXT_PP(1);
300  text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
302  List *prsoptions;
303  text *out;
306 
307  cfg = lookup_ts_config_cache(tsconfig);
308  prsobj = lookup_ts_parser_cache(cfg->prsId);
309 
310  if (!OidIsValid(prsobj->headlineOid))
311  ereport(ERROR,
312  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313  errmsg("text search parser does not support headline creation")));
314 
315  memset(&prs, 0, sizeof(HeadlineParsedText));
316  prs.lenwords = 32;
317  prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
318 
319  hlparsetext(cfg->cfgId, &prs, query,
320  VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
321 
322  if (opt)
323  prsoptions = deserialize_deflist(PointerGetDatum(opt));
324  else
325  prsoptions = NIL;
326 
327  FunctionCall3(&(prsobj->prsheadline),
328  PointerGetDatum(&prs),
329  PointerGetDatum(prsoptions),
330  PointerGetDatum(query));
331 
332  out = generateHeadline(&prs);
333 
334  PG_FREE_IF_COPY(in, 1);
335  PG_FREE_IF_COPY(query, 2);
336  if (opt)
337  PG_FREE_IF_COPY(opt, 3);
338  pfree(prs.words);
339  pfree(prs.startsel);
340  pfree(prs.stopsel);
341 
342  PG_RETURN_POINTER(out);
343 }
344 
345 Datum
347 {
349  PG_GETARG_DATUM(0),
350  PG_GETARG_DATUM(1),
351  PG_GETARG_DATUM(2)));
352 }
353 
354 Datum
356 {
359  PG_GETARG_DATUM(0),
360  PG_GETARG_DATUM(1)));
361 }
362 
363 Datum
365 {
368  PG_GETARG_DATUM(0),
369  PG_GETARG_DATUM(1),
370  PG_GETARG_DATUM(2)));
371 }
372 
373 Datum
375 {
376  Oid tsconfig = PG_GETARG_OID(0);
377  Jsonb *jb = PG_GETARG_JSONB_P(1);
379  text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
380  Jsonb *out;
384 
385  memset(&prs, 0, sizeof(HeadlineParsedText));
386  prs.lenwords = 32;
387  prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
388 
389  state->prs = &prs;
390  state->cfg = lookup_ts_config_cache(tsconfig);
391  state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
392  state->query = query;
393  if (opt)
395  else
396  state->prsoptions = NIL;
397 
398  if (!OidIsValid(state->prsobj->headlineOid))
399  ereport(ERROR,
400  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
401  errmsg("text search parser does not support headline creation")));
402 
403  out = transform_jsonb_string_values(jb, state, action);
404 
405  PG_FREE_IF_COPY(jb, 1);
406  PG_FREE_IF_COPY(query, 2);
407  if (opt)
408  PG_FREE_IF_COPY(opt, 3);
409 
410  pfree(prs.words);
411 
412  if (state->transformed)
413  {
414  pfree(prs.startsel);
415  pfree(prs.stopsel);
416  }
417 
418  PG_RETURN_JSONB_P(out);
419 }
420 
421 Datum
423 {
426  PG_GETARG_DATUM(0),
427  PG_GETARG_DATUM(1)));
428 }
429 
430 Datum
432 {
434  PG_GETARG_DATUM(0),
435  PG_GETARG_DATUM(1),
436  PG_GETARG_DATUM(2)));
437 }
438 
439 Datum
441 {
444  PG_GETARG_DATUM(0),
445  PG_GETARG_DATUM(1),
446  PG_GETARG_DATUM(2)));
447 }
448 
449 Datum
451 {
452  Oid tsconfig = PG_GETARG_OID(0);
453  text *json = PG_GETARG_TEXT_P(1);
455  text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
456  text *out;
458 
461 
462  memset(&prs, 0, sizeof(HeadlineParsedText));
463  prs.lenwords = 32;
464  prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
465 
466  state->prs = &prs;
467  state->cfg = lookup_ts_config_cache(tsconfig);
468  state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
469  state->query = query;
470  if (opt)
472  else
473  state->prsoptions = NIL;
474 
475  if (!OidIsValid(state->prsobj->headlineOid))
476  ereport(ERROR,
477  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
478  errmsg("text search parser does not support headline creation")));
479 
480  out = transform_json_string_values(json, state, action);
481 
482  PG_FREE_IF_COPY(json, 1);
483  PG_FREE_IF_COPY(query, 2);
484  if (opt)
485  PG_FREE_IF_COPY(opt, 3);
486  pfree(prs.words);
487 
488  if (state->transformed)
489  {
490  pfree(prs.startsel);
491  pfree(prs.stopsel);
492  }
493 
494  PG_RETURN_TEXT_P(out);
495 }
496 
497 Datum
499 {
502  PG_GETARG_DATUM(0),
503  PG_GETARG_DATUM(1)));
504 }
505 
506 Datum
508 {
510  PG_GETARG_DATUM(0),
511  PG_GETARG_DATUM(1),
512  PG_GETARG_DATUM(2)));
513 }
514 
515 Datum
517 {
520  PG_GETARG_DATUM(0),
521  PG_GETARG_DATUM(1),
522  PG_GETARG_DATUM(2)));
523 }
524 
525 
526 /*
527  * Return headline in text from, generated from a json(b) element
528  */
529 static text *
530 headline_json_value(void *_state, char *elem_value, int elem_len)
531 {
533 
534  HeadlineParsedText *prs = state->prs;
535  TSConfigCacheEntry *cfg = state->cfg;
536  TSParserCacheEntry *prsobj = state->prsobj;
537  TSQuery query = state->query;
538  List *prsoptions = state->prsoptions;
539 
540  prs->curwords = 0;
541  hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
542  FunctionCall3(&(prsobj->prsheadline),
543  PointerGetDatum(prs),
544  PointerGetDatum(prsoptions),
545  PointerGetDatum(query));
546 
547  state->transformed = true;
548  return generateHeadline(prs);
549 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
#define NIL
Definition: pg_list.h:65
struct HeadlineJsonState HeadlineJsonState
static Datum prs_process_call(FuncCallContext *funcctx)
Definition: wparser.c:223
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
Definition: jsonb.h:220
int type
Definition: wparser.c:154
Datum ts_headline_byid(PG_FUNCTION_ARGS)
Definition: wparser.c:346
Datum ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:440
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:44
LexDescr * list
Definition: wparser.c:32
#define DatumGetInt32(X)
Definition: postgres.h:472
char * alias
Definition: ts_public.h:28
Oid getTSCurrentConfig(bool emitError)
Definition: ts_cache.c:560
LexemeEntry * list
Definition: wparser.c:162
TSParserCacheEntry * prsobj
Definition: wparser.c:40
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:293
#define PointerGetDatum(X)
Definition: postgres.h:556
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:238
List * deserialize_deflist(Datum txt)
Definition: tsearchcmds.c:1510
Datum ts_headline_json(PG_FUNCTION_ARGS)
Definition: wparser.c:498
int len
Definition: wparser.c:161
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:644
int errcode(int sqlerrcode)
Definition: elog.c:610
Datum ts_token_type_byid(PG_FUNCTION_ARGS)
Definition: wparser.c:111
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
Definition: ts_cache.c:112
List * prsoptions
Definition: wparser.c:42
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:651
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:297
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:2116
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:308
#define FunctionCall3(flinfo, arg1, arg2, arg3)
Definition: fmgr.h:646
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:299
#define sprintf
Definition: port.h:195
Datum ts_parse_byid(PG_FUNCTION_ARGS)
Definition: wparser.c:249
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
FmgrInfo prsend
Definition: ts_cache.h:47
Datum ts_headline_byid_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:295
Datum ts_headline_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:364
text * transform_json_string_values(text *json, void *action_state, JsonTransformStringValuesAction transform_action)
Definition: jsonfuncs.c:5328
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
Definition: ts_cache.c:389
Datum ts_parse_byname(PG_FUNCTION_ARGS)
Definition: wparser.c:271
#define OidFunctionCall1(functionId, arg1)
Definition: fmgr.h:662
Datum ts_token_type_byname(PG_FUNCTION_ARGS)
Definition: wparser.c:130
static Datum tt_process_call(FuncCallContext *funcctx)
Definition: wparser.c:82
Datum ts_headline(PG_FUNCTION_ARGS)
Definition: wparser.c:355
Jsonb * transform_jsonb_string_values(Jsonb *jsonb, void *action_state, JsonTransformStringValuesAction transform_action)
Definition: jsonfuncs.c:5283
#define DirectFunctionCall4(func, arg1, arg2, arg3, arg4)
Definition: fmgr.h:630
HeadlineWordEntry * words
Definition: ts_public.h:52
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
AttInMetadata * attinmeta
Definition: funcapi.h:91
Datum ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
Definition: wparser.c:431
bool transformed
Definition: wparser.c:43
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:603
Datum ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:450
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3644
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:628
char * lexeme
Definition: wparser.c:155
char * descr
Definition: ts_public.h:29
int lexid
Definition: ts_public.h:27
int cur
Definition: wparser.c:160
static void tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
Definition: wparser.c:49
void * palloc0(Size size)
Definition: mcxt.c:981
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:352
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2067
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:370
text * generateHeadline(HeadlineParsedText *prs)
Definition: ts_parse.c:594
TSQuery query
Definition: wparser.c:41
Oid get_ts_parser_oid(List *names, bool missing_ok)
Definition: namespace.c:2299
FmgrInfo prstoken
Definition: ts_cache.h:46
Definition: regguts.h:298
static void prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
Definition: wparser.c:167
void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
Definition: ts_parse.c:529
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
static text * headline_json_value(void *_state, char *elem_value, int elem_len)
Definition: wparser.c:530
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_NARGS()
Definition: fmgr.h:203
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
#define DatumGetPointer(X)
Definition: postgres.h:549
TSConfigCacheEntry * cfg
Definition: wparser.c:39
static Datum values[MAXATTR]
Definition: bootstrap.c:165
Datum ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:374
FmgrInfo prsstart
Definition: ts_cache.h:45
#define Int32GetDatum(X)
Definition: postgres.h:479
Datum ts_headline_json_opt(PG_FUNCTION_ARGS)
Definition: wparser.c:516
void * user_fctx
Definition: funcapi.h:82
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:335
#define elog(elevel,...)
Definition: elog.h:214
Datum ts_headline_jsonb(PG_FUNCTION_ARGS)
Definition: wparser.c:422
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:642
Definition: c.h:562
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_JSONB_P(x)
Definition: jsonb.h:76
text *(* JsonTransformStringValuesAction)(void *state, char *elem_value, int elem_len)
Definition: jsonfuncs.h:37
Definition: pg_list.h:50
int16 AttrNumber
Definition: attnum.h:21
HeadlineParsedText * prs
Definition: wparser.c:38
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:317
#define PG_GETARG_JSONB_P(x)
Definition: jsonb.h:74
Datum ts_headline_json_byid(PG_FUNCTION_ARGS)
Definition: wparser.c:507
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:295
FmgrInfo prsheadline
Definition: ts_cache.h:48