PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xpath.c
Go to the documentation of this file.
1/*
2 * contrib/xml2/xpath.c
3 *
4 * Parser interface for DOM-based parser (libxml) rather than
5 * stream-based SAX-type parser
6 */
7#include "postgres.h"
8
10#include "executor/spi.h"
11#include "fmgr.h"
12#include "funcapi.h"
13#include "lib/stringinfo.h"
14#include "utils/builtins.h"
15#include "utils/tuplestore.h"
16#include "utils/xml.h"
17
18/* libxml includes */
19
20#include <libxml/xpath.h>
21#include <libxml/tree.h>
22#include <libxml/xmlmemory.h>
23#include <libxml/xmlerror.h>
24#include <libxml/parserInternals.h>
25
27 .name = "xml2",
28 .version = PG_VERSION
29);
30
31/* exported for use by xslt_proc.c */
32
34
35/* workspace for pgxml_xpath() */
36
43
44/* local declarations */
45
49
52
54
57
58static void cleanup_workspace(xpath_workspace *workspace);
59
60
61/*
62 * Initialize for xml parsing.
63 *
64 * As with the underlying pg_xml_init function, calls to this MUST be followed
65 * by a PG_TRY block that guarantees that pg_xml_done is called.
66 */
69{
71
72 /* Set up error handling (we share the core's error handler) */
74
75 /* Note: we're assuming an elog cannot be thrown by the following calls */
76
77 /* Initialize libxml */
79
80 return xmlerrcxt;
81}
82
83
84/* Encodes special characters (<, >, &, " and \r) as XML entities */
85
87
90{
92 text *volatile tout = NULL;
93 xmlChar *volatile tt = NULL;
95
97
98 PG_TRY();
99 {
100 xmlChar *ts;
101
103
107 "could not allocate xmlChar");
108 pfree(ts);
109
110 tout = cstring_to_text((char *) tt);
111 }
112 PG_CATCH();
113 {
114 if (tt != NULL)
115 xmlFree(tt);
116
117 pg_xml_done(xmlerrcxt, true);
118
119 PG_RE_THROW();
120 }
121 PG_END_TRY();
122
123 if (tt != NULL)
124 xmlFree(tt);
125
126 pg_xml_done(xmlerrcxt, false);
127
129}
130
131/*
132 * Function translates a nodeset into a text representation
133 *
134 * iterates over each node in the set and calls xmlNodeDump to write it to
135 * an xmlBuffer -from which an xmlChar * string is returned.
136 *
137 * each representation is surrounded by <tagname> ... </tagname>
138 *
139 * plainsep is an ordinary (not tag) separator - if used, then nodes are
140 * cast to string as output method
141 */
142static xmlChar *
147{
148 volatile xmlBufferPtr buf = NULL;
149 xmlChar *volatile result = NULL;
151
152 /* spin up some error handling */
154
155 PG_TRY();
156 {
158
161 "could not allocate xmlBuffer");
162
163 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
164 {
168 }
169 if (nodeset != NULL)
170 {
171 for (int i = 0; i < nodeset->nodeNr; i++)
172 {
173 if (plainsep != NULL)
174 {
177
178 /* If this isn't the last entry, write the plain sep. */
179 if (i < (nodeset->nodeNr) - 1)
181 }
182 else
183 {
184 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
185 {
189 }
191 nodeset->nodeTab[i]->doc,
192 nodeset->nodeTab[i],
193 1, 0);
194
195 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
196 {
197 xmlBufferWriteChar(buf, "</");
200 }
201 }
202 }
203 }
204
205 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
206 {
207 xmlBufferWriteChar(buf, "</");
210 }
211
212 result = xmlStrdup(xmlBufferContent(buf));
213 if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
215 "could not allocate result");
216 }
217 PG_CATCH();
218 {
219 if (buf)
221
222 pg_xml_done(xmlerrcxt, true);
223
224 PG_RE_THROW();
225 }
226 PG_END_TRY();
227
229 pg_xml_done(xmlerrcxt, false);
230
231 return result;
232}
233
234
235/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
236 * into the libxml2 representation
237 */
238static xmlChar *
243
244/* Publicly visible XPath functions */
245
246/*
247 * This is a "raw" xpath function. Check that it returns child elements
248 * properly
249 */
251
252Datum
254{
256 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
259 xmlChar *xpath;
260 text *volatile xpres = NULL;
261 xpath_workspace *volatile workspace = NULL;
263
266
267 PG_TRY();
268 {
269 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
271 }
272 PG_CATCH();
273 {
274 if (workspace)
275 cleanup_workspace(workspace);
276
277 pg_xml_done(xmlerrcxt, true);
278 PG_RE_THROW();
279 }
280 PG_END_TRY();
281
282 cleanup_workspace(workspace);
283 pg_xml_done(xmlerrcxt, false);
284
285 pfree(xpath);
286
287 if (xpres == NULL)
290}
291
292/*
293 * The following function is almost identical, but returns the elements in
294 * a list.
295 */
297
298Datum
300{
302 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
304 xmlChar *xpath;
305 text *volatile xpres = NULL;
306 xpath_workspace *volatile workspace = NULL;
308
311
312 PG_TRY();
313 {
314 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
316 }
317 PG_CATCH();
318 {
319 if (workspace)
320 cleanup_workspace(workspace);
321
322 pg_xml_done(xmlerrcxt, true);
323 PG_RE_THROW();
324 }
325 PG_END_TRY();
326
327 cleanup_workspace(workspace);
328 pg_xml_done(xmlerrcxt, false);
329
330 pfree(xpath);
331
332 if (xpres == NULL)
335}
336
337
339
340Datum
342{
344 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
345 xmlChar *xpath;
347 text *volatile xpres = NULL;
348 xpath_workspace *volatile workspace = NULL;
350
352
353 /*
354 * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
355 * at end
356 */
357 /* We could try casting to string using the libxml function? */
358
359 xpath = (xmlChar *) palloc(pathsize + 9);
360 memcpy(xpath, "string(", 7);
362 xpath[pathsize + 7] = ')';
363 xpath[pathsize + 8] = '\0';
364
366
367 PG_TRY();
368 {
369 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
370 xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
371 }
372 PG_CATCH();
373 {
374 if (workspace)
375 cleanup_workspace(workspace);
376
377 pg_xml_done(xmlerrcxt, true);
378 PG_RE_THROW();
379 }
380 PG_END_TRY();
381
382 cleanup_workspace(workspace);
383 pg_xml_done(xmlerrcxt, false);
384
385 pfree(xpath);
386
387 if (xpres == NULL)
390}
391
392
394
395Datum
397{
399 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
400 xmlChar *xpath;
401 volatile float4 fRes = 0.0;
402 volatile bool isNull = false;
403 xpath_workspace *volatile workspace = NULL;
405
408
409 PG_TRY();
410 {
411 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
412 pfree(xpath);
413
414 if (workspace->res == NULL)
415 isNull = true;
416 else
417 fRes = xmlXPathCastToNumber(workspace->res);
418 }
419 PG_CATCH();
420 {
421 if (workspace)
422 cleanup_workspace(workspace);
423
424 pg_xml_done(xmlerrcxt, true);
425 PG_RE_THROW();
426 }
427 PG_END_TRY();
428
429 cleanup_workspace(workspace);
430 pg_xml_done(xmlerrcxt, false);
431
432 if (isNull || xmlXPathIsNaN(fRes))
434
436}
437
438
440
441Datum
443{
445 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
446 xmlChar *xpath;
447 volatile int bRes = 0;
448 xpath_workspace *volatile workspace = NULL;
450
453
454 PG_TRY();
455 {
456 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
457 pfree(xpath);
458
459 if (workspace->res == NULL)
460 bRes = 0;
461 else
462 bRes = xmlXPathCastToBoolean(workspace->res);
463 }
464 PG_CATCH();
465 {
466 if (workspace)
467 cleanup_workspace(workspace);
468
469 pg_xml_done(xmlerrcxt, true);
470 PG_RE_THROW();
471 }
472 PG_END_TRY();
473
474 cleanup_workspace(workspace);
475 pg_xml_done(xmlerrcxt, false);
476
478}
479
480
481
482/* Core function to evaluate XPath query */
483
484static xpath_workspace *
486{
490
491 workspace->doctree = NULL;
492 workspace->ctxt = NULL;
493 workspace->res = NULL;
494
495 workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
496 docsize, NULL, NULL,
498 if (workspace->doctree != NULL)
499 {
500 workspace->ctxt = xmlXPathNewContext(workspace->doctree);
501 workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
502
503 /* compile the path */
504 comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
507 "XPath Syntax Error");
508
509 /* Now evaluate the path expression. */
510 workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
511
513 }
514
515 return workspace;
516}
517
518/* Clean up after processing the result of pgxml_xpath() */
519static void
521{
522 if (workspace->res)
523 xmlXPathFreeObject(workspace->res);
524 workspace->res = NULL;
525 if (workspace->ctxt)
526 xmlXPathFreeContext(workspace->ctxt);
527 workspace->ctxt = NULL;
528 if (workspace->doctree)
529 xmlFreeDoc(workspace->doctree);
530 workspace->doctree = NULL;
531}
532
533static text *
538{
539 xmlChar *volatile xpresstr = NULL;
540 text *volatile xpres = NULL;
542
543 if (res == NULL)
544 return NULL;
545
546 /* spin some error handling */
548
549 PG_TRY();
550 {
551 switch (res->type)
552 {
553 case XPATH_NODESET:
554 xpresstr = pgxmlNodeSetToText(res->nodesetval,
555 toptag,
557 break;
558
559 case XPATH_STRING:
560 xpresstr = xmlStrdup(res->stringval);
563 "could not allocate result");
564 break;
565
566 default:
567 elog(NOTICE, "unsupported XQuery result: %d", res->type);
568 xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
571 "could not allocate result");
572 }
573
574 /* Now convert this result back to text */
575 xpres = cstring_to_text((char *) xpresstr);
576 }
577 PG_CATCH();
578 {
579 if (xpresstr != NULL)
581
582 pg_xml_done(xmlerrcxt, true);
583
584 PG_RE_THROW();
585 }
586 PG_END_TRY();
587
588 /* Free various storage */
590
591 pg_xml_done(xmlerrcxt, false);
592
593 return xpres;
594}
595
596/*
597 * xpath_table is a table function. It needs some tidying (as do the
598 * other functions here!
599 */
601
602Datum
604{
605 /* Function parameters */
610 char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
611
612 /* SPI (input tuple) support */
613 SPITupleTable *tuptable;
616
617
618 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
619 AttInMetadata *attinmeta;
620
621 char **values;
622 xmlChar **xpaths;
623 char *pos;
624 const char *pathsep = "|";
625
626 int numpaths;
627 int ret;
628 uint64 proc;
629 int j;
630 int rownr; /* For issuing multiple rows from one original
631 * document */
632 bool had_values; /* To determine end of nodeset results */
635 volatile xmlDocPtr doctree = NULL;
636
638
639 /* must have at least one output column (for the pkey) */
640 if (rsinfo->setDesc->natts < 1)
643 errmsg("xpath_table must have at least one output column")));
644
645 /*
646 * At the moment we assume that the returned attributes make sense for the
647 * XPath specified (i.e. we trust the caller). It's not fatal if they get
648 * it wrong - the input function for the column type will raise an error
649 * if the path result can't be converted into the correct binary
650 * representation.
651 */
652
653 attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
654
655 values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
656 xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
657
658 /*
659 * Split XPaths. xpathset is a writable CString.
660 *
661 * Note that we stop splitting once we've done all needed for tupdesc
662 */
663 numpaths = 0;
664 pos = xpathset;
665 while (numpaths < (rsinfo->setDesc->natts - 1))
666 {
667 xpaths[numpaths++] = (xmlChar *) pos;
668 pos = strstr(pos, pathsep);
669 if (pos != NULL)
670 {
671 *pos = '\0';
672 pos++;
673 }
674 else
675 break;
676 }
677
678 /* Now build query */
680
681 /* Build initial sql statement */
682 appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
683 pkeyfield,
684 xmlfield,
685 relname,
686 condition);
687
688 SPI_connect();
689
690 if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
691 elog(ERROR, "xpath_table: SPI execution failed for query %s",
692 query_buf.data);
693
694 proc = SPI_processed;
695 tuptable = SPI_tuptable;
696 spi_tupdesc = tuptable->tupdesc;
697
698 /*
699 * Check that SPI returned correct result. If you put a comma into one of
700 * the function parameters, this will catch it when the SPI query returns
701 * e.g. 3 columns.
702 */
703 if (spi_tupdesc->natts != 2)
704 {
706 errmsg("expression returning multiple columns is not valid in parameter list"),
707 errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
708 }
709
710 /*
711 * Setup the parser. This should happen after we are done evaluating the
712 * query, in case it calls functions that set up libxml differently.
713 */
715
716 PG_TRY();
717 {
718 /* For each row i.e. document returned from SPI */
719 uint64 i;
720
721 for (i = 0; i < proc; i++)
722 {
723 char *pkey;
724 char *xmldoc;
730
731 /* Extract the row data as C Strings */
732 spi_tuple = tuptable->vals[i];
735
736 /*
737 * Clear the values array, so that not-well-formed documents
738 * return NULL in all columns. Note that this also means that
739 * spare columns will be NULL.
740 */
741 for (j = 0; j < rsinfo->setDesc->natts; j++)
742 values[j] = NULL;
743
744 /* Insert primary key */
745 values[0] = pkey;
746
747 /* Parse the document */
748 if (xmldoc)
749 doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
750 NULL, NULL,
752 else /* treat NULL as not well-formed */
753 doctree = NULL;
754
755 if (doctree == NULL)
756 {
757 /* not well-formed, so output all-NULL tuple */
761 }
762 else
763 {
764 /* New loop here - we have to deal with nodeset results */
765 rownr = 0;
766
767 do
768 {
769 /* Now evaluate the set of xpaths. */
770 had_values = false;
771 for (j = 0; j < numpaths; j++)
772 {
773 ctxt = xmlXPathNewContext(doctree);
774 if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
777 "could not allocate XPath context");
778
779 ctxt->node = xmlDocGetRootElement(doctree);
780
781 /* compile the path */
786 "XPath Syntax Error");
787
788 /* Now evaluate the path expression. */
789 res = xmlXPathCompiledEval(comppath, ctxt);
791
792 if (res != NULL)
793 {
794 switch (res->type)
795 {
796 case XPATH_NODESET:
797 /* We see if this nodeset has enough nodes */
798 if (res->nodesetval != NULL &&
800 {
801 resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
805 "could not allocate result");
806 had_values = true;
807 }
808 else
809 resstr = NULL;
810
811 break;
812
813 case XPATH_STRING:
814 resstr = xmlStrdup(res->stringval);
818 "could not allocate result");
819 break;
820
821 default:
822 elog(NOTICE, "unsupported XQuery result: %d", res->type);
823 resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
827 "could not allocate result");
828 }
829
830 /*
831 * Insert this into the appropriate column in the
832 * result tuple.
833 */
834 values[j + 1] = (char *) resstr;
835 }
837 }
838
839 /* Now add the tuple to the output, if there is one. */
840 if (had_values)
841 {
845 }
846
847 rownr++;
848 } while (had_values);
849 }
850
851 if (doctree != NULL)
852 xmlFreeDoc(doctree);
853 doctree = NULL;
854
855 if (pkey)
856 pfree(pkey);
857 if (xmldoc)
858 pfree(xmldoc);
859 }
860 }
861 PG_CATCH();
862 {
863 if (doctree != NULL)
864 xmlFreeDoc(doctree);
865
866 pg_xml_done(xmlerrcxt, true);
867
868 PG_RE_THROW();
869 }
870 PG_END_TRY();
871
872 if (doctree != NULL)
873 xmlFreeDoc(doctree);
874
875 pg_xml_done(xmlerrcxt, false);
876
877 SPI_finish();
878
879 /*
880 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
881 * tuples are in our tuplestore and passed back through rsinfo->setResult.
882 * rsinfo->setDesc is set to the tuple description that we actually used
883 * to build our tuples with, so the caller can verify we did what it was
884 * expecting.
885 */
886 return (Datum) 0;
887}
static Datum values[MAXATTR]
Definition bootstrap.c:188
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
float float4
Definition c.h:715
int errcode(int sqlerrcode)
Definition elog.c:874
#define PG_RE_THROW()
Definition elog.h:405
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:372
#define PG_END_TRY(...)
Definition elog.h:397
#define ERROR
Definition elog.h:39
#define PG_CATCH(...)
Definition elog.h:382
#define elog(elevel,...)
Definition elog.h:226
#define NOTICE
Definition elog.h:35
#define ereport(elevel,...)
Definition elog.h:150
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define PG_MODULE_MAGIC_EXT(...)
Definition fmgr.h:540
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_RETURN_TEXT_P(x)
Definition fmgr.h:374
#define PG_RETURN_FLOAT4(x)
Definition fmgr.h:368
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition funcapi.c:76
#define MAT_SRF_USE_EXPECTED_DESC
Definition funcapi.h:296
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1384
int j
Definition isn.c:78
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
static char * errmsg
NameData relname
Definition pg_class.h:40
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
static int fb(int x)
uint64 SPI_processed
Definition spi.c:45
SPITupleTable * SPI_tuptable
Definition spi.c:46
int SPI_connect(void)
Definition spi.c:95
int SPI_finish(void)
Definition spi.c:183
int SPI_exec(const char *src, long tcount)
Definition spi.c:631
char * SPI_getvalue(HeapTuple tuple, TupleDesc tupdesc, int fnumber)
Definition spi.c:1221
#define SPI_OK_SELECT
Definition spi.h:86
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
TupleDesc tupdesc
Definition spi.h:25
HeapTuple * vals
Definition spi.h:26
Definition c.h:778
xmlXPathContextPtr ctxt
Definition xpath.c:40
xmlDocPtr doctree
Definition xpath.c:39
xmlXPathObjectPtr res
Definition xpath.c:41
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition tuplestore.c:765
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
text * cstring_to_text(const char *s)
Definition varlena.c:184
char * text_to_cstring(const text *t)
Definition varlena.c:217
const char * name
Datum xpath(PG_FUNCTION_ARGS)
Definition xml.c:4566
struct PgXmlErrorContext PgXmlErrorContext
Definition xml.h:48
PgXmlErrorContext * pg_xml_init(PgXmlStrictness strictness)
void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
bool pg_xml_error_occurred(PgXmlErrorContext *errcxt)
void pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
PgXmlStrictness
Definition xml.h:40
@ PG_XML_STRICTNESS_LEGACY
Definition xml.h:41
@ PG_XML_STRICTNESS_ALL
Definition xml.h:44
static text * pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, xmlChar *septag, xmlChar *plainsep)
Definition xpath.c:534
Datum xpath_bool(PG_FUNCTION_ARGS)
Definition xpath.c:442
Datum xpath_number(PG_FUNCTION_ARGS)
Definition xpath.c:396
Datum xpath_table(PG_FUNCTION_ARGS)
Definition xpath.c:603
static xmlChar * pgxml_texttoxmlchar(text *textstring)
Definition xpath.c:239
PgXmlErrorContext * pgxml_parser_init(PgXmlStrictness strictness)
Definition xpath.c:68
Datum xpath_string(PG_FUNCTION_ARGS)
Definition xpath.c:341
static void cleanup_workspace(xpath_workspace *workspace)
Definition xpath.c:520
Datum xml_encode_special_chars(PG_FUNCTION_ARGS)
Definition xpath.c:89
Datum xpath_list(PG_FUNCTION_ARGS)
Definition xpath.c:299
static xmlChar * pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlChar *toptagname, xmlChar *septagname, xmlChar *plainsep)
Definition xpath.c:143
static xpath_workspace * pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
Definition xpath.c:485
Datum xpath_nodeset(PG_FUNCTION_ARGS)
Definition xpath.c:253