PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xpath.c
Go to the documentation of this file.
1/*
2 * contrib/xml2/xpath.c
3 *
4 * Parser interface for DOM-based parser (libxml) rather than
5 * stream-based SAX-type parser
6 */
7#include "postgres.h"
8
10#include "executor/spi.h"
11#include "fmgr.h"
12#include "funcapi.h"
13#include "lib/stringinfo.h"
14#include "utils/builtins.h"
15#include "utils/tuplestore.h"
16#include "utils/xml.h"
17
18/* libxml includes */
19
20#include <libxml/xpath.h>
21#include <libxml/tree.h>
22#include <libxml/xmlmemory.h>
23#include <libxml/xmlerror.h>
24#include <libxml/parserInternals.h>
25
27 .name = "xml2",
28 .version = PG_VERSION
29);
30
31/* exported for use by xslt_proc.c */
32
34
35/* workspace for pgxml_xpath() */
36
43
44/* local declarations */
45
49
52
54
57
58static void cleanup_workspace(xpath_workspace *workspace);
59
60
61/*
62 * Initialize for xml parsing.
63 *
64 * As with the underlying pg_xml_init function, calls to this MUST be followed
65 * by a PG_TRY block that guarantees that pg_xml_done is called.
66 */
69{
71
72 /* Set up error handling (we share the core's error handler) */
74
75 /* Note: we're assuming an elog cannot be thrown by the following calls */
76
77 /* Initialize libxml */
79
80 return xmlerrcxt;
81}
82
83
84/* Encodes special characters (<, >, &, " and \r) as XML entities */
85
87
90{
92 text *volatile tout = NULL;
93 xmlChar *volatile tt = NULL;
95
97
98 PG_TRY();
99 {
100 xmlChar *ts;
101
103
107 "could not allocate xmlChar");
108 pfree(ts);
109
110 tout = cstring_to_text((char *) tt);
111 }
112 PG_CATCH();
113 {
114 if (tt != NULL)
115 xmlFree(tt);
116
117 pg_xml_done(xmlerrcxt, true);
118
119 PG_RE_THROW();
120 }
121 PG_END_TRY();
122
123 if (tt != NULL)
124 xmlFree(tt);
125
126 pg_xml_done(xmlerrcxt, false);
127
129}
130
131/*
132 * Function translates a nodeset into a text representation
133 *
134 * iterates over each node in the set and calls xmlNodeDump to write it to
135 * an xmlBuffer -from which an xmlChar * string is returned.
136 *
137 * each representation is surrounded by <tagname> ... </tagname>
138 *
139 * plainsep is an ordinary (not tag) separator - if used, then nodes are
140 * cast to string as output method
141 */
142static xmlChar *
147{
148 volatile xmlBufferPtr buf = NULL;
149 xmlChar *volatile result = NULL;
151
152 /* spin up some error handling */
154
155 PG_TRY();
156 {
158
161 "could not allocate xmlBuffer");
162
163 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
164 {
168 }
169 if (nodeset != NULL)
170 {
171 for (int i = 0; i < nodeset->nodeNr; i++)
172 {
173 if (plainsep != NULL)
174 {
177
178 /* If this isn't the last entry, write the plain sep. */
179 if (i < (nodeset->nodeNr) - 1)
181 }
182 else
183 {
184 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
185 {
189 }
191 nodeset->nodeTab[i]->doc,
192 nodeset->nodeTab[i],
193 1, 0);
194
195 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
196 {
197 xmlBufferWriteChar(buf, "</");
200 }
201 }
202 }
203 }
204
205 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
206 {
207 xmlBufferWriteChar(buf, "</");
210 }
211
215 "could not allocate result");
216 }
217 PG_CATCH();
218 {
219 if (buf)
221
222 pg_xml_done(xmlerrcxt, true);
223
224 PG_RE_THROW();
225 }
226 PG_END_TRY();
227
229 pg_xml_done(xmlerrcxt, false);
230
231 return result;
232}
233
234
235/*
236 * Translate a PostgreSQL "varlena" -i.e. a variable length parameter
237 * into the libxml2 representation
238 */
239static xmlChar *
244
245/* Publicly visible XPath functions */
246
247/*
248 * This is a "raw" xpath function. Check that it returns child elements
249 * properly
250 */
252
253Datum
255{
257 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
260 xmlChar *xpath;
261 text *volatile xpres = NULL;
262 xpath_workspace *volatile workspace = NULL;
264
267
268 PG_TRY();
269 {
270 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
272 }
273 PG_CATCH();
274 {
275 if (workspace)
276 cleanup_workspace(workspace);
277
278 pg_xml_done(xmlerrcxt, true);
279 PG_RE_THROW();
280 }
281 PG_END_TRY();
282
283 cleanup_workspace(workspace);
284 pg_xml_done(xmlerrcxt, false);
285
286 pfree(xpath);
287
288 if (xpres == NULL)
291}
292
293/*
294 * The following function is almost identical, but returns the elements in
295 * a list.
296 */
298
299Datum
301{
303 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
305 xmlChar *xpath;
306 text *volatile xpres = NULL;
307 xpath_workspace *volatile workspace = NULL;
309
312
313 PG_TRY();
314 {
315 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
317 }
318 PG_CATCH();
319 {
320 if (workspace)
321 cleanup_workspace(workspace);
322
323 pg_xml_done(xmlerrcxt, true);
324 PG_RE_THROW();
325 }
326 PG_END_TRY();
327
328 cleanup_workspace(workspace);
329 pg_xml_done(xmlerrcxt, false);
330
331 pfree(xpath);
332
333 if (xpres == NULL)
336}
337
338
340
341Datum
343{
345 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
346 xmlChar *xpath;
348 text *volatile xpres = NULL;
349 xpath_workspace *volatile workspace = NULL;
351
353
354 /*
355 * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
356 * at end
357 */
358 /* We could try casting to string using the libxml function? */
359
360 xpath = (xmlChar *) palloc(pathsize + 9);
361 memcpy(xpath, "string(", 7);
363 xpath[pathsize + 7] = ')';
364 xpath[pathsize + 8] = '\0';
365
367
368 PG_TRY();
369 {
370 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
371 xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
372 }
373 PG_CATCH();
374 {
375 if (workspace)
376 cleanup_workspace(workspace);
377
378 pg_xml_done(xmlerrcxt, true);
379 PG_RE_THROW();
380 }
381 PG_END_TRY();
382
383 cleanup_workspace(workspace);
384 pg_xml_done(xmlerrcxt, false);
385
386 pfree(xpath);
387
388 if (xpres == NULL)
391}
392
393
395
396Datum
398{
400 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
401 xmlChar *xpath;
402 volatile float4 fRes = 0.0;
403 volatile bool isNull = false;
404 xpath_workspace *volatile workspace = NULL;
406
409
410 PG_TRY();
411 {
412 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
413 pfree(xpath);
414
415 if (workspace->res == NULL)
416 isNull = true;
417 else
418 fRes = xmlXPathCastToNumber(workspace->res);
419 }
420 PG_CATCH();
421 {
422 if (workspace)
423 cleanup_workspace(workspace);
424
425 pg_xml_done(xmlerrcxt, true);
426 PG_RE_THROW();
427 }
428 PG_END_TRY();
429
430 cleanup_workspace(workspace);
431 pg_xml_done(xmlerrcxt, false);
432
433 if (isNull || xmlXPathIsNaN(fRes))
435
437}
438
439
441
442Datum
444{
446 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
447 xmlChar *xpath;
448 volatile int bRes = 0;
449 xpath_workspace *volatile workspace = NULL;
451
454
455 PG_TRY();
456 {
457 workspace = pgxml_xpath(document, xpath, xmlerrcxt);
458 pfree(xpath);
459
460 if (workspace->res == NULL)
461 bRes = 0;
462 else
463 bRes = xmlXPathCastToBoolean(workspace->res);
464 }
465 PG_CATCH();
466 {
467 if (workspace)
468 cleanup_workspace(workspace);
469
470 pg_xml_done(xmlerrcxt, true);
471 PG_RE_THROW();
472 }
473 PG_END_TRY();
474
475 cleanup_workspace(workspace);
476 pg_xml_done(xmlerrcxt, false);
477
479}
480
481
482
483/* Core function to evaluate XPath query */
484
485static xpath_workspace *
487{
491
492 workspace->doctree = NULL;
493 workspace->ctxt = NULL;
494 workspace->res = NULL;
495
496 workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
497 docsize, NULL, NULL,
499 if (workspace->doctree != NULL)
500 {
501 workspace->ctxt = xmlXPathNewContext(workspace->doctree);
502 workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
503
504 /* compile the path */
505 comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
508 "XPath Syntax Error");
509
510 /* Now evaluate the path expression. */
511 workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
512
514 }
515
516 return workspace;
517}
518
519/* Clean up after processing the result of pgxml_xpath() */
520static void
522{
523 if (workspace->res)
524 xmlXPathFreeObject(workspace->res);
525 workspace->res = NULL;
526 if (workspace->ctxt)
527 xmlXPathFreeContext(workspace->ctxt);
528 workspace->ctxt = NULL;
529 if (workspace->doctree)
530 xmlFreeDoc(workspace->doctree);
531 workspace->doctree = NULL;
532}
533
534static text *
539{
540 xmlChar *volatile xpresstr = NULL;
541 text *volatile xpres = NULL;
543
544 if (res == NULL)
545 return NULL;
546
547 /* spin some error handling */
549
550 PG_TRY();
551 {
552 switch (res->type)
553 {
554 case XPATH_NODESET:
555 xpresstr = pgxmlNodeSetToText(res->nodesetval,
556 toptag,
558 break;
559
560 case XPATH_STRING:
561 xpresstr = xmlStrdup(res->stringval);
564 "could not allocate result");
565 break;
566
567 default:
568 elog(NOTICE, "unsupported XQuery result: %d", res->type);
569 xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
572 "could not allocate result");
573 }
574
575 /* Now convert this result back to text */
576 xpres = cstring_to_text((char *) xpresstr);
577 }
578 PG_CATCH();
579 {
580 if (xpresstr != NULL)
582
583 pg_xml_done(xmlerrcxt, true);
584
585 PG_RE_THROW();
586 }
587 PG_END_TRY();
588
589 /* Free various storage */
591
592 pg_xml_done(xmlerrcxt, false);
593
594 return xpres;
595}
596
597/*
598 * xpath_table is a table function. It needs some tidying (as do the
599 * other functions here!
600 */
602
603Datum
605{
606 /* Function parameters */
611 char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
612
613 /* SPI (input tuple) support */
614 SPITupleTable *tuptable;
617
618
619 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
620 AttInMetadata *attinmeta;
621
622 char **values;
623 xmlChar **xpaths;
624 char *pos;
625 const char *pathsep = "|";
626
627 int numpaths;
628 int ret;
629 uint64 proc;
630 int j;
631 int rownr; /* For issuing multiple rows from one original
632 * document */
633 bool had_values; /* To determine end of nodeset results */
636 volatile xmlDocPtr doctree = NULL;
637
639
640 /* must have at least one output column (for the pkey) */
641 if (rsinfo->setDesc->natts < 1)
644 errmsg("xpath_table must have at least one output column")));
645
646 /*
647 * At the moment we assume that the returned attributes make sense for the
648 * XPath specified (i.e. we trust the caller). It's not fatal if they get
649 * it wrong - the input function for the column type will raise an error
650 * if the path result can't be converted into the correct binary
651 * representation.
652 */
653
654 attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
655
656 values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
657 xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
658
659 /*
660 * Split XPaths. xpathset is a writable CString.
661 *
662 * Note that we stop splitting once we've done all needed for tupdesc
663 */
664 numpaths = 0;
665 pos = xpathset;
666 while (numpaths < (rsinfo->setDesc->natts - 1))
667 {
668 xpaths[numpaths++] = (xmlChar *) pos;
669 pos = strstr(pos, pathsep);
670 if (pos != NULL)
671 {
672 *pos = '\0';
673 pos++;
674 }
675 else
676 break;
677 }
678
679 /* Now build query */
681
682 /* Build initial sql statement */
683 appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
684 pkeyfield,
685 xmlfield,
686 relname,
687 condition);
688
689 SPI_connect();
690
691 if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
692 elog(ERROR, "xpath_table: SPI execution failed for query %s",
693 query_buf.data);
694
695 proc = SPI_processed;
696 tuptable = SPI_tuptable;
697 spi_tupdesc = tuptable->tupdesc;
698
699 /*
700 * Check that SPI returned correct result. If you put a comma into one of
701 * the function parameters, this will catch it when the SPI query returns
702 * e.g. 3 columns.
703 */
704 if (spi_tupdesc->natts != 2)
705 {
707 errmsg("expression returning multiple columns is not valid in parameter list"),
708 errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
709 }
710
711 /*
712 * Setup the parser. This should happen after we are done evaluating the
713 * query, in case it calls functions that set up libxml differently.
714 */
716
717 PG_TRY();
718 {
719 /* For each row i.e. document returned from SPI */
720 uint64 i;
721
722 for (i = 0; i < proc; i++)
723 {
724 char *pkey;
725 char *xmldoc;
731
732 /* Extract the row data as C Strings */
733 spi_tuple = tuptable->vals[i];
736
737 /*
738 * Clear the values array, so that not-well-formed documents
739 * return NULL in all columns. Note that this also means that
740 * spare columns will be NULL.
741 */
742 for (j = 0; j < rsinfo->setDesc->natts; j++)
743 values[j] = NULL;
744
745 /* Insert primary key */
746 values[0] = pkey;
747
748 /* Parse the document */
749 if (xmldoc)
750 doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
751 NULL, NULL,
753 else /* treat NULL as not well-formed */
754 doctree = NULL;
755
756 if (doctree == NULL)
757 {
758 /* not well-formed, so output all-NULL tuple */
762 }
763 else
764 {
765 /* New loop here - we have to deal with nodeset results */
766 rownr = 0;
767
768 do
769 {
770 /* Now evaluate the set of xpaths. */
771 had_values = false;
772 for (j = 0; j < numpaths; j++)
773 {
774 ctxt = xmlXPathNewContext(doctree);
775 if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
778 "could not allocate XPath context");
779
780 ctxt->node = xmlDocGetRootElement(doctree);
781
782 /* compile the path */
787 "XPath Syntax Error");
788
789 /* Now evaluate the path expression. */
790 res = xmlXPathCompiledEval(comppath, ctxt);
792
793 if (res != NULL)
794 {
795 switch (res->type)
796 {
797 case XPATH_NODESET:
798 /* We see if this nodeset has enough nodes */
799 if (res->nodesetval != NULL &&
801 {
802 resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
806 "could not allocate result");
807 had_values = true;
808 }
809 else
810 resstr = NULL;
811
812 break;
813
814 case XPATH_STRING:
815 resstr = xmlStrdup(res->stringval);
819 "could not allocate result");
820 break;
821
822 default:
823 elog(NOTICE, "unsupported XQuery result: %d", res->type);
824 resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
828 "could not allocate result");
829 }
830
831 /*
832 * Insert this into the appropriate column in the
833 * result tuple.
834 */
835 values[j + 1] = (char *) resstr;
836 }
838 }
839
840 /* Now add the tuple to the output, if there is one. */
841 if (had_values)
842 {
846 }
847
848 rownr++;
849 } while (had_values);
850 }
851
852 if (doctree != NULL)
853 xmlFreeDoc(doctree);
854 doctree = NULL;
855
856 if (pkey)
857 pfree(pkey);
858 if (xmldoc)
859 pfree(xmldoc);
860 }
861 }
862 PG_CATCH();
863 {
864 if (doctree != NULL)
865 xmlFreeDoc(doctree);
866
867 pg_xml_done(xmlerrcxt, true);
868
869 PG_RE_THROW();
870 }
871 PG_END_TRY();
872
873 if (doctree != NULL)
874 xmlFreeDoc(doctree);
875
876 pg_xml_done(xmlerrcxt, false);
877
878 SPI_finish();
879
880 /*
881 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
882 * tuples are in our tuplestore and passed back through rsinfo->setResult.
883 * rsinfo->setDesc is set to the tuple description that we actually used
884 * to build our tuples with, so the caller can verify we did what it was
885 * expecting.
886 */
887 return (Datum) 0;
888}
static Datum values[MAXATTR]
Definition bootstrap.c:190
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
float float4
Definition c.h:713
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int errcode(int sqlerrcode)
Definition elog.c:875
#define PG_RE_THROW()
Definition elog.h:407
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:374
#define PG_END_TRY(...)
Definition elog.h:399
#define ERROR
Definition elog.h:40
#define PG_CATCH(...)
Definition elog.h:384
#define elog(elevel,...)
Definition elog.h:228
#define NOTICE
Definition elog.h:36
#define ereport(elevel,...)
Definition elog.h:152
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
#define palloc0_object(type)
Definition fe_memutils.h:90
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define PG_MODULE_MAGIC_EXT(...)
Definition fmgr.h:540
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_RETURN_TEXT_P(x)
Definition fmgr.h:374
#define PG_RETURN_FLOAT4(x)
Definition fmgr.h:368
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
#define MAT_SRF_USE_EXPECTED_DESC
Definition funcapi.h:296
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1372
int j
Definition isn.c:78
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1619
void * palloc(Size size)
Definition mcxt.c:1390
static char * errmsg
NameData relname
Definition pg_class.h:40
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
static int fb(int x)
uint64 SPI_processed
Definition spi.c:45
SPITupleTable * SPI_tuptable
Definition spi.c:46
int SPI_connect(void)
Definition spi.c:95
int SPI_finish(void)
Definition spi.c:183
int SPI_exec(const char *src, long tcount)
Definition spi.c:631
char * SPI_getvalue(HeapTuple tuple, TupleDesc tupdesc, int fnumber)
Definition spi.c:1221
#define SPI_OK_SELECT
Definition spi.h:86
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
TupleDesc tupdesc
Definition spi.h:25
HeapTuple * vals
Definition spi.h:26
Definition c.h:776
xmlXPathContextPtr ctxt
Definition xpath.c:40
xmlDocPtr doctree
Definition xpath.c:39
xmlXPathObjectPtr res
Definition xpath.c:41
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition tuplestore.c:765
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
text * cstring_to_text(const char *s)
Definition varlena.c:184
char * text_to_cstring(const text *t)
Definition varlena.c:217
const char * name
Datum xpath(PG_FUNCTION_ARGS)
Definition xml.c:4570
struct PgXmlErrorContext PgXmlErrorContext
Definition xml.h:48
PgXmlErrorContext * pg_xml_init(PgXmlStrictness strictness)
void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
bool pg_xml_error_occurred(PgXmlErrorContext *errcxt)
void pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
PgXmlStrictness
Definition xml.h:40
@ PG_XML_STRICTNESS_LEGACY
Definition xml.h:41
@ PG_XML_STRICTNESS_ALL
Definition xml.h:44
static text * pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, xmlChar *septag, xmlChar *plainsep)
Definition xpath.c:535
Datum xpath_bool(PG_FUNCTION_ARGS)
Definition xpath.c:443
Datum xpath_number(PG_FUNCTION_ARGS)
Definition xpath.c:397
Datum xpath_table(PG_FUNCTION_ARGS)
Definition xpath.c:604
static xmlChar * pgxml_texttoxmlchar(text *textstring)
Definition xpath.c:240
PgXmlErrorContext * pgxml_parser_init(PgXmlStrictness strictness)
Definition xpath.c:68
Datum xpath_string(PG_FUNCTION_ARGS)
Definition xpath.c:342
static void cleanup_workspace(xpath_workspace *workspace)
Definition xpath.c:521
Datum xml_encode_special_chars(PG_FUNCTION_ARGS)
Definition xpath.c:89
Datum xpath_list(PG_FUNCTION_ARGS)
Definition xpath.c:300
static xmlChar * pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlChar *toptagname, xmlChar *septagname, xmlChar *plainsep)
Definition xpath.c:143
static xpath_workspace * pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
Definition xpath.c:486
Datum xpath_nodeset(PG_FUNCTION_ARGS)
Definition xpath.c:254