PostgreSQL Source Code  git master
postgres_fdw.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postgres_fdw.c
4  * Foreign-data wrapper for remote PostgreSQL servers
5  *
6  * Portions Copyright (c) 2012-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * contrib/postgres_fdw/postgres_fdw.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include <limits.h>
16 
17 #include "access/htup_details.h"
18 #include "access/sysattr.h"
19 #include "access/table.h"
20 #include "catalog/pg_class.h"
21 #include "catalog/pg_opfamily.h"
22 #include "commands/defrem.h"
23 #include "commands/explain.h"
24 #include "commands/vacuum.h"
25 #include "executor/execAsync.h"
26 #include "foreign/fdwapi.h"
27 #include "funcapi.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/appendinfo.h"
32 #include "optimizer/clauses.h"
33 #include "optimizer/cost.h"
34 #include "optimizer/inherit.h"
35 #include "optimizer/optimizer.h"
36 #include "optimizer/pathnode.h"
37 #include "optimizer/paths.h"
38 #include "optimizer/planmain.h"
39 #include "optimizer/prep.h"
40 #include "optimizer/restrictinfo.h"
41 #include "optimizer/tlist.h"
42 #include "parser/parsetree.h"
43 #include "postgres_fdw.h"
44 #include "storage/latch.h"
45 #include "utils/builtins.h"
46 #include "utils/float.h"
47 #include "utils/guc.h"
48 #include "utils/lsyscache.h"
49 #include "utils/memutils.h"
50 #include "utils/rel.h"
51 #include "utils/sampling.h"
52 #include "utils/selfuncs.h"
53 
55 
56 /* Default CPU cost to start up a foreign query. */
57 #define DEFAULT_FDW_STARTUP_COST 100.0
58 
59 /* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */
60 #define DEFAULT_FDW_TUPLE_COST 0.2
61 
62 /* If no remote estimates, assume a sort costs 20% extra */
63 #define DEFAULT_FDW_SORT_MULTIPLIER 1.2
64 
65 /*
66  * Indexes of FDW-private information stored in fdw_private lists.
67  *
68  * These items are indexed with the enum FdwScanPrivateIndex, so an item
69  * can be fetched with list_nth(). For example, to get the SELECT statement:
70  * sql = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql));
71  */
73 {
74  /* SQL statement to execute remotely (as a String node) */
76  /* Integer list of attribute numbers retrieved by the SELECT */
78  /* Integer representing the desired fetch_size */
80 
81  /*
82  * String describing join i.e. names of relations being joined and types
83  * of join, added when the scan is join
84  */
86 };
87 
88 /*
89  * Similarly, this enum describes what's kept in the fdw_private list for
90  * a ModifyTable node referencing a postgres_fdw foreign table. We store:
91  *
92  * 1) INSERT/UPDATE/DELETE statement text to be sent to the remote server
93  * 2) Integer list of target attribute numbers for INSERT/UPDATE
94  * (NIL for a DELETE)
95  * 3) Length till the end of VALUES clause for INSERT
96  * (-1 for a DELETE/UPDATE)
97  * 4) Boolean flag showing if the remote query has a RETURNING clause
98  * 5) Integer list of attribute numbers retrieved by RETURNING, if any
99  */
101 {
102  /* SQL statement to execute remotely (as a String node) */
104  /* Integer list of target attribute numbers for INSERT/UPDATE */
106  /* Length till the end of VALUES clause (as an Integer node) */
108  /* has-returning flag (as a Boolean node) */
110  /* Integer list of attribute numbers retrieved by RETURNING */
112 };
113 
114 /*
115  * Similarly, this enum describes what's kept in the fdw_private list for
116  * a ForeignScan node that modifies a foreign table directly. We store:
117  *
118  * 1) UPDATE/DELETE statement text to be sent to the remote server
119  * 2) Boolean flag showing if the remote query has a RETURNING clause
120  * 3) Integer list of attribute numbers retrieved by RETURNING, if any
121  * 4) Boolean flag showing if we set the command es_processed
122  */
124 {
125  /* SQL statement to execute remotely (as a String node) */
127  /* has-returning flag (as a Boolean node) */
129  /* Integer list of attribute numbers retrieved by RETURNING */
131  /* set-processed flag (as a Boolean node) */
133 };
134 
135 /*
136  * Execution state of a foreign scan using postgres_fdw.
137  */
138 typedef struct PgFdwScanState
139 {
140  Relation rel; /* relcache entry for the foreign table. NULL
141  * for a foreign join scan. */
142  TupleDesc tupdesc; /* tuple descriptor of scan */
143  AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
144 
145  /* extracted fdw_private data */
146  char *query; /* text of SELECT command */
147  List *retrieved_attrs; /* list of retrieved attribute numbers */
148 
149  /* for remote query execution */
150  PGconn *conn; /* connection for the scan */
151  PgFdwConnState *conn_state; /* extra per-connection state */
152  unsigned int cursor_number; /* quasi-unique ID for my cursor */
153  bool cursor_exists; /* have we created the cursor? */
154  int numParams; /* number of parameters passed to query */
155  FmgrInfo *param_flinfo; /* output conversion functions for them */
156  List *param_exprs; /* executable expressions for param values */
157  const char **param_values; /* textual values of query parameters */
158 
159  /* for storing result tuples */
160  HeapTuple *tuples; /* array of currently-retrieved tuples */
161  int num_tuples; /* # of tuples in array */
162  int next_tuple; /* index of next one to return */
163 
164  /* batch-level state, for optimizing rewinds and avoiding useless fetch */
165  int fetch_ct_2; /* Min(# of fetches done, 2) */
166  bool eof_reached; /* true if last fetch reached EOF */
167 
168  /* for asynchronous execution */
169  bool async_capable; /* engage asynchronous-capable logic? */
170 
171  /* working memory contexts */
172  MemoryContext batch_cxt; /* context holding current batch of tuples */
173  MemoryContext temp_cxt; /* context for per-tuple temporary data */
174 
175  int fetch_size; /* number of tuples per fetch */
177 
178 /*
179  * Execution state of a foreign insert/update/delete operation.
180  */
181 typedef struct PgFdwModifyState
182 {
183  Relation rel; /* relcache entry for the foreign table */
184  AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
185 
186  /* for remote query execution */
187  PGconn *conn; /* connection for the scan */
188  PgFdwConnState *conn_state; /* extra per-connection state */
189  char *p_name; /* name of prepared statement, if created */
190 
191  /* extracted fdw_private data */
192  char *query; /* text of INSERT/UPDATE/DELETE command */
193  char *orig_query; /* original text of INSERT command */
194  List *target_attrs; /* list of target attribute numbers */
195  int values_end; /* length up to the end of VALUES */
196  int batch_size; /* value of FDW option "batch_size" */
197  bool has_returning; /* is there a RETURNING clause? */
198  List *retrieved_attrs; /* attr numbers retrieved by RETURNING */
199 
200  /* info about parameters for prepared statement */
201  AttrNumber ctidAttno; /* attnum of input resjunk ctid column */
202  int p_nums; /* number of parameters to transmit */
203  FmgrInfo *p_flinfo; /* output conversion functions for them */
204 
205  /* batch operation stuff */
206  int num_slots; /* number of slots to insert */
207 
208  /* working memory context */
209  MemoryContext temp_cxt; /* context for per-tuple temporary data */
210 
211  /* for update row movement if subplan result rel */
212  struct PgFdwModifyState *aux_fmstate; /* foreign-insert state, if
213  * created */
215 
216 /*
217  * Execution state of a foreign scan that modifies a foreign table directly.
218  */
220 {
221  Relation rel; /* relcache entry for the foreign table */
222  AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
223 
224  /* extracted fdw_private data */
225  char *query; /* text of UPDATE/DELETE command */
226  bool has_returning; /* is there a RETURNING clause? */
227  List *retrieved_attrs; /* attr numbers retrieved by RETURNING */
228  bool set_processed; /* do we set the command es_processed? */
229 
230  /* for remote query execution */
231  PGconn *conn; /* connection for the update */
232  PgFdwConnState *conn_state; /* extra per-connection state */
233  int numParams; /* number of parameters passed to query */
234  FmgrInfo *param_flinfo; /* output conversion functions for them */
235  List *param_exprs; /* executable expressions for param values */
236  const char **param_values; /* textual values of query parameters */
237 
238  /* for storing result tuples */
239  PGresult *result; /* result for query */
240  int num_tuples; /* # of result tuples */
241  int next_tuple; /* index of next one to return */
242  Relation resultRel; /* relcache entry for the target relation */
243  AttrNumber *attnoMap; /* array of attnums of input user columns */
244  AttrNumber ctidAttno; /* attnum of input ctid column */
245  AttrNumber oidAttno; /* attnum of input oid column */
246  bool hasSystemCols; /* are there system columns of resultRel? */
247 
248  /* working memory context */
249  MemoryContext temp_cxt; /* context for per-tuple temporary data */
251 
252 /*
253  * Workspace for analyzing a foreign table.
254  */
255 typedef struct PgFdwAnalyzeState
256 {
257  Relation rel; /* relcache entry for the foreign table */
258  AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
259  List *retrieved_attrs; /* attr numbers retrieved by query */
260 
261  /* collected sample rows */
262  HeapTuple *rows; /* array of size targrows */
263  int targrows; /* target # of sample rows */
264  int numrows; /* # of sample rows collected */
265 
266  /* for random sampling */
267  double samplerows; /* # of rows fetched */
268  double rowstoskip; /* # of rows to skip before next sample */
269  ReservoirStateData rstate; /* state for reservoir sampling */
270 
271  /* working memory contexts */
272  MemoryContext anl_cxt; /* context for per-analyze lifespan data */
273  MemoryContext temp_cxt; /* context for per-tuple temporary data */
275 
276 /*
277  * This enum describes what's kept in the fdw_private list for a ForeignPath.
278  * We store:
279  *
280  * 1) Boolean flag showing if the remote query has the final sort
281  * 2) Boolean flag showing if the remote query has the LIMIT clause
282  */
284 {
285  /* has-final-sort flag (as a Boolean node) */
287  /* has-limit flag (as a Boolean node) */
289 };
290 
291 /* Struct for extra information passed to estimate_path_cost_size() */
292 typedef struct
293 {
296  bool has_limit;
297  double limit_tuples;
298  int64 count_est;
299  int64 offset_est;
301 
302 /*
303  * Identify the attribute where data conversion fails.
304  */
305 typedef struct ConversionLocation
306 {
307  AttrNumber cur_attno; /* attribute number being processed, or 0 */
308  Relation rel; /* foreign table being processed, or NULL */
309  ForeignScanState *fsstate; /* plan node being processed, or NULL */
311 
312 /* Callback argument for ec_member_matches_foreign */
313 typedef struct
314 {
315  Expr *current; /* current expr, or NULL if not yet found */
316  List *already_used; /* expressions already dealt with */
318 
319 /*
320  * SQL functions
321  */
323 
324 /*
325  * FDW callback routines
326  */
328  RelOptInfo *baserel,
329  Oid foreigntableid);
331  RelOptInfo *baserel,
332  Oid foreigntableid);
334  RelOptInfo *foreignrel,
335  Oid foreigntableid,
336  ForeignPath *best_path,
337  List *tlist,
338  List *scan_clauses,
339  Plan *outer_plan);
340 static void postgresBeginForeignScan(ForeignScanState *node, int eflags);
343 static void postgresEndForeignScan(ForeignScanState *node);
345  Index rtindex,
346  RangeTblEntry *target_rte,
347  Relation target_relation);
349  ModifyTable *plan,
350  Index resultRelation,
351  int subplan_index);
352 static void postgresBeginForeignModify(ModifyTableState *mtstate,
353  ResultRelInfo *resultRelInfo,
354  List *fdw_private,
355  int subplan_index,
356  int eflags);
358  ResultRelInfo *resultRelInfo,
359  TupleTableSlot *slot,
360  TupleTableSlot *planSlot);
362  ResultRelInfo *resultRelInfo,
363  TupleTableSlot **slots,
364  TupleTableSlot **planSlots,
365  int *numSlots);
366 static int postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo);
368  ResultRelInfo *resultRelInfo,
369  TupleTableSlot *slot,
370  TupleTableSlot *planSlot);
372  ResultRelInfo *resultRelInfo,
373  TupleTableSlot *slot,
374  TupleTableSlot *planSlot);
375 static void postgresEndForeignModify(EState *estate,
376  ResultRelInfo *resultRelInfo);
377 static void postgresBeginForeignInsert(ModifyTableState *mtstate,
378  ResultRelInfo *resultRelInfo);
379 static void postgresEndForeignInsert(EState *estate,
380  ResultRelInfo *resultRelInfo);
383  ModifyTable *plan,
384  Index resultRelation,
385  int subplan_index);
386 static void postgresBeginDirectModify(ForeignScanState *node, int eflags);
388 static void postgresEndDirectModify(ForeignScanState *node);
390  ExplainState *es);
392  ResultRelInfo *rinfo,
393  List *fdw_private,
394  int subplan_index,
395  ExplainState *es);
397  ExplainState *es);
398 static void postgresExecForeignTruncate(List *rels,
399  DropBehavior behavior,
400  bool restart_seqs);
401 static bool postgresAnalyzeForeignTable(Relation relation,
402  AcquireSampleRowsFunc *func,
403  BlockNumber *totalpages);
405  Oid serverOid);
407  RelOptInfo *joinrel,
408  RelOptInfo *outerrel,
409  RelOptInfo *innerrel,
410  JoinType jointype,
411  JoinPathExtraData *extra);
413  TupleTableSlot *slot);
415  UpperRelationKind stage,
416  RelOptInfo *input_rel,
417  RelOptInfo *output_rel,
418  void *extra);
420 static void postgresForeignAsyncRequest(AsyncRequest *areq);
422 static void postgresForeignAsyncNotify(AsyncRequest *areq);
423 
424 /*
425  * Helper functions
426  */
428  RelOptInfo *foreignrel,
429  List *param_join_conds,
430  List *pathkeys,
431  PgFdwPathExtraData *fpextra,
432  double *p_rows, int *p_width,
433  Cost *p_startup_cost, Cost *p_total_cost);
434 static void get_remote_estimate(const char *sql,
435  PGconn *conn,
436  double *rows,
437  int *width,
438  Cost *startup_cost,
439  Cost *total_cost);
441  List *pathkeys,
442  double retrieved_rows,
443  double width,
444  double limit_tuples,
445  Cost *p_startup_cost,
446  Cost *p_run_cost);
449  void *arg);
450 static void create_cursor(ForeignScanState *node);
451 static void fetch_more_data(ForeignScanState *node);
452 static void close_cursor(PGconn *conn, unsigned int cursor_number,
453  PgFdwConnState *conn_state);
455  RangeTblEntry *rte,
456  ResultRelInfo *resultRelInfo,
457  CmdType operation,
458  Plan *subplan,
459  char *query,
460  List *target_attrs,
461  int values_end,
462  bool has_returning,
463  List *retrieved_attrs);
465  ResultRelInfo *resultRelInfo,
466  CmdType operation,
467  TupleTableSlot **slots,
468  TupleTableSlot **planSlots,
469  int *numSlots);
470 static void prepare_foreign_modify(PgFdwModifyState *fmstate);
471 static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate,
472  ItemPointer tupleid,
473  TupleTableSlot **slots,
474  int numSlots);
475 static void store_returning_result(PgFdwModifyState *fmstate,
476  TupleTableSlot *slot, PGresult *res);
477 static void finish_foreign_modify(PgFdwModifyState *fmstate);
478 static void deallocate_query(PgFdwModifyState *fmstate);
479 static List *build_remote_returning(Index rtindex, Relation rel,
480  List *returningList);
481 static void rebuild_fdw_scan_tlist(ForeignScan *fscan, List *tlist);
482 static void execute_dml_stmt(ForeignScanState *node);
484 static void init_returning_filter(PgFdwDirectModifyState *dmstate,
485  List *fdw_scan_tlist,
486  Index rtindex);
488  ResultRelInfo *resultRelInfo,
489  TupleTableSlot *slot,
490  EState *estate);
491 static void prepare_query_params(PlanState *node,
492  List *fdw_exprs,
493  int numParams,
494  FmgrInfo **param_flinfo,
495  List **param_exprs,
496  const char ***param_values);
497 static void process_query_params(ExprContext *econtext,
498  FmgrInfo *param_flinfo,
499  List *param_exprs,
500  const char **param_values);
501 static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
502  HeapTuple *rows, int targrows,
503  double *totalrows,
504  double *totaldeadrows);
505 static void analyze_row_processor(PGresult *res, int row,
506  PgFdwAnalyzeState *astate);
507 static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch);
508 static void fetch_more_data_begin(AsyncRequest *areq);
509 static void complete_pending_request(AsyncRequest *areq);
511  int row,
512  Relation rel,
513  AttInMetadata *attinmeta,
514  List *retrieved_attrs,
515  ForeignScanState *fsstate,
516  MemoryContext temp_context);
517 static void conversion_error_callback(void *arg);
518 static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
519  JoinType jointype, RelOptInfo *outerrel, RelOptInfo *innerrel,
520  JoinPathExtraData *extra);
521 static bool foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
522  Node *havingQual);
524  RelOptInfo *rel);
527  Path *epq_path, List *restrictlist);
529  RelOptInfo *input_rel,
530  RelOptInfo *grouped_rel,
531  GroupPathExtraData *extra);
533  RelOptInfo *input_rel,
534  RelOptInfo *ordered_rel);
536  RelOptInfo *input_rel,
537  RelOptInfo *final_rel,
538  FinalPathExtraData *extra);
539 static void apply_server_options(PgFdwRelationInfo *fpinfo);
540 static void apply_table_options(PgFdwRelationInfo *fpinfo);
541 static void merge_fdw_options(PgFdwRelationInfo *fpinfo,
542  const PgFdwRelationInfo *fpinfo_o,
543  const PgFdwRelationInfo *fpinfo_i);
544 static int get_batch_size_option(Relation rel);
545 
546 
547 /*
548  * Foreign-data wrapper handler function: return a struct with pointers
549  * to my callback routines.
550  */
551 Datum
553 {
554  FdwRoutine *routine = makeNode(FdwRoutine);
555 
556  /* Functions for scanning foreign tables */
564 
565  /* Functions for updating foreign tables */
582 
583  /* Function for EvalPlanQual rechecks */
585  /* Support functions for EXPLAIN */
589 
590  /* Support function for TRUNCATE */
592 
593  /* Support functions for ANALYZE */
595 
596  /* Support functions for IMPORT FOREIGN SCHEMA */
598 
599  /* Support functions for join push-down */
601 
602  /* Support functions for upper relation push-down */
604 
605  /* Support functions for asynchronous execution */
610 
611  PG_RETURN_POINTER(routine);
612 }
613 
614 /*
615  * postgresGetForeignRelSize
616  * Estimate # of rows and width of the result of the scan
617  *
618  * We should consider the effect of all baserestrictinfo clauses here, but
619  * not any join clauses.
620  */
621 static void
623  RelOptInfo *baserel,
624  Oid foreigntableid)
625 {
626  PgFdwRelationInfo *fpinfo;
627  ListCell *lc;
628 
629  /*
630  * We use PgFdwRelationInfo to pass various information to subsequent
631  * functions.
632  */
633  fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
634  baserel->fdw_private = (void *) fpinfo;
635 
636  /* Base foreign tables need to be pushed down always. */
637  fpinfo->pushdown_safe = true;
638 
639  /* Look up foreign-table catalog info. */
640  fpinfo->table = GetForeignTable(foreigntableid);
641  fpinfo->server = GetForeignServer(fpinfo->table->serverid);
642 
643  /*
644  * Extract user-settable option values. Note that per-table settings of
645  * use_remote_estimate, fetch_size and async_capable override per-server
646  * settings of them, respectively.
647  */
648  fpinfo->use_remote_estimate = false;
651  fpinfo->shippable_extensions = NIL;
652  fpinfo->fetch_size = 100;
653  fpinfo->async_capable = false;
654 
655  apply_server_options(fpinfo);
656  apply_table_options(fpinfo);
657 
658  /*
659  * If the table or the server is configured to use remote estimates,
660  * identify which user to do remote access as during planning. This
661  * should match what ExecCheckPermissions() does. If we fail due to lack
662  * of permissions, the query would have failed at runtime anyway.
663  */
664  if (fpinfo->use_remote_estimate)
665  {
666  Oid userid;
667 
668  userid = OidIsValid(baserel->userid) ? baserel->userid : GetUserId();
669  fpinfo->user = GetUserMapping(userid, fpinfo->server->serverid);
670  }
671  else
672  fpinfo->user = NULL;
673 
674  /*
675  * Identify which baserestrictinfo clauses can be sent to the remote
676  * server and which can't.
677  */
678  classifyConditions(root, baserel, baserel->baserestrictinfo,
679  &fpinfo->remote_conds, &fpinfo->local_conds);
680 
681  /*
682  * Identify which attributes will need to be retrieved from the remote
683  * server. These include all attrs needed for joins or final output, plus
684  * all attrs used in the local_conds. (Note: if we end up using a
685  * parameterized scan, it's possible that some of the join clauses will be
686  * sent to the remote and thus we wouldn't really need to retrieve the
687  * columns used in them. Doesn't seem worth detecting that case though.)
688  */
689  fpinfo->attrs_used = NULL;
690  pull_varattnos((Node *) baserel->reltarget->exprs, baserel->relid,
691  &fpinfo->attrs_used);
692  foreach(lc, fpinfo->local_conds)
693  {
694  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
695 
696  pull_varattnos((Node *) rinfo->clause, baserel->relid,
697  &fpinfo->attrs_used);
698  }
699 
700  /*
701  * Compute the selectivity and cost of the local_conds, so we don't have
702  * to do it over again for each path. The best we can do for these
703  * conditions is to estimate selectivity on the basis of local statistics.
704  */
706  fpinfo->local_conds,
707  baserel->relid,
708  JOIN_INNER,
709  NULL);
710 
711  cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
712 
713  /*
714  * Set # of retrieved rows and cached relation costs to some negative
715  * value, so that we can detect when they are set to some sensible values,
716  * during one (usually the first) of the calls to estimate_path_cost_size.
717  */
718  fpinfo->retrieved_rows = -1;
719  fpinfo->rel_startup_cost = -1;
720  fpinfo->rel_total_cost = -1;
721 
722  /*
723  * If the table or the server is configured to use remote estimates,
724  * connect to the foreign server and execute EXPLAIN to estimate the
725  * number of rows selected by the restriction clauses, as well as the
726  * average row width. Otherwise, estimate using whatever statistics we
727  * have locally, in a way similar to ordinary tables.
728  */
729  if (fpinfo->use_remote_estimate)
730  {
731  /*
732  * Get cost/size estimates with help of remote server. Save the
733  * values in fpinfo so we don't need to do it again to generate the
734  * basic foreign path.
735  */
736  estimate_path_cost_size(root, baserel, NIL, NIL, NULL,
737  &fpinfo->rows, &fpinfo->width,
738  &fpinfo->startup_cost, &fpinfo->total_cost);
739 
740  /* Report estimated baserel size to planner. */
741  baserel->rows = fpinfo->rows;
742  baserel->reltarget->width = fpinfo->width;
743  }
744  else
745  {
746  /*
747  * If the foreign table has never been ANALYZEd, it will have
748  * reltuples < 0, meaning "unknown". We can't do much if we're not
749  * allowed to consult the remote server, but we can use a hack similar
750  * to plancat.c's treatment of empty relations: use a minimum size
751  * estimate of 10 pages, and divide by the column-datatype-based width
752  * estimate to get the corresponding number of tuples.
753  */
754  if (baserel->tuples < 0)
755  {
756  baserel->pages = 10;
757  baserel->tuples =
758  (10 * BLCKSZ) / (baserel->reltarget->width +
760  }
761 
762  /* Estimate baserel size as best we can with local statistics. */
764 
765  /* Fill in basically-bogus cost estimates for use later. */
766  estimate_path_cost_size(root, baserel, NIL, NIL, NULL,
767  &fpinfo->rows, &fpinfo->width,
768  &fpinfo->startup_cost, &fpinfo->total_cost);
769  }
770 
771  /*
772  * fpinfo->relation_name gets the numeric rangetable index of the foreign
773  * table RTE. (If this query gets EXPLAIN'd, we'll convert that to a
774  * human-readable string at that time.)
775  */
776  fpinfo->relation_name = psprintf("%u", baserel->relid);
777 
778  /* No outer and inner relations. */
779  fpinfo->make_outerrel_subquery = false;
780  fpinfo->make_innerrel_subquery = false;
781  fpinfo->lower_subquery_rels = NULL;
782  fpinfo->hidden_subquery_rels = NULL;
783  /* Set the relation index. */
784  fpinfo->relation_index = baserel->relid;
785 }
786 
787 /*
788  * get_useful_ecs_for_relation
789  * Determine which EquivalenceClasses might be involved in useful
790  * orderings of this relation.
791  *
792  * This function is in some respects a mirror image of the core function
793  * pathkeys_useful_for_merging: for a regular table, we know what indexes
794  * we have and want to test whether any of them are useful. For a foreign
795  * table, we don't know what indexes are present on the remote side but
796  * want to speculate about which ones we'd like to use if they existed.
797  *
798  * This function returns a list of potentially-useful equivalence classes,
799  * but it does not guarantee that an EquivalenceMember exists which contains
800  * Vars only from the given relation. For example, given ft1 JOIN t1 ON
801  * ft1.x + t1.x = 0, this function will say that the equivalence class
802  * containing ft1.x + t1.x is potentially useful. Supposing ft1 is remote and
803  * t1 is local (or on a different server), it will turn out that no useful
804  * ORDER BY clause can be generated. It's not our job to figure that out
805  * here; we're only interested in identifying relevant ECs.
806  */
807 static List *
809 {
810  List *useful_eclass_list = NIL;
811  ListCell *lc;
812  Relids relids;
813 
814  /*
815  * First, consider whether any active EC is potentially useful for a merge
816  * join against this relation.
817  */
818  if (rel->has_eclass_joins)
819  {
820  foreach(lc, root->eq_classes)
821  {
822  EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc);
823 
824  if (eclass_useful_for_merging(root, cur_ec, rel))
825  useful_eclass_list = lappend(useful_eclass_list, cur_ec);
826  }
827  }
828 
829  /*
830  * Next, consider whether there are any non-EC derivable join clauses that
831  * are merge-joinable. If the joininfo list is empty, we can exit
832  * quickly.
833  */
834  if (rel->joininfo == NIL)
835  return useful_eclass_list;
836 
837  /* If this is a child rel, we must use the topmost parent rel to search. */
838  if (IS_OTHER_REL(rel))
839  {
841  relids = rel->top_parent_relids;
842  }
843  else
844  relids = rel->relids;
845 
846  /* Check each join clause in turn. */
847  foreach(lc, rel->joininfo)
848  {
849  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
850 
851  /* Consider only mergejoinable clauses */
852  if (restrictinfo->mergeopfamilies == NIL)
853  continue;
854 
855  /* Make sure we've got canonical ECs. */
856  update_mergeclause_eclasses(root, restrictinfo);
857 
858  /*
859  * restrictinfo->mergeopfamilies != NIL is sufficient to guarantee
860  * that left_ec and right_ec will be initialized, per comments in
861  * distribute_qual_to_rels.
862  *
863  * We want to identify which side of this merge-joinable clause
864  * contains columns from the relation produced by this RelOptInfo. We
865  * test for overlap, not containment, because there could be extra
866  * relations on either side. For example, suppose we've got something
867  * like ((A JOIN B ON A.x = B.x) JOIN C ON A.y = C.y) LEFT JOIN D ON
868  * A.y = D.y. The input rel might be the joinrel between A and B, and
869  * we'll consider the join clause A.y = D.y. relids contains a
870  * relation not involved in the join class (B) and the equivalence
871  * class for the left-hand side of the clause contains a relation not
872  * involved in the input rel (C). Despite the fact that we have only
873  * overlap and not containment in either direction, A.y is potentially
874  * useful as a sort column.
875  *
876  * Note that it's even possible that relids overlaps neither side of
877  * the join clause. For example, consider A LEFT JOIN B ON A.x = B.x
878  * AND A.x = 1. The clause A.x = 1 will appear in B's joininfo list,
879  * but overlaps neither side of B. In that case, we just skip this
880  * join clause, since it doesn't suggest a useful sort order for this
881  * relation.
882  */
883  if (bms_overlap(relids, restrictinfo->right_ec->ec_relids))
884  useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
885  restrictinfo->right_ec);
886  else if (bms_overlap(relids, restrictinfo->left_ec->ec_relids))
887  useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
888  restrictinfo->left_ec);
889  }
890 
891  return useful_eclass_list;
892 }
893 
894 /*
895  * get_useful_pathkeys_for_relation
896  * Determine which orderings of a relation might be useful.
897  *
898  * Getting data in sorted order can be useful either because the requested
899  * order matches the final output ordering for the overall query we're
900  * planning, or because it enables an efficient merge join. Here, we try
901  * to figure out which pathkeys to consider.
902  */
903 static List *
905 {
906  List *useful_pathkeys_list = NIL;
907  List *useful_eclass_list;
908  PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private;
909  EquivalenceClass *query_ec = NULL;
910  ListCell *lc;
911 
912  /*
913  * Pushing the query_pathkeys to the remote server is always worth
914  * considering, because it might let us avoid a local sort.
915  */
916  fpinfo->qp_is_pushdown_safe = false;
917  if (root->query_pathkeys)
918  {
919  bool query_pathkeys_ok = true;
920 
921  foreach(lc, root->query_pathkeys)
922  {
923  PathKey *pathkey = (PathKey *) lfirst(lc);
924 
925  /*
926  * The planner and executor don't have any clever strategy for
927  * taking data sorted by a prefix of the query's pathkeys and
928  * getting it to be sorted by all of those pathkeys. We'll just
929  * end up resorting the entire data set. So, unless we can push
930  * down all of the query pathkeys, forget it.
931  */
932  if (!is_foreign_pathkey(root, rel, pathkey))
933  {
934  query_pathkeys_ok = false;
935  break;
936  }
937  }
938 
939  if (query_pathkeys_ok)
940  {
941  useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys));
942  fpinfo->qp_is_pushdown_safe = true;
943  }
944  }
945 
946  /*
947  * Even if we're not using remote estimates, having the remote side do the
948  * sort generally won't be any worse than doing it locally, and it might
949  * be much better if the remote side can generate data in the right order
950  * without needing a sort at all. However, what we're going to do next is
951  * try to generate pathkeys that seem promising for possible merge joins,
952  * and that's more speculative. A wrong choice might hurt quite a bit, so
953  * bail out if we can't use remote estimates.
954  */
955  if (!fpinfo->use_remote_estimate)
956  return useful_pathkeys_list;
957 
958  /* Get the list of interesting EquivalenceClasses. */
959  useful_eclass_list = get_useful_ecs_for_relation(root, rel);
960 
961  /* Extract unique EC for query, if any, so we don't consider it again. */
962  if (list_length(root->query_pathkeys) == 1)
963  {
964  PathKey *query_pathkey = linitial(root->query_pathkeys);
965 
966  query_ec = query_pathkey->pk_eclass;
967  }
968 
969  /*
970  * As a heuristic, the only pathkeys we consider here are those of length
971  * one. It's surely possible to consider more, but since each one we
972  * choose to consider will generate a round-trip to the remote side, we
973  * need to be a bit cautious here. It would sure be nice to have a local
974  * cache of information about remote index definitions...
975  */
976  foreach(lc, useful_eclass_list)
977  {
978  EquivalenceClass *cur_ec = lfirst(lc);
979  PathKey *pathkey;
980 
981  /* If redundant with what we did above, skip it. */
982  if (cur_ec == query_ec)
983  continue;
984 
985  /* Can't push down the sort if the EC's opfamily is not shippable. */
987  OperatorFamilyRelationId, fpinfo))
988  continue;
989 
990  /* If no pushable expression for this rel, skip it. */
991  if (find_em_for_rel(root, cur_ec, rel) == NULL)
992  continue;
993 
994  /* Looks like we can generate a pathkey, so let's do it. */
995  pathkey = make_canonical_pathkey(root, cur_ec,
996  linitial_oid(cur_ec->ec_opfamilies),
998  false);
999  useful_pathkeys_list = lappend(useful_pathkeys_list,
1000  list_make1(pathkey));
1001  }
1002 
1003  return useful_pathkeys_list;
1004 }
1005 
1006 /*
1007  * postgresGetForeignPaths
1008  * Create possible scan paths for a scan on the foreign table
1009  */
1010 static void
1012  RelOptInfo *baserel,
1013  Oid foreigntableid)
1014 {
1015  PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
1016  ForeignPath *path;
1017  List *ppi_list;
1018  ListCell *lc;
1019 
1020  /*
1021  * Create simplest ForeignScan path node and add it to baserel. This path
1022  * corresponds to SeqScan path of regular tables (though depending on what
1023  * baserestrict conditions we were able to send to remote, there might
1024  * actually be an indexscan happening there). We already did all the work
1025  * to estimate cost and size of this path.
1026  *
1027  * Although this path uses no join clauses, it could still have required
1028  * parameterization due to LATERAL refs in its tlist.
1029  */
1030  path = create_foreignscan_path(root, baserel,
1031  NULL, /* default pathtarget */
1032  fpinfo->rows,
1033  fpinfo->startup_cost,
1034  fpinfo->total_cost,
1035  NIL, /* no pathkeys */
1036  baserel->lateral_relids,
1037  NULL, /* no extra plan */
1038  NIL, /* no fdw_restrictinfo list */
1039  NIL); /* no fdw_private list */
1040  add_path(baserel, (Path *) path);
1041 
1042  /* Add paths with pathkeys */
1043  add_paths_with_pathkeys_for_rel(root, baserel, NULL, NIL);
1044 
1045  /*
1046  * If we're not using remote estimates, stop here. We have no way to
1047  * estimate whether any join clauses would be worth sending across, so
1048  * don't bother building parameterized paths.
1049  */
1050  if (!fpinfo->use_remote_estimate)
1051  return;
1052 
1053  /*
1054  * Thumb through all join clauses for the rel to identify which outer
1055  * relations could supply one or more safe-to-send-to-remote join clauses.
1056  * We'll build a parameterized path for each such outer relation.
1057  *
1058  * It's convenient to manage this by representing each candidate outer
1059  * relation by the ParamPathInfo node for it. We can then use the
1060  * ppi_clauses list in the ParamPathInfo node directly as a list of the
1061  * interesting join clauses for that rel. This takes care of the
1062  * possibility that there are multiple safe join clauses for such a rel,
1063  * and also ensures that we account for unsafe join clauses that we'll
1064  * still have to enforce locally (since the parameterized-path machinery
1065  * insists that we handle all movable clauses).
1066  */
1067  ppi_list = NIL;
1068  foreach(lc, baserel->joininfo)
1069  {
1070  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1071  Relids required_outer;
1072  ParamPathInfo *param_info;
1073 
1074  /* Check if clause can be moved to this rel */
1075  if (!join_clause_is_movable_to(rinfo, baserel))
1076  continue;
1077 
1078  /* See if it is safe to send to remote */
1079  if (!is_foreign_expr(root, baserel, rinfo->clause))
1080  continue;
1081 
1082  /* Calculate required outer rels for the resulting path */
1083  required_outer = bms_union(rinfo->clause_relids,
1084  baserel->lateral_relids);
1085  /* We do not want the foreign rel itself listed in required_outer */
1086  required_outer = bms_del_member(required_outer, baserel->relid);
1087 
1088  /*
1089  * required_outer probably can't be empty here, but if it were, we
1090  * couldn't make a parameterized path.
1091  */
1092  if (bms_is_empty(required_outer))
1093  continue;
1094 
1095  /* Get the ParamPathInfo */
1096  param_info = get_baserel_parampathinfo(root, baserel,
1097  required_outer);
1098  Assert(param_info != NULL);
1099 
1100  /*
1101  * Add it to list unless we already have it. Testing pointer equality
1102  * is OK since get_baserel_parampathinfo won't make duplicates.
1103  */
1104  ppi_list = list_append_unique_ptr(ppi_list, param_info);
1105  }
1106 
1107  /*
1108  * The above scan examined only "generic" join clauses, not those that
1109  * were absorbed into EquivalenceClauses. See if we can make anything out
1110  * of EquivalenceClauses.
1111  */
1112  if (baserel->has_eclass_joins)
1113  {
1114  /*
1115  * We repeatedly scan the eclass list looking for column references
1116  * (or expressions) belonging to the foreign rel. Each time we find
1117  * one, we generate a list of equivalence joinclauses for it, and then
1118  * see if any are safe to send to the remote. Repeat till there are
1119  * no more candidate EC members.
1120  */
1122 
1123  arg.already_used = NIL;
1124  for (;;)
1125  {
1126  List *clauses;
1127 
1128  /* Make clauses, skipping any that join to lateral_referencers */
1129  arg.current = NULL;
1131  baserel,
1133  (void *) &arg,
1134  baserel->lateral_referencers);
1135 
1136  /* Done if there are no more expressions in the foreign rel */
1137  if (arg.current == NULL)
1138  {
1139  Assert(clauses == NIL);
1140  break;
1141  }
1142 
1143  /* Scan the extracted join clauses */
1144  foreach(lc, clauses)
1145  {
1146  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1147  Relids required_outer;
1148  ParamPathInfo *param_info;
1149 
1150  /* Check if clause can be moved to this rel */
1151  if (!join_clause_is_movable_to(rinfo, baserel))
1152  continue;
1153 
1154  /* See if it is safe to send to remote */
1155  if (!is_foreign_expr(root, baserel, rinfo->clause))
1156  continue;
1157 
1158  /* Calculate required outer rels for the resulting path */
1159  required_outer = bms_union(rinfo->clause_relids,
1160  baserel->lateral_relids);
1161  required_outer = bms_del_member(required_outer, baserel->relid);
1162  if (bms_is_empty(required_outer))
1163  continue;
1164 
1165  /* Get the ParamPathInfo */
1166  param_info = get_baserel_parampathinfo(root, baserel,
1167  required_outer);
1168  Assert(param_info != NULL);
1169 
1170  /* Add it to list unless we already have it */
1171  ppi_list = list_append_unique_ptr(ppi_list, param_info);
1172  }
1173 
1174  /* Try again, now ignoring the expression we found this time */
1175  arg.already_used = lappend(arg.already_used, arg.current);
1176  }
1177  }
1178 
1179  /*
1180  * Now build a path for each useful outer relation.
1181  */
1182  foreach(lc, ppi_list)
1183  {
1184  ParamPathInfo *param_info = (ParamPathInfo *) lfirst(lc);
1185  double rows;
1186  int width;
1187  Cost startup_cost;
1188  Cost total_cost;
1189 
1190  /* Get a cost estimate from the remote */
1191  estimate_path_cost_size(root, baserel,
1192  param_info->ppi_clauses, NIL, NULL,
1193  &rows, &width,
1194  &startup_cost, &total_cost);
1195 
1196  /*
1197  * ppi_rows currently won't get looked at by anything, but still we
1198  * may as well ensure that it matches our idea of the rowcount.
1199  */
1200  param_info->ppi_rows = rows;
1201 
1202  /* Make the path */
1203  path = create_foreignscan_path(root, baserel,
1204  NULL, /* default pathtarget */
1205  rows,
1206  startup_cost,
1207  total_cost,
1208  NIL, /* no pathkeys */
1209  param_info->ppi_req_outer,
1210  NULL,
1211  NIL, /* no fdw_restrictinfo list */
1212  NIL); /* no fdw_private list */
1213  add_path(baserel, (Path *) path);
1214  }
1215 }
1216 
1217 /*
1218  * postgresGetForeignPlan
1219  * Create ForeignScan plan node which implements selected best path
1220  */
1221 static ForeignScan *
1223  RelOptInfo *foreignrel,
1224  Oid foreigntableid,
1225  ForeignPath *best_path,
1226  List *tlist,
1227  List *scan_clauses,
1228  Plan *outer_plan)
1229 {
1230  PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private;
1231  Index scan_relid;
1232  List *fdw_private;
1233  List *remote_exprs = NIL;
1234  List *local_exprs = NIL;
1235  List *params_list = NIL;
1236  List *fdw_scan_tlist = NIL;
1237  List *fdw_recheck_quals = NIL;
1238  List *retrieved_attrs;
1239  StringInfoData sql;
1240  bool has_final_sort = false;
1241  bool has_limit = false;
1242  ListCell *lc;
1243 
1244  /*
1245  * Get FDW private data created by postgresGetForeignUpperPaths(), if any.
1246  */
1247  if (best_path->fdw_private)
1248  {
1249  has_final_sort = boolVal(list_nth(best_path->fdw_private,
1251  has_limit = boolVal(list_nth(best_path->fdw_private,
1253  }
1254 
1255  if (IS_SIMPLE_REL(foreignrel))
1256  {
1257  /*
1258  * For base relations, set scan_relid as the relid of the relation.
1259  */
1260  scan_relid = foreignrel->relid;
1261 
1262  /*
1263  * In a base-relation scan, we must apply the given scan_clauses.
1264  *
1265  * Separate the scan_clauses into those that can be executed remotely
1266  * and those that can't. baserestrictinfo clauses that were
1267  * previously determined to be safe or unsafe by classifyConditions
1268  * are found in fpinfo->remote_conds and fpinfo->local_conds. Anything
1269  * else in the scan_clauses list will be a join clause, which we have
1270  * to check for remote-safety.
1271  *
1272  * Note: the join clauses we see here should be the exact same ones
1273  * previously examined by postgresGetForeignPaths. Possibly it'd be
1274  * worth passing forward the classification work done then, rather
1275  * than repeating it here.
1276  *
1277  * This code must match "extract_actual_clauses(scan_clauses, false)"
1278  * except for the additional decision about remote versus local
1279  * execution.
1280  */
1281  foreach(lc, scan_clauses)
1282  {
1283  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1284 
1285  /* Ignore any pseudoconstants, they're dealt with elsewhere */
1286  if (rinfo->pseudoconstant)
1287  continue;
1288 
1289  if (list_member_ptr(fpinfo->remote_conds, rinfo))
1290  remote_exprs = lappend(remote_exprs, rinfo->clause);
1291  else if (list_member_ptr(fpinfo->local_conds, rinfo))
1292  local_exprs = lappend(local_exprs, rinfo->clause);
1293  else if (is_foreign_expr(root, foreignrel, rinfo->clause))
1294  remote_exprs = lappend(remote_exprs, rinfo->clause);
1295  else
1296  local_exprs = lappend(local_exprs, rinfo->clause);
1297  }
1298 
1299  /*
1300  * For a base-relation scan, we have to support EPQ recheck, which
1301  * should recheck all the remote quals.
1302  */
1303  fdw_recheck_quals = remote_exprs;
1304  }
1305  else
1306  {
1307  /*
1308  * Join relation or upper relation - set scan_relid to 0.
1309  */
1310  scan_relid = 0;
1311 
1312  /*
1313  * For a join rel, baserestrictinfo is NIL and we are not considering
1314  * parameterization right now, so there should be no scan_clauses for
1315  * a joinrel or an upper rel either.
1316  */
1317  Assert(!scan_clauses);
1318 
1319  /*
1320  * Instead we get the conditions to apply from the fdw_private
1321  * structure.
1322  */
1323  remote_exprs = extract_actual_clauses(fpinfo->remote_conds, false);
1324  local_exprs = extract_actual_clauses(fpinfo->local_conds, false);
1325 
1326  /*
1327  * We leave fdw_recheck_quals empty in this case, since we never need
1328  * to apply EPQ recheck clauses. In the case of a joinrel, EPQ
1329  * recheck is handled elsewhere --- see postgresGetForeignJoinPaths().
1330  * If we're planning an upperrel (ie, remote grouping or aggregation)
1331  * then there's no EPQ to do because SELECT FOR UPDATE wouldn't be
1332  * allowed, and indeed we *can't* put the remote clauses into
1333  * fdw_recheck_quals because the unaggregated Vars won't be available
1334  * locally.
1335  */
1336 
1337  /* Build the list of columns to be fetched from the foreign server. */
1338  fdw_scan_tlist = build_tlist_to_deparse(foreignrel);
1339 
1340  /*
1341  * Ensure that the outer plan produces a tuple whose descriptor
1342  * matches our scan tuple slot. Also, remove the local conditions
1343  * from outer plan's quals, lest they be evaluated twice, once by the
1344  * local plan and once by the scan.
1345  */
1346  if (outer_plan)
1347  {
1348  /*
1349  * Right now, we only consider grouping and aggregation beyond
1350  * joins. Queries involving aggregates or grouping do not require
1351  * EPQ mechanism, hence should not have an outer plan here.
1352  */
1353  Assert(!IS_UPPER_REL(foreignrel));
1354 
1355  /*
1356  * First, update the plan's qual list if possible. In some cases
1357  * the quals might be enforced below the topmost plan level, in
1358  * which case we'll fail to remove them; it's not worth working
1359  * harder than this.
1360  */
1361  foreach(lc, local_exprs)
1362  {
1363  Node *qual = lfirst(lc);
1364 
1365  outer_plan->qual = list_delete(outer_plan->qual, qual);
1366 
1367  /*
1368  * For an inner join the local conditions of foreign scan plan
1369  * can be part of the joinquals as well. (They might also be
1370  * in the mergequals or hashquals, but we can't touch those
1371  * without breaking the plan.)
1372  */
1373  if (IsA(outer_plan, NestLoop) ||
1374  IsA(outer_plan, MergeJoin) ||
1375  IsA(outer_plan, HashJoin))
1376  {
1377  Join *join_plan = (Join *) outer_plan;
1378 
1379  if (join_plan->jointype == JOIN_INNER)
1380  join_plan->joinqual = list_delete(join_plan->joinqual,
1381  qual);
1382  }
1383  }
1384 
1385  /*
1386  * Now fix the subplan's tlist --- this might result in inserting
1387  * a Result node atop the plan tree.
1388  */
1389  outer_plan = change_plan_targetlist(outer_plan, fdw_scan_tlist,
1390  best_path->path.parallel_safe);
1391  }
1392  }
1393 
1394  /*
1395  * Build the query string to be sent for execution, and identify
1396  * expressions to be sent as parameters.
1397  */
1398  initStringInfo(&sql);
1399  deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
1400  remote_exprs, best_path->path.pathkeys,
1401  has_final_sort, has_limit, false,
1402  &retrieved_attrs, &params_list);
1403 
1404  /* Remember remote_exprs for possible use by postgresPlanDirectModify */
1405  fpinfo->final_remote_exprs = remote_exprs;
1406 
1407  /*
1408  * Build the fdw_private list that will be available to the executor.
1409  * Items in the list must match order in enum FdwScanPrivateIndex.
1410  */
1411  fdw_private = list_make3(makeString(sql.data),
1412  retrieved_attrs,
1413  makeInteger(fpinfo->fetch_size));
1414  if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel))
1415  fdw_private = lappend(fdw_private,
1416  makeString(fpinfo->relation_name));
1417 
1418  /*
1419  * Create the ForeignScan node for the given relation.
1420  *
1421  * Note that the remote parameter expressions are stored in the fdw_exprs
1422  * field of the finished plan node; we can't keep them in private state
1423  * because then they wouldn't be subject to later planner processing.
1424  */
1425  return make_foreignscan(tlist,
1426  local_exprs,
1427  scan_relid,
1428  params_list,
1429  fdw_private,
1430  fdw_scan_tlist,
1431  fdw_recheck_quals,
1432  outer_plan);
1433 }
1434 
1435 /*
1436  * Construct a tuple descriptor for the scan tuples handled by a foreign join.
1437  */
1438 static TupleDesc
1440 {
1441  ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
1442  EState *estate = node->ss.ps.state;
1443  TupleDesc tupdesc;
1444 
1445  /*
1446  * The core code has already set up a scan tuple slot based on
1447  * fsplan->fdw_scan_tlist, and this slot's tupdesc is mostly good enough,
1448  * but there's one case where it isn't. If we have any whole-row row
1449  * identifier Vars, they may have vartype RECORD, and we need to replace
1450  * that with the associated table's actual composite type. This ensures
1451  * that when we read those ROW() expression values from the remote server,
1452  * we can convert them to a composite type the local server knows.
1453  */
1455  for (int i = 0; i < tupdesc->natts; i++)
1456  {
1457  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
1458  Var *var;
1459  RangeTblEntry *rte;
1460  Oid reltype;
1461 
1462  /* Nothing to do if it's not a generic RECORD attribute */
1463  if (att->atttypid != RECORDOID || att->atttypmod >= 0)
1464  continue;
1465 
1466  /*
1467  * If we can't identify the referenced table, do nothing. This'll
1468  * likely lead to failure later, but perhaps we can muddle through.
1469  */
1470  var = (Var *) list_nth_node(TargetEntry, fsplan->fdw_scan_tlist,
1471  i)->expr;
1472  if (!IsA(var, Var) || var->varattno != 0)
1473  continue;
1474  rte = list_nth(estate->es_range_table, var->varno - 1);
1475  if (rte->rtekind != RTE_RELATION)
1476  continue;
1477  reltype = get_rel_type_id(rte->relid);
1478  if (!OidIsValid(reltype))
1479  continue;
1480  att->atttypid = reltype;
1481  /* shouldn't need to change anything else */
1482  }
1483  return tupdesc;
1484 }
1485 
1486 /*
1487  * postgresBeginForeignScan
1488  * Initiate an executor scan of a foreign PostgreSQL table.
1489  */
1490 static void
1492 {
1493  ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
1494  EState *estate = node->ss.ps.state;
1495  PgFdwScanState *fsstate;
1496  RangeTblEntry *rte;
1497  Oid userid;
1498  ForeignTable *table;
1499  UserMapping *user;
1500  int rtindex;
1501  int numParams;
1502 
1503  /*
1504  * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
1505  */
1506  if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
1507  return;
1508 
1509  /*
1510  * We'll save private state in node->fdw_state.
1511  */
1512  fsstate = (PgFdwScanState *) palloc0(sizeof(PgFdwScanState));
1513  node->fdw_state = (void *) fsstate;
1514 
1515  /*
1516  * Identify which user to do the remote access as. This should match what
1517  * ExecCheckPermissions() does.
1518  */
1519  userid = OidIsValid(fsplan->checkAsUser) ? fsplan->checkAsUser : GetUserId();
1520  if (fsplan->scan.scanrelid > 0)
1521  rtindex = fsplan->scan.scanrelid;
1522  else
1523  rtindex = bms_next_member(fsplan->fs_base_relids, -1);
1524  rte = exec_rt_fetch(rtindex, estate);
1525 
1526  /* Get info about foreign table. */
1527  table = GetForeignTable(rte->relid);
1528  user = GetUserMapping(userid, table->serverid);
1529 
1530  /*
1531  * Get connection to the foreign server. Connection manager will
1532  * establish new connection if necessary.
1533  */
1534  fsstate->conn = GetConnection(user, false, &fsstate->conn_state);
1535 
1536  /* Assign a unique ID for my cursor */
1537  fsstate->cursor_number = GetCursorNumber(fsstate->conn);
1538  fsstate->cursor_exists = false;
1539 
1540  /* Get private info created by planner functions. */
1541  fsstate->query = strVal(list_nth(fsplan->fdw_private,
1543  fsstate->retrieved_attrs = (List *) list_nth(fsplan->fdw_private,
1545  fsstate->fetch_size = intVal(list_nth(fsplan->fdw_private,
1547 
1548  /* Create contexts for batches of tuples and per-tuple temp workspace. */
1549  fsstate->batch_cxt = AllocSetContextCreate(estate->es_query_cxt,
1550  "postgres_fdw tuple data",
1552  fsstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
1553  "postgres_fdw temporary data",
1555 
1556  /*
1557  * Get info we'll need for converting data fetched from the foreign server
1558  * into local representation and error reporting during that process.
1559  */
1560  if (fsplan->scan.scanrelid > 0)
1561  {
1562  fsstate->rel = node->ss.ss_currentRelation;
1563  fsstate->tupdesc = RelationGetDescr(fsstate->rel);
1564  }
1565  else
1566  {
1567  fsstate->rel = NULL;
1568  fsstate->tupdesc = get_tupdesc_for_join_scan_tuples(node);
1569  }
1570 
1571  fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc);
1572 
1573  /*
1574  * Prepare for processing of parameters used in remote query, if any.
1575  */
1576  numParams = list_length(fsplan->fdw_exprs);
1577  fsstate->numParams = numParams;
1578  if (numParams > 0)
1580  fsplan->fdw_exprs,
1581  numParams,
1582  &fsstate->param_flinfo,
1583  &fsstate->param_exprs,
1584  &fsstate->param_values);
1585 
1586  /* Set the async-capable flag */
1587  fsstate->async_capable = node->ss.ps.async_capable;
1588 }
1589 
1590 /*
1591  * postgresIterateForeignScan
1592  * Retrieve next row from the result set, or clear tuple slot to indicate
1593  * EOF.
1594  */
1595 static TupleTableSlot *
1597 {
1598  PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
1599  TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
1600 
1601  /*
1602  * In sync mode, if this is the first call after Begin or ReScan, we need
1603  * to create the cursor on the remote side. In async mode, we would have
1604  * already created the cursor before we get here, even if this is the
1605  * first call after Begin or ReScan.
1606  */
1607  if (!fsstate->cursor_exists)
1608  create_cursor(node);
1609 
1610  /*
1611  * Get some more tuples, if we've run out.
1612  */
1613  if (fsstate->next_tuple >= fsstate->num_tuples)
1614  {
1615  /* In async mode, just clear tuple slot. */
1616  if (fsstate->async_capable)
1617  return ExecClearTuple(slot);
1618  /* No point in another fetch if we already detected EOF, though. */
1619  if (!fsstate->eof_reached)
1620  fetch_more_data(node);
1621  /* If we didn't get any tuples, must be end of data. */
1622  if (fsstate->next_tuple >= fsstate->num_tuples)
1623  return ExecClearTuple(slot);
1624  }
1625 
1626  /*
1627  * Return the next tuple.
1628  */
1629  ExecStoreHeapTuple(fsstate->tuples[fsstate->next_tuple++],
1630  slot,
1631  false);
1632 
1633  return slot;
1634 }
1635 
1636 /*
1637  * postgresReScanForeignScan
1638  * Restart the scan.
1639  */
1640 static void
1642 {
1643  PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
1644  char sql[64];
1645  PGresult *res;
1646 
1647  /* If we haven't created the cursor yet, nothing to do. */
1648  if (!fsstate->cursor_exists)
1649  return;
1650 
1651  /*
1652  * If the node is async-capable, and an asynchronous fetch for it has
1653  * begun, the asynchronous fetch might not have yet completed. Check if
1654  * the node is async-capable, and an asynchronous fetch for it is still in
1655  * progress; if so, complete the asynchronous fetch before restarting the
1656  * scan.
1657  */
1658  if (fsstate->async_capable &&
1659  fsstate->conn_state->pendingAreq &&
1660  fsstate->conn_state->pendingAreq->requestee == (PlanState *) node)
1661  fetch_more_data(node);
1662 
1663  /*
1664  * If any internal parameters affecting this node have changed, we'd
1665  * better destroy and recreate the cursor. Otherwise, if the remote
1666  * server is v14 or older, rewinding it should be good enough; if not,
1667  * rewind is only allowed for scrollable cursors, but we don't have a way
1668  * to check the scrollability of it, so destroy and recreate it in any
1669  * case. If we've only fetched zero or one batch, we needn't even rewind
1670  * the cursor, just rescan what we have.
1671  */
1672  if (node->ss.ps.chgParam != NULL)
1673  {
1674  fsstate->cursor_exists = false;
1675  snprintf(sql, sizeof(sql), "CLOSE c%u",
1676  fsstate->cursor_number);
1677  }
1678  else if (fsstate->fetch_ct_2 > 1)
1679  {
1680  if (PQserverVersion(fsstate->conn) < 150000)
1681  snprintf(sql, sizeof(sql), "MOVE BACKWARD ALL IN c%u",
1682  fsstate->cursor_number);
1683  else
1684  {
1685  fsstate->cursor_exists = false;
1686  snprintf(sql, sizeof(sql), "CLOSE c%u",
1687  fsstate->cursor_number);
1688  }
1689  }
1690  else
1691  {
1692  /* Easy: just rescan what we already have in memory, if anything */
1693  fsstate->next_tuple = 0;
1694  return;
1695  }
1696 
1697  /*
1698  * We don't use a PG_TRY block here, so be careful not to throw error
1699  * without releasing the PGresult.
1700  */
1701  res = pgfdw_exec_query(fsstate->conn, sql, fsstate->conn_state);
1703  pgfdw_report_error(ERROR, res, fsstate->conn, true, sql);
1704  PQclear(res);
1705 
1706  /* Now force a fresh FETCH. */
1707  fsstate->tuples = NULL;
1708  fsstate->num_tuples = 0;
1709  fsstate->next_tuple = 0;
1710  fsstate->fetch_ct_2 = 0;
1711  fsstate->eof_reached = false;
1712 }
1713 
1714 /*
1715  * postgresEndForeignScan
1716  * Finish scanning foreign table and dispose objects used for this scan
1717  */
1718 static void
1720 {
1721  PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
1722 
1723  /* if fsstate is NULL, we are in EXPLAIN; nothing to do */
1724  if (fsstate == NULL)
1725  return;
1726 
1727  /* Close the cursor if open, to prevent accumulation of cursors */
1728  if (fsstate->cursor_exists)
1729  close_cursor(fsstate->conn, fsstate->cursor_number,
1730  fsstate->conn_state);
1731 
1732  /* Release remote connection */
1733  ReleaseConnection(fsstate->conn);
1734  fsstate->conn = NULL;
1735 
1736  /* MemoryContexts will be deleted automatically. */
1737 }
1738 
1739 /*
1740  * postgresAddForeignUpdateTargets
1741  * Add resjunk column(s) needed for update/delete on a foreign table
1742  */
1743 static void
1745  Index rtindex,
1746  RangeTblEntry *target_rte,
1747  Relation target_relation)
1748 {
1749  Var *var;
1750 
1751  /*
1752  * In postgres_fdw, what we need is the ctid, same as for a regular table.
1753  */
1754 
1755  /* Make a Var representing the desired value */
1756  var = makeVar(rtindex,
1758  TIDOID,
1759  -1,
1760  InvalidOid,
1761  0);
1762 
1763  /* Register it as a row-identity column needed by this target rel */
1764  add_row_identity_var(root, var, rtindex, "ctid");
1765 }
1766 
1767 /*
1768  * postgresPlanForeignModify
1769  * Plan an insert/update/delete operation on a foreign table
1770  */
1771 static List *
1773  ModifyTable *plan,
1774  Index resultRelation,
1775  int subplan_index)
1776 {
1777  CmdType operation = plan->operation;
1778  RangeTblEntry *rte = planner_rt_fetch(resultRelation, root);
1779  Relation rel;
1780  StringInfoData sql;
1781  List *targetAttrs = NIL;
1782  List *withCheckOptionList = NIL;
1783  List *returningList = NIL;
1784  List *retrieved_attrs = NIL;
1785  bool doNothing = false;
1786  int values_end_len = -1;
1787 
1788  initStringInfo(&sql);
1789 
1790  /*
1791  * Core code already has some lock on each rel being planned, so we can
1792  * use NoLock here.
1793  */
1794  rel = table_open(rte->relid, NoLock);
1795 
1796  /*
1797  * In an INSERT, we transmit all columns that are defined in the foreign
1798  * table. In an UPDATE, if there are BEFORE ROW UPDATE triggers on the
1799  * foreign table, we transmit all columns like INSERT; else we transmit
1800  * only columns that were explicitly targets of the UPDATE, so as to avoid
1801  * unnecessary data transmission. (We can't do that for INSERT since we
1802  * would miss sending default values for columns not listed in the source
1803  * statement, and for UPDATE if there are BEFORE ROW UPDATE triggers since
1804  * those triggers might change values for non-target columns, in which
1805  * case we would miss sending changed values for those columns.)
1806  */
1807  if (operation == CMD_INSERT ||
1808  (operation == CMD_UPDATE &&
1809  rel->trigdesc &&
1811  {
1812  TupleDesc tupdesc = RelationGetDescr(rel);
1813  int attnum;
1814 
1815  for (attnum = 1; attnum <= tupdesc->natts; attnum++)
1816  {
1817  Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
1818 
1819  if (!attr->attisdropped)
1820  targetAttrs = lappend_int(targetAttrs, attnum);
1821  }
1822  }
1823  else if (operation == CMD_UPDATE)
1824  {
1825  int col;
1826  RelOptInfo *rel = find_base_rel(root, resultRelation);
1827  Bitmapset *allUpdatedCols = get_rel_all_updated_cols(root, rel);
1828 
1829  col = -1;
1830  while ((col = bms_next_member(allUpdatedCols, col)) >= 0)
1831  {
1832  /* bit numbers are offset by FirstLowInvalidHeapAttributeNumber */
1834 
1835  if (attno <= InvalidAttrNumber) /* shouldn't happen */
1836  elog(ERROR, "system-column update is not supported");
1837  targetAttrs = lappend_int(targetAttrs, attno);
1838  }
1839  }
1840 
1841  /*
1842  * Extract the relevant WITH CHECK OPTION list if any.
1843  */
1844  if (plan->withCheckOptionLists)
1845  withCheckOptionList = (List *) list_nth(plan->withCheckOptionLists,
1846  subplan_index);
1847 
1848  /*
1849  * Extract the relevant RETURNING list if any.
1850  */
1851  if (plan->returningLists)
1852  returningList = (List *) list_nth(plan->returningLists, subplan_index);
1853 
1854  /*
1855  * ON CONFLICT DO UPDATE and DO NOTHING case with inference specification
1856  * should have already been rejected in the optimizer, as presently there
1857  * is no way to recognize an arbiter index on a foreign table. Only DO
1858  * NOTHING is supported without an inference specification.
1859  */
1860  if (plan->onConflictAction == ONCONFLICT_NOTHING)
1861  doNothing = true;
1862  else if (plan->onConflictAction != ONCONFLICT_NONE)
1863  elog(ERROR, "unexpected ON CONFLICT specification: %d",
1864  (int) plan->onConflictAction);
1865 
1866  /*
1867  * Construct the SQL command string.
1868  */
1869  switch (operation)
1870  {
1871  case CMD_INSERT:
1872  deparseInsertSql(&sql, rte, resultRelation, rel,
1873  targetAttrs, doNothing,
1874  withCheckOptionList, returningList,
1875  &retrieved_attrs, &values_end_len);
1876  break;
1877  case CMD_UPDATE:
1878  deparseUpdateSql(&sql, rte, resultRelation, rel,
1879  targetAttrs,
1880  withCheckOptionList, returningList,
1881  &retrieved_attrs);
1882  break;
1883  case CMD_DELETE:
1884  deparseDeleteSql(&sql, rte, resultRelation, rel,
1885  returningList,
1886  &retrieved_attrs);
1887  break;
1888  default:
1889  elog(ERROR, "unexpected operation: %d", (int) operation);
1890  break;
1891  }
1892 
1893  table_close(rel, NoLock);
1894 
1895  /*
1896  * Build the fdw_private list that will be available to the executor.
1897  * Items in the list must match enum FdwModifyPrivateIndex, above.
1898  */
1899  return list_make5(makeString(sql.data),
1900  targetAttrs,
1901  makeInteger(values_end_len),
1902  makeBoolean((retrieved_attrs != NIL)),
1903  retrieved_attrs);
1904 }
1905 
1906 /*
1907  * postgresBeginForeignModify
1908  * Begin an insert/update/delete operation on a foreign table
1909  */
1910 static void
1912  ResultRelInfo *resultRelInfo,
1913  List *fdw_private,
1914  int subplan_index,
1915  int eflags)
1916 {
1917  PgFdwModifyState *fmstate;
1918  char *query;
1919  List *target_attrs;
1920  bool has_returning;
1921  int values_end_len;
1922  List *retrieved_attrs;
1923  RangeTblEntry *rte;
1924 
1925  /*
1926  * Do nothing in EXPLAIN (no ANALYZE) case. resultRelInfo->ri_FdwState
1927  * stays NULL.
1928  */
1929  if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
1930  return;
1931 
1932  /* Deconstruct fdw_private data. */
1933  query = strVal(list_nth(fdw_private,
1935  target_attrs = (List *) list_nth(fdw_private,
1937  values_end_len = intVal(list_nth(fdw_private,
1939  has_returning = boolVal(list_nth(fdw_private,
1941  retrieved_attrs = (List *) list_nth(fdw_private,
1943 
1944  /* Find RTE. */
1945  rte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex,
1946  mtstate->ps.state);
1947 
1948  /* Construct an execution state. */
1949  fmstate = create_foreign_modify(mtstate->ps.state,
1950  rte,
1951  resultRelInfo,
1952  mtstate->operation,
1953  outerPlanState(mtstate)->plan,
1954  query,
1955  target_attrs,
1956  values_end_len,
1957  has_returning,
1958  retrieved_attrs);
1959 
1960  resultRelInfo->ri_FdwState = fmstate;
1961 }
1962 
1963 /*
1964  * postgresExecForeignInsert
1965  * Insert one row into a foreign table
1966  */
1967 static TupleTableSlot *
1969  ResultRelInfo *resultRelInfo,
1970  TupleTableSlot *slot,
1971  TupleTableSlot *planSlot)
1972 {
1973  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
1974  TupleTableSlot **rslot;
1975  int numSlots = 1;
1976 
1977  /*
1978  * If the fmstate has aux_fmstate set, use the aux_fmstate (see
1979  * postgresBeginForeignInsert())
1980  */
1981  if (fmstate->aux_fmstate)
1982  resultRelInfo->ri_FdwState = fmstate->aux_fmstate;
1983  rslot = execute_foreign_modify(estate, resultRelInfo, CMD_INSERT,
1984  &slot, &planSlot, &numSlots);
1985  /* Revert that change */
1986  if (fmstate->aux_fmstate)
1987  resultRelInfo->ri_FdwState = fmstate;
1988 
1989  return rslot ? *rslot : NULL;
1990 }
1991 
1992 /*
1993  * postgresExecForeignBatchInsert
1994  * Insert multiple rows into a foreign table
1995  */
1996 static TupleTableSlot **
1998  ResultRelInfo *resultRelInfo,
1999  TupleTableSlot **slots,
2000  TupleTableSlot **planSlots,
2001  int *numSlots)
2002 {
2003  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
2004  TupleTableSlot **rslot;
2005 
2006  /*
2007  * If the fmstate has aux_fmstate set, use the aux_fmstate (see
2008  * postgresBeginForeignInsert())
2009  */
2010  if (fmstate->aux_fmstate)
2011  resultRelInfo->ri_FdwState = fmstate->aux_fmstate;
2012  rslot = execute_foreign_modify(estate, resultRelInfo, CMD_INSERT,
2013  slots, planSlots, numSlots);
2014  /* Revert that change */
2015  if (fmstate->aux_fmstate)
2016  resultRelInfo->ri_FdwState = fmstate;
2017 
2018  return rslot;
2019 }
2020 
2021 /*
2022  * postgresGetForeignModifyBatchSize
2023  * Determine the maximum number of tuples that can be inserted in bulk
2024  *
2025  * Returns the batch size specified for server or table. When batching is not
2026  * allowed (e.g. for tables with BEFORE/AFTER ROW triggers or with RETURNING
2027  * clause), returns 1.
2028  */
2029 static int
2031 {
2032  int batch_size;
2033  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
2034 
2035  /* should be called only once */
2036  Assert(resultRelInfo->ri_BatchSize == 0);
2037 
2038  /*
2039  * Should never get called when the insert is being performed on a table
2040  * that is also among the target relations of an UPDATE operation, because
2041  * postgresBeginForeignInsert() currently rejects such insert attempts.
2042  */
2043  Assert(fmstate == NULL || fmstate->aux_fmstate == NULL);
2044 
2045  /*
2046  * In EXPLAIN without ANALYZE, ri_FdwState is NULL, so we have to lookup
2047  * the option directly in server/table options. Otherwise just use the
2048  * value we determined earlier.
2049  */
2050  if (fmstate)
2051  batch_size = fmstate->batch_size;
2052  else
2053  batch_size = get_batch_size_option(resultRelInfo->ri_RelationDesc);
2054 
2055  /*
2056  * Disable batching when we have to use RETURNING, there are any
2057  * BEFORE/AFTER ROW INSERT triggers on the foreign table, or there are any
2058  * WITH CHECK OPTION constraints from parent views.
2059  *
2060  * When there are any BEFORE ROW INSERT triggers on the table, we can't
2061  * support it, because such triggers might query the table we're inserting
2062  * into and act differently if the tuples that have already been processed
2063  * and prepared for insertion are not there.
2064  */
2065  if (resultRelInfo->ri_projectReturning != NULL ||
2066  resultRelInfo->ri_WithCheckOptions != NIL ||
2067  (resultRelInfo->ri_TrigDesc &&
2068  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2069  resultRelInfo->ri_TrigDesc->trig_insert_after_row)))
2070  return 1;
2071 
2072  /*
2073  * If the foreign table has no columns, disable batching as the INSERT
2074  * syntax doesn't allow batching multiple empty rows into a zero-column
2075  * table in a single statement. This is needed for COPY FROM, in which
2076  * case fmstate must be non-NULL.
2077  */
2078  if (fmstate && list_length(fmstate->target_attrs) == 0)
2079  return 1;
2080 
2081  /*
2082  * Otherwise use the batch size specified for server/table. The number of
2083  * parameters in a batch is limited to 65535 (uint16), so make sure we
2084  * don't exceed this limit by using the maximum batch_size possible.
2085  */
2086  if (fmstate && fmstate->p_nums > 0)
2087  batch_size = Min(batch_size, PQ_QUERY_PARAM_MAX_LIMIT / fmstate->p_nums);
2088 
2089  return batch_size;
2090 }
2091 
2092 /*
2093  * postgresExecForeignUpdate
2094  * Update one row in a foreign table
2095  */
2096 static TupleTableSlot *
2098  ResultRelInfo *resultRelInfo,
2099  TupleTableSlot *slot,
2100  TupleTableSlot *planSlot)
2101 {
2102  TupleTableSlot **rslot;
2103  int numSlots = 1;
2104 
2105  rslot = execute_foreign_modify(estate, resultRelInfo, CMD_UPDATE,
2106  &slot, &planSlot, &numSlots);
2107 
2108  return rslot ? rslot[0] : NULL;
2109 }
2110 
2111 /*
2112  * postgresExecForeignDelete
2113  * Delete one row from a foreign table
2114  */
2115 static TupleTableSlot *
2117  ResultRelInfo *resultRelInfo,
2118  TupleTableSlot *slot,
2119  TupleTableSlot *planSlot)
2120 {
2121  TupleTableSlot **rslot;
2122  int numSlots = 1;
2123 
2124  rslot = execute_foreign_modify(estate, resultRelInfo, CMD_DELETE,
2125  &slot, &planSlot, &numSlots);
2126 
2127  return rslot ? rslot[0] : NULL;
2128 }
2129 
2130 /*
2131  * postgresEndForeignModify
2132  * Finish an insert/update/delete operation on a foreign table
2133  */
2134 static void
2136  ResultRelInfo *resultRelInfo)
2137 {
2138  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
2139 
2140  /* If fmstate is NULL, we are in EXPLAIN; nothing to do */
2141  if (fmstate == NULL)
2142  return;
2143 
2144  /* Destroy the execution state */
2145  finish_foreign_modify(fmstate);
2146 }
2147 
2148 /*
2149  * postgresBeginForeignInsert
2150  * Begin an insert operation on a foreign table
2151  */
2152 static void
2154  ResultRelInfo *resultRelInfo)
2155 {
2156  PgFdwModifyState *fmstate;
2157  ModifyTable *plan = castNode(ModifyTable, mtstate->ps.plan);
2158  EState *estate = mtstate->ps.state;
2159  Index resultRelation;
2160  Relation rel = resultRelInfo->ri_RelationDesc;
2161  RangeTblEntry *rte;
2162  TupleDesc tupdesc = RelationGetDescr(rel);
2163  int attnum;
2164  int values_end_len;
2165  StringInfoData sql;
2166  List *targetAttrs = NIL;
2167  List *retrieved_attrs = NIL;
2168  bool doNothing = false;
2169 
2170  /*
2171  * If the foreign table we are about to insert routed rows into is also an
2172  * UPDATE subplan result rel that will be updated later, proceeding with
2173  * the INSERT will result in the later UPDATE incorrectly modifying those
2174  * routed rows, so prevent the INSERT --- it would be nice if we could
2175  * handle this case; but for now, throw an error for safety.
2176  */
2177  if (plan && plan->operation == CMD_UPDATE &&
2178  (resultRelInfo->ri_usesFdwDirectModify ||
2179  resultRelInfo->ri_FdwState))
2180  ereport(ERROR,
2181  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2182  errmsg("cannot route tuples into foreign table to be updated \"%s\"",
2183  RelationGetRelationName(rel))));
2184 
2185  initStringInfo(&sql);
2186 
2187  /* We transmit all columns that are defined in the foreign table. */
2188  for (attnum = 1; attnum <= tupdesc->natts; attnum++)
2189  {
2190  Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
2191 
2192  if (!attr->attisdropped)
2193  targetAttrs = lappend_int(targetAttrs, attnum);
2194  }
2195 
2196  /* Check if we add the ON CONFLICT clause to the remote query. */
2197  if (plan)
2198  {
2199  OnConflictAction onConflictAction = plan->onConflictAction;
2200 
2201  /* We only support DO NOTHING without an inference specification. */
2202  if (onConflictAction == ONCONFLICT_NOTHING)
2203  doNothing = true;
2204  else if (onConflictAction != ONCONFLICT_NONE)
2205  elog(ERROR, "unexpected ON CONFLICT specification: %d",
2206  (int) onConflictAction);
2207  }
2208 
2209  /*
2210  * If the foreign table is a partition that doesn't have a corresponding
2211  * RTE entry, we need to create a new RTE describing the foreign table for
2212  * use by deparseInsertSql and create_foreign_modify() below, after first
2213  * copying the parent's RTE and modifying some fields to describe the
2214  * foreign partition to work on. However, if this is invoked by UPDATE,
2215  * the existing RTE may already correspond to this partition if it is one
2216  * of the UPDATE subplan target rels; in that case, we can just use the
2217  * existing RTE as-is.
2218  */
2219  if (resultRelInfo->ri_RangeTableIndex == 0)
2220  {
2221  ResultRelInfo *rootResultRelInfo = resultRelInfo->ri_RootResultRelInfo;
2222 
2223  rte = exec_rt_fetch(rootResultRelInfo->ri_RangeTableIndex, estate);
2224  rte = copyObject(rte);
2225  rte->relid = RelationGetRelid(rel);
2226  rte->relkind = RELKIND_FOREIGN_TABLE;
2227 
2228  /*
2229  * For UPDATE, we must use the RT index of the first subplan target
2230  * rel's RTE, because the core code would have built expressions for
2231  * the partition, such as RETURNING, using that RT index as varno of
2232  * Vars contained in those expressions.
2233  */
2234  if (plan && plan->operation == CMD_UPDATE &&
2235  rootResultRelInfo->ri_RangeTableIndex == plan->rootRelation)
2236  resultRelation = mtstate->resultRelInfo[0].ri_RangeTableIndex;
2237  else
2238  resultRelation = rootResultRelInfo->ri_RangeTableIndex;
2239  }
2240  else
2241  {
2242  resultRelation = resultRelInfo->ri_RangeTableIndex;
2243  rte = exec_rt_fetch(resultRelation, estate);
2244  }
2245 
2246  /* Construct the SQL command string. */
2247  deparseInsertSql(&sql, rte, resultRelation, rel, targetAttrs, doNothing,
2248  resultRelInfo->ri_WithCheckOptions,
2249  resultRelInfo->ri_returningList,
2250  &retrieved_attrs, &values_end_len);
2251 
2252  /* Construct an execution state. */
2253  fmstate = create_foreign_modify(mtstate->ps.state,
2254  rte,
2255  resultRelInfo,
2256  CMD_INSERT,
2257  NULL,
2258  sql.data,
2259  targetAttrs,
2260  values_end_len,
2261  retrieved_attrs != NIL,
2262  retrieved_attrs);
2263 
2264  /*
2265  * If the given resultRelInfo already has PgFdwModifyState set, it means
2266  * the foreign table is an UPDATE subplan result rel; in which case, store
2267  * the resulting state into the aux_fmstate of the PgFdwModifyState.
2268  */
2269  if (resultRelInfo->ri_FdwState)
2270  {
2271  Assert(plan && plan->operation == CMD_UPDATE);
2272  Assert(resultRelInfo->ri_usesFdwDirectModify == false);
2273  ((PgFdwModifyState *) resultRelInfo->ri_FdwState)->aux_fmstate = fmstate;
2274  }
2275  else
2276  resultRelInfo->ri_FdwState = fmstate;
2277 }
2278 
2279 /*
2280  * postgresEndForeignInsert
2281  * Finish an insert operation on a foreign table
2282  */
2283 static void
2285  ResultRelInfo *resultRelInfo)
2286 {
2287  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
2288 
2289  Assert(fmstate != NULL);
2290 
2291  /*
2292  * If the fmstate has aux_fmstate set, get the aux_fmstate (see
2293  * postgresBeginForeignInsert())
2294  */
2295  if (fmstate->aux_fmstate)
2296  fmstate = fmstate->aux_fmstate;
2297 
2298  /* Destroy the execution state */
2299  finish_foreign_modify(fmstate);
2300 }
2301 
2302 /*
2303  * postgresIsForeignRelUpdatable
2304  * Determine whether a foreign table supports INSERT, UPDATE and/or
2305  * DELETE.
2306  */
2307 static int
2309 {
2310  bool updatable;
2311  ForeignTable *table;
2312  ForeignServer *server;
2313  ListCell *lc;
2314 
2315  /*
2316  * By default, all postgres_fdw foreign tables are assumed updatable. This
2317  * can be overridden by a per-server setting, which in turn can be
2318  * overridden by a per-table setting.
2319  */
2320  updatable = true;
2321 
2322  table = GetForeignTable(RelationGetRelid(rel));
2323  server = GetForeignServer(table->serverid);
2324 
2325  foreach(lc, server->options)
2326  {
2327  DefElem *def = (DefElem *) lfirst(lc);
2328 
2329  if (strcmp(def->defname, "updatable") == 0)
2330  updatable = defGetBoolean(def);
2331  }
2332  foreach(lc, table->options)
2333  {
2334  DefElem *def = (DefElem *) lfirst(lc);
2335 
2336  if (strcmp(def->defname, "updatable") == 0)
2337  updatable = defGetBoolean(def);
2338  }
2339 
2340  /*
2341  * Currently "updatable" means support for INSERT, UPDATE and DELETE.
2342  */
2343  return updatable ?
2344  (1 << CMD_INSERT) | (1 << CMD_UPDATE) | (1 << CMD_DELETE) : 0;
2345 }
2346 
2347 /*
2348  * postgresRecheckForeignScan
2349  * Execute a local join execution plan for a foreign join
2350  */
2351 static bool
2353 {
2354  Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
2356  TupleTableSlot *result;
2357 
2358  /* For base foreign relations, it suffices to set fdw_recheck_quals */
2359  if (scanrelid > 0)
2360  return true;
2361 
2362  Assert(outerPlan != NULL);
2363 
2364  /* Execute a local join execution plan */
2365  result = ExecProcNode(outerPlan);
2366  if (TupIsNull(result))
2367  return false;
2368 
2369  /* Store result in the given slot */
2370  ExecCopySlot(slot, result);
2371 
2372  return true;
2373 }
2374 
2375 /*
2376  * find_modifytable_subplan
2377  * Helper routine for postgresPlanDirectModify to find the
2378  * ModifyTable subplan node that scans the specified RTI.
2379  *
2380  * Returns NULL if the subplan couldn't be identified. That's not a fatal
2381  * error condition, we just abandon trying to do the update directly.
2382  */
2383 static ForeignScan *
2385  ModifyTable *plan,
2386  Index rtindex,
2387  int subplan_index)
2388 {
2389  Plan *subplan = outerPlan(plan);
2390 
2391  /*
2392  * The cases we support are (1) the desired ForeignScan is the immediate
2393  * child of ModifyTable, or (2) it is the subplan_index'th child of an
2394  * Append node that is the immediate child of ModifyTable. There is no
2395  * point in looking further down, as that would mean that local joins are
2396  * involved, so we can't do the update directly.
2397  *
2398  * There could be a Result atop the Append too, acting to compute the
2399  * UPDATE targetlist values. We ignore that here; the tlist will be
2400  * checked by our caller.
2401  *
2402  * In principle we could examine all the children of the Append, but it's
2403  * currently unlikely that the core planner would generate such a plan
2404  * with the children out-of-order. Moreover, such a search risks costing
2405  * O(N^2) time when there are a lot of children.
2406  */
2407  if (IsA(subplan, Append))
2408  {
2409  Append *appendplan = (Append *) subplan;
2410 
2411  if (subplan_index < list_length(appendplan->appendplans))
2412  subplan = (Plan *) list_nth(appendplan->appendplans, subplan_index);
2413  }
2414  else if (IsA(subplan, Result) &&
2415  outerPlan(subplan) != NULL &&
2416  IsA(outerPlan(subplan), Append))
2417  {
2418  Append *appendplan = (Append *) outerPlan(subplan);
2419 
2420  if (subplan_index < list_length(appendplan->appendplans))
2421  subplan = (Plan *) list_nth(appendplan->appendplans, subplan_index);
2422  }
2423 
2424  /* Now, have we got a ForeignScan on the desired rel? */
2425  if (IsA(subplan, ForeignScan))
2426  {
2427  ForeignScan *fscan = (ForeignScan *) subplan;
2428 
2429  if (bms_is_member(rtindex, fscan->fs_base_relids))
2430  return fscan;
2431  }
2432 
2433  return NULL;
2434 }
2435 
2436 /*
2437  * postgresPlanDirectModify
2438  * Consider a direct foreign table modification
2439  *
2440  * Decide whether it is safe to modify a foreign table directly, and if so,
2441  * rewrite subplan accordingly.
2442  */
2443 static bool
2445  ModifyTable *plan,
2446  Index resultRelation,
2447  int subplan_index)
2448 {
2449  CmdType operation = plan->operation;
2450  RelOptInfo *foreignrel;
2451  RangeTblEntry *rte;
2452  PgFdwRelationInfo *fpinfo;
2453  Relation rel;
2454  StringInfoData sql;
2455  ForeignScan *fscan;
2456  List *processed_tlist = NIL;
2457  List *targetAttrs = NIL;
2458  List *remote_exprs;
2459  List *params_list = NIL;
2460  List *returningList = NIL;
2461  List *retrieved_attrs = NIL;
2462 
2463  /*
2464  * Decide whether it is safe to modify a foreign table directly.
2465  */
2466 
2467  /*
2468  * The table modification must be an UPDATE or DELETE.
2469  */
2470  if (operation != CMD_UPDATE && operation != CMD_DELETE)
2471  return false;
2472 
2473  /*
2474  * Try to locate the ForeignScan subplan that's scanning resultRelation.
2475  */
2476  fscan = find_modifytable_subplan(root, plan, resultRelation, subplan_index);
2477  if (!fscan)
2478  return false;
2479 
2480  /*
2481  * It's unsafe to modify a foreign table directly if there are any quals
2482  * that should be evaluated locally.
2483  */
2484  if (fscan->scan.plan.qual != NIL)
2485  return false;
2486 
2487  /* Safe to fetch data about the target foreign rel */
2488  if (fscan->scan.scanrelid == 0)
2489  {
2490  foreignrel = find_join_rel(root, fscan->fs_relids);
2491  /* We should have a rel for this foreign join. */
2492  Assert(foreignrel);
2493  }
2494  else
2495  foreignrel = root->simple_rel_array[resultRelation];
2496  rte = root->simple_rte_array[resultRelation];
2497  fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private;
2498 
2499  /*
2500  * It's unsafe to update a foreign table directly, if any expressions to
2501  * assign to the target columns are unsafe to evaluate remotely.
2502  */
2503  if (operation == CMD_UPDATE)
2504  {
2505  ListCell *lc,
2506  *lc2;
2507 
2508  /*
2509  * The expressions of concern are the first N columns of the processed
2510  * targetlist, where N is the length of the rel's update_colnos.
2511  */
2512  get_translated_update_targetlist(root, resultRelation,
2513  &processed_tlist, &targetAttrs);
2514  forboth(lc, processed_tlist, lc2, targetAttrs)
2515  {
2516  TargetEntry *tle = lfirst_node(TargetEntry, lc);
2517  AttrNumber attno = lfirst_int(lc2);
2518 
2519  /* update's new-value expressions shouldn't be resjunk */
2520  Assert(!tle->resjunk);
2521 
2522  if (attno <= InvalidAttrNumber) /* shouldn't happen */
2523  elog(ERROR, "system-column update is not supported");
2524 
2525  if (!is_foreign_expr(root, foreignrel, (Expr *) tle->expr))
2526  return false;
2527  }
2528  }
2529 
2530  /*
2531  * Ok, rewrite subplan so as to modify the foreign table directly.
2532  */
2533  initStringInfo(&sql);
2534 
2535  /*
2536  * Core code already has some lock on each rel being planned, so we can
2537  * use NoLock here.
2538  */
2539  rel = table_open(rte->relid, NoLock);
2540 
2541  /*
2542  * Recall the qual clauses that must be evaluated remotely. (These are
2543  * bare clauses not RestrictInfos, but deparse.c's appendConditions()
2544  * doesn't care.)
2545  */
2546  remote_exprs = fpinfo->final_remote_exprs;
2547 
2548  /*
2549  * Extract the relevant RETURNING list if any.
2550  */
2551  if (plan->returningLists)
2552  {
2553  returningList = (List *) list_nth(plan->returningLists, subplan_index);
2554 
2555  /*
2556  * When performing an UPDATE/DELETE .. RETURNING on a join directly,
2557  * we fetch from the foreign server any Vars specified in RETURNING
2558  * that refer not only to the target relation but to non-target
2559  * relations. So we'll deparse them into the RETURNING clause of the
2560  * remote query; use a targetlist consisting of them instead, which
2561  * will be adjusted to be new fdw_scan_tlist of the foreign-scan plan
2562  * node below.
2563  */
2564  if (fscan->scan.scanrelid == 0)
2565  returningList = build_remote_returning(resultRelation, rel,
2566  returningList);
2567  }
2568 
2569  /*
2570  * Construct the SQL command string.
2571  */
2572  switch (operation)
2573  {
2574  case CMD_UPDATE:
2575  deparseDirectUpdateSql(&sql, root, resultRelation, rel,
2576  foreignrel,
2577  processed_tlist,
2578  targetAttrs,
2579  remote_exprs, &params_list,
2580  returningList, &retrieved_attrs);
2581  break;
2582  case CMD_DELETE:
2583  deparseDirectDeleteSql(&sql, root, resultRelation, rel,
2584  foreignrel,
2585  remote_exprs, &params_list,
2586  returningList, &retrieved_attrs);
2587  break;
2588  default:
2589  elog(ERROR, "unexpected operation: %d", (int) operation);
2590  break;
2591  }
2592 
2593  /*
2594  * Update the operation and target relation info.
2595  */
2596  fscan->operation = operation;
2597  fscan->resultRelation = resultRelation;
2598 
2599  /*
2600  * Update the fdw_exprs list that will be available to the executor.
2601  */
2602  fscan->fdw_exprs = params_list;
2603 
2604  /*
2605  * Update the fdw_private list that will be available to the executor.
2606  * Items in the list must match enum FdwDirectModifyPrivateIndex, above.
2607  */
2608  fscan->fdw_private = list_make4(makeString(sql.data),
2609  makeBoolean((retrieved_attrs != NIL)),
2610  retrieved_attrs,
2611  makeBoolean(plan->canSetTag));
2612 
2613  /*
2614  * Update the foreign-join-related fields.
2615  */
2616  if (fscan->scan.scanrelid == 0)
2617  {
2618  /* No need for the outer subplan. */
2619  fscan->scan.plan.lefttree = NULL;
2620 
2621  /* Build new fdw_scan_tlist if UPDATE/DELETE .. RETURNING. */
2622  if (returningList)
2623  rebuild_fdw_scan_tlist(fscan, returningList);
2624  }
2625 
2626  /*
2627  * Finally, unset the async-capable flag if it is set, as we currently
2628  * don't support asynchronous execution of direct modifications.
2629  */
2630  if (fscan->scan.plan.async_capable)
2631  fscan->scan.plan.async_capable = false;
2632 
2633  table_close(rel, NoLock);
2634  return true;
2635 }
2636 
2637 /*
2638  * postgresBeginDirectModify
2639  * Prepare a direct foreign table modification
2640  */
2641 static void
2643 {
2644  ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
2645  EState *estate = node->ss.ps.state;
2646  PgFdwDirectModifyState *dmstate;
2647  Index rtindex;
2648  Oid userid;
2649  ForeignTable *table;
2650  UserMapping *user;
2651  int numParams;
2652 
2653  /*
2654  * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
2655  */
2656  if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
2657  return;
2658 
2659  /*
2660  * We'll save private state in node->fdw_state.
2661  */
2662  dmstate = (PgFdwDirectModifyState *) palloc0(sizeof(PgFdwDirectModifyState));
2663  node->fdw_state = (void *) dmstate;
2664 
2665  /*
2666  * Identify which user to do the remote access as. This should match what
2667  * ExecCheckPermissions() does.
2668  */
2669  userid = OidIsValid(fsplan->checkAsUser) ? fsplan->checkAsUser : GetUserId();
2670 
2671  /* Get info about foreign table. */
2672  rtindex = node->resultRelInfo->ri_RangeTableIndex;
2673  if (fsplan->scan.scanrelid == 0)
2674  dmstate->rel = ExecOpenScanRelation(estate, rtindex, eflags);
2675  else
2676  dmstate->rel = node->ss.ss_currentRelation;
2677  table = GetForeignTable(RelationGetRelid(dmstate->rel));
2678  user = GetUserMapping(userid, table->serverid);
2679 
2680  /*
2681  * Get connection to the foreign server. Connection manager will
2682  * establish new connection if necessary.
2683  */
2684  dmstate->conn = GetConnection(user, false, &dmstate->conn_state);
2685 
2686  /* Update the foreign-join-related fields. */
2687  if (fsplan->scan.scanrelid == 0)
2688  {
2689  /* Save info about foreign table. */
2690  dmstate->resultRel = dmstate->rel;
2691 
2692  /*
2693  * Set dmstate->rel to NULL to teach get_returning_data() and
2694  * make_tuple_from_result_row() that columns fetched from the remote
2695  * server are described by fdw_scan_tlist of the foreign-scan plan
2696  * node, not the tuple descriptor for the target relation.
2697  */
2698  dmstate->rel = NULL;
2699  }
2700 
2701  /* Initialize state variable */
2702  dmstate->num_tuples = -1; /* -1 means not set yet */
2703 
2704  /* Get private info created by planner functions. */
2705  dmstate->query = strVal(list_nth(fsplan->fdw_private,
2707  dmstate->has_returning = boolVal(list_nth(fsplan->fdw_private,
2709  dmstate->retrieved_attrs = (List *) list_nth(fsplan->fdw_private,
2711  dmstate->set_processed = boolVal(list_nth(fsplan->fdw_private,
2713 
2714  /* Create context for per-tuple temp workspace. */
2715  dmstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
2716  "postgres_fdw temporary data",
2718 
2719  /* Prepare for input conversion of RETURNING results. */
2720  if (dmstate->has_returning)
2721  {
2722  TupleDesc tupdesc;
2723 
2724  if (fsplan->scan.scanrelid == 0)
2725  tupdesc = get_tupdesc_for_join_scan_tuples(node);
2726  else
2727  tupdesc = RelationGetDescr(dmstate->rel);
2728 
2729  dmstate->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2730 
2731  /*
2732  * When performing an UPDATE/DELETE .. RETURNING on a join directly,
2733  * initialize a filter to extract an updated/deleted tuple from a scan
2734  * tuple.
2735  */
2736  if (fsplan->scan.scanrelid == 0)
2737  init_returning_filter(dmstate, fsplan->fdw_scan_tlist, rtindex);
2738  }
2739 
2740  /*
2741  * Prepare for processing of parameters used in remote query, if any.
2742  */
2743  numParams = list_length(fsplan->fdw_exprs);
2744  dmstate->numParams = numParams;
2745  if (numParams > 0)
2747  fsplan->fdw_exprs,
2748  numParams,
2749  &dmstate->param_flinfo,
2750  &dmstate->param_exprs,
2751  &dmstate->param_values);
2752 }
2753 
2754 /*
2755  * postgresIterateDirectModify
2756  * Execute a direct foreign table modification
2757  */
2758 static TupleTableSlot *
2760 {
2762  EState *estate = node->ss.ps.state;
2763  ResultRelInfo *resultRelInfo = node->resultRelInfo;
2764 
2765  /*
2766  * If this is the first call after Begin, execute the statement.
2767  */
2768  if (dmstate->num_tuples == -1)
2769  execute_dml_stmt(node);
2770 
2771  /*
2772  * If the local query doesn't specify RETURNING, just clear tuple slot.
2773  */
2774  if (!resultRelInfo->ri_projectReturning)
2775  {
2776  TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
2777  Instrumentation *instr = node->ss.ps.instrument;
2778 
2779  Assert(!dmstate->has_returning);
2780 
2781  /* Increment the command es_processed count if necessary. */
2782  if (dmstate->set_processed)
2783  estate->es_processed += dmstate->num_tuples;
2784 
2785  /* Increment the tuple count for EXPLAIN ANALYZE if necessary. */
2786  if (instr)
2787  instr->tuplecount += dmstate->num_tuples;
2788 
2789  return ExecClearTuple(slot);
2790  }
2791 
2792  /*
2793  * Get the next RETURNING tuple.
2794  */
2795  return get_returning_data(node);
2796 }
2797 
2798 /*
2799  * postgresEndDirectModify
2800  * Finish a direct foreign table modification
2801  */
2802 static void
2804 {
2806 
2807  /* if dmstate is NULL, we are in EXPLAIN; nothing to do */
2808  if (dmstate == NULL)
2809  return;
2810 
2811  /* Release PGresult */
2812  PQclear(dmstate->result);
2813 
2814  /* Release remote connection */
2815  ReleaseConnection(dmstate->conn);
2816  dmstate->conn = NULL;
2817 
2818  /* MemoryContext will be deleted automatically. */
2819 }
2820 
2821 /*
2822  * postgresExplainForeignScan
2823  * Produce extra output for EXPLAIN of a ForeignScan on a foreign table
2824  */
2825 static void
2827 {
2829  List *fdw_private = plan->fdw_private;
2830 
2831  /*
2832  * Identify foreign scans that are really joins or upper relations. The
2833  * input looks something like "(1) LEFT JOIN (2)", and we must replace the
2834  * digit string(s), which are RT indexes, with the correct relation names.
2835  * We do that here, not when the plan is created, because we can't know
2836  * what aliases ruleutils.c will assign at plan creation time.
2837  */
2838  if (list_length(fdw_private) > FdwScanPrivateRelations)
2839  {
2840  StringInfo relations;
2841  char *rawrelations;
2842  char *ptr;
2843  int minrti,
2844  rtoffset;
2845 
2846  rawrelations = strVal(list_nth(fdw_private, FdwScanPrivateRelations));
2847 
2848  /*
2849  * A difficulty with using a string representation of RT indexes is
2850  * that setrefs.c won't update the string when flattening the
2851  * rangetable. To find out what rtoffset was applied, identify the
2852  * minimum RT index appearing in the string and compare it to the
2853  * minimum member of plan->fs_base_relids. (We expect all the relids
2854  * in the join will have been offset by the same amount; the Asserts
2855  * below should catch it if that ever changes.)
2856  */
2857  minrti = INT_MAX;
2858  ptr = rawrelations;
2859  while (*ptr)
2860  {
2861  if (isdigit((unsigned char) *ptr))
2862  {
2863  int rti = strtol(ptr, &ptr, 10);
2864 
2865  if (rti < minrti)
2866  minrti = rti;
2867  }
2868  else
2869  ptr++;
2870  }
2871  rtoffset = bms_next_member(plan->fs_base_relids, -1) - minrti;
2872 
2873  /* Now we can translate the string */
2874  relations = makeStringInfo();
2875  ptr = rawrelations;
2876  while (*ptr)
2877  {
2878  if (isdigit((unsigned char) *ptr))
2879  {
2880  int rti = strtol(ptr, &ptr, 10);
2881  RangeTblEntry *rte;
2882  char *relname;
2883  char *refname;
2884 
2885  rti += rtoffset;
2886  Assert(bms_is_member(rti, plan->fs_base_relids));
2887  rte = rt_fetch(rti, es->rtable);
2888  Assert(rte->rtekind == RTE_RELATION);
2889  /* This logic should agree with explain.c's ExplainTargetRel */
2890  relname = get_rel_name(rte->relid);
2891  if (es->verbose)
2892  {
2893  char *namespace;
2894 
2895  namespace = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
2896  appendStringInfo(relations, "%s.%s",
2897  quote_identifier(namespace),
2899  }
2900  else
2901  appendStringInfoString(relations,
2903  refname = (char *) list_nth(es->rtable_names, rti - 1);
2904  if (refname == NULL)
2905  refname = rte->eref->aliasname;
2906  if (strcmp(refname, relname) != 0)
2907  appendStringInfo(relations, " %s",
2908  quote_identifier(refname));
2909  }
2910  else
2911  appendStringInfoChar(relations, *ptr++);
2912  }
2913  ExplainPropertyText("Relations", relations->data, es);
2914  }
2915 
2916  /*
2917  * Add remote query, when VERBOSE option is specified.
2918  */
2919  if (es->verbose)
2920  {
2921  char *sql;
2922 
2923  sql = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql));
2924  ExplainPropertyText("Remote SQL", sql, es);
2925  }
2926 }
2927 
2928 /*
2929  * postgresExplainForeignModify
2930  * Produce extra output for EXPLAIN of a ModifyTable on a foreign table
2931  */
2932 static void
2934  ResultRelInfo *rinfo,
2935  List *fdw_private,
2936  int subplan_index,
2937  ExplainState *es)
2938 {
2939  if (es->verbose)
2940  {
2941  char *sql = strVal(list_nth(fdw_private,
2943 
2944  ExplainPropertyText("Remote SQL", sql, es);
2945 
2946  /*
2947  * For INSERT we should always have batch size >= 1, but UPDATE and
2948  * DELETE don't support batching so don't show the property.
2949  */
2950  if (rinfo->ri_BatchSize > 0)
2951  ExplainPropertyInteger("Batch Size", NULL, rinfo->ri_BatchSize, es);
2952  }
2953 }
2954 
2955 /*
2956  * postgresExplainDirectModify
2957  * Produce extra output for EXPLAIN of a ForeignScan that modifies a
2958  * foreign table directly
2959  */
2960 static void
2962 {
2963  List *fdw_private;
2964  char *sql;
2965 
2966  if (es->verbose)
2967  {
2968  fdw_private = ((ForeignScan *) node->ss.ps.plan)->fdw_private;
2969  sql = strVal(list_nth(fdw_private, FdwDirectModifyPrivateUpdateSql));
2970  ExplainPropertyText("Remote SQL", sql, es);
2971  }
2972 }
2973 
2974 /*
2975  * postgresExecForeignTruncate
2976  * Truncate one or more foreign tables
2977  */
2978 static void
2980  DropBehavior behavior,
2981  bool restart_seqs)
2982 {
2983  Oid serverid = InvalidOid;
2984  UserMapping *user = NULL;
2985  PGconn *conn = NULL;
2986  StringInfoData sql;
2987  ListCell *lc;
2988  bool server_truncatable = true;
2989 
2990  /*
2991  * By default, all postgres_fdw foreign tables are assumed truncatable.
2992  * This can be overridden by a per-server setting, which in turn can be
2993  * overridden by a per-table setting.
2994  */
2995  foreach(lc, rels)
2996  {
2997  ForeignServer *server = NULL;
2998  Relation rel = lfirst(lc);
3000  ListCell *cell;
3001  bool truncatable;
3002 
3003  /*
3004  * First time through, determine whether the foreign server allows
3005  * truncates. Since all specified foreign tables are assumed to belong
3006  * to the same foreign server, this result can be used for other
3007  * foreign tables.
3008  */
3009  if (!OidIsValid(serverid))
3010  {
3011  serverid = table->serverid;
3012  server = GetForeignServer(serverid);
3013 
3014  foreach(cell, server->options)
3015  {
3016  DefElem *defel = (DefElem *) lfirst(cell);
3017 
3018  if (strcmp(defel->defname, "truncatable") == 0)
3019  {
3020  server_truncatable = defGetBoolean(defel);
3021  break;
3022  }
3023  }
3024  }
3025 
3026  /*
3027  * Confirm that all specified foreign tables belong to the same
3028  * foreign server.
3029  */
3030  Assert(table->serverid == serverid);
3031 
3032  /* Determine whether this foreign table allows truncations */
3033  truncatable = server_truncatable;
3034  foreach(cell, table->options)
3035  {
3036  DefElem *defel = (DefElem *) lfirst(cell);
3037 
3038  if (strcmp(defel->defname, "truncatable") == 0)
3039  {
3040  truncatable = defGetBoolean(defel);
3041  break;
3042  }
3043  }
3044 
3045  if (!truncatable)
3046  ereport(ERROR,
3047  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3048  errmsg("foreign table \"%s\" does not allow truncates",
3049  RelationGetRelationName(rel))));
3050  }
3051  Assert(OidIsValid(serverid));
3052 
3053  /*
3054  * Get connection to the foreign server. Connection manager will
3055  * establish new connection if necessary.
3056  */
3057  user = GetUserMapping(GetUserId(), serverid);
3058  conn = GetConnection(user, false, NULL);
3059 
3060  /* Construct the TRUNCATE command string */
3061  initStringInfo(&sql);
3062  deparseTruncateSql(&sql, rels, behavior, restart_seqs);
3063 
3064  /* Issue the TRUNCATE command to remote server */
3065  do_sql_command(conn, sql.data);
3066 
3067  pfree(sql.data);
3068 }
3069 
3070 /*
3071  * estimate_path_cost_size
3072  * Get cost and size estimates for a foreign scan on given foreign relation
3073  * either a base relation or a join between foreign relations or an upper
3074  * relation containing foreign relations.
3075  *
3076  * param_join_conds are the parameterization clauses with outer relations.
3077  * pathkeys specify the expected sort order if any for given path being costed.
3078  * fpextra specifies additional post-scan/join-processing steps such as the
3079  * final sort and the LIMIT restriction.
3080  *
3081  * The function returns the cost and size estimates in p_rows, p_width,
3082  * p_startup_cost and p_total_cost variables.
3083  */
3084 static void
3086  RelOptInfo *foreignrel,
3087  List *param_join_conds,
3088  List *pathkeys,
3089  PgFdwPathExtraData *fpextra,
3090  double *p_rows, int *p_width,
3091  Cost *p_startup_cost, Cost *p_total_cost)
3092 {
3093  PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private;
3094  double rows;
3095  double retrieved_rows;
3096  int width;
3097  Cost startup_cost;
3098  Cost total_cost;
3099 
3100  /* Make sure the core code has set up the relation's reltarget */
3101  Assert(foreignrel->reltarget);
3102 
3103  /*
3104  * If the table or the server is configured to use remote estimates,
3105  * connect to the foreign server and execute EXPLAIN to estimate the
3106  * number of rows selected by the restriction+join clauses. Otherwise,
3107  * estimate rows using whatever statistics we have locally, in a way
3108  * similar to ordinary tables.
3109  */
3110  if (fpinfo->use_remote_estimate)
3111  {
3112  List *remote_param_join_conds;
3113  List *local_param_join_conds;
3114  StringInfoData sql;
3115  PGconn *conn;
3116  Selectivity local_sel;
3117  QualCost local_cost;
3118  List *fdw_scan_tlist = NIL;
3119  List *remote_conds;
3120 
3121  /* Required only to be passed to deparseSelectStmtForRel */
3122  List *retrieved_attrs;
3123 
3124  /*
3125  * param_join_conds might contain both clauses that are safe to send
3126  * across, and clauses that aren't.
3127  */
3128  classifyConditions(root, foreignrel, param_join_conds,
3129  &remote_param_join_conds, &local_param_join_conds);
3130 
3131  /* Build the list of columns to be fetched from the foreign server. */
3132  if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel))
3133  fdw_scan_tlist = build_tlist_to_deparse(foreignrel);
3134  else
3135  fdw_scan_tlist = NIL;
3136 
3137  /*
3138  * The complete list of remote conditions includes everything from
3139  * baserestrictinfo plus any extra join_conds relevant to this
3140  * particular path.
3141  */
3142  remote_conds = list_concat(remote_param_join_conds,
3143  fpinfo->remote_conds);
3144 
3145  /*
3146  * Construct EXPLAIN query including the desired SELECT, FROM, and
3147  * WHERE clauses. Params and other-relation Vars are replaced by dummy
3148  * values, so don't request params_list.
3149  */
3150  initStringInfo(&sql);
3151  appendStringInfoString(&sql, "EXPLAIN ");
3152  deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
3153  remote_conds, pathkeys,
3154  fpextra ? fpextra->has_final_sort : false,
3155  fpextra ? fpextra->has_limit : false,
3156  false, &retrieved_attrs, NULL);
3157 
3158  /* Get the remote estimate */
3159  conn = GetConnection(fpinfo->user, false, NULL);
3160  get_remote_estimate(sql.data, conn, &rows, &width,
3161  &startup_cost, &total_cost);
3163 
3164  retrieved_rows = rows;
3165 
3166  /* Factor in the selectivity of the locally-checked quals */
3167  local_sel = clauselist_selectivity(root,
3168  local_param_join_conds,
3169  foreignrel->relid,
3170  JOIN_INNER,
3171  NULL);
3172  local_sel *= fpinfo->local_conds_sel;
3173 
3174  rows = clamp_row_est(rows * local_sel);
3175 
3176  /* Add in the eval cost of the locally-checked quals */
3177  startup_cost += fpinfo->local_conds_cost.startup;
3178  total_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
3179  cost_qual_eval(&local_cost, local_param_join_conds, root);
3180  startup_cost += local_cost.startup;
3181  total_cost += local_cost.per_tuple * retrieved_rows;
3182 
3183  /*
3184  * Add in tlist eval cost for each output row. In case of an
3185  * aggregate, some of the tlist expressions such as grouping
3186  * expressions will be evaluated remotely, so adjust the costs.
3187  */
3188  startup_cost += foreignrel->reltarget->cost.startup;
3189  total_cost += foreignrel->reltarget->cost.startup;
3190  total_cost += foreignrel->reltarget->cost.per_tuple * rows;
3191  if (IS_UPPER_REL(foreignrel))
3192  {
3193  QualCost tlist_cost;
3194 
3195  cost_qual_eval(&tlist_cost, fdw_scan_tlist, root);
3196  startup_cost -= tlist_cost.startup;
3197  total_cost -= tlist_cost.startup;
3198  total_cost -= tlist_cost.per_tuple * rows;
3199  }
3200  }
3201  else
3202  {
3203  Cost run_cost = 0;
3204 
3205  /*
3206  * We don't support join conditions in this mode (hence, no
3207  * parameterized paths can be made).
3208  */
3209  Assert(param_join_conds == NIL);
3210 
3211  /*
3212  * We will come here again and again with different set of pathkeys or
3213  * additional post-scan/join-processing steps that caller wants to
3214  * cost. We don't need to calculate the cost/size estimates for the
3215  * underlying scan, join, or grouping each time. Instead, use those
3216  * estimates if we have cached them already.
3217  */
3218  if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
3219  {
3220  Assert(fpinfo->retrieved_rows >= 0);
3221 
3222  rows = fpinfo->rows;
3223  retrieved_rows = fpinfo->retrieved_rows;
3224  width = fpinfo->width;
3225  startup_cost = fpinfo->rel_startup_cost;
3226  run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
3227 
3228  /*
3229  * If we estimate the costs of a foreign scan or a foreign join
3230  * with additional post-scan/join-processing steps, the scan or
3231  * join costs obtained from the cache wouldn't yet contain the
3232  * eval costs for the final scan/join target, which would've been
3233  * updated by apply_scanjoin_target_to_paths(); add the eval costs
3234  * now.
3235  */
3236  if (fpextra && !IS_UPPER_REL(foreignrel))
3237  {
3238  /* Shouldn't get here unless we have LIMIT */
3239  Assert(fpextra->has_limit);
3240  Assert(foreignrel->reloptkind == RELOPT_BASEREL ||
3241  foreignrel->reloptkind == RELOPT_JOINREL);
3242  startup_cost += foreignrel->reltarget->cost.startup;
3243  run_cost += foreignrel->reltarget->cost.per_tuple * rows;
3244  }
3245  }
3246  else if (IS_JOIN_REL(foreignrel))
3247  {
3248  PgFdwRelationInfo *fpinfo_i;
3249  PgFdwRelationInfo *fpinfo_o;
3250  QualCost join_cost;
3251  QualCost remote_conds_cost;
3252  double nrows;
3253 
3254  /* Use rows/width estimates made by the core code. */
3255  rows = foreignrel->rows;
3256  width = foreignrel->reltarget->width;
3257 
3258  /* For join we expect inner and outer relations set */
3259  Assert(fpinfo->innerrel && fpinfo->outerrel);
3260 
3261  fpinfo_i = (PgFdwRelationInfo *) fpinfo->innerrel->fdw_private;
3262  fpinfo_o = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private;
3263 
3264  /* Estimate of number of rows in cross product */
3265  nrows = fpinfo_i->rows * fpinfo_o->rows;
3266 
3267  /*
3268  * Back into an estimate of the number of retrieved rows. Just in
3269  * case this is nuts, clamp to at most nrows.
3270  */
3271  retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
3272  retrieved_rows = Min(retrieved_rows, nrows);
3273 
3274  /*
3275  * The cost of foreign join is estimated as cost of generating
3276  * rows for the joining relations + cost for applying quals on the
3277  * rows.
3278  */
3279 
3280  /*
3281  * Calculate the cost of clauses pushed down to the foreign server
3282  */
3283  cost_qual_eval(&remote_conds_cost, fpinfo->remote_conds, root);
3284  /* Calculate the cost of applying join clauses */
3285  cost_qual_eval(&join_cost, fpinfo->joinclauses, root);
3286 
3287  /*
3288  * Startup cost includes startup cost of joining relations and the
3289  * startup cost for join and other clauses. We do not include the
3290  * startup cost specific to join strategy (e.g. setting up hash
3291  * tables) since we do not know what strategy the foreign server
3292  * is going to use.
3293  */
3294  startup_cost = fpinfo_i->rel_startup_cost + fpinfo_o->rel_startup_cost;
3295  startup_cost += join_cost.startup;
3296  startup_cost += remote_conds_cost.startup;
3297  startup_cost += fpinfo->local_conds_cost.startup;
3298 
3299  /*
3300  * Run time cost includes:
3301  *
3302  * 1. Run time cost (total_cost - startup_cost) of relations being
3303  * joined
3304  *
3305  * 2. Run time cost of applying join clauses on the cross product
3306  * of the joining relations.
3307  *
3308  * 3. Run time cost of applying pushed down other clauses on the
3309  * result of join
3310  *
3311  * 4. Run time cost of applying nonpushable other clauses locally
3312  * on the result fetched from the foreign server.
3313  */
3314  run_cost = fpinfo_i->rel_total_cost - fpinfo_i->rel_startup_cost;
3315  run_cost += fpinfo_o->rel_total_cost - fpinfo_o->rel_startup_cost;
3316  run_cost += nrows * join_cost.per_tuple;
3317  nrows = clamp_row_est(nrows * fpinfo->joinclause_sel);
3318  run_cost += nrows * remote_conds_cost.per_tuple;
3319  run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
3320 
3321  /* Add in tlist eval cost for each output row */
3322  startup_cost += foreignrel->reltarget->cost.startup;
3323  run_cost += foreignrel->reltarget->cost.per_tuple * rows;
3324  }
3325  else if (IS_UPPER_REL(foreignrel))
3326  {
3327  RelOptInfo *outerrel = fpinfo->outerrel;
3328  PgFdwRelationInfo *ofpinfo;
3329  AggClauseCosts aggcosts;
3330  double input_rows;
3331  int numGroupCols;
3332  double numGroups = 1;
3333 
3334  /* The upper relation should have its outer relation set */
3335  Assert(outerrel);
3336  /* and that outer relation should have its reltarget set */
3337  Assert(outerrel->reltarget);
3338 
3339  /*
3340  * This cost model is mixture of costing done for sorted and
3341  * hashed aggregates in cost_agg(). We are not sure which
3342  * strategy will be considered at remote side, thus for
3343  * simplicity, we put all startup related costs in startup_cost
3344  * and all finalization and run cost are added in total_cost.
3345  */
3346 
3347  ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private;
3348 
3349  /* Get rows from input rel */
3350  input_rows = ofpinfo->rows;
3351 
3352  /* Collect statistics about aggregates for estimating costs. */
3353  MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
3354  if (root->parse->hasAggs)
3355  {
3357  }
3358 
3359  /* Get number of grouping columns and possible number of groups */
3360  numGroupCols = list_length(root->processed_groupClause);
3361  numGroups = estimate_num_groups(root,
3362  get_sortgrouplist_exprs(root->processed_groupClause,
3363  fpinfo->grouped_tlist),
3364  input_rows, NULL, NULL);
3365 
3366  /*
3367  * Get the retrieved_rows and rows estimates. If there are HAVING
3368  * quals, account for their selectivity.
3369  */
3370  if (root->hasHavingQual)
3371  {
3372  /* Factor in the selectivity of the remotely-checked quals */
3373  retrieved_rows =
3374  clamp_row_est(numGroups *
3376  fpinfo->remote_conds,
3377  0,
3378  JOIN_INNER,
3379  NULL));
3380  /* Factor in the selectivity of the locally-checked quals */
3381  rows = clamp_row_est(retrieved_rows * fpinfo->local_conds_sel);
3382  }
3383  else
3384  {
3385  rows = retrieved_rows = numGroups;
3386  }
3387 
3388  /* Use width estimate made by the core code. */
3389  width = foreignrel->reltarget->width;
3390 
3391  /*-----
3392  * Startup cost includes:
3393  * 1. Startup cost for underneath input relation, adjusted for
3394  * tlist replacement by apply_scanjoin_target_to_paths()
3395  * 2. Cost of performing aggregation, per cost_agg()
3396  *-----
3397  */
3398  startup_cost = ofpinfo->rel_startup_cost;
3399  startup_cost += outerrel->reltarget->cost.startup;
3400  startup_cost += aggcosts.transCost.startup;
3401  startup_cost += aggcosts.transCost.per_tuple * input_rows;
3402  startup_cost += aggcosts.finalCost.startup;
3403  startup_cost += (cpu_operator_cost * numGroupCols) * input_rows;
3404 
3405  /*-----
3406  * Run time cost includes:
3407  * 1. Run time cost of underneath input relation, adjusted for
3408  * tlist replacement by apply_scanjoin_target_to_paths()
3409  * 2. Run time cost of performing aggregation, per cost_agg()
3410  *-----
3411  */
3412  run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
3413  run_cost += outerrel->reltarget->cost.per_tuple * input_rows;
3414  run_cost += aggcosts.finalCost.per_tuple * numGroups;
3415  run_cost += cpu_tuple_cost * numGroups;
3416 
3417  /* Account for the eval cost of HAVING quals, if any */
3418  if (root->hasHavingQual)
3419  {
3420  QualCost remote_cost;
3421 
3422  /* Add in the eval cost of the remotely-checked quals */
3423  cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
3424  startup_cost += remote_cost.startup;
3425  run_cost += remote_cost.per_tuple * numGroups;
3426  /* Add in the eval cost of the locally-checked quals */
3427  startup_cost += fpinfo->local_conds_cost.startup;
3428  run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
3429  }
3430 
3431  /* Add in tlist eval cost for each output row */
3432  startup_cost += foreignrel->reltarget->cost.startup;
3433  run_cost += foreignrel->reltarget->cost.per_tuple * rows;
3434  }
3435  else
3436  {
3437  Cost cpu_per_tuple;
3438 
3439  /* Use rows/width estimates made by set_baserel_size_estimates. */
3440  rows = foreignrel->rows;
3441  width = foreignrel->reltarget->width;
3442 
3443  /*
3444  * Back into an estimate of the number of retrieved rows. Just in
3445  * case this is nuts, clamp to at most foreignrel->tuples.
3446  */
3447  retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
3448  retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
3449 
3450  /*
3451  * Cost as though this were a seqscan, which is pessimistic. We
3452  * effectively imagine the local_conds are being evaluated
3453  * remotely, too.
3454  */
3455  startup_cost = 0;
3456  run_cost = 0;
3457  run_cost += seq_page_cost * foreignrel->pages;
3458 
3459  startup_cost += foreignrel->baserestrictcost.startup;
3460  cpu_per_tuple = cpu_tuple_cost + foreignrel->baserestrictcost.per_tuple;
3461  run_cost += cpu_per_tuple * foreignrel->tuples;
3462 
3463  /* Add in tlist eval cost for each output row */
3464  startup_cost += foreignrel->reltarget->cost.startup;
3465  run_cost += foreignrel->reltarget->cost.per_tuple * rows;
3466  }
3467 
3468  /*
3469  * Without remote estimates, we have no real way to estimate the cost
3470  * of generating sorted output. It could be free if the query plan
3471  * the remote side would have chosen generates properly-sorted output
3472  * anyway, but in most cases it will cost something. Estimate a value
3473  * high enough that we won't pick the sorted path when the ordering
3474  * isn't locally useful, but low enough that we'll err on the side of
3475  * pushing down the ORDER BY clause when it's useful to do so.
3476  */
3477  if (pathkeys != NIL)
3478  {
3479  if (IS_UPPER_REL(foreignrel))
3480  {
3481  Assert(foreignrel->reloptkind == RELOPT_UPPER_REL &&
3482  fpinfo->stage == UPPERREL_GROUP_AGG);
3484  retrieved_rows, width,
3485  fpextra->limit_tuples,
3486  &startup_cost, &run_cost);
3487  }
3488  else
3489  {
3490  startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
3491  run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
3492  }
3493  }
3494 
3495  total_cost = startup_cost + run_cost;
3496 
3497  /* Adjust the cost estimates if we have LIMIT */
3498  if (fpextra && fpextra->has_limit)
3499  {
3500  adjust_limit_rows_costs(&rows, &startup_cost, &total_cost,
3501  fpextra->offset_est, fpextra->count_est);
3502  retrieved_rows = rows;
3503  }
3504  }
3505 
3506  /*
3507  * If this includes the final sort step, the given target, which will be
3508  * applied to the resulting path, might have different expressions from
3509  * the foreignrel's reltarget (see make_sort_input_target()); adjust tlist
3510  * eval costs.
3511  */
3512  if (fpextra && fpextra->has_final_sort &&
3513  fpextra->target != foreignrel->reltarget)
3514  {
3515  QualCost oldcost = foreignrel->reltarget->cost;
3516  QualCost newcost = fpextra->target->cost;
3517 
3518  startup_cost += newcost.startup - oldcost.startup;
3519  total_cost += newcost.startup - oldcost.startup;
3520  total_cost += (newcost.per_tuple - oldcost.per_tuple) * rows;
3521  }
3522 
3523  /*
3524  * Cache the retrieved rows and cost estimates for scans, joins, or
3525  * groupings without any parameterization, pathkeys, or additional
3526  * post-scan/join-processing steps, before adding the costs for
3527  * transferring data from the foreign server. These estimates are useful
3528  * for costing remote joins involving this relation or costing other
3529  * remote operations on this relation such as remote sorts and remote
3530  * LIMIT restrictions, when the costs can not be obtained from the foreign
3531  * server. This function will be called at least once for every foreign
3532  * relation without any parameterization, pathkeys, or additional
3533  * post-scan/join-processing steps.
3534  */
3535  if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
3536  {
3537  fpinfo->retrieved_rows = retrieved_rows;
3538  fpinfo->rel_startup_cost = startup_cost;
3539  fpinfo->rel_total_cost = total_cost;
3540  }
3541 
3542  /*
3543  * Add some additional cost factors to account for connection overhead
3544  * (fdw_startup_cost), transferring data across the network
3545  * (fdw_tuple_cost per retrieved row), and local manipulation of the data
3546  * (cpu_tuple_cost per retrieved row).
3547  */
3548  startup_cost += fpinfo->fdw_startup_cost;
3549  total_cost += fpinfo->fdw_startup_cost;
3550  total_cost += fpinfo->fdw_tuple_cost * retrieved_rows;
3551  total_cost += cpu_tuple_cost * retrieved_rows;
3552 
3553  /*
3554  * If we have LIMIT, we should prefer performing the restriction remotely
3555  * rather than locally, as the former avoids extra row fetches from the
3556  * remote that the latter might cause. But since the core code doesn't
3557  * account for such fetches when estimating the costs of the local
3558  * restriction (see create_limit_path()), there would be no difference
3559  * between the costs of the local restriction and the costs of the remote
3560  * restriction estimated above if we don't use remote estimates (except
3561  * for the case where the foreignrel is a grouping relation, the given
3562  * pathkeys is not NIL, and the effects of a bounded sort for that rel is
3563  * accounted for in costing the remote restriction). Tweak the costs of
3564  * the remote restriction to ensure we'll prefer it if LIMIT is a useful
3565  * one.
3566  */
3567  if (!fpinfo->use_remote_estimate &&
3568  fpextra && fpextra->has_limit &&
3569  fpextra->limit_tuples > 0 &&
3570  fpextra->limit_tuples < fpinfo->rows)
3571  {
3572  Assert(fpinfo->rows > 0);
3573  total_cost -= (total_cost - startup_cost) * 0.05 *
3574  (fpinfo->rows - fpextra->limit_tuples) / fpinfo->rows;
3575  }
3576 
3577  /* Return results. */
3578  *p_rows = rows;
3579  *p_width = width;
3580  *p_startup_cost = startup_cost;
3581  *p_total_cost = total_cost;
3582 }
3583 
3584 /*
3585  * Estimate costs of executing a SQL statement remotely.
3586  * The given "sql" must be an EXPLAIN command.
3587  */
3588 static void
3589 get_remote_estimate(const char *sql, PGconn *conn,
3590  double *rows, int *width,
3591  Cost *startup_cost, Cost *total_cost)
3592 {
3593  PGresult *volatile res = NULL;
3594 
3595  /* PGresult must be released before leaving this function. */
3596  PG_TRY();
3597  {
3598  char *line;
3599  char *p;
3600  int n;
3601 
3602  /*
3603  * Execute EXPLAIN remotely.
3604  */
3605  res = pgfdw_exec_query(conn, sql, NULL);
3607  pgfdw_report_error(ERROR, res, conn, false, sql);
3608 
3609  /*
3610  * Extract cost numbers for topmost plan node. Note we search for a
3611  * left paren from the end of the line to avoid being confused by
3612  * other uses of parentheses.
3613  */
3614  line = PQgetvalue(res, 0, 0);
3615  p = strrchr(line, '(');
3616  if (p == NULL)
3617  elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line);
3618  n = sscanf(p, "(cost=%lf..%lf rows=%lf width=%d)",
3619  startup_cost, total_cost, rows, width);
3620  if (n != 4)
3621  elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line);
3622  }
3623  PG_FINALLY();
3624  {
3625  PQclear(res);
3626  }
3627  PG_END_TRY();
3628 }
3629 
3630 /*
3631  * Adjust the cost estimates of a foreign grouping path to include the cost of
3632  * generating properly-sorted output.
3633  */
3634 static void
3636  List *pathkeys,
3637  double retrieved_rows,
3638  double width,
3639  double limit_tuples,
3640  Cost *p_startup_cost,
3641  Cost *p_run_cost)
3642 {
3643  /*
3644  * If the GROUP BY clause isn't sort-able, the plan chosen by the remote
3645  * side is unlikely to generate properly-sorted output, so it would need
3646  * an explicit sort; adjust the given costs with cost_sort(). Likewise,
3647  * if the GROUP BY clause is sort-able but isn't a superset of the given
3648  * pathkeys, adjust the costs with that function. Otherwise, adjust the
3649  * costs by applying the same heuristic as for the scan or join case.
3650  */
3651  if (!grouping_is_sortable(root->processed_groupClause) ||
3652  !pathkeys_contained_in(pathkeys, root->group_pathkeys))
3653  {
3654  Path sort_path; /* dummy for result of cost_sort */
3655 
3656  cost_sort(&sort_path,
3657  root,
3658  pathkeys,
3659  *p_startup_cost + *p_run_cost,
3660  retrieved_rows,
3661  width,
3662  0.0,
3663  work_mem,
3664  limit_tuples);
3665 
3666  *p_startup_cost = sort_path.startup_cost;
3667  *p_run_cost = sort_path.total_cost - sort_path.startup_cost;
3668  }
3669  else
3670  {
3671  /*
3672  * The default extra cost seems too large for foreign-grouping cases;
3673  * add 1/4th of that default.
3674  */
3675  double sort_multiplier = 1.0 + (DEFAULT_FDW_SORT_MULTIPLIER
3676  - 1.0) * 0.25;
3677 
3678  *p_startup_cost *= sort_multiplier;
3679  *p_run_cost *= sort_multiplier;
3680  }
3681 }
3682 
3683 /*
3684  * Detect whether we want to process an EquivalenceClass member.
3685  *
3686  * This is a callback for use by generate_implied_equalities_for_column.
3687  */
3688 static bool
3691  void *arg)
3692 {
3694  Expr *expr = em->em_expr;
3695 
3696  /*
3697  * If we've identified what we're processing in the current scan, we only
3698  * want to match that expression.
3699  */
3700  if (state->current != NULL)
3701  return equal(expr, state->current);
3702 
3703  /*
3704  * Otherwise, ignore anything we've already processed.
3705  */
3706  if (list_member(state->already_used, expr))
3707  return false;
3708 
3709  /* This is the new target to process. */
3710  state->current = expr;
3711  return true;
3712 }
3713 
3714 /*
3715  * Create cursor for node's query with current parameter values.
3716  */
3717 static void
3719 {
3720  PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
3721  ExprContext *econtext = node->ss.ps.ps_ExprContext;
3722  int numParams = fsstate->numParams;
3723  const char **values = fsstate->param_values;
3724  PGconn *conn = fsstate->conn;
3726  PGresult *res;
3727 
3728  /* First, process a pending asynchronous request, if any. */
3729  if (fsstate->conn_state->pendingAreq)
3731 
3732  /*
3733  * Construct array of query parameter values in text format. We do the
3734  * conversions in the short-lived per-tuple context, so as not to cause a
3735  * memory leak over repeated scans.
3736  */
3737  if (numParams > 0)
3738  {
3739  MemoryContext oldcontext;
3740 
3741  oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
3742 
3743  process_query_params(econtext,
3744  fsstate->param_flinfo,
3745  fsstate->param_exprs,
3746  values);
3747 
3748  MemoryContextSwitchTo(oldcontext);
3749  }
3750 
3751  /* Construct the DECLARE CURSOR command */
3752  initStringInfo(&buf);
3753  appendStringInfo(&buf, "DECLARE c%u CURSOR FOR\n%s",
3754  fsstate->cursor_number, fsstate->query);
3755 
3756  /*
3757  * Notice that we pass NULL for paramTypes, thus forcing the remote server
3758  * to infer types for all parameters. Since we explicitly cast every
3759  * parameter (see deparse.c), the "inference" is trivial and will produce
3760  * the desired result. This allows us to avoid assuming that the remote
3761  * server has the same OIDs we do for the parameters' types.
3762  */
3763  if (!PQsendQueryParams(conn, buf.data, numParams,
3764  NULL, values, NULL, NULL, 0))
3765  pgfdw_report_error(ERROR, NULL, conn, false, buf.data);
3766 
3767  /*
3768  * Get the result, and check for success.
3769  *
3770  * We don't use a PG_TRY block here, so be careful not to throw error
3771  * without releasing the PGresult.
3772  */
3775  pgfdw_report_error(ERROR, res, conn, true, fsstate->query);
3776  PQclear(res);
3777 
3778  /* Mark the cursor as created, and show no tuples have been retrieved */
3779  fsstate->cursor_exists = true;
3780  fsstate->tuples = NULL;
3781  fsstate->num_tuples = 0;
3782  fsstate->next_tuple = 0;
3783  fsstate->fetch_ct_2 = 0;
3784  fsstate->eof_reached = false;
3785 
3786  /* Clean up */
3787  pfree(buf.data);
3788 }
3789 
3790 /*
3791  * Fetch some more rows from the node's cursor.
3792  */
3793 static void
3795 {
3796  PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
3797  PGresult *volatile res = NULL;
3798  MemoryContext oldcontext;
3799 
3800  /*
3801  * We'll store the tuples in the batch_cxt. First, flush the previous
3802  * batch.
3803  */
3804  fsstate->tuples = NULL;
3805  MemoryContextReset(fsstate->batch_cxt);
3806  oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt);
3807 
3808  /* PGresult must be released before leaving this function. */
3809  PG_TRY();
3810  {
3811  PGconn *conn = fsstate->conn;
3812  int numrows;
3813  int i;
3814 
3815  if (fsstate->async_capable)
3816  {
3817  Assert(fsstate->conn_state->pendingAreq);
3818 
3819  /*
3820  * The query was already sent by an earlier call to
3821  * fetch_more_data_begin. So now we just fetch the result.
3822  */
3824  /* On error, report the original query, not the FETCH. */
3826  pgfdw_report_error(ERROR, res, conn, false, fsstate->query);
3827 
3828  /* Reset per-connection state */
3829  fsstate->conn_state->pendingAreq = NULL;
3830  }
3831  else
3832  {
3833  char sql[64];
3834 
3835  /* This is a regular synchronous fetch. */
3836  snprintf(sql, sizeof(sql), "FETCH %d FROM c%u",
3837  fsstate->fetch_size, fsstate->cursor_number);
3838 
3839  res = pgfdw_exec_query(conn, sql, fsstate->conn_state);
3840  /* On error, report the original query, not the FETCH. */
3842  pgfdw_report_error(ERROR, res, conn, false, fsstate->query);
3843  }
3844 
3845  /* Convert the data into HeapTuples */
3846  numrows = PQntuples(res);
3847  fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple));
3848  fsstate->num_tuples = numrows;
3849  fsstate->next_tuple = 0;
3850 
3851  for (i = 0; i < numrows; i++)
3852  {
3853  Assert(IsA(node->ss.ps.plan, ForeignScan));
3854 
3855  fsstate->tuples[i] =
3857  fsstate->rel,
3858  fsstate->attinmeta,
3859  fsstate->retrieved_attrs,
3860  node,
3861  fsstate->temp_cxt);
3862  }
3863 
3864  /* Update fetch_ct_2 */
3865  if (fsstate->fetch_ct_2 < 2)
3866  fsstate->fetch_ct_2++;
3867 
3868  /* Must be EOF if we didn't get as many tuples as we asked for. */
3869  fsstate->eof_reached = (numrows < fsstate->fetch_size);
3870  }
3871  PG_FINALLY();
3872  {
3873  PQclear(res);
3874  }
3875  PG_END_TRY();
3876 
3877  MemoryContextSwitchTo(oldcontext);
3878 }
3879 
3880 /*
3881  * Force assorted GUC parameters to settings that ensure that we'll output
3882  * data values in a form that is unambiguous to the remote server.
3883  *
3884  * This is rather expensive and annoying to do once per row, but there's
3885  * little choice if we want to be sure values are transmitted accurately;
3886  * we can't leave the settings in place between rows for fear of affecting
3887  * user-visible computations.
3888  *
3889  * We use the equivalent of a function SET option to allow the settings to
3890  * persist only until the caller calls reset_transmission_modes(). If an
3891  * error is thrown in between, guc.c will take care of undoing the settings.
3892  *
3893  * The return value is the nestlevel that must be passed to
3894  * reset_transmission_modes() to undo things.
3895  */
3896 int
3898 {
3899  int nestlevel = NewGUCNestLevel();
3900 
3901  /*
3902  * The values set here should match what pg_dump does. See also
3903  * configure_remote_session in connection.c.
3904  */
3905  if (DateStyle != USE_ISO_DATES)
3906  (void) set_config_option("datestyle", "ISO",
3908  GUC_ACTION_SAVE, true, 0, false);
3910  (void) set_config_option("intervalstyle", "postgres",
3912  GUC_ACTION_SAVE, true, 0, false);
3913  if (extra_float_digits < 3)
3914  (void) set_config_option("extra_float_digits", "3",
3916  GUC_ACTION_SAVE, true, 0, false);
3917 
3918  /*
3919  * In addition force restrictive search_path, in case there are any
3920  * regproc or similar constants to be printed.
3921  */
3922  (void) set_config_option("search_path", "pg_catalog",
3924  GUC_ACTION_SAVE, true, 0, false);
3925 
3926  return nestlevel;
3927 }
3928 
3929 /*
3930  * Undo the effects of set_transmission_modes().
3931  */
3932 void
3934 {
3935  AtEOXact_GUC(true, nestlevel);
3936 }
3937 
3938 /*
3939  * Utility routine to close a cursor.
3940  */
3941 static void
3943  PgFdwConnState *conn_state)
3944 {
3945  char sql[64];
3946  PGresult *res;
3947 
3948  snprintf(sql, sizeof(sql), "CLOSE c%u", cursor_number);
3949 
3950  /*
3951  * We don't use a PG_TRY block here, so be careful not to throw error
3952  * without releasing the PGresult.
3953  */
3954  res = pgfdw_exec_query(conn, sql, conn_state);
3956  pgfdw_report_error(ERROR, res, conn, true, sql);
3957  PQclear(res);
3958 }
3959 
3960 /*
3961  * create_foreign_modify
3962  * Construct an execution state of a foreign insert/update/delete
3963  * operation
3964  */
3965 static PgFdwModifyState *
3967  RangeTblEntry *rte,
3968  ResultRelInfo *resultRelInfo,
3969  CmdType operation,
3970  Plan *subplan,
3971  char *query,
3972  List *target_attrs,
3973  int values_end,
3974  bool has_returning,
3975  List *retrieved_attrs)
3976 {
3977  PgFdwModifyState *fmstate;
3978  Relation rel = resultRelInfo->ri_RelationDesc;
3979  TupleDesc tupdesc = RelationGetDescr(rel);
3980  Oid userid;
3981  ForeignTable *table;
3982  UserMapping *user;
3983  AttrNumber n_params;
3984  Oid typefnoid;
3985  bool isvarlena;
3986  ListCell *lc;
3987 
3988  /* Begin constructing PgFdwModifyState. */
3989  fmstate = (PgFdwModifyState *) palloc0(sizeof(PgFdwModifyState));
3990  fmstate->rel = rel;
3991 
3992  /* Identify which user to do the remote access as. */
3993  userid = ExecGetResultRelCheckAsUser(resultRelInfo, estate);
3994 
3995  /* Get info about foreign table. */
3996  table = GetForeignTable(RelationGetRelid(rel));
3997  user = GetUserMapping(userid, table->serverid);
3998 
3999  /* Open connection; report that we'll create a prepared statement. */
4000  fmstate->conn = GetConnection(user, true, &fmstate->conn_state);
4001  fmstate->p_name = NULL; /* prepared statement not made yet */
4002 
4003  /* Set up remote query information. */
4004  fmstate->query = query;
4005  if (operation == CMD_INSERT)
4006  {
4007  fmstate->query = pstrdup(fmstate->query);
4008  fmstate->orig_query = pstrdup(fmstate->query);
4009  }
4010  fmstate->target_attrs = target_attrs;
4011  fmstate->values_end = values_end;
4012  fmstate->has_returning = has_returning;
4013  fmstate->retrieved_attrs = retrieved_attrs;
4014 
4015  /* Create context for per-tuple temp workspace. */
4016  fmstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
4017  "postgres_fdw temporary data",
4019 
4020  /* Prepare for input conversion of RETURNING results. */
4021  if (fmstate->has_returning)
4022  fmstate->attinmeta = TupleDescGetAttInMetadata(tupdesc);
4023 
4024  /* Prepare for output conversion of parameters used in prepared stmt. */
4025  n_params = list_length(fmstate->target_attrs) + 1;
4026  fmstate->p_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * n_params);
4027  fmstate->p_nums = 0;
4028 
4029  if (operation == CMD_UPDATE || operation == CMD_DELETE)
4030  {
4031  Assert(subplan != NULL);
4032 
4033  /* Find the ctid resjunk column in the subplan's result */
4035  "ctid");
4036  if (!AttributeNumberIsValid(fmstate->ctidAttno))
4037  elog(ERROR, "could not find junk ctid column");
4038 
4039  /* First transmittable parameter will be ctid */
4040  getTypeOutputInfo(TIDOID, &typefnoid, &isvarlena);
4041  fmgr_info(typefnoid, &fmstate->p_flinfo[fmstate->p_nums]);
4042  fmstate->p_nums++;
4043  }
4044 
4045  if (operation == CMD_INSERT || operation == CMD_UPDATE)
4046  {
4047  /* Set up for remaining transmittable parameters */
4048  foreach(lc, fmstate->target_attrs)
4049  {
4050  int attnum = lfirst_int(lc);
4051  Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
4052 
4053  Assert(!attr->attisdropped);
4054 
4055  /* Ignore generated columns; they are set to DEFAULT */
4056  if (attr->attgenerated)
4057  continue;
4058  getTypeOutputInfo(attr->atttypid, &typefnoid, &isvarlena);
4059  fmgr_info(typefnoid, &fmstate->p_flinfo[fmstate->p_nums]);
4060  fmstate->p_nums++;
4061  }
4062  }
4063 
4064  Assert(fmstate->p_nums <= n_params);
4065 
4066  /* Set batch_size from foreign server/table options. */
4067  if (operation == CMD_INSERT)
4068  fmstate->batch_size = get_batch_size_option(rel);
4069 
4070  fmstate->num_slots = 1;
4071 
4072  /* Initialize auxiliary state */
4073  fmstate->aux_fmstate = NULL;
4074 
4075  return fmstate;
4076 }
4077 
4078 /*
4079  * execute_foreign_modify
4080  * Perform foreign-table modification as required, and fetch RETURNING
4081  * result if any. (This is the shared guts of postgresExecForeignInsert,
4082  * postgresExecForeignBatchInsert, postgresExecForeignUpdate, and
4083  * postgresExecForeignDelete.)
4084  */
4085 static TupleTableSlot **
4087  ResultRelInfo *resultRelInfo,
4088  CmdType operation,
4089  TupleTableSlot **slots,
4090  TupleTableSlot **planSlots,
4091  int *numSlots)
4092 {
4093  PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
4094  ItemPointer ctid = NULL;
4095  const char **p_values;
4096  PGresult *res;
4097  int n_rows;
4098  StringInfoData sql;
4099 
4100  /* The operation should be INSERT, UPDATE, or DELETE */
4101  Assert(operation == CMD_INSERT ||
4102  operation == CMD_UPDATE ||
4103  operation == CMD_DELETE);
4104 
4105  /* First, process a pending asynchronous request, if any. */
4106  if (fmstate->conn_state->pendingAreq)
4108 
4109  /*
4110  * If the existing query was deparsed and prepared for a different number
4111  * of rows, rebuild it for the proper number.
4112  */
4113  if (operation == CMD_INSERT && fmstate->num_slots != *numSlots)
4114  {
4115  /* Destroy the prepared statement created previously */
4116  if (fmstate->p_name)
4117  deallocate_query(fmstate);
4118 
4119  /* Build INSERT string with numSlots records in its VALUES clause. */
4120  initStringInfo(&sql);
4121  rebuildInsertSql(&sql, fmstate->rel,
4122  fmstate->orig_query, fmstate->target_attrs,
4123  fmstate->values_end, fmstate->p_nums,
4124  *numSlots - 1);
4125  pfree(fmstate->query);
4126  fmstate->query = sql.data;
4127  fmstate->num_slots = *numSlots;
4128  }
4129 
4130  /* Set up the prepared statement on the remote server, if we didn't yet */
4131  if (!fmstate->p_name)
4132  prepare_foreign_modify(fmstate);
4133 
4134  /*
4135  * For UPDATE/DELETE, get the ctid that was passed up as a resjunk column
4136  */
4137  if (operation == CMD_UPDATE || operation == CMD_DELETE)
4138  {
4139  Datum datum;
4140  bool isNull;
4141 
4142  datum = ExecGetJunkAttribute(planSlots[0],
4143  fmstate->ctidAttno,
4144  &isNull);
4145  /* shouldn't ever get a null result... */
4146  if (isNull)
4147  elog(ERROR, "ctid is NULL");
4148  ctid = (ItemPointer) DatumGetPointer(datum);
4149  }
4150 
4151  /* Convert parameters needed by prepared statement to text form */
4152  p_values = convert_prep_stmt_params(fmstate, ctid, slots, *numSlots);
4153 
4154  /*
4155  * Execute the prepared statement.
4156  */
4157  if (!PQsendQueryPrepared(fmstate->conn,
4158  fmstate->p_name,
4159  fmstate->p_nums * (*numSlots),
4160  p_values,
4161  NULL,
4162  NULL,
4163  0))
4164  pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query);
4165 
4166  /*
4167  * Get the result, and check for success.
4168  *
4169  * We don't use a PG_TRY block here, so be careful not to throw error
4170  * without releasing the PGresult.
4171  */
4172  res = pgfdw_get_result(fmstate->conn);
4173  if (PQresultStatus(res) !=
4175  pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
4176 
4177  /* Check number of rows affected, and fetch RETURNING tuple if any */
4178  if (fmstate->has_returning)
4179  {
4180  Assert(*numSlots == 1);
4181  n_rows = PQntuples(res);
4182  if (n_rows > 0)
4183  store_returning_result(fmstate, slots[0], res);
4184  }
4185  else
4186  n_rows = atoi(PQcmdTuples(res));
4187 
4188  /* And clean up */
4189  PQclear(res);
4190 
4191  MemoryContextReset(fmstate->temp_cxt);
4192 
4193  *numSlots = n_rows;
4194 
4195  /*
4196  * Return NULL if nothing was inserted/updated/deleted on the remote end
4197  */
4198  return (n_rows > 0) ? slots : NULL;
4199 }
4200 
4201 /*
4202  * prepare_foreign_modify
4203  * Establish a prepared statement for execution of INSERT/UPDATE/DELETE
4204  */
4205 static void
4207 {
4208  char prep_name[NAMEDATALEN];
4209  char *p_name;
4210  PGresult *res;
4211 
4212  /*
4213  * The caller would already have processed a pending asynchronous request
4214  * if any, so no need to do it here.
4215  */
4216 
4217  /* Construct name we'll use for the prepared statement. */
4218  snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
4219  GetPrepStmtNumber(fmstate->conn));
4220  p_name = pstrdup(prep_name);
4221 
4222  /*
4223  * We intentionally do not specify parameter types here, but leave the
4224  * remote server to derive them by default. This avoids possible problems
4225  * with the remote server using different type OIDs than we do. All of
4226  * the prepared statements we use in this module are simple enough that
4227  * the remote server will make the right choices.
4228  */
4229  if (!PQsendPrepare(fmstate->conn,
4230  p_name,
4231  fmstate->query,
4232  0,
4233  NULL))
4234  pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query);
4235 
4236  /*
4237  * Get the result, and check for success.
4238  *
4239  * We don't use a PG_TRY block here, so be careful not to throw error
4240  * without releasing the PGresult.
4241  */
4242  res = pgfdw_get_result(fmstate->conn);
4244  pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
4245  PQclear(res);
4246 
4247  /* This action shows that the prepare has been done. */
4248  fmstate->p_name = p_name;
4249 }
4250 
4251 /*
4252  * convert_prep_stmt_params
4253  * Create array of text strings representing parameter values
4254  *
4255  * tupleid is ctid to send, or NULL if none
4256  * slot is slot to get remaining parameters from, or NULL if none
4257  *
4258  * Data is constructed in temp_cxt; caller should reset that after use.
4259  */
4260 static const char **
4262  ItemPointer tupleid,
4263  TupleTableSlot **slots,
4264  int numSlots)
4265 {
4266  const char **p_values;
4267  int i;
4268  int j;
4269  int pindex = 0;
4270  MemoryContext oldcontext;
4271 
4272  oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt);
4273 
4274  p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums * numSlots);
4275 
4276  /* ctid is provided only for UPDATE/DELETE, which don't allow batching */
4277  Assert(!(tupleid != NULL && numSlots > 1));
4278 
4279  /* 1st parameter should be ctid, if it's in use */
4280  if (tupleid != NULL)
4281  {
4282  Assert(numSlots == 1);
4283  /* don't need set_transmission_modes for TID output */
4284  p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
4285  PointerGetDatum(tupleid));
4286  pindex++;
4287  }
4288 
4289  /* get following parameters from slots */
4290  if (slots != NULL && fmstate->target_attrs != NIL)
4291  {
4292  TupleDesc tupdesc = RelationGetDescr(fmstate->rel);
4293  int nestlevel;
4294  ListCell *lc;
4295 
4296  nestlevel = set_transmission_modes();
4297 
4298  for (i = 0; i < numSlots; i++)
4299  {
4300  j = (tupleid != NULL) ? 1 : 0;
4301  foreach(lc, fmstate->target_attrs)
4302  {
4303  int attnum = lfirst_int(lc);
4304  Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
4305  Datum value;
4306  bool isnull;
4307 
4308  /* Ignore generated columns; they are set to DEFAULT */
4309  if (attr->attgenerated)
4310  continue;
4311  value = slot_getattr(slots[i], attnum, &isnull);
4312  if (isnull)
4313  p_values[pindex] = NULL;
4314  else
4315  p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[j],
4316  value);
4317  pindex++;
4318  j++;
4319  }
4320  }
4321 
4322  reset_transmission_modes(nestlevel);
4323  }
4324 
4325  Assert(pindex == fmstate->p_nums * numSlots);
4326 
4327  MemoryContextSwitchTo(oldcontext);
4328 
4329  return p_values;
4330 }
4331 
4332 /*
4333  * store_returning_result
4334  * Store the result of a RETURNING clause
4335  *
4336  * On error, be sure to release the PGresult on the way out. Callers do not
4337  * have PG_TRY blocks to ensure this happens.
4338  */
4339 static void
4341  TupleTableSlot *slot, PGresult *res)
4342 {
4343  PG_TRY();
4344  {
4345  HeapTuple newtup;
4346 
4347  newtup = make_tuple_from_result_row(res, 0,
4348  fmstate->rel,
4349  fmstate->attinmeta,
4350  fmstate->retrieved_attrs,
4351  NULL,
4352  fmstate->temp_cxt);
4353 
4354  /*
4355  * The returning slot will not necessarily be suitable to store
4356  * heaptuples directly, so allow for conversion.
4357  */
4358  ExecForceStoreHeapTuple(newtup, slot, true);
4359  }
4360  PG_CATCH();
4361  {
4362  PQclear(res);
4363  PG_RE_THROW();
4364  }
4365  PG_END_TRY();
4366 }
4367 
4368 /*
4369  * finish_foreign_modify
4370  * Release resources for a foreign insert/update/delete operation
4371  */
4372 static void
4374 {
4375  Assert(fmstate != NULL);
4376 
4377  /* If we created a prepared statement, destroy it */
4378  deallocate_query(fmstate);
4379 
4380  /* Release remote connection */
4381  ReleaseConnection(fmstate->conn);
4382  fmstate->conn = NULL;
4383 }
4384 
4385 /*
4386  * deallocate_query
4387  * Deallocate a prepared statement for a foreign insert/update/delete
4388  * operation
4389  */
4390 static void
4392 {
4393  char sql[64];
4394  PGresult *res;
4395 
4396  /* do nothing if the query is not allocated */
4397  if (!fmstate->p_name)
4398  return;
4399 
4400  snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name);
4401 
4402  /*
4403  * We don't use a PG_TRY block here, so be careful not to throw error
4404  * without releasing the PGresult.
4405  */
4406  res = pgfdw_exec_query(fmstate->conn, sql, fmstate->conn_state);
4408  pgfdw_report_error(ERROR, res, fmstate->conn, true, sql);
4409  PQclear(res);
4410  pfree(fmstate->p_name);
4411  fmstate->p_name = NULL;
4412 }
4413 
4414 /*
4415  * build_remote_returning
4416  * Build a RETURNING targetlist of a remote query for performing an
4417  * UPDATE/DELETE .. RETURNING on a join directly
4418  */
4419 static List *
4420 build_remote_returning(Index rtindex, Relation rel, List *returningList)
4421 {
4422  bool have_wholerow = false;
4423  List *tlist = NIL;
4424  List *vars;
4425  ListCell *lc;
4426 
4427  Assert(returningList);
4428 
4429  vars = pull_var_clause((Node *) returningList, PVC_INCLUDE_PLACEHOLDERS);
4430 
4431  /*
4432  * If there's a whole-row reference to the target relation, then we'll
4433  * need all the columns of the relation.
4434  */
4435  foreach(lc, vars)
4436  {
4437  Var *var = (Var *) lfirst(lc);
4438 
4439  if (IsA(var, Var) &&
4440  var->varno == rtindex &&
4441  var->varattno == InvalidAttrNumber)
4442  {
4443  have_wholerow = true;
4444  break;
4445  }
4446  }
4447 
4448  if (have_wholerow)
4449  {
4450  TupleDesc tupdesc = RelationGetDescr(rel);
4451  int i;
4452 
4453  for (i = 1; i <= tupdesc->natts; i++)
4454  {
4455  Form_pg_attribute attr = TupleDescAttr(tupdesc, i - 1);
4456  Var *var;
4457 
4458  /* Ignore dropped attributes. */
4459  if (attr->attisdropped)
4460  continue;
4461 
4462  var = makeVar(rtindex,
4463  i,
4464  attr->atttypid,
4465  attr->atttypmod,
4466  attr->attcollation,
4467  0);
4468 
4469  tlist = lappend(tlist,
4470  makeTargetEntry((Expr *) var,
4471  list_length(tlist) + 1,
4472  NULL,
4473  false));
4474  }
4475  }
4476 
4477  /* Now add any remaining columns to tlist. */
4478  foreach(lc, vars)
4479  {
4480  Var *var = (Var *) lfirst(lc);
4481 
4482  /*
4483  * No need for whole-row references to the target relation. We don't
4484  * need system columns other than ctid and oid either, since those are
4485  * set locally.
4486  */
4487  if (IsA(var, Var) &&
4488  var->varno == rtindex &&
4489  var->varattno <= InvalidAttrNumber &&
4491  continue; /* don't need it */
4492 
4493  if (tlist_member((Expr *) var, tlist))
4494  continue; /* already got it */
4495 
4496  tlist = lappend(tlist,
4497  makeTargetEntry((Expr *) var,
4498  list_length(tlist) + 1,
4499  NULL,
4500  false));
4501  }
4502 
4503  list_free(vars);
4504 
4505  return tlist;
4506 }
4507 
4508 /*
4509  * rebuild_fdw_scan_tlist
4510  * Build new fdw_scan_tlist of given foreign-scan plan node from given
4511  * tlist
4512  *
4513  * There might be columns that the fdw_scan_tlist of the given foreign-scan
4514  * plan node contains that the given tlist doesn't. The fdw_scan_tlist would
4515  * have contained resjunk columns such as 'ctid' of the target relation and
4516  * 'wholerow' of non-target relations, but the tlist might not contain them,
4517  * for example. So, adjust the tlist so it contains all the columns specified
4518  * in the fdw_scan_tlist; else setrefs.c will get confused.
4519  */
4520 static void
4522 {
4523  List *new_tlist = tlist;
4524  List *old_tlist = fscan->fdw_scan_tlist;
4525  ListCell *lc;
4526 
4527  foreach(lc, old_tlist)
4528  {
4529  TargetEntry *tle = (TargetEntry *) lfirst(lc);
4530 
4531  if (tlist_member(tle->expr, new_tlist))
4532  continue; /* already got it */
4533 
4534  new_tlist = lappend(new_tlist,
4535  makeTargetEntry(tle->expr,
4536  list_length(new_tlist) + 1,
4537  NULL,
4538  false));
4539  }
4540  fscan->fdw_scan_tlist = new_tlist;
4541 }
4542 
4543 /*
4544  * Execute a direct UPDATE/DELETE statement.
4545  */
4546 static void
4548 {
4550  ExprContext *econtext = node->ss.ps.ps_ExprContext;
4551  int numParams = dmstate->numParams;
4552  const char **values = dmstate->param_values;
4553 
4554  /* First, process a pending asynchronous request, if any. */
4555  if (dmstate->conn_state->pendingAreq)
4557 
4558  /*
4559  * Construct array of query parameter values in text format.
4560  */
4561  if (numParams > 0)
4562  process_query_params(econtext,
4563  dmstate->param_flinfo,
4564  dmstate->param_exprs,
4565  values);
4566 
4567  /*
4568  * Notice that we pass NULL for paramTypes, thus forcing the remote server
4569  * to infer types for all parameters. Since we explicitly cast every
4570  * parameter (see deparse.c), the "inference" is trivial and will produce
4571  * the desired result. This allows us to avoid assuming that the remote
4572  * server has the same OIDs we do for the parameters' types.
4573  */
4574  if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams,
4575  NULL, values, NULL, NULL, 0))
4576  pgfdw_report_error(ERROR, NULL, dmstate->conn, false, dmstate->query);
4577 
4578  /*
4579  * Get the result, and check for success.
4580  *
4581  * We don't use a PG_TRY block here, so be careful not to throw error
4582  * without releasing the PGresult.
4583  */
4584  dmstate->result = pgfdw_get_result(dmstate->conn);
4585  if (PQresultStatus(dmstate->result) !=
4587  pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true,
4588  dmstate->query);
4589 
4590  /* Get the number of rows affected. */
4591  if (dmstate->has_returning)
4592  dmstate->num_tuples = PQntuples(dmstate->result);
4593  else
4594  dmstate->num_tuples = atoi(PQcmdTuples(dmstate->result));
4595 }
4596 
4597 /*
4598  * Get the result of a RETURNING clause.
4599  */
4600 static TupleTableSlot *
4602 {
4604  EState *estate = node->ss.ps.state;
4605  ResultRelInfo *resultRelInfo = node->resultRelInfo;
4606  TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
4607  TupleTableSlot *resultSlot;
4608 
4609  Assert(resultRelInfo->ri_projectReturning);
4610 
4611  /* If we didn't get any tuples, must be end of data. */
4612  if (dmstate->next_tuple >= dmstate->num_tuples)
4613  return ExecClearTuple(slot);
4614 
4615  /* Increment the command es_processed count if necessary. */
4616  if (dmstate->set_processed)
4617  estate->es_processed += 1;
4618 
4619  /*
4620  * Store a RETURNING tuple. If has_returning is false, just emit a dummy
4621  * tuple. (has_returning is false when the local query is of the form
4622  * "UPDATE/DELETE .. RETURNING 1" for example.)
4623  */
4624  if (!dmstate->has_returning)
4625  {
4626  ExecStoreAllNullTuple(slot);
4627  resultSlot = slot;
4628  }
4629  else
4630  {
4631  /*
4632  * On error, be sure to release the PGresult on the way out. Callers
4633  * do not have PG_TRY blocks to ensure this happens.
4634  */
4635  PG_TRY();
4636  {
4637  HeapTuple newtup;
4638 
4639  newtup = make_tuple_from_result_row(dmstate->result,
4640  dmstate->next_tuple,
4641  dmstate->rel,
4642  dmstate->attinmeta,
4643  dmstate->retrieved_attrs,
4644  node,
4645  dmstate->temp_cxt);
4646  ExecStoreHeapTuple(newtup, slot, false);
4647  }
4648  PG_CATCH();
4649  {
4650  PQclear(dmstate->result);
4651  PG_RE_THROW();
4652  }
4653  PG_END_TRY();
4654 
4655  /* Get the updated/deleted tuple. */
4656  if (dmstate->rel)
4657  resultSlot = slot;
4658  else
4659  resultSlot = apply_returning_filter(dmstate, resultRelInfo, slot, estate);
4660  }
4661  dmstate->next_tuple++;
4662 
4663  /* Make slot available for evaluation of the local query RETURNING list. */
4664  resultRelInfo->ri_projectReturning->pi_exprContext->ecxt_scantuple =
4665  resultSlot;
4666 
4667  return slot;
4668 }
4669 
4670 /*
4671  * Initialize a filter to extract an updated/deleted tuple from a scan tuple.
4672  */
4673 static void
4675  List *fdw_scan_tlist,
4676  Index rtindex)
4677 {
4678  TupleDesc resultTupType = RelationGetDescr(dmstate->resultRel);
4679  ListCell *lc;
4680  int i;
4681 
4682  /*
4683  * Calculate the mapping between the fdw_scan_tlist's entries and the
4684  * result tuple's attributes.
4685  *
4686  * The "map" is an array of indexes of the result tuple's attributes in
4687  * fdw_scan_tlist, i.e., one entry for every attribute of the result
4688  * tuple. We store zero for any attributes that don't have the
4689  * corresponding entries in that list, marking that a NULL is needed in
4690  * the result tuple.
4691  *
4692  * Also get the indexes of the entries for ctid and oid if any.
4693  */
4694  dmstate->attnoMap = (AttrNumber *)
4695  palloc0(resultTupType->natts * sizeof(AttrNumber));
4696 
4697  dmstate->ctidAttno = dmstate->oidAttno = 0;
4698 
4699  i = 1;
4700  dmstate->hasSystemCols = false;
4701  foreach(lc, fdw_scan_tlist)
4702  {
4703  TargetEntry *tle = (TargetEntry *) lfirst(lc);
4704  Var *var = (Var *) tle->expr;
4705 
4706  Assert(IsA(var, Var));
4707 
4708  /*
4709  * If the Var is a column of the target relation to be retrieved from
4710  * the foreign server, get the index of the entry.
4711  */
4712  if (var->varno == rtindex &&
4713  list_member_int(dmstate->retrieved_attrs, i))
4714  {
4715  int attrno = var->varattno;
4716 
4717  if (attrno < 0)
4718  {
4719  /*
4720  * We don't retrieve system columns other than ctid and oid.
4721  */
4722  if (attrno == SelfItemPointerAttributeNumber)
4723  dmstate->ctidAttno = i;
4724  else
4725  Assert(false);
4726  dmstate->hasSystemCols = true;
4727  }
4728  else
4729  {
4730  /*
4731  * We don't retrieve whole-row references to the target
4732  * relation either.
4733  */
4734  Assert(attrno > 0);
4735 
4736  dmstate->attnoMap[attrno - 1] = i;
4737  }
4738  }
4739  i++;
4740  }
4741 }
4742 
4743 /*
4744  * Extract and return an updated/deleted tuple from a scan tuple.
4745  */
4746 static TupleTableSlot *
4748  ResultRelInfo *resultRelInfo,
4749  TupleTableSlot *slot,
4750  EState *estate)
4751 {
4752  TupleDesc resultTupType = RelationGetDescr(dmstate->resultRel);
4753  TupleTableSlot *resultSlot;
4754  Datum *values;
4755  bool *isnull;
4756  Datum *old_values;
4757  bool *old_isnull;
4758  int i;
4759 
4760  /*
4761  * Use the return tuple slot as a place to store the result tuple.
4762  */
4763  resultSlot = ExecGetReturningSlot(estate, resultRelInfo);
4764 
4765  /*
4766  * Extract all the values of the scan tuple.
4767  */
4768  slot_getallattrs(slot);
4769  old_values = slot->tts_values;
4770  old_isnull = slot->tts_isnull;
4771 
4772  /*
4773  * Prepare to build the result tuple.
4774  */
4775  ExecClearTuple(resultSlot);
4776  values = resultSlot->tts_values;
4777  isnull = resultSlot->tts_isnull;
4778 
4779  /*
4780  * Transpose data into proper fields of the result tuple.
4781  */
4782  for (i = 0; i < resultTupType->natts; i++)
4783  {
4784  int j = dmstate->attnoMap[i];
4785 
4786  if (j == 0)
4787  {
4788  values[i] = (Datum) 0;
4789  isnull[i] = true;
4790  }
4791  else
4792  {
4793  values[i] = old_values[j - 1];
4794  isnull[i] = old_isnull[j - 1];
4795  }
4796  }
4797 
4798  /*
4799  * Build the virtual tuple.
4800  */
4801  ExecStoreVirtualTuple(resultSlot);
4802 
4803  /*
4804  * If we have any system columns to return, materialize a heap tuple in
4805  * the slot from column values set above and install system columns in
4806  * that tuple.
4807  */
4808  if (dmstate->hasSystemCols)
4809  {
4810  HeapTuple resultTup = ExecFetchSlotHeapTuple(resultSlot, true, NULL);
4811 
4812  /* ctid */
4813  if (dmstate->ctidAttno)
4814  {
4815  ItemPointer ctid = NULL;
4816 
4817  ctid = (ItemPointer) DatumGetPointer(old_values[dmstate->ctidAttno - 1]);
4818  resultTup->t_self = *ctid;
4819  }
4820 
4821  /*
4822  * And remaining columns
4823  *
4824  * Note: since we currently don't allow the target relation to appear
4825  * on the nullable side of an outer join, any system columns wouldn't
4826  * go to NULL.
4827  *
4828  * Note: no need to care about tableoid here because it will be
4829  * initialized in ExecProcessReturning().
4830  */
4834  }
4835 
4836  /*
4837  * And return the result tuple.
4838  */
4839  return resultSlot;
4840 }
4841 
4842 /*
4843  * Prepare for processing of parameters used in remote query.
4844  */
4845 static void
4847  List *fdw_exprs,
4848  int numParams,
4849  FmgrInfo **param_flinfo,
4850  List **param_exprs,
4851  const char ***param_values)
4852 {
4853  int i;
4854  ListCell *lc;
4855 
4856  Assert(numParams > 0);
4857 
4858  /* Prepare for output conversion of parameters used in remote query. */
4859  *param_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * numParams);
4860 
4861  i = 0;
4862  foreach(lc, fdw_exprs)
4863  {
4864  Node *param_expr = (Node *) lfirst(lc);
4865  Oid typefnoid;
4866  bool isvarlena;
4867 
4868  getTypeOutputInfo(exprType(param_expr), &typefnoid, &isvarlena);
4869  fmgr_info(typefnoid, &(*param_flinfo)[i]);
4870  i++;
4871  }
4872 
4873  /*
4874  * Prepare remote-parameter expressions for evaluation. (Note: in
4875  * practice, we expect that all these expressions will be just Params, so
4876  * we could possibly do something more efficient than using the full
4877  * expression-eval machinery for this. But probably there would be little
4878  * benefit, and it'd require postgres_fdw to know more than is desirable
4879  * about Param evaluation.)
4880  */
4881  *param_exprs = ExecInitExprList(fdw_exprs, node);
4882 
4883  /* Allocate buffer for text form of query parameters. */
4884  *param_values = (const char **) palloc0(numParams * sizeof(char *));
4885 }
4886 
4887 /*
4888  * Construct array of query parameter values in text format.
4889  */
4890 static void
4892  FmgrInfo *param_flinfo,
4893  List *param_exprs,
4894  const char **param_values)
4895 {
4896  int nestlevel;
4897  int i;
4898  ListCell *lc;
4899 
4900  nestlevel = set_transmission_modes();
4901 
4902  i = 0;
4903  foreach(lc, param_exprs)
4904  {
4905  ExprState *expr_state = (ExprState *) lfirst(lc);
4906  Datum expr_value;
4907  bool isNull;
4908 
4909  /* Evaluate the parameter expression */
4910  expr_value = ExecEvalExpr(expr_state, econtext, &isNull);
4911 
4912  /*
4913  * Get string representation of each parameter value by invoking
4914  * type-specific output function, unless the value is null.
4915  */
4916  if (isNull)
4917  param_values[i] = NULL;
4918  else
4919  param_values[i] = OutputFunctionCall(&param_flinfo[i], expr_value);
4920 
4921  i++;
4922  }
4923 
4924  reset_transmission_modes(nestlevel);
4925 }
4926 
4927 /*
4928  * postgresAnalyzeForeignTable
4929  * Test whether analyzing this foreign table is supported
4930  */
4931 static bool
4933  AcquireSampleRowsFunc *func,
4934  BlockNumber *totalpages)
4935 {
4936  ForeignTable *table;
4937  UserMapping *user;
4938  PGconn *conn;
4939  StringInfoData sql;
4940  PGresult *volatile res = NULL;
4941 
4942  /* Return the row-analysis function pointer */
4944 
4945  /*
4946  * Now we have to get the number of pages. It's annoying that the ANALYZE
4947  * API requires us to return that now, because it forces some duplication
4948  * of effort between this routine and postgresAcquireSampleRowsFunc. But
4949  * it's probably not worth redefining that API at this point.
4950  */
4951 
4952  /*
4953  * Get the connection to use. We do the remote access as the table's
4954  * owner, even if the ANALYZE was started by some other user.
4955  */
4956  table = GetForeignTable(RelationGetRelid(relation));
4957  user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
4958  conn = GetConnection(user, false, NULL);
4959 
4960  /*
4961  * Construct command to get page count for relation.
4962  */
4963  initStringInfo(&sql);
4964  deparseAnalyzeSizeSql(&sql, relation);
4965 
4966  /* In what follows, do not risk leaking any PGresults. */
4967  PG_TRY();
4968  {
4969  res = pgfdw_exec_query(conn, sql.data, NULL);
4971  pgfdw_report_error(ERROR, res, conn, false, sql.data);
4972 
4973  if (PQntuples(res) != 1 || PQnfields(res) != 1)
4974  elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query");
4975  *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10);
4976  }
4977  PG_FINALLY();
4978  {
4979  PQclear(res);
4980  }
4981  PG_END_TRY();
4982 
4984 
4985  return true;
4986 }
4987 
4988 /*
4989  * postgresGetAnalyzeInfoForForeignTable
4990  * Count tuples in foreign table (just get pg_class.reltuples).
4991  *
4992  * can_tablesample determines if the remote relation supports acquiring the
4993  * sample using TABLESAMPLE.
4994  */
4995 static double
4996 postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample)
4997 {
4998  ForeignTable *table;
4999  UserMapping *user;
5000  PGconn *conn;
5001  StringInfoData sql;
5002  PGresult *volatile res = NULL;
5003  volatile double reltuples = -1;
5004  volatile char relkind = 0;
5005 
5006  /* assume the remote relation does not support TABLESAMPLE */
5007  *can_tablesample = false;
5008 
5009  /*
5010  * Get the connection to use. We do the remote access as the table's
5011  * owner, even if the ANALYZE was started by some other user.
5012  */
5013  table = GetForeignTable(RelationGetRelid(relation));
5014  user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
5015  conn = GetConnection(user, false, NULL);
5016 
5017  /*
5018  * Construct command to get page count for relation.
5019  */
5020  initStringInfo(&sql);
5021  deparseAnalyzeInfoSql(&sql, relation);
5022 
5023  /* In what follows, do not risk leaking any PGresults. */
5024  PG_TRY();
5025  {
5026  res = pgfdw_exec_query(conn, sql.data, NULL);
5028  pgfdw_report_error(ERROR, res, conn, false, sql.data);
5029 
5030  if (PQntuples(res) != 1 || PQnfields(res) != 2)
5031  elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query");
5032  reltuples = strtod(PQgetvalue(res, 0, 0), NULL);
5033  relkind = *(PQgetvalue(res, 0, 1));
5034  }
5035  PG_FINALLY();
5036  {
5037  if (res)
5038  PQclear(res);
5039  }
5040  PG_END_TRY();
5041 
5043 
5044  /* TABLESAMPLE is supported only for regular tables and matviews */
5045  *can_tablesample = (relkind == RELKIND_RELATION ||
5046  relkind == RELKIND_MATVIEW ||
5047  relkind == RELKIND_PARTITIONED_TABLE);
5048 
5049  return reltuples;
5050 }
5051 
5052 /*
5053  * Acquire a random sample of rows from foreign table managed by postgres_fdw.
5054  *
5055  * Selected rows are returned in the caller-allocated array rows[],
5056  * which must have at least targrows entries.
5057  * The actual number of rows selected is returned as the function result.
5058  * We also count the total number of rows in the table and return it into
5059  * *totalrows. Note that *totaldeadrows is always set to 0.
5060  *
5061  * Note that the returned list of rows is not always in order by physical
5062  * position in the table. Therefore, correlation estimates derived later
5063  * may be meaningless, but it's OK because we don't use the estimates
5064  * currently (the planner only pays attention to correlation for indexscans).
5065  */
5066 static int
5068  HeapTuple *rows, int targrows,
5069  double *totalrows,
5070  double *totaldeadrows)
5071 {
5072  PgFdwAnalyzeState astate;
5073  ForeignTable *table;
5074  ForeignServer *server;
5075  UserMapping *user;
5076  PGconn *conn;
5077  int server_version_num;
5078  PgFdwSamplingMethod method = ANALYZE_SAMPLE_AUTO; /* auto is default */
5079  double sample_frac = -1.0;
5080  double reltuples;
5081  unsigned int cursor_number;
5082  StringInfoData sql;
5083  PGresult *volatile res = NULL;
5084  ListCell *lc;
5085 
5086  /* Initialize workspace state */
5087  astate.rel = relation;
5089 
5090  astate.rows = rows;
5091  astate.targrows = targrows;
5092  astate.numrows = 0;
5093  astate.samplerows = 0;
5094  astate.rowstoskip = -1; /* -1 means not set yet */
5095  reservoir_init_selection_state(&astate.rstate, targrows);
5096 
5097  /* Remember ANALYZE context, and create a per-tuple temp context */
5098  astate.anl_cxt = CurrentMemoryContext;
5100  "postgres_fdw temporary data",
5102 
5103  /*
5104  * Get the connection to use. We do the remote access as the table's
5105  * owner, even if the ANALYZE was started by some other user.
5106  */
5107  table = GetForeignTable(RelationGetRelid(relation));
5108  server = GetForeignServer(table->serverid);
5109  user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
5110  conn = GetConnection(user, false, NULL);
5111 
5112  /* We'll need server version, so fetch it now. */
5114 
5115  /*
5116  * What sampling method should we use?
5117  */
5118  foreach(lc, server->options)
5119  {
5120  DefElem *def = (DefElem *) lfirst(lc);
5121 
5122  if (strcmp(def->defname, "analyze_sampling") == 0)
5123  {
5124  char *value = defGetString(def);
5125 
5126  if (strcmp(value, "off") == 0)
5127  method = ANALYZE_SAMPLE_OFF;
5128  else if (strcmp(value, "auto") == 0)
5129  method = ANALYZE_SAMPLE_AUTO;
5130  else if (strcmp(value, "random") == 0)
5131  method = ANALYZE_SAMPLE_RANDOM;
5132  else if (strcmp(value, "system") == 0)
5133  method = ANALYZE_SAMPLE_SYSTEM;
5134  else if (strcmp(value, "bernoulli") == 0)
5135  method = ANALYZE_SAMPLE_BERNOULLI;
5136 
5137  break;
5138  }
5139  }
5140 
5141  foreach(lc, table->options)
5142  {
5143  DefElem *def = (DefElem *) lfirst(lc);
5144 
5145  if (strcmp(def->defname, "analyze_sampling") == 0)
5146  {
5147  char *value = defGetString(def);
5148 
5149  if (strcmp(value, "off") == 0)
5150  method = ANALYZE_SAMPLE_OFF;
5151  else if (strcmp(value, "auto") == 0)
5152  method = ANALYZE_SAMPLE_AUTO;
5153  else if (strcmp(value, "random") == 0)
5154  method = ANALYZE_SAMPLE_RANDOM;
5155  else if (strcmp(value, "system") == 0)
5156  method = ANALYZE_SAMPLE_SYSTEM;
5157  else if (strcmp(value, "bernoulli") == 0)
5158  method = ANALYZE_SAMPLE_BERNOULLI;
5159 
5160  break;
5161  }
5162  }
5163 
5164  /*
5165  * Error-out if explicitly required one of the TABLESAMPLE methods, but
5166  * the server does not support it.
5167  */
5168  if ((server_version_num < 95000) &&
5169  (method == ANALYZE_SAMPLE_SYSTEM ||
5170  method == ANALYZE_SAMPLE_BERNOULLI))
5171  ereport(ERROR,
5172  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5173  errmsg("remote server does not support TABLESAMPLE feature")));
5174 
5175  /*
5176  * If we've decided to do remote sampling, calculate the sampling rate. We
5177  * need to get the number of tuples from the remote server, but skip that
5178  * network round-trip if not needed.
5179  */
5180  if (method != ANALYZE_SAMPLE_OFF)
5181  {
5182  bool can_tablesample;
5183 
5184  reltuples = postgresGetAnalyzeInfoForForeignTable(relation,
5185  &can_tablesample);
5186 
5187  /*
5188  * Make sure we're not choosing TABLESAMPLE when the remote relation
5189  * does not support that. But only do this for "auto" - if the user
5190  * explicitly requested BERNOULLI/SYSTEM, it's better to fail.
5191  */
5192  if (!can_tablesample && (method == ANALYZE_SAMPLE_AUTO))
5193  method = ANALYZE_SAMPLE_RANDOM;
5194 
5195  /*
5196  * Remote's reltuples could be 0 or -1 if the table has never been
5197  * vacuumed/analyzed. In that case, disable sampling after all.
5198  */
5199  if ((reltuples <= 0) || (targrows >= reltuples))
5200  method = ANALYZE_SAMPLE_OFF;
5201  else
5202  {
5203  /*
5204  * All supported sampling methods require sampling rate, not
5205  * target rows directly, so we calculate that using the remote
5206  * reltuples value. That's imperfect, because it might be off a
5207  * good deal, but that's not something we can (or should) address
5208  * here.
5209  *
5210  * If reltuples is too low (i.e. when table grew), we'll end up
5211  * sampling more rows - but then we'll apply the local sampling,
5212  * so we get the expected sample size. This is the same outcome as
5213  * without remote sampling.
5214  *
5215  * If reltuples is too high (e.g. after bulk DELETE), we will end
5216  * up sampling too few rows.
5217  *
5218  * We can't really do much better here - we could try sampling a
5219  * bit more rows, but we don't know how off the reltuples value is
5220  * so how much is "a bit more"?
5221  *
5222  * Furthermore, the targrows value for partitions is determined
5223  * based on table size (relpages), which can be off in different
5224  * ways too. Adjusting the sampling rate here might make the issue
5225  * worse.
5226  */
5227  sample_frac = targrows / reltuples;
5228 
5229  /*
5230  * We should never get sampling rate outside the valid range
5231  * (between 0.0 and 1.0), because those cases should be covered by
5232  * the previous branch that sets ANALYZE_SAMPLE_OFF.
5233  */
5234  Assert(sample_frac >= 0.0 && sample_frac <= 1.0);
5235  }
5236  }
5237 
5238  /*
5239  * For "auto" method, pick the one we believe is best. For servers with
5240  * TABLESAMPLE support we pick BERNOULLI, for old servers we fall-back to
5241  * random() to at least reduce network transfer.
5242  */
5243  if (method == ANALYZE_SAMPLE_AUTO)
5244  {
5245  if (server_version_num < 95000)
5246  method = ANALYZE_SAMPLE_RANDOM;
5247  else
5248  method = ANALYZE_SAMPLE_BERNOULLI;
5249  }
5250 
5251  /*
5252  * Construct cursor that retrieves whole rows from remote.
5253  */
5255  initStringInfo(&sql);
5256  appendStringInfo(&sql, "DECLARE c%u CURSOR FOR ", cursor_number);
5257 
5258  deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs);
5259 
5260  /* In what follows, do not risk leaking any PGresults. */
5261  PG_TRY();
5262  {
5263  char fetch_sql[64];
5264  int fetch_size;
5265 
5266  res = pgfdw_exec_query(conn, sql.data, NULL);
5268  pgfdw_report_error(ERROR, res, conn, false, sql.data);
5269  PQclear(res);
5270  res = NULL;
5271 
5272  /*
5273  * Determine the fetch size. The default is arbitrary, but shouldn't
5274  * be enormous.
5275  */
5276  fetch_size = 100;
5277  foreach(lc, server->options)
5278  {
5279  DefElem *def = (DefElem *) lfirst(lc);
5280 
5281  if (strcmp(def->defname, "fetch_size") == 0)
5282  {
5283  (void) parse_int(defGetString(def), &fetch_size, 0, NULL);
5284  break;
5285  }
5286  }
5287  foreach(lc, table->options)
5288  {
5289  DefElem *def = (DefElem *) lfirst(lc);
5290 
5291  if (strcmp(def->defname, "fetch_size") == 0)
5292  {
5293  (void) parse_int(defGetString(def), &fetch_size, 0, NULL);
5294  break;
5295  }
5296  }
5297 
5298  /* Construct command to fetch rows from remote. */
5299  snprintf(fetch_sql, sizeof(fetch_sql), "FETCH %d FROM c%u",
5301 
5302  /* Retrieve and process rows a batch at a time. */
5303  for (;;)
5304  {
5305  int numrows;
5306  int i;
5307 
5308  /* Allow users to cancel long query */
5310 
5311  /*
5312  * XXX possible future improvement: if rowstoskip is large, we
5313  * could issue a MOVE rather than physically fetching the rows,
5314  * then just adjust rowstoskip and samplerows appropriately.
5315  */
5316 
5317  /* Fetch some rows */
5318  res = pgfdw_exec_query(conn, fetch_sql, NULL);
5319  /* On error, report the original query, not the FETCH. */
5321  pgfdw_report_error(ERROR, res, conn, false, sql.data);
5322 
5323  /* Process whatever we got. */
5324  numrows = PQntuples(res);
5325  for (i = 0; i < numrows; i++)
5326  analyze_row_processor(res, i, &astate);
5327 
5328  PQclear(res);
5329  res = NULL;
5330 
5331  /* Must be EOF if we didn't get all the rows requested. */
5332  if (numrows < fetch_size)
5333  break;
5334  }
5335 
5336  /* Close the cursor, just to be tidy. */
5338  }
5339  PG_CATCH();
5340  {
5341  PQclear(res);
5342  PG_RE_THROW();
5343  }
5344  PG_END_TRY();
5345 
5347 
5348  /* We assume that we have no dead tuple. */
5349  *totaldeadrows = 0.0;
5350 
5351  /*
5352  * Without sampling, we've retrieved all living tuples from foreign
5353  * server, so report that as totalrows. Otherwise use the reltuples
5354  * estimate we got from the remote side.
5355  */
5356  if (method == ANALYZE_SAMPLE_OFF)
5357  *totalrows = astate.samplerows;
5358  else
5359  *totalrows = reltuples;
5360 
5361  /*
5362  * Emit some interesting relation info
5363  */
5364  ereport(elevel,
5365  (errmsg("\"%s\": table contains %.0f rows, %d rows in sample",
5366  RelationGetRelationName(relation),
5367  *totalrows, astate.numrows)));
5368 
5369  return astate.numrows;
5370 }
5371 
5372 /*
5373  * Collect sample rows from the result of query.
5374  * - Use all tuples in sample until target # of samples are collected.
5375  * - Subsequently, replace already-sampled tuples randomly.
5376  */
5377 static void
5379 {
5380  int targrows = astate->targrows;
5381  int pos; /* array index to store tuple in */
5382  MemoryContext oldcontext;
5383 
5384  /* Always increment sample row counter. */
5385  astate->samplerows += 1;
5386 
5387  /*
5388  * Determine the slot where this sample row should be stored. Set pos to
5389  * negative value to indicate the row should be skipped.
5390  */
5391  if (astate->numrows < targrows)
5392  {
5393  /* First targrows rows are always included into the sample */
5394  pos = astate->numrows++;
5395  }
5396  else
5397  {
5398  /*
5399  * Now we start replacing tuples in the sample until we reach the end
5400  * of the relation. Same algorithm as in acquire_sample_rows in
5401  * analyze.c; see Jeff Vitter's paper.
5402  */
5403  if (astate->rowstoskip < 0)
5404  astate->rowstoskip = reservoir_get_next_S(&astate->rstate, astate->samplerows, targrows);
5405 
5406  if (astate->rowstoskip <= 0)
5407  {
5408  /* Choose a random reservoir element to replace. */
5409  pos = (int) (targrows * sampler_random_fract(&astate->rstate.randstate));
5410  Assert(pos >= 0 && pos < targrows);
5411  heap_freetuple(astate->rows[pos]);
5412  }
5413  else
5414  {
5415  /* Skip this tuple. */
5416  pos = -1;
5417  }
5418 
5419  astate->rowstoskip -= 1;
5420  }
5421 
5422  if (pos >= 0)
5423  {
5424  /*
5425  * Create sample tuple from current result row, and store it in the
5426  * position determined above. The tuple has to be created in anl_cxt.
5427  */
5428  oldcontext = MemoryContextSwitchTo(astate->anl_cxt);
5429 
5430  astate->rows[pos] = make_tuple_from_result_row(res, row,
5431  astate->rel,
5432  astate->attinmeta,
5433  astate->retrieved_attrs,
5434  NULL,
5435  astate->temp_cxt);
5436 
5437  MemoryContextSwitchTo(oldcontext);
5438  }
5439 }
5440 
5441 /*
5442  * Import a foreign schema
5443  */
5444 static List *
5446 {
5447  List *commands = NIL;
5448  bool import_collate = true;
5449  bool import_default = false;
5450  bool import_generated = true;
5451  bool import_not_null = true;
5452  ForeignServer *server;
5453  UserMapping *mapping;
5454  PGconn *conn;
5456  PGresult *volatile res = NULL;
5457  int numrows,
5458  i;
5459  ListCell *lc;
5460 
5461  /* Parse statement options */
5462  foreach(lc, stmt->options)
5463  {
5464  DefElem *def = (DefElem *) lfirst(lc);
5465 
5466  if (strcmp(def->defname, "import_collate") == 0)
5467  import_collate = defGetBoolean(def);
5468  else if (strcmp(def->defname, "import_default") == 0)
5469  import_default = defGetBoolean(def);
5470  else if (strcmp(def->defname, "import_generated") == 0)
5471  import_generated = defGetBoolean(def);
5472  else if (strcmp(def->defname, "import_not_null") == 0)
5473  import_not_null = defGetBoolean(def);
5474  else
5475  ereport(ERROR,
5476  (errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
5477  errmsg("invalid option \"%s\"", def->defname)));
5478  }
5479 
5480  /*
5481  * Get connection to the foreign server. Connection manager will
5482  * establish new connection if necessary.
5483  */
5484  server = GetForeignServer(serverOid);
5485  mapping = GetUserMapping(GetUserId(), server->serverid);
5486  conn = GetConnection(mapping, false, NULL);
5487 
5488  /* Don't attempt to import collation if remote server hasn't got it */
5489  if (PQserverVersion(conn) < 90100)
5490  import_collate = false;
5491 
5492  /* Create workspace for strings */
5493  initStringInfo(&buf);
5494 
5495  /* In what follows, do not risk leaking any PGresults. */
5496  PG_TRY();
5497  {
5498  /* Check that the schema really exists */
5499  appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = ");
5500  deparseStringLiteral(&buf, stmt->remote_schema);
5501 
5502  res = pgfdw_exec_query(conn, buf.data, NULL);
5504  pgfdw_report_error(ERROR, res, conn, false, buf.data);
5505 
5506  if (PQntuples(res) != 1)
5507  ereport(