PostgreSQL Source Code  git master
copyfrom_internal.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * copyfrom_internal.h
4  * Internal definitions for COPY FROM command.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/commands/copyfrom_internal.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef COPYFROM_INTERNAL_H
15 #define COPYFROM_INTERNAL_H
16 
17 #include "commands/copy.h"
18 #include "commands/trigger.h"
19 #include "nodes/miscnodes.h"
20 
21 /*
22  * Represents the different source cases we need to worry about at
23  * the bottom level
24  */
25 typedef enum CopySource
26 {
27  COPY_FILE, /* from file (or a piped program) */
28  COPY_FRONTEND, /* from frontend */
29  COPY_CALLBACK, /* from callback function */
31 
32 /*
33  * Represents the end-of-line terminator type of the input
34  */
35 typedef enum EolType
36 {
42 
43 /*
44  * Represents the insert method to be used during COPY FROM.
45  */
46 typedef enum CopyInsertMethod
47 {
48  CIM_SINGLE, /* use table_tuple_insert or ExecForeignInsert */
49  CIM_MULTI, /* always use table_multi_insert or
50  * ExecForeignBatchInsert */
51  CIM_MULTI_CONDITIONAL, /* use table_multi_insert or
52  * ExecForeignBatchInsert only if valid */
54 
55 /*
56  * This struct contains all the state variables used throughout a COPY FROM
57  * operation.
58  */
59 typedef struct CopyFromStateData
60 {
61  /* low-level state data */
62  CopySource copy_src; /* type of copy source */
63  FILE *copy_file; /* used if copy_src == COPY_FILE */
64  StringInfo fe_msgbuf; /* used if copy_src == COPY_FRONTEND */
65 
66  EolType eol_type; /* EOL type of input */
67  int file_encoding; /* file or remote side's character encoding */
68  bool need_transcoding; /* file encoding diff from server? */
69  Oid conversion_proc; /* encoding conversion function */
70 
71  /* parameters from the COPY command */
72  Relation rel; /* relation to copy from */
73  List *attnumlist; /* integer list of attnums to copy */
74  char *filename; /* filename, or NULL for STDIN */
75  bool is_program; /* is 'filename' a program to popen? */
76  copy_data_source_cb data_source_cb; /* function for reading data */
77 
79  bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
80  Node *whereClause; /* WHERE condition (or NULL) */
81 
82  /* these are just for error messages, see CopyFromErrorCallback */
83  const char *cur_relname; /* table name for error messages */
84  uint64 cur_lineno; /* line number for error messages */
85  const char *cur_attname; /* current att for error messages */
86  const char *cur_attval; /* current att value for error messages */
87  bool relname_only; /* don't output line number, att, etc. */
88 
89  /*
90  * Working state
91  */
92  MemoryContext copycontext; /* per-copy execution context */
93 
94  AttrNumber num_defaults; /* count of att that are missing and have
95  * default value */
96  FmgrInfo *in_functions; /* array of input functions for each attrs */
97  Oid *typioparams; /* array of element types for in_functions */
98  ErrorSaveContext *escontext; /* soft error trapper during in_functions
99  * execution */
100  uint64 num_errors; /* total number of rows which contained soft
101  * errors */
102  int *defmap; /* array of default att numbers related to
103  * missing att */
104  ExprState **defexprs; /* array of default att expressions for all
105  * att */
106  bool *defaults; /* if DEFAULT marker was found for
107  * corresponding att */
108  bool volatile_defexprs; /* is any of defexprs volatile? */
109  List *range_table; /* single element list of RangeTblEntry */
110  List *rteperminfos; /* single element list of RTEPermissionInfo */
112 
114 
115  /*
116  * These variables are used to reduce overhead in COPY FROM.
117  *
118  * attribute_buf holds the separated, de-escaped text for each field of
119  * the current line. The CopyReadAttributes functions return arrays of
120  * pointers into this buffer. We avoid palloc/pfree overhead by re-using
121  * the buffer on each cycle.
122  *
123  * In binary COPY FROM, attribute_buf holds the binary data for the
124  * current field, but the usage is otherwise similar.
125  */
127 
128  /* field raw data pointers found by COPY FROM */
129 
131  char **raw_fields;
132 
133  /*
134  * Similarly, line_buf holds the whole input line being processed. The
135  * input cycle is first to read the whole line into line_buf, and then
136  * extract the individual attribute fields into attribute_buf. line_buf
137  * is preserved unmodified so that we can display it in error messages if
138  * appropriate. (In binary mode, line_buf is not used.)
139  */
141  bool line_buf_valid; /* contains the row being processed? */
142 
143  /*
144  * input_buf holds input data, already converted to database encoding.
145  *
146  * In text mode, CopyReadLine parses this data sufficiently to locate line
147  * boundaries, then transfers the data to line_buf. We guarantee that
148  * there is a \0 at input_buf[input_buf_len] at all times. (In binary
149  * mode, input_buf is not used.)
150  *
151  * If encoding conversion is not required, input_buf is not a separate
152  * buffer but points directly to raw_buf. In that case, input_buf_len
153  * tracks the number of bytes that have been verified as valid in the
154  * database encoding, and raw_buf_len is the total number of bytes stored
155  * in the buffer.
156  */
157 #define INPUT_BUF_SIZE 65536 /* we palloc INPUT_BUF_SIZE+1 bytes */
158  char *input_buf;
159  int input_buf_index; /* next byte to process */
160  int input_buf_len; /* total # of bytes stored */
161  bool input_reached_eof; /* true if we reached EOF */
162  bool input_reached_error; /* true if a conversion error happened */
163  /* Shorthand for number of unconsumed bytes available in input_buf */
164 #define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index)
165 
166  /*
167  * raw_buf holds raw input data read from the data source (file or client
168  * connection), not yet converted to the database encoding. Like with
169  * 'input_buf', we guarantee that there is a \0 at raw_buf[raw_buf_len].
170  */
171 #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
172  char *raw_buf;
173  int raw_buf_index; /* next byte to process */
174  int raw_buf_len; /* total # of bytes stored */
175  bool raw_reached_eof; /* true if we reached EOF */
176 
177  /* Shorthand for number of unconsumed bytes available in raw_buf */
178 #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
179 
180  uint64 bytes_processed; /* number of bytes processed so far */
182 
183 extern void ReceiveCopyBegin(CopyFromState cstate);
184 extern void ReceiveCopyBinaryHeader(CopyFromState cstate);
185 
186 #endif /* COPYFROM_INTERNAL_H */
int16 AttrNumber
Definition: attnum.h:21
struct CopyFromStateData CopyFromStateData
CopySource
@ COPY_FILE
@ COPY_CALLBACK
@ COPY_FRONTEND
CopyInsertMethod
@ CIM_SINGLE
@ CIM_MULTI_CONDITIONAL
@ CIM_MULTI
void ReceiveCopyBinaryHeader(CopyFromState cstate)
void ReceiveCopyBegin(CopyFromState cstate)
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
int(* copy_data_source_cb)(void *outbuf, int minread, int maxread)
Definition: copy.h:93
unsigned int Oid
Definition: postgres_ext.h:31
copy_data_source_cb data_source_cb
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
TransitionCaptureState * transition_capture
MemoryContext copycontext
const char * cur_attval
const char * cur_attname
const char * cur_relname
ErrorSaveContext * escontext
Definition: fmgr.h:57
Definition: pg_list.h:54
Definition: nodes.h:129