PostgreSQL Source Code  git master
compress_io.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * compress_io.c
4  * Routines for archivers to write an uncompressed or compressed data
5  * stream.
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * This file includes two APIs for dealing with compressed data. The first
11  * provides more flexibility, using callbacks to read/write data from the
12  * underlying stream. The second API is a wrapper around fopen and
13  * friends, providing an interface similar to those, but abstracts away
14  * the possible compression. The second API is aimed for the resulting
15  * files to be easily manipulated with an external compression utility
16  * program.
17  *
18  * Compressor API
19  * --------------
20  *
21  * The interface for writing to an archive consists of three functions:
22  * AllocateCompressor, writeData, and EndCompressor. First you call
23  * AllocateCompressor, then write all the data by calling writeData as many
24  * times as needed, and finally EndCompressor. writeData will call the
25  * WriteFunc that was provided to AllocateCompressor for each chunk of
26  * compressed data.
27  *
28  * The interface for reading an archive consists of the same three functions:
29  * AllocateCompressor, readData, and EndCompressor. First you call
30  * AllocateCompressor, then read all the data by calling readData to read the
31  * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32  * returns the compressed data one chunk at a time. Then readData decompresses
33  * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34  * to signal EOF. The interface is the same for compressed and uncompressed
35  * streams.
36  *
37  * Compressed stream API
38  * ----------------------
39  *
40  * The compressed stream API is providing a set of function pointers for
41  * opening, reading, writing, and finally closing files. The implemented
42  * function pointers are documented in the corresponding header file and are
43  * common for all streams. It allows the caller to use the same functions for
44  * both compressed and uncompressed streams.
45  *
46  * The interface consists of three functions, InitCompressFileHandle,
47  * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48  * compression is known, then start by calling InitCompressFileHandle,
49  * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50  * the function pointers as required for the read/write operations. Finally
51  * call EndCompressFileHandle to end the stream.
52  *
53  * InitDiscoverCompressFileHandle tries to infer the compression by the
54  * filename suffix. If the suffix is not yet known then it tries to simply
55  * open the file and if it fails, it tries to open the same file with the .gz
56  * suffix, and then again with the .lz4 suffix.
57  *
58  * IDENTIFICATION
59  * src/bin/pg_dump/compress_io.c
60  *
61  *-------------------------------------------------------------------------
62  */
63 #include "postgres_fe.h"
64 
65 #include <sys/stat.h>
66 #include <unistd.h>
67 
68 #include "compress_gzip.h"
69 #include "compress_io.h"
70 #include "compress_lz4.h"
71 #include "compress_none.h"
72 #include "pg_backup_utils.h"
73 
74 /*----------------------
75  * Generic functions
76  *----------------------
77  */
78 
79 /*
80  * Checks whether a compression algorithm is supported.
81  *
82  * On success returns NULL, otherwise returns a malloc'ed string which can be
83  * used by the caller in an error message.
84  */
85 char *
87 {
88  const pg_compress_algorithm algorithm = compression_spec.algorithm;
89  bool supported = false;
90 
91  if (algorithm == PG_COMPRESSION_NONE)
92  supported = true;
93 #ifdef HAVE_LIBZ
94  if (algorithm == PG_COMPRESSION_GZIP)
95  supported = true;
96 #endif
97 #ifdef USE_LZ4
98  if (algorithm == PG_COMPRESSION_LZ4)
99  supported = true;
100 #endif
101 
102  if (!supported)
103  return psprintf("this build does not support compression with %s",
104  get_compress_algorithm_name(algorithm));
105 
106  return NULL;
107 }
108 
109 /*----------------------
110  * Compressor API
111  *----------------------
112  */
113 
114 /*
115  * Allocate a new compressor.
116  */
119  ReadFunc readF, WriteFunc writeF)
120 {
121  CompressorState *cs;
122 
123  cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
124  cs->readF = readF;
125  cs->writeF = writeF;
126 
127  if (compression_spec.algorithm == PG_COMPRESSION_NONE)
128  InitCompressorNone(cs, compression_spec);
129  else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
130  InitCompressorGzip(cs, compression_spec);
131  else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
132  InitCompressorLZ4(cs, compression_spec);
133 
134  return cs;
135 }
136 
137 /*
138  * Terminate compression library context and flush its buffers.
139  */
140 void
142 {
143  cs->end(AH, cs);
144  pg_free(cs);
145 }
146 
147 /*----------------------
148  * Compressed stream API
149  *----------------------
150  */
151 
152 /*
153  * Private routines
154  */
155 static int
156 hasSuffix(const char *filename, const char *suffix)
157 {
158  int filenamelen = strlen(filename);
159  int suffixlen = strlen(suffix);
160 
161  if (filenamelen < suffixlen)
162  return 0;
163 
164  return memcmp(&filename[filenamelen - suffixlen],
165  suffix,
166  suffixlen) == 0;
167 }
168 
169 /* free() without changing errno; useful in several places below */
170 static void
172 {
173  int save_errno = errno;
174 
175  free(p);
176  errno = save_errno;
177 }
178 
179 /*
180  * Public interface
181  */
182 
183 /*
184  * Initialize a compress file handle for the specified compression algorithm.
185  */
188 {
189  CompressFileHandle *CFH;
190 
191  CFH = pg_malloc0(sizeof(CompressFileHandle));
192 
193  if (compression_spec.algorithm == PG_COMPRESSION_NONE)
194  InitCompressFileHandleNone(CFH, compression_spec);
195  else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
196  InitCompressFileHandleGzip(CFH, compression_spec);
197  else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
198  InitCompressFileHandleLZ4(CFH, compression_spec);
199 
200  return CFH;
201 }
202 
203 /*
204  * Open a file for reading. 'path' is the file to open, and 'mode' should
205  * be either "r" or "rb".
206  *
207  * If the file at 'path' contains the suffix of a supported compression method,
208  * currently this includes ".gz" and ".lz4", then this compression will be used
209  * throughout. Otherwise the compression will be inferred by iteratively trying
210  * to open the file at 'path', first as is, then by appending known compression
211  * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
212  * "foo.gz" or "foo.lz4", trying in that order.
213  *
214  * On failure, return NULL with an error code in errno.
215  */
217 InitDiscoverCompressFileHandle(const char *path, const char *mode)
218 {
219  CompressFileHandle *CFH = NULL;
220  struct stat st;
221  char *fname;
222  pg_compress_specification compression_spec = {0};
223 
224  compression_spec.algorithm = PG_COMPRESSION_NONE;
225 
226  Assert(strcmp(mode, PG_BINARY_R) == 0);
227 
228  fname = strdup(path);
229 
230  if (hasSuffix(fname, ".gz"))
231  compression_spec.algorithm = PG_COMPRESSION_GZIP;
232  else
233  {
234  bool exists;
235 
236  exists = (stat(path, &st) == 0);
237  /* avoid unused warning if it is not built with compression */
238  if (exists)
239  compression_spec.algorithm = PG_COMPRESSION_NONE;
240 #ifdef HAVE_LIBZ
241  if (!exists)
242  {
243  free_keep_errno(fname);
244  fname = psprintf("%s.gz", path);
245  exists = (stat(fname, &st) == 0);
246 
247  if (exists)
248  compression_spec.algorithm = PG_COMPRESSION_GZIP;
249  }
250 #endif
251 #ifdef USE_LZ4
252  if (!exists)
253  {
254  free_keep_errno(fname);
255  fname = psprintf("%s.lz4", path);
256  exists = (stat(fname, &st) == 0);
257 
258  if (exists)
259  compression_spec.algorithm = PG_COMPRESSION_LZ4;
260  }
261 #endif
262  }
263 
264  CFH = InitCompressFileHandle(compression_spec);
265  if (!CFH->open_func(fname, -1, mode, CFH))
266  {
267  free_keep_errno(CFH);
268  CFH = NULL;
269  }
270  free_keep_errno(fname);
271 
272  return CFH;
273 }
274 
275 /*
276  * Close an open file handle and release its memory.
277  *
278  * On failure, returns false and sets errno appropriately.
279  */
280 bool
282 {
283  bool ret = false;
284 
285  if (CFH->private_data)
286  ret = CFH->close_func(CFH);
287 
288  free_keep_errno(CFH);
289 
290  return ret;
291 }
#define PG_BINARY_R
Definition: c.h:1262
void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec)
static void free_keep_errno(void *p)
Definition: compress_io.c:171
static int hasSuffix(const char *filename, const char *suffix)
Definition: compress_io.c:156
bool EndCompressFileHandle(CompressFileHandle *CFH)
Definition: compress_io.c:281
char * supports_compression(const pg_compress_specification compression_spec)
Definition: compress_io.c:86
CompressorState * AllocateCompressor(const pg_compress_specification compression_spec, ReadFunc readF, WriteFunc writeF)
Definition: compress_io.c:118
void EndCompressor(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.c:141
CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode)
Definition: compress_io.c:217
CompressFileHandle * InitCompressFileHandle(const pg_compress_specification compression_spec)
Definition: compress_io.c:187
size_t(* ReadFunc)(ArchiveHandle *AH, char **buf, size_t *buflen)
Definition: compress_io.h:41
void(* WriteFunc)(ArchiveHandle *AH, const char *buf, size_t len)
Definition: compress_io.h:28
void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:640
void InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:633
void InitCompressorNone(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_none.c:66
void InitCompressFileHandleNone(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition: compression.c:67
pg_compress_algorithm
Definition: compression.h:22
@ PG_COMPRESSION_GZIP
Definition: compression.h:24
@ PG_COMPRESSION_LZ4
Definition: compression.h:25
@ PG_COMPRESSION_NONE
Definition: compression.h:23
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define free(a)
Definition: header.h:65
Assert(fmt[strlen(fmt) - 1] !='\n')
static PgChecksumMode mode
Definition: pg_checksums.c:65
static char * filename
Definition: pg_dumpall.c:119
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
bool(* open_func)(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
Definition: compress_io.h:105
bool(* close_func)(CompressFileHandle *CFH)
Definition: compress_io.h:169
void(* end)(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.h:61
ReadFunc readF
Definition: compress_io.h:66
WriteFunc writeF
Definition: compress_io.h:71
pg_compress_algorithm algorithm
Definition: compression.h:33
#define stat
Definition: win32_port.h:286