PostgreSQL Source Code  git master
compress_io.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * compress_io.c
4  * Routines for archivers to write an uncompressed or compressed data
5  * stream.
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * This file includes two APIs for dealing with compressed data. The first
11  * provides more flexibility, using callbacks to read/write data from the
12  * underlying stream. The second API is a wrapper around fopen and
13  * friends, providing an interface similar to those, but abstracts away
14  * the possible compression. The second API is aimed for the resulting
15  * files to be easily manipulated with an external compression utility
16  * program.
17  *
18  * Compressor API
19  * --------------
20  *
21  * The interface for writing to an archive consists of three functions:
22  * AllocateCompressor, writeData, and EndCompressor. First you call
23  * AllocateCompressor, then write all the data by calling writeData as many
24  * times as needed, and finally EndCompressor. writeData will call the
25  * WriteFunc that was provided to AllocateCompressor for each chunk of
26  * compressed data.
27  *
28  * The interface for reading an archive consists of the same three functions:
29  * AllocateCompressor, readData, and EndCompressor. First you call
30  * AllocateCompressor, then read all the data by calling readData to read the
31  * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32  * returns the compressed data one chunk at a time. Then readData decompresses
33  * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34  * to signal EOF. The interface is the same for compressed and uncompressed
35  * streams.
36  *
37  * Compressed stream API
38  * ----------------------
39  *
40  * The compressed stream API is providing a set of function pointers for
41  * opening, reading, writing, and finally closing files. The implemented
42  * function pointers are documented in the corresponding header file and are
43  * common for all streams. It allows the caller to use the same functions for
44  * both compressed and uncompressed streams.
45  *
46  * The interface consists of three functions, InitCompressFileHandle,
47  * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48  * compression is known, then start by calling InitCompressFileHandle,
49  * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50  * the function pointers as required for the read/write operations. Finally
51  * call EndCompressFileHandle to end the stream.
52  *
53  * InitDiscoverCompressFileHandle tries to infer the compression by the
54  * filename suffix. If the suffix is not yet known then it tries to simply
55  * open the file and if it fails, it tries to open the same file with
56  * compressed suffixes (.gz, .lz4 and .zst, in this order).
57  *
58  * IDENTIFICATION
59  * src/bin/pg_dump/compress_io.c
60  *
61  *-------------------------------------------------------------------------
62  */
63 #include "postgres_fe.h"
64 
65 #include <sys/stat.h>
66 #include <unistd.h>
67 
68 #include "compress_gzip.h"
69 #include "compress_io.h"
70 #include "compress_lz4.h"
71 #include "compress_none.h"
72 #include "compress_zstd.h"
73 #include "pg_backup_utils.h"
74 
75 /*----------------------
76  * Generic functions
77  *----------------------
78  */
79 
80 /*
81  * Checks whether support for a compression algorithm is implemented in
82  * pg_dump/restore.
83  *
84  * On success returns NULL, otherwise returns a malloc'ed string which can be
85  * used by the caller in an error message.
86  */
87 char *
89 {
90  const pg_compress_algorithm algorithm = compression_spec.algorithm;
91  bool supported = false;
92 
93  if (algorithm == PG_COMPRESSION_NONE)
94  supported = true;
95 #ifdef HAVE_LIBZ
96  if (algorithm == PG_COMPRESSION_GZIP)
97  supported = true;
98 #endif
99 #ifdef USE_LZ4
100  if (algorithm == PG_COMPRESSION_LZ4)
101  supported = true;
102 #endif
103 #ifdef USE_ZSTD
104  if (algorithm == PG_COMPRESSION_ZSTD)
105  supported = true;
106 #endif
107 
108  if (!supported)
109  return psprintf(_("this build does not support compression with %s"),
110  get_compress_algorithm_name(algorithm));
111 
112  return NULL;
113 }
114 
115 /*----------------------
116  * Compressor API
117  *----------------------
118  */
119 
120 /*
121  * Allocate a new compressor.
122  */
125  ReadFunc readF, WriteFunc writeF)
126 {
127  CompressorState *cs;
128 
129  cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
130  cs->readF = readF;
131  cs->writeF = writeF;
132 
133  if (compression_spec.algorithm == PG_COMPRESSION_NONE)
134  InitCompressorNone(cs, compression_spec);
135  else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
136  InitCompressorGzip(cs, compression_spec);
137  else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
138  InitCompressorLZ4(cs, compression_spec);
139  else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
140  InitCompressorZstd(cs, compression_spec);
141 
142  return cs;
143 }
144 
145 /*
146  * Terminate compression library context and flush its buffers.
147  */
148 void
150 {
151  cs->end(AH, cs);
152  pg_free(cs);
153 }
154 
155 /*----------------------
156  * Compressed stream API
157  *----------------------
158  */
159 
160 /*
161  * Private routines
162  */
163 static int
164 hasSuffix(const char *filename, const char *suffix)
165 {
166  int filenamelen = strlen(filename);
167  int suffixlen = strlen(suffix);
168 
169  if (filenamelen < suffixlen)
170  return 0;
171 
172  return memcmp(&filename[filenamelen - suffixlen],
173  suffix,
174  suffixlen) == 0;
175 }
176 
177 /* free() without changing errno; useful in several places below */
178 static void
180 {
181  int save_errno = errno;
182 
183  free(p);
184  errno = save_errno;
185 }
186 
187 /*
188  * Public interface
189  */
190 
191 /*
192  * Initialize a compress file handle for the specified compression algorithm.
193  */
196 {
197  CompressFileHandle *CFH;
198 
199  CFH = pg_malloc0(sizeof(CompressFileHandle));
200 
201  if (compression_spec.algorithm == PG_COMPRESSION_NONE)
202  InitCompressFileHandleNone(CFH, compression_spec);
203  else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
204  InitCompressFileHandleGzip(CFH, compression_spec);
205  else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
206  InitCompressFileHandleLZ4(CFH, compression_spec);
207  else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
208  InitCompressFileHandleZstd(CFH, compression_spec);
209 
210  return CFH;
211 }
212 
213 /*
214  * Checks if a compressed file (with the specified extension) exists.
215  *
216  * The filename of the tested file is stored to fname buffer (the existing
217  * buffer is freed, new buffer is allocated and returned through the pointer).
218  */
219 static bool
220 check_compressed_file(const char *path, char **fname, char *ext)
221 {
222  free_keep_errno(*fname);
223  *fname = psprintf("%s.%s", path, ext);
224  return (access(*fname, F_OK) == 0);
225 }
226 
227 /*
228  * Open a file for reading. 'path' is the file to open, and 'mode' should
229  * be either "r" or "rb".
230  *
231  * If the file at 'path' contains the suffix of a supported compression method,
232  * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
233  * throughout. Otherwise the compression will be inferred by iteratively trying
234  * to open the file at 'path', first as is, then by appending known compression
235  * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
236  * "foo.{gz,lz4,zst}", trying in that order.
237  *
238  * On failure, return NULL with an error code in errno.
239  */
241 InitDiscoverCompressFileHandle(const char *path, const char *mode)
242 {
243  CompressFileHandle *CFH = NULL;
244  struct stat st;
245  char *fname;
246  pg_compress_specification compression_spec = {0};
247 
248  compression_spec.algorithm = PG_COMPRESSION_NONE;
249 
250  Assert(strcmp(mode, PG_BINARY_R) == 0);
251 
252  fname = pg_strdup(path);
253 
254  if (hasSuffix(fname, ".gz"))
255  compression_spec.algorithm = PG_COMPRESSION_GZIP;
256  else if (hasSuffix(fname, ".lz4"))
257  compression_spec.algorithm = PG_COMPRESSION_LZ4;
258  else if (hasSuffix(fname, ".zst"))
259  compression_spec.algorithm = PG_COMPRESSION_ZSTD;
260  else
261  {
262  if (stat(path, &st) == 0)
263  compression_spec.algorithm = PG_COMPRESSION_NONE;
264  else if (check_compressed_file(path, &fname, "gz"))
265  compression_spec.algorithm = PG_COMPRESSION_GZIP;
266  else if (check_compressed_file(path, &fname, "lz4"))
267  compression_spec.algorithm = PG_COMPRESSION_LZ4;
268  else if (check_compressed_file(path, &fname, "zst"))
269  compression_spec.algorithm = PG_COMPRESSION_ZSTD;
270  }
271 
272  CFH = InitCompressFileHandle(compression_spec);
273  if (!CFH->open_func(fname, -1, mode, CFH))
274  {
275  free_keep_errno(CFH);
276  CFH = NULL;
277  }
278  free_keep_errno(fname);
279 
280  return CFH;
281 }
282 
283 /*
284  * Close an open file handle and release its memory.
285  *
286  * On failure, returns false and sets errno appropriately.
287  */
288 bool
290 {
291  bool ret = false;
292 
293  if (CFH->private_data)
294  ret = CFH->close_func(CFH);
295 
296  free_keep_errno(CFH);
297 
298  return ret;
299 }
#define PG_BINARY_R
Definition: c.h:1262
void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec)
static void free_keep_errno(void *p)
Definition: compress_io.c:179
static int hasSuffix(const char *filename, const char *suffix)
Definition: compress_io.c:164
bool EndCompressFileHandle(CompressFileHandle *CFH)
Definition: compress_io.c:289
char * supports_compression(const pg_compress_specification compression_spec)
Definition: compress_io.c:88
CompressorState * AllocateCompressor(const pg_compress_specification compression_spec, ReadFunc readF, WriteFunc writeF)
Definition: compress_io.c:124
void EndCompressor(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.c:149
CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode)
Definition: compress_io.c:241
CompressFileHandle * InitCompressFileHandle(const pg_compress_specification compression_spec)
Definition: compress_io.c:195
static bool check_compressed_file(const char *path, char **fname, char *ext)
Definition: compress_io.c:220
size_t(* ReadFunc)(ArchiveHandle *AH, char **buf, size_t *buflen)
Definition: compress_io.h:47
void(* WriteFunc)(ArchiveHandle *AH, const char *buf, size_t len)
Definition: compress_io.h:34
void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:784
void InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:777
void InitCompressorNone(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_none.c:66
void InitCompressFileHandleNone(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorZstd(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_zstd.c:23
void InitCompressFileHandleZstd(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
Definition: compress_zstd.c:29
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition: compression.c:69
pg_compress_algorithm
Definition: compression.h:22
@ PG_COMPRESSION_GZIP
Definition: compression.h:24
@ PG_COMPRESSION_LZ4
Definition: compression.h:25
@ PG_COMPRESSION_NONE
Definition: compression.h:23
@ PG_COMPRESSION_ZSTD
Definition: compression.h:26
#define _(x)
Definition: elog.c:90
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define free(a)
Definition: header.h:65
Assert(fmt[strlen(fmt) - 1] !='\n')
static PgChecksumMode mode
Definition: pg_checksums.c:56
static char * filename
Definition: pg_dumpall.c:121
short access
Definition: preproc-type.c:36
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
bool(* open_func)(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
Definition: compress_io.h:111
bool(* close_func)(CompressFileHandle *CFH)
Definition: compress_io.h:175
void(* end)(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.h:67
ReadFunc readF
Definition: compress_io.h:72
WriteFunc writeF
Definition: compress_io.h:77
pg_compress_algorithm algorithm
Definition: compression.h:34
#define stat
Definition: win32_port.h:284