PostgreSQL Source Code git master
compress_io.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * compress_io.c
4 * Routines for archivers to write an uncompressed or compressed data
5 * stream.
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * This file includes two APIs for dealing with compressed data. The first
11 * provides more flexibility, using callbacks to read/write data from the
12 * underlying stream. The second API is a wrapper around fopen and
13 * friends, providing an interface similar to those, but abstracts away
14 * the possible compression. The second API is aimed for the resulting
15 * files to be easily manipulated with an external compression utility
16 * program.
17 *
18 * Compressor API
19 * --------------
20 *
21 * The interface for writing to an archive consists of three functions:
22 * AllocateCompressor, writeData, and EndCompressor. First you call
23 * AllocateCompressor, then write all the data by calling writeData as many
24 * times as needed, and finally EndCompressor. writeData will call the
25 * WriteFunc that was provided to AllocateCompressor for each chunk of
26 * compressed data.
27 *
28 * The interface for reading an archive consists of the same three functions:
29 * AllocateCompressor, readData, and EndCompressor. First you call
30 * AllocateCompressor, then read all the data by calling readData to read the
31 * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32 * returns the compressed data one chunk at a time. Then readData decompresses
33 * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34 * to signal EOF. The interface is the same for compressed and uncompressed
35 * streams.
36 *
37 * Compressed stream API
38 * ----------------------
39 *
40 * The compressed stream API is providing a set of function pointers for
41 * opening, reading, writing, and finally closing files. The implemented
42 * function pointers are documented in the corresponding header file and are
43 * common for all streams. It allows the caller to use the same functions for
44 * both compressed and uncompressed streams.
45 *
46 * The interface consists of three functions, InitCompressFileHandle,
47 * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48 * compression is known, then start by calling InitCompressFileHandle,
49 * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50 * the function pointers as required for the read/write operations. Finally
51 * call EndCompressFileHandle to end the stream.
52 *
53 * InitDiscoverCompressFileHandle tries to infer the compression by the
54 * filename suffix. If the suffix is not yet known then it tries to simply
55 * open the file and if it fails, it tries to open the same file with
56 * compressed suffixes (.gz, .lz4 and .zst, in this order).
57 *
58 * IDENTIFICATION
59 * src/bin/pg_dump/compress_io.c
60 *
61 *-------------------------------------------------------------------------
62 */
63#include "postgres_fe.h"
64
65#include <sys/stat.h>
66#include <unistd.h>
67
68#include "compress_gzip.h"
69#include "compress_io.h"
70#include "compress_lz4.h"
71#include "compress_none.h"
72#include "compress_zstd.h"
73
74/*----------------------
75 * Generic functions
76 *----------------------
77 */
78
79/*
80 * Checks whether support for a compression algorithm is implemented in
81 * pg_dump/restore.
82 *
83 * On success returns NULL, otherwise returns a malloc'ed string which can be
84 * used by the caller in an error message.
85 */
86char *
88{
89 const pg_compress_algorithm algorithm = compression_spec.algorithm;
90 bool supported = false;
91
92 if (algorithm == PG_COMPRESSION_NONE)
93 supported = true;
94#ifdef HAVE_LIBZ
95 if (algorithm == PG_COMPRESSION_GZIP)
96 supported = true;
97#endif
98#ifdef USE_LZ4
99 if (algorithm == PG_COMPRESSION_LZ4)
100 supported = true;
101#endif
102#ifdef USE_ZSTD
103 if (algorithm == PG_COMPRESSION_ZSTD)
104 supported = true;
105#endif
106
107 if (!supported)
108 return psprintf(_("this build does not support compression with %s"),
109 get_compress_algorithm_name(algorithm));
110
111 return NULL;
112}
113
114/*----------------------
115 * Compressor API
116 *----------------------
117 */
118
119/*
120 * Allocate a new compressor.
121 */
124 ReadFunc readF, WriteFunc writeF)
125{
126 CompressorState *cs;
127
129 cs->readF = readF;
130 cs->writeF = writeF;
131
132 if (compression_spec.algorithm == PG_COMPRESSION_NONE)
133 InitCompressorNone(cs, compression_spec);
134 else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
135 InitCompressorGzip(cs, compression_spec);
136 else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
137 InitCompressorLZ4(cs, compression_spec);
138 else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
139 InitCompressorZstd(cs, compression_spec);
140
141 return cs;
142}
143
144/*
145 * Terminate compression library context and flush its buffers.
146 */
147void
149{
150 cs->end(AH, cs);
151 pg_free(cs);
152}
153
154/*----------------------
155 * Compressed stream API
156 *----------------------
157 */
158
159/*
160 * Private routines
161 */
162static int
163hasSuffix(const char *filename, const char *suffix)
164{
165 int filenamelen = strlen(filename);
166 int suffixlen = strlen(suffix);
167
168 if (filenamelen < suffixlen)
169 return 0;
170
171 return memcmp(&filename[filenamelen - suffixlen],
172 suffix,
173 suffixlen) == 0;
174}
175
176/* free() without changing errno; useful in several places below */
177static void
179{
180 int save_errno = errno;
181
182 free(p);
183 errno = save_errno;
184}
185
186/*
187 * Public interface
188 */
189
190/*
191 * Initialize a compress file handle for the specified compression algorithm.
192 */
195{
197
198 CFH = pg_malloc0(sizeof(CompressFileHandle));
199
200 if (compression_spec.algorithm == PG_COMPRESSION_NONE)
201 InitCompressFileHandleNone(CFH, compression_spec);
202 else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
203 InitCompressFileHandleGzip(CFH, compression_spec);
204 else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
205 InitCompressFileHandleLZ4(CFH, compression_spec);
206 else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
207 InitCompressFileHandleZstd(CFH, compression_spec);
208
209 return CFH;
210}
211
212/*
213 * Checks if a compressed file (with the specified extension) exists.
214 *
215 * The filename of the tested file is stored to fname buffer (the existing
216 * buffer is freed, new buffer is allocated and returned through the pointer).
217 */
218static bool
219check_compressed_file(const char *path, char **fname, char *ext)
220{
221 free_keep_errno(*fname);
222 *fname = psprintf("%s.%s", path, ext);
223 return (access(*fname, F_OK) == 0);
224}
225
226/*
227 * Open a file for reading. 'path' is the file to open, and 'mode' should
228 * be either "r" or "rb".
229 *
230 * If the file at 'path' contains the suffix of a supported compression method,
231 * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
232 * throughout. Otherwise the compression will be inferred by iteratively trying
233 * to open the file at 'path', first as is, then by appending known compression
234 * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
235 * "foo.{gz,lz4,zst}", trying in that order.
236 *
237 * On failure, return NULL with an error code in errno.
238 */
240InitDiscoverCompressFileHandle(const char *path, const char *mode)
241{
242 CompressFileHandle *CFH = NULL;
243 struct stat st;
244 char *fname;
245 pg_compress_specification compression_spec = {0};
246
247 compression_spec.algorithm = PG_COMPRESSION_NONE;
248
249 Assert(strcmp(mode, PG_BINARY_R) == 0);
250
251 fname = pg_strdup(path);
252
253 if (hasSuffix(fname, ".gz"))
254 compression_spec.algorithm = PG_COMPRESSION_GZIP;
255 else if (hasSuffix(fname, ".lz4"))
256 compression_spec.algorithm = PG_COMPRESSION_LZ4;
257 else if (hasSuffix(fname, ".zst"))
258 compression_spec.algorithm = PG_COMPRESSION_ZSTD;
259 else
260 {
261 if (stat(path, &st) == 0)
262 compression_spec.algorithm = PG_COMPRESSION_NONE;
263 else if (check_compressed_file(path, &fname, "gz"))
264 compression_spec.algorithm = PG_COMPRESSION_GZIP;
265 else if (check_compressed_file(path, &fname, "lz4"))
266 compression_spec.algorithm = PG_COMPRESSION_LZ4;
267 else if (check_compressed_file(path, &fname, "zst"))
268 compression_spec.algorithm = PG_COMPRESSION_ZSTD;
269 }
270
271 CFH = InitCompressFileHandle(compression_spec);
272 if (!CFH->open_func(fname, -1, mode, CFH))
273 {
274 free_keep_errno(CFH);
275 CFH = NULL;
276 }
277 free_keep_errno(fname);
278
279 return CFH;
280}
281
282/*
283 * Close an open file handle and release its memory.
284 *
285 * On failure, returns false and sets errno appropriately.
286 */
287bool
289{
290 bool ret = false;
291
292 if (CFH->private_data)
293 ret = CFH->close_func(CFH);
294
295 free_keep_errno(CFH);
296
297 return ret;
298}
#define PG_BINARY_R
Definition: c.h:1246
void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec)
static void free_keep_errno(void *p)
Definition: compress_io.c:178
static int hasSuffix(const char *filename, const char *suffix)
Definition: compress_io.c:163
bool EndCompressFileHandle(CompressFileHandle *CFH)
Definition: compress_io.c:288
CompressorState * AllocateCompressor(const pg_compress_specification compression_spec, ReadFunc readF, WriteFunc writeF)
Definition: compress_io.c:123
CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode)
Definition: compress_io.c:240
char * supports_compression(const pg_compress_specification compression_spec)
Definition: compress_io.c:87
void EndCompressor(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.c:148
CompressFileHandle * InitCompressFileHandle(const pg_compress_specification compression_spec)
Definition: compress_io.c:194
static bool check_compressed_file(const char *path, char **fname, char *ext)
Definition: compress_io.c:219
size_t(* ReadFunc)(ArchiveHandle *AH, char **buf, size_t *buflen)
Definition: compress_io.h:47
void(* WriteFunc)(ArchiveHandle *AH, const char *buf, size_t len)
Definition: compress_io.h:34
void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:784
void InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_lz4.c:777
void InitCompressorNone(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_none.c:66
void InitCompressFileHandleNone(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorZstd(CompressorState *cs, const pg_compress_specification compression_spec)
Definition: compress_zstd.c:23
void InitCompressFileHandleZstd(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
Definition: compress_zstd.c:29
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition: compression.c:69
pg_compress_algorithm
Definition: compression.h:22
@ PG_COMPRESSION_GZIP
Definition: compression.h:24
@ PG_COMPRESSION_LZ4
Definition: compression.h:25
@ PG_COMPRESSION_NONE
Definition: compression.h:23
@ PG_COMPRESSION_ZSTD
Definition: compression.h:26
#define _(x)
Definition: elog.c:90
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
void pg_free(void *ptr)
Definition: fe_memutils.c:105
Assert(PointerIsAligned(start, uint64))
#define free(a)
Definition: header.h:65
static PgChecksumMode mode
Definition: pg_checksums.c:55
static char * filename
Definition: pg_dumpall.c:124
short access
Definition: preproc-type.c:36
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
bool(* open_func)(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
Definition: compress_io.h:111
bool(* close_func)(CompressFileHandle *CFH)
Definition: compress_io.h:175
void(* end)(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.h:67
ReadFunc readF
Definition: compress_io.h:72
WriteFunc writeF
Definition: compress_io.h:77
pg_compress_algorithm algorithm
Definition: compression.h:34
#define stat
Definition: win32_port.h:274