PostgreSQL Source Code  git master
compress_gzip.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * compress_gzip.c
4  * Routines for archivers to read or write a gzip compressed data stream.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/bin/pg_dump/compress_gzip.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres_fe.h"
15 #include <unistd.h>
16 
17 #include "compress_gzip.h"
18 #include "pg_backup_utils.h"
19 
20 #ifdef HAVE_LIBZ
21 #include "zlib.h"
22 
23 /*----------------------
24  * Compressor API
25  *----------------------
26  */
27 typedef struct GzipCompressorState
28 {
29  z_streamp zp;
30 
31  void *outbuf;
32  size_t outsize;
33 } GzipCompressorState;
34 
35 /* Private routines that support gzip compressed data I/O */
36 static void DeflateCompressorInit(CompressorState *cs);
37 static void DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs);
38 static void DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs,
39  bool flush);
40 static void EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs);
41 static void WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
42  const void *data, size_t dLen);
43 static void ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs);
44 
45 static void
46 DeflateCompressorInit(CompressorState *cs)
47 {
48  GzipCompressorState *gzipcs;
49  z_streamp zp;
50 
51  gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
52  zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
53  zp->zalloc = Z_NULL;
54  zp->zfree = Z_NULL;
55  zp->opaque = Z_NULL;
56 
57  /*
58  * outsize is the buffer size we tell zlib it can output to. We actually
59  * allocate one extra byte because some routines want to append a trailing
60  * zero byte to the zlib output.
61  */
62  gzipcs->outsize = DEFAULT_IO_BUFFER_SIZE;
63  gzipcs->outbuf = pg_malloc(gzipcs->outsize + 1);
64 
65  /* -Z 0 uses the "None" compressor -- not zlib with no compression */
66  Assert(cs->compression_spec.level != 0);
67 
68  if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
69  pg_fatal("could not initialize compression library: %s", zp->msg);
70 
71  /* Just be paranoid - maybe End is called after Start, with no Write */
72  zp->next_out = gzipcs->outbuf;
73  zp->avail_out = gzipcs->outsize;
74 
75  /* Keep track of gzipcs */
76  cs->private_data = gzipcs;
77 }
78 
79 static void
80 DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs)
81 {
82  GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
83  z_streamp zp;
84 
85  zp = gzipcs->zp;
86  zp->next_in = NULL;
87  zp->avail_in = 0;
88 
89  /* Flush any remaining data from zlib buffer */
90  DeflateCompressorCommon(AH, cs, true);
91 
92  if (deflateEnd(zp) != Z_OK)
93  pg_fatal("could not close compression stream: %s", zp->msg);
94 
95  pg_free(gzipcs->outbuf);
96  pg_free(gzipcs->zp);
97  pg_free(gzipcs);
98  cs->private_data = NULL;
99 }
100 
101 static void
102 DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
103 {
104  GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
105  z_streamp zp = gzipcs->zp;
106  void *out = gzipcs->outbuf;
107  int res = Z_OK;
108 
109  while (gzipcs->zp->avail_in != 0 || flush)
110  {
111  res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
112  if (res == Z_STREAM_ERROR)
113  pg_fatal("could not compress data: %s", zp->msg);
114  if ((flush && (zp->avail_out < gzipcs->outsize))
115  || (zp->avail_out == 0)
116  || (zp->avail_in != 0)
117  )
118  {
119  /*
120  * Extra paranoia: avoid zero-length chunks, since a zero length
121  * chunk is the EOF marker in the custom format. This should never
122  * happen but ...
123  */
124  if (zp->avail_out < gzipcs->outsize)
125  {
126  /*
127  * Any write function should do its own error checking but to
128  * make sure we do a check here as well ...
129  */
130  size_t len = gzipcs->outsize - zp->avail_out;
131 
132  cs->writeF(AH, (char *) out, len);
133  }
134  zp->next_out = out;
135  zp->avail_out = gzipcs->outsize;
136  }
137 
138  if (res == Z_STREAM_END)
139  break;
140  }
141 }
142 
143 static void
144 EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
145 {
146  /* If deflation was initialized, finalize it */
147  if (cs->private_data)
148  DeflateCompressorEnd(AH, cs);
149 }
150 
151 static void
152 WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
153  const void *data, size_t dLen)
154 {
155  GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
156 
157  gzipcs->zp->next_in = (void *) unconstify(void *, data);
158  gzipcs->zp->avail_in = dLen;
159  DeflateCompressorCommon(AH, cs, false);
160 }
161 
162 static void
163 ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
164 {
165  z_streamp zp;
166  char *out;
167  int res = Z_OK;
168  size_t cnt;
169  char *buf;
170  size_t buflen;
171 
172  zp = (z_streamp) pg_malloc(sizeof(z_stream));
173  zp->zalloc = Z_NULL;
174  zp->zfree = Z_NULL;
175  zp->opaque = Z_NULL;
176 
177  buflen = DEFAULT_IO_BUFFER_SIZE;
178  buf = pg_malloc(buflen);
179 
181 
182  if (inflateInit(zp) != Z_OK)
183  pg_fatal("could not initialize compression library: %s",
184  zp->msg);
185 
186  /* no minimal chunk size for zlib */
187  while ((cnt = cs->readF(AH, &buf, &buflen)))
188  {
189  zp->next_in = (void *) buf;
190  zp->avail_in = cnt;
191 
192  while (zp->avail_in > 0)
193  {
194  zp->next_out = (void *) out;
195  zp->avail_out = DEFAULT_IO_BUFFER_SIZE;
196 
197  res = inflate(zp, 0);
198  if (res != Z_OK && res != Z_STREAM_END)
199  pg_fatal("could not uncompress data: %s", zp->msg);
200 
201  out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0';
202  ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH);
203  }
204  }
205 
206  zp->next_in = NULL;
207  zp->avail_in = 0;
208  while (res != Z_STREAM_END)
209  {
210  zp->next_out = (void *) out;
211  zp->avail_out = DEFAULT_IO_BUFFER_SIZE;
212  res = inflate(zp, 0);
213  if (res != Z_OK && res != Z_STREAM_END)
214  pg_fatal("could not uncompress data: %s", zp->msg);
215 
216  out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0';
217  ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH);
218  }
219 
220  if (inflateEnd(zp) != Z_OK)
221  pg_fatal("could not close compression library: %s", zp->msg);
222 
223  free(buf);
224  free(out);
225  free(zp);
226 }
227 
228 /* Public routines that support gzip compressed data I/O */
229 void
231  const pg_compress_specification compression_spec)
232 {
233  cs->readData = ReadDataFromArchiveGzip;
234  cs->writeData = WriteDataToArchiveGzip;
235  cs->end = EndCompressorGzip;
236 
237  cs->compression_spec = compression_spec;
238 
239  /*
240  * If the caller has defined a write function, prepare the necessary
241  * state. Note that if the data is empty, End may be called immediately
242  * after Init, without ever calling Write.
243  */
244  if (cs->writeF)
245  DeflateCompressorInit(cs);
246 }
247 
248 
249 /*----------------------
250  * Compress File API
251  *----------------------
252  */
253 
254 static bool
255 Gzip_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH)
256 {
257  gzFile gzfp = (gzFile) CFH->private_data;
258  int gzret;
259 
260  gzret = gzread(gzfp, ptr, size);
261  if (gzret <= 0 && !gzeof(gzfp))
262  {
263  int errnum;
264  const char *errmsg = gzerror(gzfp, &errnum);
265 
266  pg_fatal("could not read from input file: %s",
267  errnum == Z_ERRNO ? strerror(errno) : errmsg);
268  }
269 
270  if (rsize)
271  *rsize = (size_t) gzret;
272 
273  return true;
274 }
275 
276 static bool
277 Gzip_write(const void *ptr, size_t size, CompressFileHandle *CFH)
278 {
279  gzFile gzfp = (gzFile) CFH->private_data;
280 
281  return gzwrite(gzfp, ptr, size) > 0;
282 }
283 
284 static int
285 Gzip_getc(CompressFileHandle *CFH)
286 {
287  gzFile gzfp = (gzFile) CFH->private_data;
288  int ret;
289 
290  errno = 0;
291  ret = gzgetc(gzfp);
292  if (ret == EOF)
293  {
294  if (!gzeof(gzfp))
295  pg_fatal("could not read from input file: %m");
296  else
297  pg_fatal("could not read from input file: end of file");
298  }
299 
300  return ret;
301 }
302 
303 static char *
304 Gzip_gets(char *ptr, int size, CompressFileHandle *CFH)
305 {
306  gzFile gzfp = (gzFile) CFH->private_data;
307 
308  return gzgets(gzfp, ptr, size);
309 }
310 
311 static bool
312 Gzip_close(CompressFileHandle *CFH)
313 {
314  gzFile gzfp = (gzFile) CFH->private_data;
315 
316  CFH->private_data = NULL;
317 
318  return gzclose(gzfp) == Z_OK;
319 }
320 
321 static bool
322 Gzip_eof(CompressFileHandle *CFH)
323 {
324  gzFile gzfp = (gzFile) CFH->private_data;
325 
326  return gzeof(gzfp) == 1;
327 }
328 
329 static const char *
330 Gzip_get_error(CompressFileHandle *CFH)
331 {
332  gzFile gzfp = (gzFile) CFH->private_data;
333  const char *errmsg;
334  int errnum;
335 
336  errmsg = gzerror(gzfp, &errnum);
337  if (errnum == Z_ERRNO)
338  errmsg = strerror(errno);
339 
340  return errmsg;
341 }
342 
343 static bool
344 Gzip_open(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
345 {
346  gzFile gzfp;
347  char mode_compression[32];
348 
349  if (CFH->compression_spec.level != Z_DEFAULT_COMPRESSION)
350  {
351  /*
352  * user has specified a compression level, so tell zlib to use it
353  */
354  snprintf(mode_compression, sizeof(mode_compression), "%s%d",
355  mode, CFH->compression_spec.level);
356  }
357  else
358  strcpy(mode_compression, mode);
359 
360  if (fd >= 0)
361  gzfp = gzdopen(dup(fd), mode_compression);
362  else
363  gzfp = gzopen(path, mode_compression);
364 
365  if (gzfp == NULL)
366  return false;
367 
368  CFH->private_data = gzfp;
369 
370  return true;
371 }
372 
373 static bool
374 Gzip_open_write(const char *path, const char *mode, CompressFileHandle *CFH)
375 {
376  char *fname;
377  bool ret;
378  int save_errno;
379 
380  fname = psprintf("%s.gz", path);
381  ret = CFH->open_func(fname, -1, mode, CFH);
382 
383  save_errno = errno;
384  pg_free(fname);
385  errno = save_errno;
386 
387  return ret;
388 }
389 
390 void
392  const pg_compress_specification compression_spec)
393 {
394  CFH->open_func = Gzip_open;
395  CFH->open_write_func = Gzip_open_write;
396  CFH->read_func = Gzip_read;
397  CFH->write_func = Gzip_write;
398  CFH->gets_func = Gzip_gets;
399  CFH->getc_func = Gzip_getc;
400  CFH->close_func = Gzip_close;
401  CFH->eof_func = Gzip_eof;
402  CFH->get_error_func = Gzip_get_error;
403 
404  CFH->compression_spec = compression_spec;
405 
406  CFH->private_data = NULL;
407 }
408 #else /* HAVE_LIBZ */
409 void
411  const pg_compress_specification compression_spec)
412 {
413  pg_fatal("this build does not support compression with %s", "gzip");
414 }
415 
416 void
418  const pg_compress_specification compression_spec)
419 {
420  pg_fatal("this build does not support compression with %s", "gzip");
421 }
422 #endif /* HAVE_LIBZ */
#define unconstify(underlying_type, expr)
Definition: c.h:1245
#define Assert(condition)
Definition: c.h:858
void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec)
void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec)
#define DEFAULT_IO_BUFFER_SIZE
Definition: compress_io.h:27
int errmsg(const char *fmt,...)
Definition: elog.c:1072
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
#define free(a)
Definition: header.h:65
void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH)
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:56
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:73
#define strerror
Definition: port.h:251
#define snprintf
Definition: port.h:238
static int fd(const char *x, int i)
Definition: preproc-init.c:105
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
static pg_noinline void Size size
Definition: slab.c:607
bool(* open_write_func)(const char *path, const char *mode, CompressFileHandle *CFH)
Definition: compress_io.h:122
const char *(* get_error_func)(CompressFileHandle *CFH)
Definition: compress_io.h:181
bool(* write_func)(const void *ptr, size_t size, struct CompressFileHandle *CFH)
Definition: compress_io.h:139
int(* getc_func)(CompressFileHandle *CFH)
Definition: compress_io.h:161
char *(* gets_func)(char *s, int size, CompressFileHandle *CFH)
Definition: compress_io.h:152
bool(* eof_func)(CompressFileHandle *CFH)
Definition: compress_io.h:168
bool(* open_func)(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
Definition: compress_io.h:111
pg_compress_specification compression_spec
Definition: compress_io.h:186
bool(* close_func)(CompressFileHandle *CFH)
Definition: compress_io.h:175
bool(* read_func)(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH)
Definition: compress_io.h:131
void * private_data
Definition: compress_io.h:87
void(* readData)(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.h:56
pg_compress_specification compression_spec
Definition: compress_io.h:82
void(* end)(ArchiveHandle *AH, CompressorState *cs)
Definition: compress_io.h:67
ReadFunc readF
Definition: compress_io.h:72
void(* writeData)(ArchiveHandle *AH, CompressorState *cs, const void *data, size_t dLen)
Definition: compress_io.h:61
WriteFunc writeF
Definition: compress_io.h:77