PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
buffile.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * buffile.c
4 * Management of large buffered temporary files.
5 *
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/storage/file/buffile.c
11 *
12 * NOTES:
13 *
14 * BufFiles provide a very incomplete emulation of stdio atop virtual Files
15 * (as managed by fd.c). Currently, we only support the buffered-I/O
16 * aspect of stdio: a read or write of the low-level File occurs only
17 * when the buffer is filled or emptied. This is an even bigger win
18 * for virtual Files than for ordinary kernel files, since reducing the
19 * frequency with which a virtual File is touched reduces "thrashing"
20 * of opening/closing file descriptors.
21 *
22 * Note that BufFile structs are allocated with palloc(), and therefore
23 * will go away automatically at query/transaction end. Since the underlying
24 * virtual Files are made with OpenTemporaryFile, all resources for
25 * the file are certain to be cleaned up even if processing is aborted
26 * by ereport(ERROR). The data structures required are made in the
27 * palloc context that was current when the BufFile was created, and
28 * any external resources such as temp files are owned by the ResourceOwner
29 * that was current at that time.
30 *
31 * BufFile also supports temporary files that exceed the OS file size limit
32 * (by opening multiple fd.c temporary files). This is an essential feature
33 * for sorts and hashjoins on large amounts of data.
34 *
35 * BufFile supports temporary files that can be shared with other backends, as
36 * infrastructure for parallel execution. Such files need to be created as a
37 * member of a SharedFileSet that all participants are attached to.
38 *
39 * BufFile also supports temporary files that can be used by the single backend
40 * when the corresponding files need to be survived across the transaction and
41 * need to be opened and closed multiple times. Such files need to be created
42 * as a member of a FileSet.
43 *-------------------------------------------------------------------------
44 */
45
46#include "postgres.h"
47
48#include "commands/tablespace.h"
49#include "executor/instrument.h"
50#include "miscadmin.h"
51#include "pgstat.h"
52#include "storage/buffile.h"
53#include "storage/bufmgr.h"
54#include "storage/fd.h"
55#include "utils/resowner.h"
56
57/*
58 * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
59 * The reason is that we'd like large BufFiles to be spread across multiple
60 * tablespaces when available.
61 */
62#define MAX_PHYSICAL_FILESIZE 0x40000000
63#define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ)
64
65/*
66 * This data structure represents a buffered file that consists of one or
67 * more physical files (each accessed through a virtual file descriptor
68 * managed by fd.c).
69 */
70struct BufFile
71{
72 int numFiles; /* number of physical files in set */
73 /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
74 File *files; /* palloc'd array with numFiles entries */
75
76 bool isInterXact; /* keep open over transactions? */
77 bool dirty; /* does buffer need to be written? */
78 bool readOnly; /* has the file been set to read only? */
79
80 FileSet *fileset; /* space for fileset based segment files */
81 const char *name; /* name of fileset based BufFile */
82
83 /*
84 * resowner is the ResourceOwner to use for underlying temp files. (We
85 * don't need to remember the memory context we're using explicitly,
86 * because after creation we only repalloc our arrays larger.)
87 */
89
90 /*
91 * "current pos" is position of start of buffer within the logical file.
92 * Position as seen by user of BufFile is (curFile, curOffset + pos).
93 */
94 int curFile; /* file index (0..n) part of current pos */
95 off_t curOffset; /* offset part of current pos */
96 int pos; /* next read/write position in buffer */
97 int nbytes; /* total # of valid bytes in buffer */
98
99 /*
100 * XXX Should ideally us PGIOAlignedBlock, but might need a way to avoid
101 * wasting per-file alignment padding when some users create many files.
102 */
104};
105
106static BufFile *makeBufFileCommon(int nfiles);
107static BufFile *makeBufFile(File firstfile);
108static void extendBufFile(BufFile *file);
109static void BufFileLoadBuffer(BufFile *file);
110static void BufFileDumpBuffer(BufFile *file);
111static void BufFileFlush(BufFile *file);
112static File MakeNewFileSetSegment(BufFile *buffile, int segment);
113
114/*
115 * Create BufFile and perform the common initialization.
116 */
117static BufFile *
119{
120 BufFile *file = (BufFile *) palloc(sizeof(BufFile));
121
122 file->numFiles = nfiles;
123 file->isInterXact = false;
124 file->dirty = false;
126 file->curFile = 0;
127 file->curOffset = 0;
128 file->pos = 0;
129 file->nbytes = 0;
130
131 return file;
132}
133
134/*
135 * Create a BufFile given the first underlying physical file.
136 * NOTE: caller must set isInterXact if appropriate.
137 */
138static BufFile *
140{
141 BufFile *file = makeBufFileCommon(1);
142
143 file->files = (File *) palloc(sizeof(File));
144 file->files[0] = firstfile;
145 file->readOnly = false;
146 file->fileset = NULL;
147 file->name = NULL;
148
149 return file;
150}
151
152/*
153 * Add another component temp file.
154 */
155static void
157{
158 File pfile;
159 ResourceOwner oldowner;
160
161 /* Be sure to associate the file with the BufFile's resource owner */
162 oldowner = CurrentResourceOwner;
164
165 if (file->fileset == NULL)
166 pfile = OpenTemporaryFile(file->isInterXact);
167 else
168 pfile = MakeNewFileSetSegment(file, file->numFiles);
169
170 Assert(pfile >= 0);
171
172 CurrentResourceOwner = oldowner;
173
174 file->files = (File *) repalloc(file->files,
175 (file->numFiles + 1) * sizeof(File));
176 file->files[file->numFiles] = pfile;
177 file->numFiles++;
178}
179
180/*
181 * Create a BufFile for a new temporary file (which will expand to become
182 * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are
183 * written to it).
184 *
185 * If interXact is true, the temp file will not be automatically deleted
186 * at end of transaction.
187 *
188 * Note: if interXact is true, the caller had better be calling us in a
189 * memory context, and with a resource owner, that will survive across
190 * transaction boundaries.
191 */
192BufFile *
193BufFileCreateTemp(bool interXact)
194{
195 BufFile *file;
196 File pfile;
197
198 /*
199 * Ensure that temp tablespaces are set up for OpenTemporaryFile to use.
200 * Possibly the caller will have done this already, but it seems useful to
201 * double-check here. Failure to do this at all would result in the temp
202 * files always getting placed in the default tablespace, which is a
203 * pretty hard-to-detect bug. Callers may prefer to do it earlier if they
204 * want to be sure that any required catalog access is done in some other
205 * resource context.
206 */
208
209 pfile = OpenTemporaryFile(interXact);
210 Assert(pfile >= 0);
211
212 file = makeBufFile(pfile);
213 file->isInterXact = interXact;
214
215 return file;
216}
217
218/*
219 * Build the name for a given segment of a given BufFile.
220 */
221static void
222FileSetSegmentName(char *name, const char *buffile_name, int segment)
223{
224 snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment);
225}
226
227/*
228 * Create a new segment file backing a fileset based BufFile.
229 */
230static File
231MakeNewFileSetSegment(BufFile *buffile, int segment)
232{
233 char name[MAXPGPATH];
234 File file;
235
236 /*
237 * It is possible that there are files left over from before a crash
238 * restart with the same name. In order for BufFileOpenFileSet() not to
239 * get confused about how many segments there are, we'll unlink the next
240 * segment number if it already exists.
241 */
242 FileSetSegmentName(name, buffile->name, segment + 1);
243 FileSetDelete(buffile->fileset, name, true);
244
245 /* Create the new segment. */
246 FileSetSegmentName(name, buffile->name, segment);
247 file = FileSetCreate(buffile->fileset, name);
248
249 /* FileSetCreate would've errored out */
250 Assert(file > 0);
251
252 return file;
253}
254
255/*
256 * Create a BufFile that can be discovered and opened read-only by other
257 * backends that are attached to the same SharedFileSet using the same name.
258 *
259 * The naming scheme for fileset based BufFiles is left up to the calling code.
260 * The name will appear as part of one or more filenames on disk, and might
261 * provide clues to administrators about which subsystem is generating
262 * temporary file data. Since each SharedFileSet object is backed by one or
263 * more uniquely named temporary directory, names don't conflict with
264 * unrelated SharedFileSet objects.
265 */
266BufFile *
267BufFileCreateFileSet(FileSet *fileset, const char *name)
268{
269 BufFile *file;
270
271 file = makeBufFileCommon(1);
272 file->fileset = fileset;
273 file->name = pstrdup(name);
274 file->files = (File *) palloc(sizeof(File));
275 file->files[0] = MakeNewFileSetSegment(file, 0);
276 file->readOnly = false;
277
278 return file;
279}
280
281/*
282 * Open a file that was previously created in another backend (or this one)
283 * with BufFileCreateFileSet in the same FileSet using the same name.
284 * The backend that created the file must have called BufFileClose() or
285 * BufFileExportFileSet() to make sure that it is ready to be opened by other
286 * backends and render it read-only. If missing_ok is true, which indicates
287 * that missing files can be safely ignored, then return NULL if the BufFile
288 * with the given name is not found, otherwise, throw an error.
289 */
290BufFile *
291BufFileOpenFileSet(FileSet *fileset, const char *name, int mode,
292 bool missing_ok)
293{
294 BufFile *file;
295 char segment_name[MAXPGPATH];
296 Size capacity = 16;
297 File *files;
298 int nfiles = 0;
299
300 files = palloc(sizeof(File) * capacity);
301
302 /*
303 * We don't know how many segments there are, so we'll probe the
304 * filesystem to find out.
305 */
306 for (;;)
307 {
308 /* See if we need to expand our file segment array. */
309 if (nfiles + 1 > capacity)
310 {
311 capacity *= 2;
312 files = repalloc(files, sizeof(File) * capacity);
313 }
314 /* Try to load a segment. */
315 FileSetSegmentName(segment_name, name, nfiles);
316 files[nfiles] = FileSetOpen(fileset, segment_name, mode);
317 if (files[nfiles] <= 0)
318 break;
319 ++nfiles;
320
322 }
323
324 /*
325 * If we didn't find any files at all, then no BufFile exists with this
326 * name.
327 */
328 if (nfiles == 0)
329 {
330 /* free the memory */
331 pfree(files);
332
333 if (missing_ok)
334 return NULL;
335
338 errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m",
339 segment_name, name)));
340 }
341
342 file = makeBufFileCommon(nfiles);
343 file->files = files;
344 file->readOnly = (mode == O_RDONLY);
345 file->fileset = fileset;
346 file->name = pstrdup(name);
347
348 return file;
349}
350
351/*
352 * Delete a BufFile that was created by BufFileCreateFileSet in the given
353 * FileSet using the given name.
354 *
355 * It is not necessary to delete files explicitly with this function. It is
356 * provided only as a way to delete files proactively, rather than waiting for
357 * the FileSet to be cleaned up.
358 *
359 * Only one backend should attempt to delete a given name, and should know
360 * that it exists and has been exported or closed otherwise missing_ok should
361 * be passed true.
362 */
363void
364BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
365{
366 char segment_name[MAXPGPATH];
367 int segment = 0;
368 bool found = false;
369
370 /*
371 * We don't know how many segments the file has. We'll keep deleting
372 * until we run out. If we don't manage to find even an initial segment,
373 * raise an error.
374 */
375 for (;;)
376 {
377 FileSetSegmentName(segment_name, name, segment);
378 if (!FileSetDelete(fileset, segment_name, true))
379 break;
380 found = true;
381 ++segment;
382
384 }
385
386 if (!found && !missing_ok)
387 elog(ERROR, "could not delete unknown BufFile \"%s\"", name);
388}
389
390/*
391 * BufFileExportFileSet --- flush and make read-only, in preparation for sharing.
392 */
393void
395{
396 /* Must be a file belonging to a FileSet. */
397 Assert(file->fileset != NULL);
398
399 /* It's probably a bug if someone calls this twice. */
400 Assert(!file->readOnly);
401
402 BufFileFlush(file);
403 file->readOnly = true;
404}
405
406/*
407 * Close a BufFile
408 *
409 * Like fclose(), this also implicitly FileCloses the underlying File.
410 */
411void
413{
414 int i;
415
416 /* flush any unwritten data */
417 BufFileFlush(file);
418 /* close and delete the underlying file(s) */
419 for (i = 0; i < file->numFiles; i++)
420 FileClose(file->files[i]);
421 /* release the buffer space */
422 pfree(file->files);
423 pfree(file);
424}
425
426/*
427 * BufFileLoadBuffer
428 *
429 * Load some data into buffer, if possible, starting from curOffset.
430 * At call, must have dirty = false, pos and nbytes = 0.
431 * On exit, nbytes is number of bytes loaded.
432 */
433static void
435{
436 File thisfile;
437 instr_time io_start;
438 instr_time io_time;
439
440 /*
441 * Advance to next component file if necessary and possible.
442 */
443 if (file->curOffset >= MAX_PHYSICAL_FILESIZE &&
444 file->curFile + 1 < file->numFiles)
445 {
446 file->curFile++;
447 file->curOffset = 0;
448 }
449
450 thisfile = file->files[file->curFile];
451
452 if (track_io_timing)
453 INSTR_TIME_SET_CURRENT(io_start);
454 else
455 INSTR_TIME_SET_ZERO(io_start);
456
457 /*
458 * Read whatever we can get, up to a full bufferload.
459 */
460 file->nbytes = FileRead(thisfile,
461 file->buffer.data,
462 sizeof(file->buffer),
463 file->curOffset,
464 WAIT_EVENT_BUFFILE_READ);
465 if (file->nbytes < 0)
466 {
467 file->nbytes = 0;
470 errmsg("could not read file \"%s\": %m",
471 FilePathName(thisfile))));
472 }
473
474 if (track_io_timing)
475 {
476 INSTR_TIME_SET_CURRENT(io_time);
478 }
479
480 /* we choose not to advance curOffset here */
481
482 if (file->nbytes > 0)
484}
485
486/*
487 * BufFileDumpBuffer
488 *
489 * Dump buffer contents starting at curOffset.
490 * At call, should have dirty = true, nbytes > 0.
491 * On exit, dirty is cleared if successful write, and curOffset is advanced.
492 */
493static void
495{
496 int wpos = 0;
497 int bytestowrite;
498 File thisfile;
499
500 /*
501 * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
502 * crosses a component-file boundary; so we need a loop.
503 */
504 while (wpos < file->nbytes)
505 {
506 off_t availbytes;
507 instr_time io_start;
508 instr_time io_time;
509
510 /*
511 * Advance to next component file if necessary and possible.
512 */
513 if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
514 {
515 while (file->curFile + 1 >= file->numFiles)
516 extendBufFile(file);
517 file->curFile++;
518 file->curOffset = 0;
519 }
520
521 /*
522 * Determine how much we need to write into this file.
523 */
524 bytestowrite = file->nbytes - wpos;
525 availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
526
527 if ((off_t) bytestowrite > availbytes)
528 bytestowrite = (int) availbytes;
529
530 thisfile = file->files[file->curFile];
531
532 if (track_io_timing)
533 INSTR_TIME_SET_CURRENT(io_start);
534 else
535 INSTR_TIME_SET_ZERO(io_start);
536
537 bytestowrite = FileWrite(thisfile,
538 file->buffer.data + wpos,
539 bytestowrite,
540 file->curOffset,
541 WAIT_EVENT_BUFFILE_WRITE);
542 if (bytestowrite <= 0)
545 errmsg("could not write to file \"%s\": %m",
546 FilePathName(thisfile))));
547
548 if (track_io_timing)
549 {
550 INSTR_TIME_SET_CURRENT(io_time);
552 }
553
554 file->curOffset += bytestowrite;
555 wpos += bytestowrite;
556
558 }
559 file->dirty = false;
560
561 /*
562 * At this point, curOffset has been advanced to the end of the buffer,
563 * ie, its original value + nbytes. We need to make it point to the
564 * logical file position, ie, original value + pos, in case that is less
565 * (as could happen due to a small backwards seek in a dirty buffer!)
566 */
567 file->curOffset -= (file->nbytes - file->pos);
568 if (file->curOffset < 0) /* handle possible segment crossing */
569 {
570 file->curFile--;
571 Assert(file->curFile >= 0);
573 }
574
575 /*
576 * Now we can set the buffer empty without changing the logical position
577 */
578 file->pos = 0;
579 file->nbytes = 0;
580}
581
582/*
583 * BufFileRead variants
584 *
585 * Like fread() except we assume 1-byte element size and report I/O errors via
586 * ereport().
587 *
588 * If 'exact' is true, then an error is also raised if the number of bytes
589 * read is not exactly 'size' (no short reads). If 'exact' and 'eofOK' are
590 * true, then reading zero bytes is ok.
591 */
592static size_t
593BufFileReadCommon(BufFile *file, void *ptr, size_t size, bool exact, bool eofOK)
594{
595 size_t start_size = size;
596 size_t nread = 0;
597 size_t nthistime;
598
599 BufFileFlush(file);
600
601 while (size > 0)
602 {
603 if (file->pos >= file->nbytes)
604 {
605 /* Try to load more data into buffer. */
606 file->curOffset += file->pos;
607 file->pos = 0;
608 file->nbytes = 0;
609 BufFileLoadBuffer(file);
610 if (file->nbytes <= 0)
611 break; /* no more data available */
612 }
613
614 nthistime = file->nbytes - file->pos;
615 if (nthistime > size)
616 nthistime = size;
617 Assert(nthistime > 0);
618
619 memcpy(ptr, file->buffer.data + file->pos, nthistime);
620
621 file->pos += nthistime;
622 ptr = (char *) ptr + nthistime;
623 size -= nthistime;
624 nread += nthistime;
625 }
626
627 if (exact &&
628 (nread != start_size && !(nread == 0 && eofOK)))
631 file->name ?
632 errmsg("could not read from file set \"%s\": read only %zu of %zu bytes",
633 file->name, nread, start_size) :
634 errmsg("could not read from temporary file: read only %zu of %zu bytes",
635 nread, start_size));
636
637 return nread;
638}
639
640/*
641 * Legacy interface where the caller needs to check for end of file or short
642 * reads.
643 */
644size_t
645BufFileRead(BufFile *file, void *ptr, size_t size)
646{
647 return BufFileReadCommon(file, ptr, size, false, false);
648}
649
650/*
651 * Require read of exactly the specified size.
652 */
653void
654BufFileReadExact(BufFile *file, void *ptr, size_t size)
655{
656 BufFileReadCommon(file, ptr, size, true, false);
657}
658
659/*
660 * Require read of exactly the specified size, but optionally allow end of
661 * file (in which case 0 is returned).
662 */
663size_t
664BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
665{
666 return BufFileReadCommon(file, ptr, size, true, eofOK);
667}
668
669/*
670 * BufFileWrite
671 *
672 * Like fwrite() except we assume 1-byte element size and report errors via
673 * ereport().
674 */
675void
676BufFileWrite(BufFile *file, const void *ptr, size_t size)
677{
678 size_t nthistime;
679
680 Assert(!file->readOnly);
681
682 while (size > 0)
683 {
684 if (file->pos >= BLCKSZ)
685 {
686 /* Buffer full, dump it out */
687 if (file->dirty)
688 BufFileDumpBuffer(file);
689 else
690 {
691 /* Hmm, went directly from reading to writing? */
692 file->curOffset += file->pos;
693 file->pos = 0;
694 file->nbytes = 0;
695 }
696 }
697
698 nthistime = BLCKSZ - file->pos;
699 if (nthistime > size)
700 nthistime = size;
701 Assert(nthistime > 0);
702
703 memcpy(file->buffer.data + file->pos, ptr, nthistime);
704
705 file->dirty = true;
706 file->pos += nthistime;
707 if (file->nbytes < file->pos)
708 file->nbytes = file->pos;
709 ptr = (const char *) ptr + nthistime;
710 size -= nthistime;
711 }
712}
713
714/*
715 * BufFileFlush
716 *
717 * Like fflush(), except that I/O errors are reported with ereport().
718 */
719static void
721{
722 if (file->dirty)
723 BufFileDumpBuffer(file);
724
725 Assert(!file->dirty);
726}
727
728/*
729 * BufFileSeek
730 *
731 * Like fseek(), except that target position needs two values in order to
732 * work when logical filesize exceeds maximum value representable by off_t.
733 * We do not support relative seeks across more than that, however.
734 * I/O errors are reported by ereport().
735 *
736 * Result is 0 if OK, EOF if not. Logical position is not moved if an
737 * impossible seek is attempted.
738 */
739int
740BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
741{
742 int newFile;
743 off_t newOffset;
744
745 switch (whence)
746 {
747 case SEEK_SET:
748 if (fileno < 0)
749 return EOF;
750 newFile = fileno;
751 newOffset = offset;
752 break;
753 case SEEK_CUR:
754
755 /*
756 * Relative seek considers only the signed offset, ignoring
757 * fileno. Note that large offsets (> 1 GB) risk overflow in this
758 * add, unless we have 64-bit off_t.
759 */
760 newFile = file->curFile;
761 newOffset = (file->curOffset + file->pos) + offset;
762 break;
763 case SEEK_END:
764
765 /*
766 * The file size of the last file gives us the end offset of that
767 * file.
768 */
769 newFile = file->numFiles - 1;
770 newOffset = FileSize(file->files[file->numFiles - 1]);
771 if (newOffset < 0)
774 errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
775 FilePathName(file->files[file->numFiles - 1]),
776 file->name)));
777 break;
778 default:
779 elog(ERROR, "invalid whence: %d", whence);
780 return EOF;
781 }
782 while (newOffset < 0)
783 {
784 if (--newFile < 0)
785 return EOF;
786 newOffset += MAX_PHYSICAL_FILESIZE;
787 }
788 if (newFile == file->curFile &&
789 newOffset >= file->curOffset &&
790 newOffset <= file->curOffset + file->nbytes)
791 {
792 /*
793 * Seek is to a point within existing buffer; we can just adjust
794 * pos-within-buffer, without flushing buffer. Note this is OK
795 * whether reading or writing, but buffer remains dirty if we were
796 * writing.
797 */
798 file->pos = (int) (newOffset - file->curOffset);
799 return 0;
800 }
801 /* Otherwise, must reposition buffer, so flush any dirty data */
802 BufFileFlush(file);
803
804 /*
805 * At this point and no sooner, check for seek past last segment. The
806 * above flush could have created a new segment, so checking sooner would
807 * not work (at least not with this code).
808 */
809
810 /* convert seek to "start of next seg" to "end of last seg" */
811 if (newFile == file->numFiles && newOffset == 0)
812 {
813 newFile--;
814 newOffset = MAX_PHYSICAL_FILESIZE;
815 }
816 while (newOffset > MAX_PHYSICAL_FILESIZE)
817 {
818 if (++newFile >= file->numFiles)
819 return EOF;
820 newOffset -= MAX_PHYSICAL_FILESIZE;
821 }
822 if (newFile >= file->numFiles)
823 return EOF;
824 /* Seek is OK! */
825 file->curFile = newFile;
826 file->curOffset = newOffset;
827 file->pos = 0;
828 file->nbytes = 0;
829 return 0;
830}
831
832void
833BufFileTell(BufFile *file, int *fileno, off_t *offset)
834{
835 *fileno = file->curFile;
836 *offset = file->curOffset + file->pos;
837}
838
839/*
840 * BufFileSeekBlock --- block-oriented seek
841 *
842 * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
843 * the file. Note that users of this interface will fail if their files
844 * exceed BLCKSZ * PG_INT64_MAX bytes, but that is quite a lot; we don't
845 * work with tables bigger than that, either...
846 *
847 * Result is 0 if OK, EOF if not. Logical position is not moved if an
848 * impossible seek is attempted.
849 */
850int
852{
853 return BufFileSeek(file,
854 (int) (blknum / BUFFILE_SEG_SIZE),
855 (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
856 SEEK_SET);
857}
858
859/*
860 * Returns the amount of data in the given BufFile, in bytes.
861 *
862 * Returned value includes the size of any holes left behind by BufFileAppend.
863 * ereport()s on failure.
864 */
865int64
867{
868 int64 lastFileSize;
869
870 /* Get the size of the last physical file. */
871 lastFileSize = FileSize(file->files[file->numFiles - 1]);
872 if (lastFileSize < 0)
875 errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
876 FilePathName(file->files[file->numFiles - 1]),
877 file->name)));
878
879 return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
880 lastFileSize;
881}
882
883/*
884 * Append the contents of the source file to the end of the target file.
885 *
886 * Note that operation subsumes ownership of underlying resources from
887 * "source". Caller should never call BufFileClose against source having
888 * called here first. Resource owners for source and target must match,
889 * too.
890 *
891 * This operation works by manipulating lists of segment files, so the
892 * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned
893 * boundary, typically creating empty holes before the boundary. These
894 * areas do not contain any interesting data, and cannot be read from by
895 * caller.
896 *
897 * Returns the block number within target where the contents of source
898 * begins. Caller should apply this as an offset when working off block
899 * positions that are in terms of the original BufFile space.
900 */
901int64
903{
904 int64 startBlock = (int64) target->numFiles * BUFFILE_SEG_SIZE;
905 int newNumFiles = target->numFiles + source->numFiles;
906 int i;
907
908 Assert(source->readOnly);
909 Assert(!source->dirty);
910
911 if (target->resowner != source->resowner)
912 elog(ERROR, "could not append BufFile with non-matching resource owner");
913
914 target->files = (File *)
915 repalloc(target->files, sizeof(File) * newNumFiles);
916 for (i = target->numFiles; i < newNumFiles; i++)
917 target->files[i] = source->files[i - target->numFiles];
918 target->numFiles = newNumFiles;
919
920 return startBlock;
921}
922
923/*
924 * Truncate a BufFile created by BufFileCreateFileSet up to the given fileno
925 * and the offset.
926 */
927void
928BufFileTruncateFileSet(BufFile *file, int fileno, off_t offset)
929{
930 int numFiles = file->numFiles;
931 int newFile = fileno;
932 off_t newOffset = file->curOffset;
933 char segment_name[MAXPGPATH];
934 int i;
935
936 /*
937 * Loop over all the files up to the given fileno and remove the files
938 * that are greater than the fileno and truncate the given file up to the
939 * offset. Note that we also remove the given fileno if the offset is 0
940 * provided it is not the first file in which we truncate it.
941 */
942 for (i = file->numFiles - 1; i >= fileno; i--)
943 {
944 if ((i != fileno || offset == 0) && i != 0)
945 {
946 FileSetSegmentName(segment_name, file->name, i);
947 FileClose(file->files[i]);
948 if (!FileSetDelete(file->fileset, segment_name, true))
951 errmsg("could not delete fileset \"%s\": %m",
952 segment_name)));
953 numFiles--;
954 newOffset = MAX_PHYSICAL_FILESIZE;
955
956 /*
957 * This is required to indicate that we have deleted the given
958 * fileno.
959 */
960 if (i == fileno)
961 newFile--;
962 }
963 else
964 {
965 if (FileTruncate(file->files[i], offset,
966 WAIT_EVENT_BUFFILE_TRUNCATE) < 0)
969 errmsg("could not truncate file \"%s\": %m",
970 FilePathName(file->files[i]))));
971 newOffset = offset;
972 }
973 }
974
975 file->numFiles = numFiles;
976
977 /*
978 * If the truncate point is within existing buffer then we can just adjust
979 * pos within buffer.
980 */
981 if (newFile == file->curFile &&
982 newOffset >= file->curOffset &&
983 newOffset <= file->curOffset + file->nbytes)
984 {
985 /* No need to reset the current pos if the new pos is greater. */
986 if (newOffset <= file->curOffset + file->pos)
987 file->pos = (int) (newOffset - file->curOffset);
988
989 /* Adjust the nbytes for the current buffer. */
990 file->nbytes = (int) (newOffset - file->curOffset);
991 }
992 else if (newFile == file->curFile &&
993 newOffset < file->curOffset)
994 {
995 /*
996 * The truncate point is within the existing file but prior to the
997 * current position, so we can forget the current buffer and reset the
998 * current position.
999 */
1000 file->curOffset = newOffset;
1001 file->pos = 0;
1002 file->nbytes = 0;
1003 }
1004 else if (newFile < file->curFile)
1005 {
1006 /*
1007 * The truncate point is prior to the current file, so need to reset
1008 * the current position accordingly.
1009 */
1010 file->curFile = newFile;
1011 file->curOffset = newOffset;
1012 file->pos = 0;
1013 file->nbytes = 0;
1014 }
1015 /* Nothing to do, if the truncate point is beyond current file. */
1016}
void PrepareTempTablespaces(void)
Definition: tablespace.c:1331
BufFile * BufFileOpenFileSet(FileSet *fileset, const char *name, int mode, bool missing_ok)
Definition: buffile.c:291
int BufFileSeekBlock(BufFile *file, int64 blknum)
Definition: buffile.c:851
void BufFileExportFileSet(BufFile *file)
Definition: buffile.c:394
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:645
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
static void FileSetSegmentName(char *name, const char *buffile_name, int segment)
Definition: buffile.c:222
static BufFile * makeBufFileCommon(int nfiles)
Definition: buffile.c:118
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
#define BUFFILE_SEG_SIZE
Definition: buffile.c:63
static void BufFileLoadBuffer(BufFile *file)
Definition: buffile.c:434
static File MakeNewFileSetSegment(BufFile *buffile, int segment)
Definition: buffile.c:231
void BufFileTell(BufFile *file, int *fileno, off_t *offset)
Definition: buffile.c:833
static void extendBufFile(BufFile *file)
Definition: buffile.c:156
#define MAX_PHYSICAL_FILESIZE
Definition: buffile.c:62
static void BufFileFlush(BufFile *file)
Definition: buffile.c:720
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition: buffile.c:664
void BufFileTruncateFileSet(BufFile *file, int fileno, off_t offset)
Definition: buffile.c:928
BufFile * BufFileCreateFileSet(FileSet *fileset, const char *name)
Definition: buffile.c:267
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
int64 BufFileSize(BufFile *file)
Definition: buffile.c:866
static BufFile * makeBufFile(File firstfile)
Definition: buffile.c:139
static size_t BufFileReadCommon(BufFile *file, void *ptr, size_t size, bool exact, bool eofOK)
Definition: buffile.c:593
void BufFileClose(BufFile *file)
Definition: buffile.c:412
int64 BufFileAppend(BufFile *target, BufFile *source)
Definition: buffile.c:902
void BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
Definition: buffile.c:364
static void BufFileDumpBuffer(BufFile *file)
Definition: buffile.c:494
bool track_io_timing
Definition: bufmgr.c:143
#define Assert(condition)
Definition: c.h:812
int64_t int64
Definition: c.h:482
size_t Size
Definition: c.h:559
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
char * FilePathName(File file)
Definition: fd.c:2483
void FileClose(File file)
Definition: fd.c:1977
File OpenTemporaryFile(bool interXact)
Definition: fd.c:1723
off_t FileSize(File file)
Definition: fd.c:2431
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2448
static ssize_t FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.h:196
int File
Definition: fd.h:51
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.h:208
File FileSetOpen(FileSet *fileset, const char *name, int mode)
Definition: fileset.c:119
bool FileSetDelete(FileSet *fileset, const char *name, bool error_on_failure)
Definition: fileset.c:136
File FileSetCreate(FileSet *fileset, const char *name)
Definition: fileset.c:92
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:72
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define MAXPGPATH
static rewind_source * source
Definition: pg_rewind.c:89
#define snprintf
Definition: port.h:238
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
static pg_noinline void Size size
Definition: slab.c:607
int nbytes
Definition: buffile.c:97
PGAlignedBlock buffer
Definition: buffile.c:103
int numFiles
Definition: buffile.c:72
bool dirty
Definition: buffile.c:77
FileSet * fileset
Definition: buffile.c:80
File * files
Definition: buffile.c:74
ResourceOwner resowner
Definition: buffile.c:88
bool isInterXact
Definition: buffile.c:76
bool readOnly
Definition: buffile.c:78
int pos
Definition: buffile.c:96
int curFile
Definition: buffile.c:94
off_t curOffset
Definition: buffile.c:95
const char * name
Definition: buffile.c:81
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time temp_blk_read_time
Definition: instrument.h:40
int64 temp_blks_read
Definition: instrument.h:34
int64 temp_blks_written
Definition: instrument.h:35
#define wpos(wep)
Definition: tsrank.c:27
char data[BLCKSZ]
Definition: c.h:1073
const char * name