PostgreSQL Source Code  git master
logtape.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * logtape.c
4  * Management of "logical tapes" within temporary files.
5  *
6  * This module exists to support sorting via multiple merge passes (see
7  * tuplesort.c). Merging is an ideal algorithm for tape devices, but if
8  * we implement it on disk by creating a separate file for each "tape",
9  * there is an annoying problem: the peak space usage is at least twice
10  * the volume of actual data to be sorted. (This must be so because each
11  * datum will appear in both the input and output tapes of the final
12  * merge pass. For seven-tape polyphase merge, which is otherwise a
13  * pretty good algorithm, peak usage is more like 4x actual data volume.)
14  *
15  * We can work around this problem by recognizing that any one tape
16  * dataset (with the possible exception of the final output) is written
17  * and read exactly once in a perfectly sequential manner. Therefore,
18  * a datum once read will not be required again, and we can recycle its
19  * space for use by the new tape dataset(s) being generated. In this way,
20  * the total space usage is essentially just the actual data volume, plus
21  * insignificant bookkeeping and start/stop overhead.
22  *
23  * Few OSes allow arbitrary parts of a file to be released back to the OS,
24  * so we have to implement this space-recycling ourselves within a single
25  * logical file. logtape.c exists to perform this bookkeeping and provide
26  * the illusion of N independent tape devices to tuplesort.c. Note that
27  * logtape.c itself depends on buffile.c to provide a "logical file" of
28  * larger size than the underlying OS may support.
29  *
30  * For simplicity, we allocate and release space in the underlying file
31  * in BLCKSZ-size blocks. Space allocation boils down to keeping track
32  * of which blocks in the underlying file belong to which logical tape,
33  * plus any blocks that are free (recycled and not yet reused).
34  * The blocks in each logical tape form a chain, with a prev- and next-
35  * pointer in each block.
36  *
37  * The initial write pass is guaranteed to fill the underlying file
38  * perfectly sequentially, no matter how data is divided into logical tapes.
39  * Once we begin merge passes, the access pattern becomes considerably
40  * less predictable --- but the seeking involved should be comparable to
41  * what would happen if we kept each logical tape in a separate file,
42  * so there's no serious performance penalty paid to obtain the space
43  * savings of recycling. We try to localize the write accesses by always
44  * writing to the lowest-numbered free block when we have a choice; it's
45  * not clear this helps much, but it can't hurt. (XXX perhaps a LIFO
46  * policy for free blocks would be better?)
47  *
48  * To further make the I/Os more sequential, we can use a larger buffer
49  * when reading, and read multiple blocks from the same tape in one go,
50  * whenever the buffer becomes empty.
51  *
52  * To support the above policy of writing to the lowest free block, the
53  * freelist is a min heap.
54  *
55  * Since all the bookkeeping and buffer memory is allocated with palloc(),
56  * and the underlying file(s) are made with OpenTemporaryFile, all resources
57  * for a logical tape set are certain to be cleaned up even if processing
58  * is aborted by ereport(ERROR). To avoid confusion, the caller should take
59  * care that all calls for a single LogicalTapeSet are made in the same
60  * palloc context.
61  *
62  * To support parallel sort operations involving coordinated callers to
63  * tuplesort.c routines across multiple workers, it is necessary to
64  * concatenate each worker BufFile/tapeset into one single logical tapeset
65  * managed by the leader. Workers should have produced one final
66  * materialized tape (their entire output) when this happens in leader.
67  * There will always be the same number of runs as input tapes, and the same
68  * number of input tapes as participants (worker Tuplesortstates).
69  *
70  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
71  * Portions Copyright (c) 1994, Regents of the University of California
72  *
73  * IDENTIFICATION
74  * src/backend/utils/sort/logtape.c
75  *
76  *-------------------------------------------------------------------------
77  */
78 
79 #include "postgres.h"
80 
81 #include "storage/buffile.h"
82 #include "utils/builtins.h"
83 #include "utils/logtape.h"
84 #include "utils/memdebug.h"
85 #include "utils/memutils.h"
86 
87 /*
88  * A TapeBlockTrailer is stored at the end of each BLCKSZ block.
89  *
90  * The first block of a tape has prev == -1. The last block of a tape
91  * stores the number of valid bytes on the block, inverted, in 'next'
92  * Therefore next < 0 indicates the last block.
93  */
94 typedef struct TapeBlockTrailer
95 {
96  long prev; /* previous block on this tape, or -1 on first
97  * block */
98  long next; /* next block on this tape, or # of valid
99  * bytes on last block (if < 0) */
101 
102 #define TapeBlockPayloadSize (BLCKSZ - sizeof(TapeBlockTrailer))
103 #define TapeBlockGetTrailer(buf) \
104  ((TapeBlockTrailer *) ((char *) buf + TapeBlockPayloadSize))
105 
106 #define TapeBlockIsLast(buf) (TapeBlockGetTrailer(buf)->next < 0)
107 #define TapeBlockGetNBytes(buf) \
108  (TapeBlockIsLast(buf) ? \
109  (- TapeBlockGetTrailer(buf)->next) : TapeBlockPayloadSize)
110 #define TapeBlockSetNBytes(buf, nbytes) \
111  (TapeBlockGetTrailer(buf)->next = -(nbytes))
112 
113 /*
114  * When multiple tapes are being written to concurrently (as in HashAgg),
115  * avoid excessive fragmentation by preallocating block numbers to individual
116  * tapes. Each preallocation doubles in size starting at
117  * TAPE_WRITE_PREALLOC_MIN blocks up to TAPE_WRITE_PREALLOC_MAX blocks.
118  *
119  * No filesystem operations are performed for preallocation; only the block
120  * numbers are reserved. This may lead to sparse writes, which will cause
121  * ltsWriteBlock() to fill in holes with zeros.
122  */
123 #define TAPE_WRITE_PREALLOC_MIN 8
124 #define TAPE_WRITE_PREALLOC_MAX 128
125 
126 /*
127  * This data structure represents a single "logical tape" within the set
128  * of logical tapes stored in the same file.
129  *
130  * While writing, we hold the current partially-written data block in the
131  * buffer. While reading, we can hold multiple blocks in the buffer. Note
132  * that we don't retain the trailers of a block when it's read into the
133  * buffer. The buffer therefore contains one large contiguous chunk of data
134  * from the tape.
135  */
136 typedef struct LogicalTape
137 {
138  bool writing; /* T while in write phase */
139  bool frozen; /* T if blocks should not be freed when read */
140  bool dirty; /* does buffer need to be written? */
141 
142  /*
143  * Block numbers of the first, current, and next block of the tape.
144  *
145  * The "current" block number is only valid when writing, or reading from
146  * a frozen tape. (When reading from an unfrozen tape, we use a larger
147  * read buffer that holds multiple blocks, so the "current" block is
148  * ambiguous.)
149  *
150  * When concatenation of worker tape BufFiles is performed, an offset to
151  * the first block in the unified BufFile space is applied during reads.
152  */
157 
158  /*
159  * Buffer for current data block(s).
160  */
161  char *buffer; /* physical buffer (separately palloc'd) */
162  int buffer_size; /* allocated size of the buffer */
163  int max_size; /* highest useful, safe buffer_size */
164  int pos; /* next read/write position in buffer */
165  int nbytes; /* total # of valid bytes in buffer */
166 
167  /*
168  * Preallocated block numbers are held in an array sorted in descending
169  * order; blocks are consumed from the end of the array (lowest block
170  * numbers first).
171  */
172  long *prealloc;
173  int nprealloc; /* number of elements in list */
174  int prealloc_size; /* number of elements list can hold */
175 } LogicalTape;
176 
177 /*
178  * This data structure represents a set of related "logical tapes" sharing
179  * space in a single underlying file. (But that "file" may be multiple files
180  * if needed to escape OS limits on file size; buffile.c handles that for us.)
181  * The number of tapes is fixed at creation.
182  */
184 {
185  BufFile *pfile; /* underlying file for whole tape set */
186 
187  /*
188  * File size tracking. nBlocksWritten is the size of the underlying file,
189  * in BLCKSZ blocks. nBlocksAllocated is the number of blocks allocated
190  * by ltsReleaseBlock(), and it is always greater than or equal to
191  * nBlocksWritten. Blocks between nBlocksAllocated and nBlocksWritten are
192  * blocks that have been allocated for a tape, but have not been written
193  * to the underlying file yet. nHoleBlocks tracks the total number of
194  * blocks that are in unused holes between worker spaces following BufFile
195  * concatenation.
196  */
197  long nBlocksAllocated; /* # of blocks allocated */
198  long nBlocksWritten; /* # of blocks used in underlying file */
199  long nHoleBlocks; /* # of "hole" blocks left */
200 
201  /*
202  * We store the numbers of recycled-and-available blocks in freeBlocks[].
203  * When there are no such blocks, we extend the underlying file.
204  *
205  * If forgetFreeSpace is true then any freed blocks are simply forgotten
206  * rather than being remembered in freeBlocks[]. See notes for
207  * LogicalTapeSetForgetFreeSpace().
208  */
209  bool forgetFreeSpace; /* are we remembering free blocks? */
210  long *freeBlocks; /* resizable array holding minheap */
211  long nFreeBlocks; /* # of currently free blocks */
212  Size freeBlocksLen; /* current allocated length of freeBlocks[] */
213 
214  /* The array of logical tapes. */
215  int nTapes; /* # of logical tapes in set */
216  LogicalTape *tapes; /* has nTapes nentries */
217 };
218 
219 static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
220 static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
221 static long ltsGetFreeBlock(LogicalTapeSet *lts);
222 static long ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt);
223 static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum);
224 static void ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared,
225  SharedFileSet *fileset);
226 static void ltsInitTape(LogicalTape *lt);
227 static void ltsInitReadBuffer(LogicalTapeSet *lts, LogicalTape *lt);
228 
229 
230 /*
231  * Write a block-sized buffer to the specified block of the underlying file.
232  *
233  * No need for an error return convention; we ereport() on any error.
234  */
235 static void
236 ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
237 {
238  /*
239  * BufFile does not support "holes", so if we're about to write a block
240  * that's past the current end of file, fill the space between the current
241  * end of file and the target block with zeros.
242  *
243  * This should happen rarely, otherwise you are not writing very
244  * sequentially. In current use, this only happens when the sort ends
245  * writing a run, and switches to another tape. The last block of the
246  * previous tape isn't flushed to disk until the end of the sort, so you
247  * get one-block hole, where the last block of the previous tape will
248  * later go.
249  *
250  * Note that BufFile concatenation can leave "holes" in BufFile between
251  * worker-owned block ranges. These are tracked for reporting purposes
252  * only. We never read from nor write to these hole blocks, and so they
253  * are not considered here.
254  */
255  while (blocknum > lts->nBlocksWritten)
256  {
257  PGAlignedBlock zerobuf;
258 
259  MemSet(zerobuf.data, 0, sizeof(zerobuf));
260 
261  ltsWriteBlock(lts, lts->nBlocksWritten, zerobuf.data);
262  }
263 
264  /* Write the requested block */
265  if (BufFileSeekBlock(lts->pfile, blocknum) != 0)
266  ereport(ERROR,
268  errmsg("could not seek to block %ld of temporary file",
269  blocknum)));
270  BufFileWrite(lts->pfile, buffer, BLCKSZ);
271 
272  /* Update nBlocksWritten, if we extended the file */
273  if (blocknum == lts->nBlocksWritten)
274  lts->nBlocksWritten++;
275 }
276 
277 /*
278  * Read a block-sized buffer from the specified block of the underlying file.
279  *
280  * No need for an error return convention; we ereport() on any error. This
281  * module should never attempt to read a block it doesn't know is there.
282  */
283 static void
284 ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
285 {
286  size_t nread;
287 
288  if (BufFileSeekBlock(lts->pfile, blocknum) != 0)
289  ereport(ERROR,
291  errmsg("could not seek to block %ld of temporary file",
292  blocknum)));
293  nread = BufFileRead(lts->pfile, buffer, BLCKSZ);
294  if (nread != BLCKSZ)
295  ereport(ERROR,
297  errmsg("could not read block %ld of temporary file: read only %zu of %zu bytes",
298  blocknum, nread, (size_t) BLCKSZ)));
299 }
300 
301 /*
302  * Read as many blocks as we can into the per-tape buffer.
303  *
304  * Returns true if anything was read, 'false' on EOF.
305  */
306 static bool
308 {
309  lt->pos = 0;
310  lt->nbytes = 0;
311 
312  do
313  {
314  char *thisbuf = lt->buffer + lt->nbytes;
315  long datablocknum = lt->nextBlockNumber;
316 
317  /* Fetch next block number */
318  if (datablocknum == -1L)
319  break; /* EOF */
320  /* Apply worker offset, needed for leader tapesets */
321  datablocknum += lt->offsetBlockNumber;
322 
323  /* Read the block */
324  ltsReadBlock(lts, datablocknum, (void *) thisbuf);
325  if (!lt->frozen)
326  ltsReleaseBlock(lts, datablocknum);
328 
329  lt->nbytes += TapeBlockGetNBytes(thisbuf);
330  if (TapeBlockIsLast(thisbuf))
331  {
332  lt->nextBlockNumber = -1L;
333  /* EOF */
334  break;
335  }
336  else
337  lt->nextBlockNumber = TapeBlockGetTrailer(thisbuf)->next;
338 
339  /* Advance to next block, if we have buffer space left */
340  } while (lt->buffer_size - lt->nbytes > BLCKSZ);
341 
342  return (lt->nbytes > 0);
343 }
344 
345 static inline void
346 swap_nodes(long *heap, unsigned long a, unsigned long b)
347 {
348  unsigned long swap;
349 
350  swap = heap[a];
351  heap[a] = heap[b];
352  heap[b] = swap;
353 }
354 
355 static inline unsigned long
356 left_offset(unsigned long i)
357 {
358  return 2 * i + 1;
359 }
360 
361 static inline unsigned long
362 right_offset(unsigned i)
363 {
364  return 2 * i + 2;
365 }
366 
367 static inline unsigned long
368 parent_offset(unsigned long i)
369 {
370  return (i - 1) / 2;
371 }
372 
373 /*
374  * Select the lowest currently unused block by taking the first element from
375  * the freelist min heap.
376  */
377 static long
379 {
380  long *heap = lts->freeBlocks;
381  long blocknum;
382  int heapsize;
383  unsigned long pos;
384 
385  /* freelist empty; allocate a new block */
386  if (lts->nFreeBlocks == 0)
387  return lts->nBlocksAllocated++;
388 
389  if (lts->nFreeBlocks == 1)
390  {
391  lts->nFreeBlocks--;
392  return lts->freeBlocks[0];
393  }
394 
395  /* take top of minheap */
396  blocknum = heap[0];
397 
398  /* replace with end of minheap array */
399  heap[0] = heap[--lts->nFreeBlocks];
400 
401  /* sift down */
402  pos = 0;
403  heapsize = lts->nFreeBlocks;
404  while (true)
405  {
406  unsigned long left = left_offset(pos);
407  unsigned long right = right_offset(pos);
408  unsigned long min_child;
409 
410  if (left < heapsize && right < heapsize)
411  min_child = (heap[left] < heap[right]) ? left : right;
412  else if (left < heapsize)
413  min_child = left;
414  else if (right < heapsize)
415  min_child = right;
416  else
417  break;
418 
419  if (heap[min_child] >= heap[pos])
420  break;
421 
422  swap_nodes(heap, min_child, pos);
423  pos = min_child;
424  }
425 
426  return blocknum;
427 }
428 
429 /*
430  * Return the lowest free block number from the tape's preallocation list.
431  * Refill the preallocation list if necessary.
432  */
433 static long
435 {
436  /* sorted in descending order, so return the last element */
437  if (lt->nprealloc > 0)
438  return lt->prealloc[--lt->nprealloc];
439 
440  if (lt->prealloc == NULL)
441  {
443  lt->prealloc = (long *) palloc(sizeof(long) * lt->prealloc_size);
444  }
445  else if (lt->prealloc_size < TAPE_WRITE_PREALLOC_MAX)
446  {
447  /* when the preallocation list runs out, double the size */
448  lt->prealloc_size *= 2;
451  lt->prealloc = (long *) repalloc(lt->prealloc,
452  sizeof(long) * lt->prealloc_size);
453  }
454 
455  /* refill preallocation list */
456  lt->nprealloc = lt->prealloc_size;
457  for (int i = lt->nprealloc; i > 0; i--)
458  {
459  lt->prealloc[i - 1] = ltsGetFreeBlock(lts);
460 
461  /* verify descending order */
462  Assert(i == lt->nprealloc || lt->prealloc[i - 1] > lt->prealloc[i]);
463  }
464 
465  return lt->prealloc[--lt->nprealloc];
466 }
467 
468 /*
469  * Return a block# to the freelist.
470  */
471 static void
472 ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
473 {
474  long *heap;
475  unsigned long pos;
476 
477  /*
478  * Do nothing if we're no longer interested in remembering free space.
479  */
480  if (lts->forgetFreeSpace)
481  return;
482 
483  /*
484  * Enlarge freeBlocks array if full.
485  */
486  if (lts->nFreeBlocks >= lts->freeBlocksLen)
487  {
488  /*
489  * If the freelist becomes very large, just return and leak this free
490  * block.
491  */
492  if (lts->freeBlocksLen * 2 > MaxAllocSize)
493  return;
494 
495  lts->freeBlocksLen *= 2;
496  lts->freeBlocks = (long *) repalloc(lts->freeBlocks,
497  lts->freeBlocksLen * sizeof(long));
498  }
499 
500  heap = lts->freeBlocks;
501  pos = lts->nFreeBlocks;
502 
503  /* place entry at end of minheap array */
504  heap[pos] = blocknum;
505  lts->nFreeBlocks++;
506 
507  /* sift up */
508  while (pos != 0)
509  {
510  unsigned long parent = parent_offset(pos);
511 
512  if (heap[parent] < heap[pos])
513  break;
514 
515  swap_nodes(heap, parent, pos);
516  pos = parent;
517  }
518 }
519 
520 /*
521  * Claim ownership of a set of logical tapes from existing shared BufFiles.
522  *
523  * Caller should be leader process. Though tapes are marked as frozen in
524  * workers, they are not frozen when opened within leader, since unfrozen tapes
525  * use a larger read buffer. (Frozen tapes have smaller read buffer, optimized
526  * for random access.)
527  */
528 static void
530  SharedFileSet *fileset)
531 {
532  LogicalTape *lt = NULL;
533  long tapeblocks = 0L;
534  long nphysicalblocks = 0L;
535  int i;
536 
537  /* Should have at least one worker tape, plus leader's tape */
538  Assert(lts->nTapes >= 2);
539 
540  /*
541  * Build concatenated view of all BufFiles, remembering the block number
542  * where each source file begins. No changes are needed for leader/last
543  * tape.
544  */
545  for (i = 0; i < lts->nTapes - 1; i++)
546  {
547  char filename[MAXPGPATH];
548  BufFile *file;
549  int64 filesize;
550 
551  lt = &lts->tapes[i];
552 
553  pg_itoa(i, filename);
554  file = BufFileOpenShared(fileset, filename);
555  filesize = BufFileSize(file);
556 
557  /*
558  * Stash first BufFile, and concatenate subsequent BufFiles to that.
559  * Store block offset into each tape as we go.
560  */
561  lt->firstBlockNumber = shared[i].firstblocknumber;
562  if (i == 0)
563  {
564  lts->pfile = file;
565  lt->offsetBlockNumber = 0L;
566  }
567  else
568  {
569  lt->offsetBlockNumber = BufFileAppend(lts->pfile, file);
570  }
571  /* Don't allocate more for read buffer than could possibly help */
572  lt->max_size = Min(MaxAllocSize, filesize);
573  tapeblocks = filesize / BLCKSZ;
574  nphysicalblocks += tapeblocks;
575  }
576 
577  /*
578  * Set # of allocated blocks, as well as # blocks written. Use extent of
579  * new BufFile space (from 0 to end of last worker's tape space) for this.
580  * Allocated/written blocks should include space used by holes left
581  * between concatenated BufFiles.
582  */
583  lts->nBlocksAllocated = lt->offsetBlockNumber + tapeblocks;
584  lts->nBlocksWritten = lts->nBlocksAllocated;
585 
586  /*
587  * Compute number of hole blocks so that we can later work backwards, and
588  * instrument number of physical blocks. We don't simply use physical
589  * blocks directly for instrumentation because this would break if we ever
590  * subsequently wrote to the leader tape.
591  *
592  * Working backwards like this keeps our options open. If shared BufFiles
593  * ever support being written to post-export, logtape.c can automatically
594  * take advantage of that. We'd then support writing to the leader tape
595  * while recycling space from worker tapes, because the leader tape has a
596  * zero offset (write routines won't need to have extra logic to apply an
597  * offset).
598  *
599  * The only thing that currently prevents writing to the leader tape from
600  * working is the fact that BufFiles opened using BufFileOpenShared() are
601  * read-only by definition, but that could be changed if it seemed
602  * worthwhile. For now, writing to the leader tape will raise a "Bad file
603  * descriptor" error, so tuplesort must avoid writing to the leader tape
604  * altogether.
605  */
606  lts->nHoleBlocks = lts->nBlocksAllocated - nphysicalblocks;
607 }
608 
609 /*
610  * Initialize per-tape struct. Note we allocate the I/O buffer lazily.
611  */
612 static void
614 {
615  lt->writing = true;
616  lt->frozen = false;
617  lt->dirty = false;
618  lt->firstBlockNumber = -1L;
619  lt->curBlockNumber = -1L;
620  lt->nextBlockNumber = -1L;
621  lt->offsetBlockNumber = 0L;
622  lt->buffer = NULL;
623  lt->buffer_size = 0;
624  /* palloc() larger than MaxAllocSize would fail */
625  lt->max_size = MaxAllocSize;
626  lt->pos = 0;
627  lt->nbytes = 0;
628  lt->prealloc = NULL;
629  lt->nprealloc = 0;
630  lt->prealloc_size = 0;
631 }
632 
633 /*
634  * Lazily allocate and initialize the read buffer. This avoids waste when many
635  * tapes are open at once, but not all are active between rewinding and
636  * reading.
637  */
638 static void
640 {
641  Assert(lt->buffer_size > 0);
642  lt->buffer = palloc(lt->buffer_size);
643 
644  /* Read the first block, or reset if tape is empty */
646  lt->pos = 0;
647  lt->nbytes = 0;
648  ltsReadFillBuffer(lts, lt);
649 }
650 
651 /*
652  * Create a set of logical tapes in a temporary underlying file.
653  *
654  * Each tape is initialized in write state. Serial callers pass ntapes,
655  * NULL argument for shared, and -1 for worker. Parallel worker callers
656  * pass ntapes, a shared file handle, NULL shared argument, and their own
657  * worker number. Leader callers, which claim shared worker tapes here,
658  * must supply non-sentinel values for all arguments except worker number,
659  * which should be -1.
660  *
661  * Leader caller is passing back an array of metadata each worker captured
662  * when LogicalTapeFreeze() was called for their final result tapes. Passed
663  * tapes array is actually sized ntapes - 1, because it includes only
664  * worker tapes, whereas leader requires its own leader tape. Note that we
665  * rely on the assumption that reclaimed worker tapes will only be read
666  * from once by leader, and never written to again (tapes are initialized
667  * for writing, but that's only to be consistent). Leader may not write to
668  * its own tape purely due to a restriction in the shared buffile
669  * infrastructure that may be lifted in the future.
670  */
672 LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
673  int worker)
674 {
675  LogicalTapeSet *lts;
676  int i;
677 
678  /*
679  * Create top-level struct including per-tape LogicalTape structs.
680  */
681  Assert(ntapes > 0);
682  lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet));
683  lts->nBlocksAllocated = 0L;
684  lts->nBlocksWritten = 0L;
685  lts->nHoleBlocks = 0L;
686  lts->forgetFreeSpace = false;
687  lts->freeBlocksLen = 32; /* reasonable initial guess */
688  lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
689  lts->nFreeBlocks = 0;
690  lts->nTapes = ntapes;
691  lts->tapes = (LogicalTape *) palloc(ntapes * sizeof(LogicalTape));
692 
693  for (i = 0; i < ntapes; i++)
694  ltsInitTape(&lts->tapes[i]);
695 
696  /*
697  * Create temp BufFile storage as required.
698  *
699  * Leader concatenates worker tapes, which requires special adjustment to
700  * final tapeset data. Things are simpler for the worker case and the
701  * serial case, though. They are generally very similar -- workers use a
702  * shared fileset, whereas serial sorts use a conventional serial BufFile.
703  */
704  if (shared)
705  ltsConcatWorkerTapes(lts, shared, fileset);
706  else if (fileset)
707  {
708  char filename[MAXPGPATH];
709 
710  pg_itoa(worker, filename);
711  lts->pfile = BufFileCreateShared(fileset, filename);
712  }
713  else
714  lts->pfile = BufFileCreateTemp(false);
715 
716  return lts;
717 }
718 
719 /*
720  * Close a logical tape set and release all resources.
721  */
722 void
724 {
725  LogicalTape *lt;
726  int i;
727 
728  BufFileClose(lts->pfile);
729  for (i = 0; i < lts->nTapes; i++)
730  {
731  lt = &lts->tapes[i];
732  if (lt->buffer)
733  pfree(lt->buffer);
734  }
735  pfree(lts->tapes);
736  pfree(lts->freeBlocks);
737  pfree(lts);
738 }
739 
740 /*
741  * Mark a logical tape set as not needing management of free space anymore.
742  *
743  * This should be called if the caller does not intend to write any more data
744  * into the tape set, but is reading from un-frozen tapes. Since no more
745  * writes are planned, remembering free blocks is no longer useful. Setting
746  * this flag lets us avoid wasting time and space in ltsReleaseBlock(), which
747  * is not designed to handle large numbers of free blocks.
748  */
749 void
751 {
752  lts->forgetFreeSpace = true;
753 }
754 
755 /*
756  * Write to a logical tape.
757  *
758  * There are no error returns; we ereport() on failure.
759  */
760 void
762  void *ptr, size_t size)
763 {
764  LogicalTape *lt;
765  size_t nthistime;
766 
767  Assert(tapenum >= 0 && tapenum < lts->nTapes);
768  lt = &lts->tapes[tapenum];
769  Assert(lt->writing);
770  Assert(lt->offsetBlockNumber == 0L);
771 
772  /* Allocate data buffer and first block on first write */
773  if (lt->buffer == NULL)
774  {
775  lt->buffer = (char *) palloc(BLCKSZ);
776  lt->buffer_size = BLCKSZ;
777  }
778  if (lt->curBlockNumber == -1)
779  {
780  Assert(lt->firstBlockNumber == -1);
781  Assert(lt->pos == 0);
782 
783  lt->curBlockNumber = ltsGetPreallocBlock(lts, lt);
785 
786  TapeBlockGetTrailer(lt->buffer)->prev = -1L;
787  }
788 
789  Assert(lt->buffer_size == BLCKSZ);
790  while (size > 0)
791  {
792  if (lt->pos >= (int) TapeBlockPayloadSize)
793  {
794  /* Buffer full, dump it out */
795  long nextBlockNumber;
796 
797  if (!lt->dirty)
798  {
799  /* Hmm, went directly from reading to writing? */
800  elog(ERROR, "invalid logtape state: should be dirty");
801  }
802 
803  /*
804  * First allocate the next block, so that we can store it in the
805  * 'next' pointer of this block.
806  */
807  nextBlockNumber = ltsGetPreallocBlock(lts, lt);
808 
809  /* set the next-pointer and dump the current block. */
810  TapeBlockGetTrailer(lt->buffer)->next = nextBlockNumber;
811  ltsWriteBlock(lts, lt->curBlockNumber, (void *) lt->buffer);
812 
813  /* initialize the prev-pointer of the next block */
814  TapeBlockGetTrailer(lt->buffer)->prev = lt->curBlockNumber;
815  lt->curBlockNumber = nextBlockNumber;
816  lt->pos = 0;
817  lt->nbytes = 0;
818  }
819 
820  nthistime = TapeBlockPayloadSize - lt->pos;
821  if (nthistime > size)
822  nthistime = size;
823  Assert(nthistime > 0);
824 
825  memcpy(lt->buffer + lt->pos, ptr, nthistime);
826 
827  lt->dirty = true;
828  lt->pos += nthistime;
829  if (lt->nbytes < lt->pos)
830  lt->nbytes = lt->pos;
831  ptr = (void *) ((char *) ptr + nthistime);
832  size -= nthistime;
833  }
834 }
835 
836 /*
837  * Rewind logical tape and switch from writing to reading.
838  *
839  * The tape must currently be in writing state, or "frozen" in read state.
840  *
841  * 'buffer_size' specifies how much memory to use for the read buffer.
842  * Regardless of the argument, the actual amount of memory used is between
843  * BLCKSZ and MaxAllocSize, and is a multiple of BLCKSZ. The given value is
844  * rounded down and truncated to fit those constraints, if necessary. If the
845  * tape is frozen, the 'buffer_size' argument is ignored, and a small BLCKSZ
846  * byte buffer is used.
847  */
848 void
849 LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum, size_t buffer_size)
850 {
851  LogicalTape *lt;
852 
853  Assert(tapenum >= 0 && tapenum < lts->nTapes);
854  lt = &lts->tapes[tapenum];
855 
856  /*
857  * Round and cap buffer_size if needed.
858  */
859  if (lt->frozen)
860  buffer_size = BLCKSZ;
861  else
862  {
863  /* need at least one block */
864  if (buffer_size < BLCKSZ)
865  buffer_size = BLCKSZ;
866 
867  /* palloc() larger than max_size is unlikely to be helpful */
868  if (buffer_size > lt->max_size)
869  buffer_size = lt->max_size;
870 
871  /* round down to BLCKSZ boundary */
872  buffer_size -= buffer_size % BLCKSZ;
873  }
874 
875  if (lt->writing)
876  {
877  /*
878  * Completion of a write phase. Flush last partial data block, and
879  * rewind for normal (destructive) read.
880  */
881  if (lt->dirty)
882  {
883  /*
884  * As long as we've filled the buffer at least once, its contents
885  * are entirely defined from valgrind's point of view, even though
886  * contents beyond the current end point may be stale. But it's
887  * possible - at least in the case of a parallel sort - to sort
888  * such small amount of data that we do not fill the buffer even
889  * once. Tell valgrind that its contents are defined, so it
890  * doesn't bleat.
891  */
893  lt->buffer_size - lt->nbytes);
894 
895  TapeBlockSetNBytes(lt->buffer, lt->nbytes);
896  ltsWriteBlock(lts, lt->curBlockNumber, (void *) lt->buffer);
897  }
898  lt->writing = false;
899  }
900  else
901  {
902  /*
903  * This is only OK if tape is frozen; we rewind for (another) read
904  * pass.
905  */
906  Assert(lt->frozen);
907  }
908 
909  if (lt->buffer)
910  pfree(lt->buffer);
911 
912  /* the buffer is lazily allocated, but set the size here */
913  lt->buffer = NULL;
914  lt->buffer_size = buffer_size;
915 
916  /* free the preallocation list, and return unused block numbers */
917  if (lt->prealloc != NULL)
918  {
919  for (int i = lt->nprealloc; i > 0; i--)
920  ltsReleaseBlock(lts, lt->prealloc[i - 1]);
921  pfree(lt->prealloc);
922  lt->prealloc = NULL;
923  lt->nprealloc = 0;
924  lt->prealloc_size = 0;
925  }
926 }
927 
928 /*
929  * Rewind logical tape and switch from reading to writing.
930  *
931  * NOTE: we assume the caller has read the tape to the end; otherwise
932  * untouched data will not have been freed. We could add more code to free
933  * any unread blocks, but in current usage of this module it'd be useless
934  * code.
935  */
936 void
938 {
939  LogicalTape *lt;
940 
941  Assert(tapenum >= 0 && tapenum < lts->nTapes);
942  lt = &lts->tapes[tapenum];
943 
944  Assert(!lt->writing && !lt->frozen);
945  lt->writing = true;
946  lt->dirty = false;
947  lt->firstBlockNumber = -1L;
948  lt->curBlockNumber = -1L;
949  lt->pos = 0;
950  lt->nbytes = 0;
951  if (lt->buffer)
952  pfree(lt->buffer);
953  lt->buffer = NULL;
954  lt->buffer_size = 0;
955 }
956 
957 /*
958  * Read from a logical tape.
959  *
960  * Early EOF is indicated by return value less than #bytes requested.
961  */
962 size_t
963 LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
964  void *ptr, size_t size)
965 {
966  LogicalTape *lt;
967  size_t nread = 0;
968  size_t nthistime;
969 
970  Assert(tapenum >= 0 && tapenum < lts->nTapes);
971  lt = &lts->tapes[tapenum];
972  Assert(!lt->writing);
973 
974  if (lt->buffer == NULL)
975  ltsInitReadBuffer(lts, lt);
976 
977  while (size > 0)
978  {
979  if (lt->pos >= lt->nbytes)
980  {
981  /* Try to load more data into buffer. */
982  if (!ltsReadFillBuffer(lts, lt))
983  break; /* EOF */
984  }
985 
986  nthistime = lt->nbytes - lt->pos;
987  if (nthistime > size)
988  nthistime = size;
989  Assert(nthistime > 0);
990 
991  memcpy(ptr, lt->buffer + lt->pos, nthistime);
992 
993  lt->pos += nthistime;
994  ptr = (void *) ((char *) ptr + nthistime);
995  size -= nthistime;
996  nread += nthistime;
997  }
998 
999  return nread;
1000 }
1001 
1002 /*
1003  * "Freeze" the contents of a tape so that it can be read multiple times
1004  * and/or read backwards. Once a tape is frozen, its contents will not
1005  * be released until the LogicalTapeSet is destroyed. This is expected
1006  * to be used only for the final output pass of a merge.
1007  *
1008  * This *must* be called just at the end of a write pass, before the
1009  * tape is rewound (after rewind is too late!). It performs a rewind
1010  * and switch to read mode "for free". An immediately following rewind-
1011  * for-read call is OK but not necessary.
1012  *
1013  * share output argument is set with details of storage used for tape after
1014  * freezing, which may be passed to LogicalTapeSetCreate within leader
1015  * process later. This metadata is only of interest to worker callers
1016  * freezing their final output for leader (single materialized tape).
1017  * Serial sorts should set share to NULL.
1018  */
1019 void
1020 LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum, TapeShare *share)
1021 {
1022  LogicalTape *lt;
1023 
1024  Assert(tapenum >= 0 && tapenum < lts->nTapes);
1025  lt = &lts->tapes[tapenum];
1026  Assert(lt->writing);
1027  Assert(lt->offsetBlockNumber == 0L);
1028 
1029  /*
1030  * Completion of a write phase. Flush last partial data block, and rewind
1031  * for nondestructive read.
1032  */
1033  if (lt->dirty)
1034  {
1035  /*
1036  * As long as we've filled the buffer at least once, its contents are
1037  * entirely defined from valgrind's point of view, even though
1038  * contents beyond the current end point may be stale. But it's
1039  * possible - at least in the case of a parallel sort - to sort such
1040  * small amount of data that we do not fill the buffer even once. Tell
1041  * valgrind that its contents are defined, so it doesn't bleat.
1042  */
1044  lt->buffer_size - lt->nbytes);
1045 
1046  TapeBlockSetNBytes(lt->buffer, lt->nbytes);
1047  ltsWriteBlock(lts, lt->curBlockNumber, (void *) lt->buffer);
1048  lt->writing = false;
1049  }
1050  lt->writing = false;
1051  lt->frozen = true;
1052 
1053  /*
1054  * The seek and backspace functions assume a single block read buffer.
1055  * That's OK with current usage. A larger buffer is helpful to make the
1056  * read pattern of the backing file look more sequential to the OS, when
1057  * we're reading from multiple tapes. But at the end of a sort, when a
1058  * tape is frozen, we only read from a single tape anyway.
1059  */
1060  if (!lt->buffer || lt->buffer_size != BLCKSZ)
1061  {
1062  if (lt->buffer)
1063  pfree(lt->buffer);
1064  lt->buffer = palloc(BLCKSZ);
1065  lt->buffer_size = BLCKSZ;
1066  }
1067 
1068  /* Read the first block, or reset if tape is empty */
1069  lt->curBlockNumber = lt->firstBlockNumber;
1070  lt->pos = 0;
1071  lt->nbytes = 0;
1072 
1073  if (lt->firstBlockNumber == -1L)
1074  lt->nextBlockNumber = -1L;
1075  ltsReadBlock(lts, lt->curBlockNumber, (void *) lt->buffer);
1076  if (TapeBlockIsLast(lt->buffer))
1077  lt->nextBlockNumber = -1L;
1078  else
1079  lt->nextBlockNumber = TapeBlockGetTrailer(lt->buffer)->next;
1080  lt->nbytes = TapeBlockGetNBytes(lt->buffer);
1081 
1082  /* Handle extra steps when caller is to share its tapeset */
1083  if (share)
1084  {
1085  BufFileExportShared(lts->pfile);
1086  share->firstblocknumber = lt->firstBlockNumber;
1087  }
1088 }
1089 
1090 /*
1091  * Add additional tapes to this tape set. Not intended to be used when any
1092  * tapes are frozen.
1093  */
1094 void
1095 LogicalTapeSetExtend(LogicalTapeSet *lts, int nAdditional)
1096 {
1097  int i;
1098  int nTapesOrig = lts->nTapes;
1099 
1100  lts->nTapes += nAdditional;
1101 
1102  lts->tapes = (LogicalTape *) repalloc(lts->tapes,
1103  lts->nTapes * sizeof(LogicalTape));
1104 
1105  for (i = nTapesOrig; i < lts->nTapes; i++)
1106  ltsInitTape(&lts->tapes[i]);
1107 }
1108 
1109 /*
1110  * Backspace the tape a given number of bytes. (We also support a more
1111  * general seek interface, see below.)
1112  *
1113  * *Only* a frozen-for-read tape can be backed up; we don't support
1114  * random access during write, and an unfrozen read tape may have
1115  * already discarded the desired data!
1116  *
1117  * Returns the number of bytes backed up. It can be less than the
1118  * requested amount, if there isn't that much data before the current
1119  * position. The tape is positioned to the beginning of the tape in
1120  * that case.
1121  */
1122 size_t
1123 LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, size_t size)
1124 {
1125  LogicalTape *lt;
1126  size_t seekpos = 0;
1127 
1128  Assert(tapenum >= 0 && tapenum < lts->nTapes);
1129  lt = &lts->tapes[tapenum];
1130  Assert(lt->frozen);
1131  Assert(lt->buffer_size == BLCKSZ);
1132 
1133  if (lt->buffer == NULL)
1134  ltsInitReadBuffer(lts, lt);
1135 
1136  /*
1137  * Easy case for seek within current block.
1138  */
1139  if (size <= (size_t) lt->pos)
1140  {
1141  lt->pos -= (int) size;
1142  return size;
1143  }
1144 
1145  /*
1146  * Not-so-easy case, have to walk back the chain of blocks. This
1147  * implementation would be pretty inefficient for long seeks, but we
1148  * really aren't doing that (a seek over one tuple is typical).
1149  */
1150  seekpos = (size_t) lt->pos; /* part within this block */
1151  while (size > seekpos)
1152  {
1153  long prev = TapeBlockGetTrailer(lt->buffer)->prev;
1154 
1155  if (prev == -1L)
1156  {
1157  /* Tried to back up beyond the beginning of tape. */
1158  if (lt->curBlockNumber != lt->firstBlockNumber)
1159  elog(ERROR, "unexpected end of tape");
1160  lt->pos = 0;
1161  return seekpos;
1162  }
1163 
1164  ltsReadBlock(lts, prev, (void *) lt->buffer);
1165 
1166  if (TapeBlockGetTrailer(lt->buffer)->next != lt->curBlockNumber)
1167  elog(ERROR, "broken tape, next of block %ld is %ld, expected %ld",
1168  prev,
1169  TapeBlockGetTrailer(lt->buffer)->next,
1170  lt->curBlockNumber);
1171 
1173  lt->curBlockNumber = prev;
1174  lt->nextBlockNumber = TapeBlockGetTrailer(lt->buffer)->next;
1175 
1176  seekpos += TapeBlockPayloadSize;
1177  }
1178 
1179  /*
1180  * 'seekpos' can now be greater than 'size', because it points to the
1181  * beginning the target block. The difference is the position within the
1182  * page.
1183  */
1184  lt->pos = seekpos - size;
1185  return size;
1186 }
1187 
1188 /*
1189  * Seek to an arbitrary position in a logical tape.
1190  *
1191  * *Only* a frozen-for-read tape can be seeked.
1192  *
1193  * Must be called with a block/offset previously returned by
1194  * LogicalTapeTell().
1195  */
1196 void
1198  long blocknum, int offset)
1199 {
1200  LogicalTape *lt;
1201 
1202  Assert(tapenum >= 0 && tapenum < lts->nTapes);
1203  lt = &lts->tapes[tapenum];
1204  Assert(lt->frozen);
1205  Assert(offset >= 0 && offset <= TapeBlockPayloadSize);
1206  Assert(lt->buffer_size == BLCKSZ);
1207 
1208  if (lt->buffer == NULL)
1209  ltsInitReadBuffer(lts, lt);
1210 
1211  if (blocknum != lt->curBlockNumber)
1212  {
1213  ltsReadBlock(lts, blocknum, (void *) lt->buffer);
1214  lt->curBlockNumber = blocknum;
1216  lt->nextBlockNumber = TapeBlockGetTrailer(lt->buffer)->next;
1217  }
1218 
1219  if (offset > lt->nbytes)
1220  elog(ERROR, "invalid tape seek position");
1221  lt->pos = offset;
1222 }
1223 
1224 /*
1225  * Obtain current position in a form suitable for a later LogicalTapeSeek.
1226  *
1227  * NOTE: it'd be OK to do this during write phase with intention of using
1228  * the position for a seek after freezing. Not clear if anyone needs that.
1229  */
1230 void
1232  long *blocknum, int *offset)
1233 {
1234  LogicalTape *lt;
1235 
1236  Assert(tapenum >= 0 && tapenum < lts->nTapes);
1237  lt = &lts->tapes[tapenum];
1238 
1239  if (lt->buffer == NULL)
1240  ltsInitReadBuffer(lts, lt);
1241 
1242  Assert(lt->offsetBlockNumber == 0L);
1243 
1244  /* With a larger buffer, 'pos' wouldn't be the same as offset within page */
1245  Assert(lt->buffer_size == BLCKSZ);
1246 
1247  *blocknum = lt->curBlockNumber;
1248  *offset = lt->pos;
1249 }
1250 
1251 /*
1252  * Obtain total disk space currently used by a LogicalTapeSet, in blocks.
1253  */
1254 long
1256 {
1257  return lts->nBlocksAllocated - lts->nHoleBlocks;
1258 }
int max_size
Definition: logtape.c:163
size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum, void *ptr, size_t size)
Definition: logtape.c:963
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
#define swap(a, b)
Definition: qsort.c:94
#define TapeBlockIsLast(buf)
Definition: logtape.c:106
#define TapeBlockPayloadSize
Definition: logtape.c:102
long offsetBlockNumber
Definition: logtape.c:156
long nBlocksWritten
Definition: logtape.c:198
BufFile * BufFileOpenShared(SharedFileSet *fileset, const char *name)
Definition: buffile.c:280
int64 BufFileSize(BufFile *file)
Definition: buffile.c:780
static bool ltsReadFillBuffer(LogicalTapeSet *lts, LogicalTape *lt)
Definition: logtape.c:307
long * prealloc
Definition: logtape.c:172
#define Min(x, y)
Definition: c.h:927
static long ltsGetFreeBlock(LogicalTapeSet *lts)
Definition: logtape.c:378
BufFile * pfile
Definition: logtape.c:185
#define MemSet(start, val, len)
Definition: c.h:949
int prealloc_size
Definition: logtape.c:174
static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
Definition: logtape.c:472
long firstblocknumber
Definition: logtape.h:50
bool frozen
Definition: logtape.c:139
long nextBlockNumber
Definition: logtape.c:155
void BufFileClose(BufFile *file)
Definition: buffile.c:391
void LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum)
Definition: logtape.c:937
static void swap_nodes(long *heap, unsigned long a, unsigned long b)
Definition: logtape.c:346
bool writing
Definition: logtape.c:138
long * freeBlocks
Definition: logtape.c:210
bool dirty
Definition: logtape.c:140
char data[BLCKSZ]
Definition: c.h:1082
void pfree(void *pointer)
Definition: mcxt.c:1056
void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum, void *ptr, size_t size)
Definition: logtape.c:761
#define ERROR
Definition: elog.h:43
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:184
#define MAXPGPATH
LogicalTapeSet * LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset, int worker)
Definition: logtape.c:672
BufFile * BufFileCreateShared(SharedFileSet *fileset, const char *name)
Definition: buffile.c:258
size_t LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, size_t size)
Definition: logtape.c:1123
int errcode_for_file_access(void)
Definition: elog.c:633
long nHoleBlocks
Definition: logtape.c:199
int nbytes
Definition: logtape.c:165
static unsigned long right_offset(unsigned i)
Definition: logtape.c:362
void LogicalTapeTell(LogicalTapeSet *lts, int tapenum, long *blocknum, int *offset)
Definition: logtape.c:1231
int nprealloc
Definition: logtape.c:173
static unsigned long left_offset(unsigned long i)
Definition: logtape.c:356
#define MaxAllocSize
Definition: memutils.h:40
static void ltsInitTape(LogicalTape *lt)
Definition: logtape.c:613
long firstBlockNumber
Definition: logtape.c:153
long nFreeBlocks
Definition: logtape.c:211
#define TAPE_WRITE_PREALLOC_MAX
Definition: logtape.c:124
int BufFileSeekBlock(BufFile *file, long blknum)
Definition: buffile.c:747
#define TAPE_WRITE_PREALLOC_MIN
Definition: logtape.c:123
#define TapeBlockSetNBytes(buf, nbytes)
Definition: logtape.c:110
static void ltsInitReadBuffer(LogicalTapeSet *lts, LogicalTape *lt)
Definition: logtape.c:639
static void ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared, SharedFileSet *fileset)
Definition: logtape.c:529
#define ereport(elevel,...)
Definition: elog.h:144
static long ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt)
Definition: logtape.c:434
long curBlockNumber
Definition: logtape.c:154
#define Assert(condition)
Definition: c.h:745
struct TapeBlockTrailer TapeBlockTrailer
size_t Size
Definition: c.h:473
void BufFileExportShared(BufFile *file)
Definition: buffile.c:373
LogicalTape * tapes
Definition: logtape.c:216
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
void LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum, size_t buffer_size)
Definition: logtape.c:849
#define TapeBlockGetNBytes(buf)
Definition: logtape.c:107
void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum, TapeShare *share)
Definition: logtape.c:1020
char * buffer
Definition: logtape.c:161
static char * filename
Definition: pg_dumpall.c:90
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define elog(elevel,...)
Definition: elog.h:214
bool forgetFreeSpace
Definition: logtape.c:209
int i
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:539
void LogicalTapeSetClose(LogicalTapeSet *lts)
Definition: logtape.c:723
void LogicalTapeSeek(LogicalTapeSet *lts, int tapenum, long blocknum, int offset)
Definition: logtape.c:1197
void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
Definition: logtape.c:750
Size freeBlocksLen
Definition: logtape.c:212
long LogicalTapeSetBlocks(LogicalTapeSet *lts)
Definition: logtape.c:1255
void BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:582
struct LogicalTape LogicalTape
static unsigned long parent_offset(unsigned long i)
Definition: logtape.c:368
long BufFileAppend(BufFile *target, BufFile *source)
Definition: buffile.c:819
long nBlocksAllocated
Definition: logtape.c:197
void LogicalTapeSetExtend(LogicalTapeSet *lts, int nAdditional)
Definition: logtape.c:1095
int buffer_size
Definition: logtape.c:162
static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
Definition: logtape.c:236
#define TapeBlockGetTrailer(buf)
Definition: logtape.c:103
static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
Definition: logtape.c:284
int pg_itoa(int16 i, char *a)
Definition: numutils.c:338