PostgreSQL Source Code git master
tuplestore.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tuplestore.c
4 * Generalized routines for temporary tuple storage.
5 *
6 * This module handles temporary storage of tuples for purposes such
7 * as Materialize nodes, hashjoin batch files, etc. It is essentially
8 * a dumbed-down version of tuplesort.c; it does no sorting of tuples
9 * but can only store and regurgitate a sequence of tuples. However,
10 * because no sort is required, it is allowed to start reading the sequence
11 * before it has all been written. This is particularly useful for cursors,
12 * because it allows random access within the already-scanned portion of
13 * a query without having to process the underlying scan to completion.
14 * Also, it is possible to support multiple independent read pointers.
15 *
16 * A temporary file is used to handle the data if it exceeds the
17 * space limit specified by the caller.
18 *
19 * The (approximate) amount of memory allowed to the tuplestore is specified
20 * in kilobytes by the caller. We absorb tuples and simply store them in an
21 * in-memory array as long as we haven't exceeded maxKBytes. If we do exceed
22 * maxKBytes, we dump all the tuples into a temp file and then read from that
23 * when needed.
24 *
25 * Upon creation, a tuplestore supports a single read pointer, numbered 0.
26 * Additional read pointers can be created using tuplestore_alloc_read_pointer.
27 * Mark/restore behavior is supported by copying read pointers.
28 *
29 * When the caller requests backward-scan capability, we write the temp file
30 * in a format that allows either forward or backward scan. Otherwise, only
31 * forward scan is allowed. A request for backward scan must be made before
32 * putting any tuples into the tuplestore. Rewind is normally allowed but
33 * can be turned off via tuplestore_set_eflags; turning off rewind for all
34 * read pointers enables truncation of the tuplestore at the oldest read point
35 * for minimal memory usage. (The caller must explicitly call tuplestore_trim
36 * at appropriate times for truncation to actually happen.)
37 *
38 * Note: in TSS_WRITEFILE state, the temp file's seek position is the
39 * current write position, and the write-position variables in the tuplestore
40 * aren't kept up to date. Similarly, in TSS_READFILE state the temp file's
41 * seek position is the active read pointer's position, and that read pointer
42 * isn't kept up to date. We update the appropriate variables using ftell()
43 * before switching to the other state or activating a different read pointer.
44 *
45 *
46 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
47 * Portions Copyright (c) 1994, Regents of the University of California
48 *
49 * IDENTIFICATION
50 * src/backend/utils/sort/tuplestore.c
51 *
52 *-------------------------------------------------------------------------
53 */
54
55#include "postgres.h"
56
57#include <limits.h>
58
59#include "access/htup_details.h"
60#include "commands/tablespace.h"
61#include "executor/executor.h"
62#include "miscadmin.h"
63#include "storage/buffile.h"
64#include "utils/memutils.h"
65#include "utils/resowner.h"
66
67
68/*
69 * Possible states of a Tuplestore object. These denote the states that
70 * persist between calls of Tuplestore routines.
71 */
72typedef enum
73{
74 TSS_INMEM, /* Tuples still fit in memory */
75 TSS_WRITEFILE, /* Writing to temp file */
76 TSS_READFILE, /* Reading from temp file */
78
79/*
80 * State for a single read pointer. If we are in state INMEM then all the
81 * read pointers' "current" fields denote the read positions. In state
82 * WRITEFILE, the file/offset fields denote the read positions. In state
83 * READFILE, inactive read pointers have valid file/offset, but the active
84 * read pointer implicitly has position equal to the temp file's seek position.
85 *
86 * Special case: if eof_reached is true, then the pointer's read position is
87 * implicitly equal to the write position, and current/file/offset aren't
88 * maintained. This way we need not update all the read pointers each time
89 * we write.
90 */
91typedef struct
92{
93 int eflags; /* capability flags */
94 bool eof_reached; /* read has reached EOF */
95 int current; /* next array index to read */
96 int file; /* temp file# */
97 off_t offset; /* byte offset in file */
99
100/*
101 * Private state of a Tuplestore operation.
102 */
104{
105 TupStoreStatus status; /* enumerated value as shown above */
106 int eflags; /* capability flags (OR of pointers' flags) */
107 bool backward; /* store extra length words in file? */
108 bool interXact; /* keep open through transactions? */
109 bool truncated; /* tuplestore_trim has removed tuples? */
110 bool usedDisk; /* used by tuplestore_get_stats() */
111 int64 maxSpace; /* used by tuplestore_get_stats() */
112 int64 availMem; /* remaining memory available, in bytes */
113 int64 allowedMem; /* total memory allowed, in bytes */
114 int64 tuples; /* number of tuples added */
115 BufFile *myfile; /* underlying file, or NULL if none */
116 MemoryContext context; /* memory context for holding tuples */
117 ResourceOwner resowner; /* resowner for holding temp files */
118
119 /*
120 * These function pointers decouple the routines that must know what kind
121 * of tuple we are handling from the routines that don't need to know it.
122 * They are set up by the tuplestore_begin_xxx routines.
123 *
124 * (Although tuplestore.c currently only supports heap tuples, I've copied
125 * this part of tuplesort.c so that extension to other kinds of objects
126 * will be easy if it's ever needed.)
127 *
128 * Function to copy a supplied input tuple into palloc'd space. (NB: we
129 * assume that a single pfree() is enough to release the tuple later, so
130 * the representation must be "flat" in one palloc chunk.) state->availMem
131 * must be decreased by the amount of space used.
132 */
133 void *(*copytup) (Tuplestorestate *state, void *tup);
134
135 /*
136 * Function to write a stored tuple onto tape. The representation of the
137 * tuple on tape need not be the same as it is in memory; requirements on
138 * the tape representation are given below. After writing the tuple,
139 * pfree() it, and increase state->availMem by the amount of memory space
140 * thereby released.
141 */
142 void (*writetup) (Tuplestorestate *state, void *tup);
143
144 /*
145 * Function to read a stored tuple from tape back into memory. 'len' is
146 * the already-read length of the stored tuple. Create and return a
147 * palloc'd copy, and decrease state->availMem by the amount of memory
148 * space consumed.
149 */
150 void *(*readtup) (Tuplestorestate *state, unsigned int len);
151
152 /*
153 * This array holds pointers to tuples in memory if we are in state INMEM.
154 * In states WRITEFILE and READFILE it's not used.
155 *
156 * When memtupdeleted > 0, the first memtupdeleted pointers are already
157 * released due to a tuplestore_trim() operation, but we haven't expended
158 * the effort to slide the remaining pointers down. These unused pointers
159 * are set to NULL to catch any invalid accesses. Note that memtupcount
160 * includes the deleted pointers.
161 */
162 void **memtuples; /* array of pointers to palloc'd tuples */
163 int memtupdeleted; /* the first N slots are currently unused */
164 int memtupcount; /* number of tuples currently present */
165 int memtupsize; /* allocated length of memtuples array */
166 bool growmemtuples; /* memtuples' growth still underway? */
167
168 /*
169 * These variables are used to keep track of the current positions.
170 *
171 * In state WRITEFILE, the current file seek position is the write point;
172 * in state READFILE, the write position is remembered in writepos_xxx.
173 * (The write position is the same as EOF, but since BufFileSeek doesn't
174 * currently implement SEEK_END, we have to remember it explicitly.)
175 */
176 TSReadPointer *readptrs; /* array of read pointers */
177 int activeptr; /* index of the active read pointer */
178 int readptrcount; /* number of pointers currently valid */
179 int readptrsize; /* allocated length of readptrs array */
180
181 int writepos_file; /* file# (valid if READFILE state) */
182 off_t writepos_offset; /* offset (valid if READFILE state) */
183};
184
185#define COPYTUP(state,tup) ((*(state)->copytup) (state, tup))
186#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
187#define READTUP(state,len) ((*(state)->readtup) (state, len))
188#define LACKMEM(state) ((state)->availMem < 0)
189#define USEMEM(state,amt) ((state)->availMem -= (amt))
190#define FREEMEM(state,amt) ((state)->availMem += (amt))
191
192/*--------------------
193 *
194 * NOTES about on-tape representation of tuples:
195 *
196 * We require the first "unsigned int" of a stored tuple to be the total size
197 * on-tape of the tuple, including itself (so it is never zero).
198 * The remainder of the stored tuple
199 * may or may not match the in-memory representation of the tuple ---
200 * any conversion needed is the job of the writetup and readtup routines.
201 *
202 * If state->backward is true, then the stored representation of
203 * the tuple must be followed by another "unsigned int" that is a copy of the
204 * length --- so the total tape space used is actually sizeof(unsigned int)
205 * more than the stored length value. This allows read-backwards. When
206 * state->backward is not set, the write/read routines may omit the extra
207 * length word.
208 *
209 * writetup is expected to write both length words as well as the tuple
210 * data. When readtup is called, the tape is positioned just after the
211 * front length word; readtup must read the tuple data and advance past
212 * the back length word (if present).
213 *
214 * The write/read routines can make use of the tuple description data
215 * stored in the Tuplestorestate record, if needed. They are also expected
216 * to adjust state->availMem by the amount of memory space (not tape space!)
217 * released or consumed. There is no error return from either writetup
218 * or readtup; they should ereport() on failure.
219 *
220 *
221 * NOTES about memory consumption calculations:
222 *
223 * We count space allocated for tuples against the maxKBytes limit,
224 * plus the space used by the variable-size array memtuples.
225 * Fixed-size space (primarily the BufFile I/O buffer) is not counted.
226 * We don't worry about the size of the read pointer array, either.
227 *
228 * Note that we count actual space used (as shown by GetMemoryChunkSpace)
229 * rather than the originally-requested size. This is important since
230 * palloc can add substantial overhead. It's not a complete answer since
231 * we won't count any wasted space in palloc allocation blocks, but it's
232 * a lot better than what we were doing before 7.3.
233 *
234 *--------------------
235 */
236
237
239 bool interXact,
240 int maxKBytes);
241static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
242static void dumptuples(Tuplestorestate *state);
244static unsigned int getlen(Tuplestorestate *state, bool eofOK);
245static void *copytup_heap(Tuplestorestate *state, void *tup);
246static void writetup_heap(Tuplestorestate *state, void *tup);
247static void *readtup_heap(Tuplestorestate *state, unsigned int len);
248
249
250/*
251 * tuplestore_begin_xxx
252 *
253 * Initialize for a tuple store operation.
254 */
255static Tuplestorestate *
256tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
257{
259
261
262 state->status = TSS_INMEM;
263 state->eflags = eflags;
264 state->interXact = interXact;
265 state->truncated = false;
266 state->usedDisk = false;
267 state->maxSpace = 0;
268 state->allowedMem = maxKBytes * (int64) 1024;
269 state->availMem = state->allowedMem;
270 state->myfile = NULL;
271
272 /*
273 * The palloc/pfree pattern for tuple memory is in a FIFO pattern. A
274 * generation context is perfectly suited for this.
275 */
277 "tuplestore tuples",
279 state->resowner = CurrentResourceOwner;
280
281 state->memtupdeleted = 0;
282 state->memtupcount = 0;
283 state->tuples = 0;
284
285 /*
286 * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
287 * see comments in grow_memtuples().
288 */
289 state->memtupsize = Max(16384 / sizeof(void *),
290 ALLOCSET_SEPARATE_THRESHOLD / sizeof(void *) + 1);
291
292 state->growmemtuples = true;
293 state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
294
296
297 state->activeptr = 0;
298 state->readptrcount = 1;
299 state->readptrsize = 8; /* arbitrary */
300 state->readptrs = (TSReadPointer *)
301 palloc(state->readptrsize * sizeof(TSReadPointer));
302
303 state->readptrs[0].eflags = eflags;
304 state->readptrs[0].eof_reached = false;
305 state->readptrs[0].current = 0;
306
307 return state;
308}
309
310/*
311 * tuplestore_begin_heap
312 *
313 * Create a new tuplestore; other types of tuple stores (other than
314 * "heap" tuple stores, for heap tuples) are possible, but not presently
315 * implemented.
316 *
317 * randomAccess: if true, both forward and backward accesses to the
318 * tuple store are allowed.
319 *
320 * interXact: if true, the files used for on-disk storage persist beyond the
321 * end of the current transaction. NOTE: It's the caller's responsibility to
322 * create such a tuplestore in a memory context and resource owner that will
323 * also survive transaction boundaries, and to ensure the tuplestore is closed
324 * when it's no longer wanted.
325 *
326 * maxKBytes: how much data to store in memory (any data beyond this
327 * amount is paged to disk). When in doubt, use work_mem.
328 */
330tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
331{
333 int eflags;
334
335 /*
336 * This interpretation of the meaning of randomAccess is compatible with
337 * the pre-8.3 behavior of tuplestores.
338 */
339 eflags = randomAccess ?
342
343 state = tuplestore_begin_common(eflags, interXact, maxKBytes);
344
345 state->copytup = copytup_heap;
346 state->writetup = writetup_heap;
347 state->readtup = readtup_heap;
348
349 return state;
350}
351
352/*
353 * tuplestore_set_eflags
354 *
355 * Set the capability flags for read pointer 0 at a finer grain than is
356 * allowed by tuplestore_begin_xxx. This must be called before inserting
357 * any data into the tuplestore.
358 *
359 * eflags is a bitmask following the meanings used for executor node
360 * startup flags (see executor.h). tuplestore pays attention to these bits:
361 * EXEC_FLAG_REWIND need rewind to start
362 * EXEC_FLAG_BACKWARD need backward fetch
363 * If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD
364 * is set per "randomAccess" in the tuplestore_begin_xxx call.
365 *
366 * NOTE: setting BACKWARD without REWIND means the pointer can read backwards,
367 * but not further than the truncation point (the furthest-back read pointer
368 * position at the time of the last tuplestore_trim call).
369 */
370void
372{
373 int i;
374
375 if (state->status != TSS_INMEM || state->memtupcount != 0)
376 elog(ERROR, "too late to call tuplestore_set_eflags");
377
378 state->readptrs[0].eflags = eflags;
379 for (i = 1; i < state->readptrcount; i++)
380 eflags |= state->readptrs[i].eflags;
381 state->eflags = eflags;
382}
383
384/*
385 * tuplestore_alloc_read_pointer - allocate another read pointer.
386 *
387 * Returns the pointer's index.
388 *
389 * The new pointer initially copies the position of read pointer 0.
390 * It can have its own eflags, but if any data has been inserted into
391 * the tuplestore, these eflags must not represent an increase in
392 * requirements.
393 */
394int
396{
397 /* Check for possible increase of requirements */
398 if (state->status != TSS_INMEM || state->memtupcount != 0)
399 {
400 if ((state->eflags | eflags) != state->eflags)
401 elog(ERROR, "too late to require new tuplestore eflags");
402 }
403
404 /* Make room for another read pointer if needed */
405 if (state->readptrcount >= state->readptrsize)
406 {
407 int newcnt = state->readptrsize * 2;
408
409 state->readptrs = (TSReadPointer *)
410 repalloc(state->readptrs, newcnt * sizeof(TSReadPointer));
411 state->readptrsize = newcnt;
412 }
413
414 /* And set it up */
415 state->readptrs[state->readptrcount] = state->readptrs[0];
416 state->readptrs[state->readptrcount].eflags = eflags;
417
418 state->eflags |= eflags;
419
420 return state->readptrcount++;
421}
422
423/*
424 * tuplestore_clear
425 *
426 * Delete all the contents of a tuplestore, and reset its read pointers
427 * to the start.
428 */
429void
431{
432 int i;
433 TSReadPointer *readptr;
434
435 /* update the maxSpace before doing any USEMEM/FREEMEM adjustments */
437
438 if (state->myfile)
439 BufFileClose(state->myfile);
440 state->myfile = NULL;
441
442#ifdef USE_ASSERT_CHECKING
443 {
444 int64 availMem = state->availMem;
445
446 /*
447 * Below, we reset the memory context for storing tuples. To save
448 * from having to always call GetMemoryChunkSpace() on all stored
449 * tuples, we adjust the availMem to forget all the tuples and just
450 * recall USEMEM for the space used by the memtuples array. Here we
451 * just Assert that's correct and the memory tracking hasn't gone
452 * wrong anywhere.
453 */
454 for (i = state->memtupdeleted; i < state->memtupcount; i++)
455 availMem += GetMemoryChunkSpace(state->memtuples[i]);
456
457 availMem += GetMemoryChunkSpace(state->memtuples);
458
459 Assert(availMem == state->allowedMem);
460 }
461#endif
462
463 /* clear the memory consumed by the memory tuples */
464 MemoryContextReset(state->context);
465
466 /*
467 * Zero the used memory and re-consume the space for the memtuples array.
468 * This saves having to FREEMEM for each stored tuple.
469 */
470 state->availMem = state->allowedMem;
472
473 state->status = TSS_INMEM;
474 state->truncated = false;
475 state->memtupdeleted = 0;
476 state->memtupcount = 0;
477 state->tuples = 0;
478 readptr = state->readptrs;
479 for (i = 0; i < state->readptrcount; readptr++, i++)
480 {
481 readptr->eof_reached = false;
482 readptr->current = 0;
483 }
484}
485
486/*
487 * tuplestore_end
488 *
489 * Release resources and clean up.
490 */
491void
493{
494 if (state->myfile)
495 BufFileClose(state->myfile);
496
497 MemoryContextDelete(state->context);
498 pfree(state->memtuples);
499 pfree(state->readptrs);
500 pfree(state);
501}
502
503/*
504 * tuplestore_select_read_pointer - make the specified read pointer active
505 */
506void
508{
509 TSReadPointer *readptr;
510 TSReadPointer *oldptr;
511
512 Assert(ptr >= 0 && ptr < state->readptrcount);
513
514 /* No work if already active */
515 if (ptr == state->activeptr)
516 return;
517
518 readptr = &state->readptrs[ptr];
519 oldptr = &state->readptrs[state->activeptr];
520
521 switch (state->status)
522 {
523 case TSS_INMEM:
524 case TSS_WRITEFILE:
525 /* no work */
526 break;
527 case TSS_READFILE:
528
529 /*
530 * First, save the current read position in the pointer about to
531 * become inactive.
532 */
533 if (!oldptr->eof_reached)
534 BufFileTell(state->myfile,
535 &oldptr->file,
536 &oldptr->offset);
537
538 /*
539 * We have to make the temp file's seek position equal to the
540 * logical position of the new read pointer. In eof_reached
541 * state, that's the EOF, which we have available from the saved
542 * write position.
543 */
544 if (readptr->eof_reached)
545 {
546 if (BufFileSeek(state->myfile,
547 state->writepos_file,
548 state->writepos_offset,
549 SEEK_SET) != 0)
552 errmsg("could not seek in tuplestore temporary file")));
553 }
554 else
555 {
556 if (BufFileSeek(state->myfile,
557 readptr->file,
558 readptr->offset,
559 SEEK_SET) != 0)
562 errmsg("could not seek in tuplestore temporary file")));
563 }
564 break;
565 default:
566 elog(ERROR, "invalid tuplestore state");
567 break;
568 }
569
570 state->activeptr = ptr;
571}
572
573/*
574 * tuplestore_tuple_count
575 *
576 * Returns the number of tuples added since creation or the last
577 * tuplestore_clear().
578 */
579int64
581{
582 return state->tuples;
583}
584
585/*
586 * tuplestore_ateof
587 *
588 * Returns the active read pointer's eof_reached state.
589 */
590bool
592{
593 return state->readptrs[state->activeptr].eof_reached;
594}
595
596/*
597 * Grow the memtuples[] array, if possible within our memory constraint. We
598 * must not exceed INT_MAX tuples in memory or the caller-provided memory
599 * limit. Return true if we were able to enlarge the array, false if not.
600 *
601 * Normally, at each increment we double the size of the array. When doing
602 * that would exceed a limit, we attempt one last, smaller increase (and then
603 * clear the growmemtuples flag so we don't try any more). That allows us to
604 * use memory as fully as permitted; sticking to the pure doubling rule could
605 * result in almost half going unused. Because availMem moves around with
606 * tuple addition/removal, we need some rule to prevent making repeated small
607 * increases in memtupsize, which would just be useless thrashing. The
608 * growmemtuples flag accomplishes that and also prevents useless
609 * recalculations in this function.
610 */
611static bool
613{
614 int newmemtupsize;
615 int memtupsize = state->memtupsize;
616 int64 memNowUsed = state->allowedMem - state->availMem;
617
618 /* Forget it if we've already maxed out memtuples, per comment above */
619 if (!state->growmemtuples)
620 return false;
621
622 /* Select new value of memtupsize */
623 if (memNowUsed <= state->availMem)
624 {
625 /*
626 * We've used no more than half of allowedMem; double our usage,
627 * clamping at INT_MAX tuples.
628 */
629 if (memtupsize < INT_MAX / 2)
630 newmemtupsize = memtupsize * 2;
631 else
632 {
633 newmemtupsize = INT_MAX;
634 state->growmemtuples = false;
635 }
636 }
637 else
638 {
639 /*
640 * This will be the last increment of memtupsize. Abandon doubling
641 * strategy and instead increase as much as we safely can.
642 *
643 * To stay within allowedMem, we can't increase memtupsize by more
644 * than availMem / sizeof(void *) elements. In practice, we want to
645 * increase it by considerably less, because we need to leave some
646 * space for the tuples to which the new array slots will refer. We
647 * assume the new tuples will be about the same size as the tuples
648 * we've already seen, and thus we can extrapolate from the space
649 * consumption so far to estimate an appropriate new size for the
650 * memtuples array. The optimal value might be higher or lower than
651 * this estimate, but it's hard to know that in advance. We again
652 * clamp at INT_MAX tuples.
653 *
654 * This calculation is safe against enlarging the array so much that
655 * LACKMEM becomes true, because the memory currently used includes
656 * the present array; thus, there would be enough allowedMem for the
657 * new array elements even if no other memory were currently used.
658 *
659 * We do the arithmetic in float8, because otherwise the product of
660 * memtupsize and allowedMem could overflow. Any inaccuracy in the
661 * result should be insignificant; but even if we computed a
662 * completely insane result, the checks below will prevent anything
663 * really bad from happening.
664 */
665 double grow_ratio;
666
667 grow_ratio = (double) state->allowedMem / (double) memNowUsed;
668 if (memtupsize * grow_ratio < INT_MAX)
669 newmemtupsize = (int) (memtupsize * grow_ratio);
670 else
671 newmemtupsize = INT_MAX;
672
673 /* We won't make any further enlargement attempts */
674 state->growmemtuples = false;
675 }
676
677 /* Must enlarge array by at least one element, else report failure */
678 if (newmemtupsize <= memtupsize)
679 goto noalloc;
680
681 /*
682 * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp
683 * to ensure our request won't be rejected. Note that we can easily
684 * exhaust address space before facing this outcome. (This is presently
685 * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
686 * don't rely on that at this distance.)
687 */
688 if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(void *))
689 {
690 newmemtupsize = (int) (MaxAllocHugeSize / sizeof(void *));
691 state->growmemtuples = false; /* can't grow any more */
692 }
693
694 /*
695 * We need to be sure that we do not cause LACKMEM to become true, else
696 * the space management algorithm will go nuts. The code above should
697 * never generate a dangerous request, but to be safe, check explicitly
698 * that the array growth fits within availMem. (We could still cause
699 * LACKMEM if the memory chunk overhead associated with the memtuples
700 * array were to increase. That shouldn't happen because we chose the
701 * initial array size large enough to ensure that palloc will be treating
702 * both old and new arrays as separate chunks. But we'll check LACKMEM
703 * explicitly below just in case.)
704 */
705 if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(void *)))
706 goto noalloc;
707
708 /* OK, do it */
710 state->memtupsize = newmemtupsize;
711 state->memtuples = (void **)
712 repalloc_huge(state->memtuples,
713 state->memtupsize * sizeof(void *));
715 if (LACKMEM(state))
716 elog(ERROR, "unexpected out-of-memory situation in tuplestore");
717 return true;
718
719noalloc:
720 /* If for any reason we didn't realloc, shut off future attempts */
721 state->growmemtuples = false;
722 return false;
723}
724
725/*
726 * Accept one tuple and append it to the tuplestore.
727 *
728 * Note that the input tuple is always copied; the caller need not save it.
729 *
730 * If the active read pointer is currently "at EOF", it remains so (the read
731 * pointer implicitly advances along with the write pointer); otherwise the
732 * read pointer is unchanged. Non-active read pointers do not move, which
733 * means they are certain to not be "at EOF" immediately after puttuple.
734 * This curious-seeming behavior is for the convenience of nodeMaterial.c and
735 * nodeCtescan.c, which would otherwise need to do extra pointer repositioning
736 * steps.
737 *
738 * tuplestore_puttupleslot() is a convenience routine to collect data from
739 * a TupleTableSlot without an extra copy operation.
740 */
741void
743 TupleTableSlot *slot)
744{
745 MinimalTuple tuple;
746 MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
747
748 /*
749 * Form a MinimalTuple in working memory
750 */
751 tuple = ExecCopySlotMinimalTuple(slot);
753
755
756 MemoryContextSwitchTo(oldcxt);
757}
758
759/*
760 * "Standard" case to copy from a HeapTuple. This is actually now somewhat
761 * deprecated, but not worth getting rid of in view of the number of callers.
762 */
763void
765{
766 MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
767
768 /*
769 * Copy the tuple. (Must do this even in WRITEFILE case. Note that
770 * COPYTUP includes USEMEM, so we needn't do that here.)
771 */
772 tuple = COPYTUP(state, tuple);
773
775
776 MemoryContextSwitchTo(oldcxt);
777}
778
779/*
780 * Similar to tuplestore_puttuple(), but work from values + nulls arrays.
781 * This avoids an extra tuple-construction operation.
782 */
783void
785 const Datum *values, const bool *isnull)
786{
787 MinimalTuple tuple;
788 MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
789
790 tuple = heap_form_minimal_tuple(tdesc, values, isnull);
792
794
795 MemoryContextSwitchTo(oldcxt);
796}
797
798static void
800{
801 TSReadPointer *readptr;
802 int i;
803 ResourceOwner oldowner;
804 MemoryContext oldcxt;
805
806 state->tuples++;
807
808 switch (state->status)
809 {
810 case TSS_INMEM:
811
812 /*
813 * Update read pointers as needed; see API spec above.
814 */
815 readptr = state->readptrs;
816 for (i = 0; i < state->readptrcount; readptr++, i++)
817 {
818 if (readptr->eof_reached && i != state->activeptr)
819 {
820 readptr->eof_reached = false;
821 readptr->current = state->memtupcount;
822 }
823 }
824
825 /*
826 * Grow the array as needed. Note that we try to grow the array
827 * when there is still one free slot remaining --- if we fail,
828 * there'll still be room to store the incoming tuple, and then
829 * we'll switch to tape-based operation.
830 */
831 if (state->memtupcount >= state->memtupsize - 1)
832 {
833 (void) grow_memtuples(state);
834 Assert(state->memtupcount < state->memtupsize);
835 }
836
837 /* Stash the tuple in the in-memory array */
838 state->memtuples[state->memtupcount++] = tuple;
839
840 /*
841 * Done if we still fit in available memory and have array slots.
842 */
843 if (state->memtupcount < state->memtupsize && !LACKMEM(state))
844 return;
845
846 /*
847 * Nope; time to switch to tape-based operation. Make sure that
848 * the temp file(s) are created in suitable temp tablespaces.
849 */
851
852 /* associate the file with the store's resource owner */
853 oldowner = CurrentResourceOwner;
854 CurrentResourceOwner = state->resowner;
855
856 /*
857 * We switch out of the state->context as this is a generation
858 * context, which isn't ideal for allocations relating to the
859 * BufFile.
860 */
861 oldcxt = MemoryContextSwitchTo(state->context->parent);
862
863 state->myfile = BufFileCreateTemp(state->interXact);
864
865 MemoryContextSwitchTo(oldcxt);
866
867 CurrentResourceOwner = oldowner;
868
869 /*
870 * Freeze the decision about whether trailing length words will be
871 * used. We can't change this choice once data is on tape, even
872 * though callers might drop the requirement.
873 */
874 state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
875
876 /*
877 * Update the maximum space used before dumping the tuples. It's
878 * possible that more space will be used by the tuples in memory
879 * than the space that will be used on disk.
880 */
882
883 state->status = TSS_WRITEFILE;
885 break;
886 case TSS_WRITEFILE:
887
888 /*
889 * Update read pointers as needed; see API spec above. Note:
890 * BufFileTell is quite cheap, so not worth trying to avoid
891 * multiple calls.
892 */
893 readptr = state->readptrs;
894 for (i = 0; i < state->readptrcount; readptr++, i++)
895 {
896 if (readptr->eof_reached && i != state->activeptr)
897 {
898 readptr->eof_reached = false;
899 BufFileTell(state->myfile,
900 &readptr->file,
901 &readptr->offset);
902 }
903 }
904
905 WRITETUP(state, tuple);
906 break;
907 case TSS_READFILE:
908
909 /*
910 * Switch from reading to writing.
911 */
912 if (!state->readptrs[state->activeptr].eof_reached)
913 BufFileTell(state->myfile,
914 &state->readptrs[state->activeptr].file,
915 &state->readptrs[state->activeptr].offset);
916 if (BufFileSeek(state->myfile,
917 state->writepos_file, state->writepos_offset,
918 SEEK_SET) != 0)
921 errmsg("could not seek in tuplestore temporary file")));
922 state->status = TSS_WRITEFILE;
923
924 /*
925 * Update read pointers as needed; see API spec above.
926 */
927 readptr = state->readptrs;
928 for (i = 0; i < state->readptrcount; readptr++, i++)
929 {
930 if (readptr->eof_reached && i != state->activeptr)
931 {
932 readptr->eof_reached = false;
933 readptr->file = state->writepos_file;
934 readptr->offset = state->writepos_offset;
935 }
936 }
937
938 WRITETUP(state, tuple);
939 break;
940 default:
941 elog(ERROR, "invalid tuplestore state");
942 break;
943 }
944}
945
946/*
947 * Fetch the next tuple in either forward or back direction.
948 * Returns NULL if no more tuples. If should_free is set, the
949 * caller must pfree the returned tuple when done with it.
950 *
951 * Backward scan is only allowed if randomAccess was set true or
952 * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
953 */
954static void *
956 bool *should_free)
957{
958 TSReadPointer *readptr = &state->readptrs[state->activeptr];
959 unsigned int tuplen;
960 void *tup;
961
962 Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
963
964 switch (state->status)
965 {
966 case TSS_INMEM:
967 *should_free = false;
968 if (forward)
969 {
970 if (readptr->eof_reached)
971 return NULL;
972 if (readptr->current < state->memtupcount)
973 {
974 /* We have another tuple, so return it */
975 return state->memtuples[readptr->current++];
976 }
977 readptr->eof_reached = true;
978 return NULL;
979 }
980 else
981 {
982 /*
983 * if all tuples are fetched already then we return last
984 * tuple, else tuple before last returned.
985 */
986 if (readptr->eof_reached)
987 {
988 readptr->current = state->memtupcount;
989 readptr->eof_reached = false;
990 }
991 else
992 {
993 if (readptr->current <= state->memtupdeleted)
994 {
995 Assert(!state->truncated);
996 return NULL;
997 }
998 readptr->current--; /* last returned tuple */
999 }
1000 if (readptr->current <= state->memtupdeleted)
1001 {
1002 Assert(!state->truncated);
1003 return NULL;
1004 }
1005 return state->memtuples[readptr->current - 1];
1006 }
1007 break;
1008
1009 case TSS_WRITEFILE:
1010 /* Skip state change if we'll just return NULL */
1011 if (readptr->eof_reached && forward)
1012 return NULL;
1013
1014 /*
1015 * Switch from writing to reading.
1016 */
1017 BufFileTell(state->myfile,
1018 &state->writepos_file, &state->writepos_offset);
1019 if (!readptr->eof_reached)
1020 if (BufFileSeek(state->myfile,
1021 readptr->file, readptr->offset,
1022 SEEK_SET) != 0)
1023 ereport(ERROR,
1025 errmsg("could not seek in tuplestore temporary file")));
1026 state->status = TSS_READFILE;
1027 /* FALLTHROUGH */
1028
1029 case TSS_READFILE:
1030 *should_free = true;
1031 if (forward)
1032 {
1033 if ((tuplen = getlen(state, true)) != 0)
1034 {
1035 tup = READTUP(state, tuplen);
1036 return tup;
1037 }
1038 else
1039 {
1040 readptr->eof_reached = true;
1041 return NULL;
1042 }
1043 }
1044
1045 /*
1046 * Backward.
1047 *
1048 * if all tuples are fetched already then we return last tuple,
1049 * else tuple before last returned.
1050 *
1051 * Back up to fetch previously-returned tuple's ending length
1052 * word. If seek fails, assume we are at start of file.
1053 */
1054 if (BufFileSeek(state->myfile, 0, -(long) sizeof(unsigned int),
1055 SEEK_CUR) != 0)
1056 {
1057 /* even a failed backwards fetch gets you out of eof state */
1058 readptr->eof_reached = false;
1059 Assert(!state->truncated);
1060 return NULL;
1061 }
1062 tuplen = getlen(state, false);
1063
1064 if (readptr->eof_reached)
1065 {
1066 readptr->eof_reached = false;
1067 /* We will return the tuple returned before returning NULL */
1068 }
1069 else
1070 {
1071 /*
1072 * Back up to get ending length word of tuple before it.
1073 */
1074 if (BufFileSeek(state->myfile, 0,
1075 -(long) (tuplen + 2 * sizeof(unsigned int)),
1076 SEEK_CUR) != 0)
1077 {
1078 /*
1079 * If that fails, presumably the prev tuple is the first
1080 * in the file. Back up so that it becomes next to read
1081 * in forward direction (not obviously right, but that is
1082 * what in-memory case does).
1083 */
1084 if (BufFileSeek(state->myfile, 0,
1085 -(long) (tuplen + sizeof(unsigned int)),
1086 SEEK_CUR) != 0)
1087 ereport(ERROR,
1089 errmsg("could not seek in tuplestore temporary file")));
1090 Assert(!state->truncated);
1091 return NULL;
1092 }
1093 tuplen = getlen(state, false);
1094 }
1095
1096 /*
1097 * Now we have the length of the prior tuple, back up and read it.
1098 * Note: READTUP expects we are positioned after the initial
1099 * length word of the tuple, so back up to that point.
1100 */
1101 if (BufFileSeek(state->myfile, 0,
1102 -(long) tuplen,
1103 SEEK_CUR) != 0)
1104 ereport(ERROR,
1106 errmsg("could not seek in tuplestore temporary file")));
1107 tup = READTUP(state, tuplen);
1108 return tup;
1109
1110 default:
1111 elog(ERROR, "invalid tuplestore state");
1112 return NULL; /* keep compiler quiet */
1113 }
1114}
1115
1116/*
1117 * tuplestore_gettupleslot - exported function to fetch a MinimalTuple
1118 *
1119 * If successful, put tuple in slot and return true; else, clear the slot
1120 * and return false.
1121 *
1122 * If copy is true, the slot receives a copied tuple (allocated in current
1123 * memory context) that will stay valid regardless of future manipulations of
1124 * the tuplestore's state. If copy is false, the slot may just receive a
1125 * pointer to a tuple held within the tuplestore. The latter is more
1126 * efficient but the slot contents may be corrupted if additional writes to
1127 * the tuplestore occur. (If using tuplestore_trim, see comments therein.)
1128 */
1129bool
1131 bool copy, TupleTableSlot *slot)
1132{
1133 MinimalTuple tuple;
1134 bool should_free;
1135
1136 tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
1137
1138 if (tuple)
1139 {
1140 if (copy && !should_free)
1141 {
1142 tuple = heap_copy_minimal_tuple(tuple);
1143 should_free = true;
1144 }
1145 ExecStoreMinimalTuple(tuple, slot, should_free);
1146 return true;
1147 }
1148 else
1149 {
1150 ExecClearTuple(slot);
1151 return false;
1152 }
1153}
1154
1155/*
1156 * tuplestore_advance - exported function to adjust position without fetching
1157 *
1158 * We could optimize this case to avoid palloc/pfree overhead, but for the
1159 * moment it doesn't seem worthwhile.
1160 */
1161bool
1163{
1164 void *tuple;
1165 bool should_free;
1166
1167 tuple = tuplestore_gettuple(state, forward, &should_free);
1168
1169 if (tuple)
1170 {
1171 if (should_free)
1172 pfree(tuple);
1173 return true;
1174 }
1175 else
1176 {
1177 return false;
1178 }
1179}
1180
1181/*
1182 * Advance over N tuples in either forward or back direction,
1183 * without returning any data. N<=0 is a no-op.
1184 * Returns true if successful, false if ran out of tuples.
1185 */
1186bool
1188{
1189 TSReadPointer *readptr = &state->readptrs[state->activeptr];
1190
1191 Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
1192
1193 if (ntuples <= 0)
1194 return true;
1195
1196 switch (state->status)
1197 {
1198 case TSS_INMEM:
1199 if (forward)
1200 {
1201 if (readptr->eof_reached)
1202 return false;
1203 if (state->memtupcount - readptr->current >= ntuples)
1204 {
1205 readptr->current += ntuples;
1206 return true;
1207 }
1208 readptr->current = state->memtupcount;
1209 readptr->eof_reached = true;
1210 return false;
1211 }
1212 else
1213 {
1214 if (readptr->eof_reached)
1215 {
1216 readptr->current = state->memtupcount;
1217 readptr->eof_reached = false;
1218 ntuples--;
1219 }
1220 if (readptr->current - state->memtupdeleted > ntuples)
1221 {
1222 readptr->current -= ntuples;
1223 return true;
1224 }
1225 Assert(!state->truncated);
1226 readptr->current = state->memtupdeleted;
1227 return false;
1228 }
1229 break;
1230
1231 default:
1232 /* We don't currently try hard to optimize other cases */
1233 while (ntuples-- > 0)
1234 {
1235 void *tuple;
1236 bool should_free;
1237
1238 tuple = tuplestore_gettuple(state, forward, &should_free);
1239
1240 if (tuple == NULL)
1241 return false;
1242 if (should_free)
1243 pfree(tuple);
1245 }
1246 return true;
1247 }
1248}
1249
1250/*
1251 * dumptuples - remove tuples from memory and write to tape
1252 *
1253 * As a side effect, we must convert each read pointer's position from
1254 * "current" to file/offset format. But eof_reached pointers don't
1255 * need to change state.
1256 */
1257static void
1259{
1260 int i;
1261
1262 for (i = state->memtupdeleted;; i++)
1263 {
1264 TSReadPointer *readptr = state->readptrs;
1265 int j;
1266
1267 for (j = 0; j < state->readptrcount; readptr++, j++)
1268 {
1269 if (i == readptr->current && !readptr->eof_reached)
1270 BufFileTell(state->myfile,
1271 &readptr->file, &readptr->offset);
1272 }
1273 if (i >= state->memtupcount)
1274 break;
1275 WRITETUP(state, state->memtuples[i]);
1276 }
1277 state->memtupdeleted = 0;
1278 state->memtupcount = 0;
1279}
1280
1281/*
1282 * tuplestore_rescan - rewind the active read pointer to start
1283 */
1284void
1286{
1287 TSReadPointer *readptr = &state->readptrs[state->activeptr];
1288
1289 Assert(readptr->eflags & EXEC_FLAG_REWIND);
1290 Assert(!state->truncated);
1291
1292 switch (state->status)
1293 {
1294 case TSS_INMEM:
1295 readptr->eof_reached = false;
1296 readptr->current = 0;
1297 break;
1298 case TSS_WRITEFILE:
1299 readptr->eof_reached = false;
1300 readptr->file = 0;
1301 readptr->offset = 0;
1302 break;
1303 case TSS_READFILE:
1304 readptr->eof_reached = false;
1305 if (BufFileSeek(state->myfile, 0, 0, SEEK_SET) != 0)
1306 ereport(ERROR,
1308 errmsg("could not seek in tuplestore temporary file")));
1309 break;
1310 default:
1311 elog(ERROR, "invalid tuplestore state");
1312 break;
1313 }
1314}
1315
1316/*
1317 * tuplestore_copy_read_pointer - copy a read pointer's state to another
1318 */
1319void
1321 int srcptr, int destptr)
1322{
1323 TSReadPointer *sptr = &state->readptrs[srcptr];
1324 TSReadPointer *dptr = &state->readptrs[destptr];
1325
1326 Assert(srcptr >= 0 && srcptr < state->readptrcount);
1327 Assert(destptr >= 0 && destptr < state->readptrcount);
1328
1329 /* Assigning to self is a no-op */
1330 if (srcptr == destptr)
1331 return;
1332
1333 if (dptr->eflags != sptr->eflags)
1334 {
1335 /* Possible change of overall eflags, so copy and then recompute */
1336 int eflags;
1337 int i;
1338
1339 *dptr = *sptr;
1340 eflags = state->readptrs[0].eflags;
1341 for (i = 1; i < state->readptrcount; i++)
1342 eflags |= state->readptrs[i].eflags;
1343 state->eflags = eflags;
1344 }
1345 else
1346 *dptr = *sptr;
1347
1348 switch (state->status)
1349 {
1350 case TSS_INMEM:
1351 case TSS_WRITEFILE:
1352 /* no work */
1353 break;
1354 case TSS_READFILE:
1355
1356 /*
1357 * This case is a bit tricky since the active read pointer's
1358 * position corresponds to the seek point, not what is in its
1359 * variables. Assigning to the active requires a seek, and
1360 * assigning from the active requires a tell, except when
1361 * eof_reached.
1362 */
1363 if (destptr == state->activeptr)
1364 {
1365 if (dptr->eof_reached)
1366 {
1367 if (BufFileSeek(state->myfile,
1368 state->writepos_file,
1369 state->writepos_offset,
1370 SEEK_SET) != 0)
1371 ereport(ERROR,
1373 errmsg("could not seek in tuplestore temporary file")));
1374 }
1375 else
1376 {
1377 if (BufFileSeek(state->myfile,
1378 dptr->file, dptr->offset,
1379 SEEK_SET) != 0)
1380 ereport(ERROR,
1382 errmsg("could not seek in tuplestore temporary file")));
1383 }
1384 }
1385 else if (srcptr == state->activeptr)
1386 {
1387 if (!dptr->eof_reached)
1388 BufFileTell(state->myfile,
1389 &dptr->file,
1390 &dptr->offset);
1391 }
1392 break;
1393 default:
1394 elog(ERROR, "invalid tuplestore state");
1395 break;
1396 }
1397}
1398
1399/*
1400 * tuplestore_trim - remove all no-longer-needed tuples
1401 *
1402 * Calling this function authorizes the tuplestore to delete all tuples
1403 * before the oldest read pointer, if no read pointer is marked as requiring
1404 * REWIND capability.
1405 *
1406 * Note: this is obviously safe if no pointer has BACKWARD capability either.
1407 * If a pointer is marked as BACKWARD but not REWIND capable, it means that
1408 * the pointer can be moved backward but not before the oldest other read
1409 * pointer.
1410 */
1411void
1413{
1414 int oldest;
1415 int nremove;
1416 int i;
1417
1418 /*
1419 * Truncation is disallowed if any read pointer requires rewind
1420 * capability.
1421 */
1422 if (state->eflags & EXEC_FLAG_REWIND)
1423 return;
1424
1425 /*
1426 * We don't bother trimming temp files since it usually would mean more
1427 * work than just letting them sit in kernel buffers until they age out.
1428 */
1429 if (state->status != TSS_INMEM)
1430 return;
1431
1432 /* Find the oldest read pointer */
1433 oldest = state->memtupcount;
1434 for (i = 0; i < state->readptrcount; i++)
1435 {
1436 if (!state->readptrs[i].eof_reached)
1437 oldest = Min(oldest, state->readptrs[i].current);
1438 }
1439
1440 /*
1441 * Note: you might think we could remove all the tuples before the oldest
1442 * "current", since that one is the next to be returned. However, since
1443 * tuplestore_gettuple returns a direct pointer to our internal copy of
1444 * the tuple, it's likely that the caller has still got the tuple just
1445 * before "current" referenced in a slot. So we keep one extra tuple
1446 * before the oldest "current". (Strictly speaking, we could require such
1447 * callers to use the "copy" flag to tuplestore_gettupleslot, but for
1448 * efficiency we allow this one case to not use "copy".)
1449 */
1450 nremove = oldest - 1;
1451 if (nremove <= 0)
1452 return; /* nothing to do */
1453
1454 Assert(nremove >= state->memtupdeleted);
1455 Assert(nremove <= state->memtupcount);
1456
1457 /* before freeing any memory, update the statistics */
1459
1460 /* Release no-longer-needed tuples */
1461 for (i = state->memtupdeleted; i < nremove; i++)
1462 {
1463 FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
1464 pfree(state->memtuples[i]);
1465 state->memtuples[i] = NULL;
1466 }
1467 state->memtupdeleted = nremove;
1468
1469 /* mark tuplestore as truncated (used for Assert crosschecks only) */
1470 state->truncated = true;
1471
1472 /*
1473 * If nremove is less than 1/8th memtupcount, just stop here, leaving the
1474 * "deleted" slots as NULL. This prevents us from expending O(N^2) time
1475 * repeatedly memmove-ing a large pointer array. The worst case space
1476 * wastage is pretty small, since it's just pointers and not whole tuples.
1477 */
1478 if (nremove < state->memtupcount / 8)
1479 return;
1480
1481 /*
1482 * Slide the array down and readjust pointers.
1483 *
1484 * In mergejoin's current usage, it's demonstrable that there will always
1485 * be exactly one non-removed tuple; so optimize that case.
1486 */
1487 if (nremove + 1 == state->memtupcount)
1488 state->memtuples[0] = state->memtuples[nremove];
1489 else
1490 memmove(state->memtuples, state->memtuples + nremove,
1491 (state->memtupcount - nremove) * sizeof(void *));
1492
1493 state->memtupdeleted = 0;
1494 state->memtupcount -= nremove;
1495 for (i = 0; i < state->readptrcount; i++)
1496 {
1497 if (!state->readptrs[i].eof_reached)
1498 state->readptrs[i].current -= nremove;
1499 }
1500}
1501
1502/*
1503 * tuplestore_updatemax
1504 * Update the maximum space used by this tuplestore and the method used
1505 * for storage.
1506 */
1507static void
1509{
1510 if (state->status == TSS_INMEM)
1511 state->maxSpace = Max(state->maxSpace,
1512 state->allowedMem - state->availMem);
1513 else
1514 {
1515 state->maxSpace = Max(state->maxSpace,
1516 BufFileSize(state->myfile));
1517
1518 /*
1519 * usedDisk never gets set to false again after spilling to disk, even
1520 * if tuplestore_clear() is called and new tuples go to memory again.
1521 */
1522 state->usedDisk = true;
1523 }
1524}
1525
1526/*
1527 * tuplestore_get_stats
1528 * Obtain statistics about the maximum space used by the tuplestore.
1529 * These statistics are the maximums and are not reset by calls to
1530 * tuplestore_trim() or tuplestore_clear().
1531 */
1532void
1534 int64 *max_space)
1535{
1537
1538 if (state->usedDisk)
1539 *max_storage_type = "Disk";
1540 else
1541 *max_storage_type = "Memory";
1542
1543 *max_space = state->maxSpace;
1544}
1545
1546/*
1547 * tuplestore_in_memory
1548 *
1549 * Returns true if the tuplestore has not spilled to disk.
1550 *
1551 * XXX exposing this is a violation of modularity ... should get rid of it.
1552 */
1553bool
1555{
1556 return (state->status == TSS_INMEM);
1557}
1558
1559
1560/*
1561 * Tape interface routines
1562 */
1563
1564static unsigned int
1566{
1567 unsigned int len;
1568 size_t nbytes;
1569
1570 nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
1571 if (nbytes == 0)
1572 return 0;
1573 else
1574 return len;
1575}
1576
1577
1578/*
1579 * Routines specialized for HeapTuple case
1580 *
1581 * The stored form is actually a MinimalTuple, but for largely historical
1582 * reasons we allow COPYTUP to work from a HeapTuple.
1583 *
1584 * Since MinimalTuple already has length in its first word, we don't need
1585 * to write that separately.
1586 */
1587
1588static void *
1590{
1591 MinimalTuple tuple;
1592
1595 return tuple;
1596}
1597
1598static void
1600{
1601 MinimalTuple tuple = (MinimalTuple) tup;
1602
1603 /* the part of the MinimalTuple we'll write: */
1604 char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1605 unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
1606
1607 /* total on-disk footprint: */
1608 unsigned int tuplen = tupbodylen + sizeof(int);
1609
1610 BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1611 BufFileWrite(state->myfile, tupbody, tupbodylen);
1612 if (state->backward) /* need trailing length word? */
1613 BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1614
1617}
1618
1619static void *
1621{
1622 unsigned int tupbodylen = len - sizeof(int);
1623 unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
1624 MinimalTuple tuple = (MinimalTuple) palloc(tuplen);
1625 char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1626
1627 /* read in the tuple proper */
1628 tuple->t_len = tuplen;
1629 BufFileReadExact(state->myfile, tupbody, tupbodylen);
1630 if (state->backward) /* need trailing length word? */
1631 BufFileReadExact(state->myfile, &tuplen, sizeof(tuplen));
1632 return tuple;
1633}
void PrepareTempTablespaces(void)
Definition: tablespace.c:1331
static Datum values[MAXATTR]
Definition: bootstrap.c:151
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileTell(BufFile *file, int *fileno, off_t *offset)
Definition: buffile.c:833
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition: buffile.c:664
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
int64 BufFileSize(BufFile *file)
Definition: buffile.c:866
void BufFileClose(BufFile *file)
Definition: buffile.c:412
#define Min(x, y)
Definition: c.h:975
#define Max(x, y)
Definition: c.h:969
int64_t int64
Definition: c.h:499
size_t Size
Definition: c.h:576
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1633
#define EXEC_FLAG_BACKWARD
Definition: executor.h:69
#define EXEC_FLAG_REWIND
Definition: executor.h:68
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: generation.c:160
Assert(PointerIsAligned(start, uint64))
MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup)
Definition: heaptuple.c:1577
MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1453
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1524
MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1536
MinimalTupleData * MinimalTuple
Definition: htup.h:27
#define MINIMAL_TUPLE_DATA_OFFSET
Definition: htup_details.h:673
int j
Definition: isn.c:75
int i
Definition: isn.c:74
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1544
void pfree(void *pointer)
Definition: mcxt.c:1524
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * repalloc_huge(void *pointer, Size size)
Definition: mcxt.c:1675
#define MaxAllocHugeSize
Definition: memutils.h:45
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define ALLOCSET_SEPARATE_THRESHOLD
Definition: memutils.h:187
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
const void size_t len
uintptr_t Datum
Definition: postgres.h:69
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
bool eof_reached
Definition: tuplestore.c:94
off_t offset
Definition: tuplestore.c:97
ResourceOwner resowner
Definition: tuplestore.c:117
TupStoreStatus status
Definition: tuplestore.c:105
MemoryContext context
Definition: tuplestore.c:116
TSReadPointer * readptrs
Definition: tuplestore.c:176
BufFile * myfile
Definition: tuplestore.c:115
off_t writepos_offset
Definition: tuplestore.c:182
void ** memtuples
Definition: tuplestore.c:162
void(* writetup)(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:142
Definition: regguts.h:323
#define WRITETUP(state, tup)
Definition: tuplestore.c:186
void tuplestore_get_stats(Tuplestorestate *state, char **max_storage_type, int64 *max_space)
Definition: tuplestore.c:1533
#define READTUP(state, len)
Definition: tuplestore.c:187
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1130
#define LACKMEM(state)
Definition: tuplestore.c:188
#define USEMEM(state, amt)
Definition: tuplestore.c:189
static void writetup_heap(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:1599
void tuplestore_puttupleslot(Tuplestorestate *state, TupleTableSlot *slot)
Definition: tuplestore.c:742
static void * copytup_heap(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:1589
void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
Definition: tuplestore.c:507
#define COPYTUP(state, tup)
Definition: tuplestore.c:185
void tuplestore_clear(Tuplestorestate *state)
Definition: tuplestore.c:430
static bool grow_memtuples(Tuplestorestate *state)
Definition: tuplestore.c:612
#define FREEMEM(state, amt)
Definition: tuplestore.c:190
static Tuplestorestate * tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
Definition: tuplestore.c:256
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
Definition: tuplestore.c:799
TupStoreStatus
Definition: tuplestore.c:73
@ TSS_READFILE
Definition: tuplestore.c:76
@ TSS_INMEM
Definition: tuplestore.c:74
@ TSS_WRITEFILE
Definition: tuplestore.c:75
int64 tuplestore_tuple_count(Tuplestorestate *state)
Definition: tuplestore.c:580
void tuplestore_rescan(Tuplestorestate *state)
Definition: tuplestore.c:1285
int tuplestore_alloc_read_pointer(Tuplestorestate *state, int eflags)
Definition: tuplestore.c:395
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:330
void tuplestore_trim(Tuplestorestate *state)
Definition: tuplestore.c:1412
void tuplestore_copy_read_pointer(Tuplestorestate *state, int srcptr, int destptr)
Definition: tuplestore.c:1320
bool tuplestore_advance(Tuplestorestate *state, bool forward)
Definition: tuplestore.c:1162
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
static void * readtup_heap(Tuplestorestate *state, unsigned int len)
Definition: tuplestore.c:1620
bool tuplestore_in_memory(Tuplestorestate *state)
Definition: tuplestore.c:1554
void tuplestore_end(Tuplestorestate *state)
Definition: tuplestore.c:492
static void * tuplestore_gettuple(Tuplestorestate *state, bool forward, bool *should_free)
Definition: tuplestore.c:955
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition: tuplestore.c:764
bool tuplestore_ateof(Tuplestorestate *state)
Definition: tuplestore.c:591
static void dumptuples(Tuplestorestate *state)
Definition: tuplestore.c:1258
static void tuplestore_updatemax(Tuplestorestate *state)
Definition: tuplestore.c:1508
void tuplestore_set_eflags(Tuplestorestate *state, int eflags)
Definition: tuplestore.c:371
bool tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
Definition: tuplestore.c:1187
static unsigned int getlen(Tuplestorestate *state, bool eofOK)
Definition: tuplestore.c:1565
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition: tuptable.h:492
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454