PostgreSQL Source Code git master
Loading...
Searching...
No Matches
tuplestore.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tuplestore.c
4 * Generalized routines for temporary tuple storage.
5 *
6 * This module handles temporary storage of tuples for purposes such
7 * as Materialize nodes, hashjoin batch files, etc. It is essentially
8 * a dumbed-down version of tuplesort.c; it does no sorting of tuples
9 * but can only store and regurgitate a sequence of tuples. However,
10 * because no sort is required, it is allowed to start reading the sequence
11 * before it has all been written. This is particularly useful for cursors,
12 * because it allows random access within the already-scanned portion of
13 * a query without having to process the underlying scan to completion.
14 * Also, it is possible to support multiple independent read pointers.
15 *
16 * A temporary file is used to handle the data if it exceeds the
17 * space limit specified by the caller.
18 *
19 * The (approximate) amount of memory allowed to the tuplestore is specified
20 * in kilobytes by the caller. We absorb tuples and simply store them in an
21 * in-memory array as long as we haven't exceeded maxKBytes. If we do exceed
22 * maxKBytes, we dump all the tuples into a temp file and then read from that
23 * when needed.
24 *
25 * Upon creation, a tuplestore supports a single read pointer, numbered 0.
26 * Additional read pointers can be created using tuplestore_alloc_read_pointer.
27 * Mark/restore behavior is supported by copying read pointers.
28 *
29 * When the caller requests backward-scan capability, we write the temp file
30 * in a format that allows either forward or backward scan. Otherwise, only
31 * forward scan is allowed. A request for backward scan must be made before
32 * putting any tuples into the tuplestore. Rewind is normally allowed but
33 * can be turned off via tuplestore_set_eflags; turning off rewind for all
34 * read pointers enables truncation of the tuplestore at the oldest read point
35 * for minimal memory usage. (The caller must explicitly call tuplestore_trim
36 * at appropriate times for truncation to actually happen.)
37 *
38 * Note: in TSS_WRITEFILE state, the temp file's seek position is the
39 * current write position, and the write-position variables in the tuplestore
40 * aren't kept up to date. Similarly, in TSS_READFILE state the temp file's
41 * seek position is the active read pointer's position, and that read pointer
42 * isn't kept up to date. We update the appropriate variables using ftell()
43 * before switching to the other state or activating a different read pointer.
44 *
45 *
46 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
47 * Portions Copyright (c) 1994, Regents of the University of California
48 *
49 * IDENTIFICATION
50 * src/backend/utils/sort/tuplestore.c
51 *
52 *-------------------------------------------------------------------------
53 */
54
55#include "postgres.h"
56
57#include <limits.h>
58
59#include "access/htup_details.h"
60#include "commands/tablespace.h"
61#include "executor/executor.h"
62#include "miscadmin.h"
63#include "storage/buffile.h"
64#include "utils/memutils.h"
65#include "utils/resowner.h"
66#include "utils/tuplestore.h"
67
68
69/*
70 * Possible states of a Tuplestore object. These denote the states that
71 * persist between calls of Tuplestore routines.
72 */
73typedef enum
74{
75 TSS_INMEM, /* Tuples still fit in memory */
76 TSS_WRITEFILE, /* Writing to temp file */
77 TSS_READFILE, /* Reading from temp file */
79
80/*
81 * State for a single read pointer. If we are in state INMEM then all the
82 * read pointers' "current" fields denote the read positions. In state
83 * WRITEFILE, the file/offset fields denote the read positions. In state
84 * READFILE, inactive read pointers have valid file/offset, but the active
85 * read pointer implicitly has position equal to the temp file's seek position.
86 *
87 * Special case: if eof_reached is true, then the pointer's read position is
88 * implicitly equal to the write position, and current/file/offset aren't
89 * maintained. This way we need not update all the read pointers each time
90 * we write.
91 */
92typedef struct
93{
94 int eflags; /* capability flags */
95 bool eof_reached; /* read has reached EOF */
96 int current; /* next array index to read */
97 int file; /* temp file# */
98 pgoff_t offset; /* byte offset in file */
100
101/*
102 * Private state of a Tuplestore operation.
103 */
105{
106 TupStoreStatus status; /* enumerated value as shown above */
107 int eflags; /* capability flags (OR of pointers' flags) */
108 bool backward; /* store extra length words in file? */
109 bool interXact; /* keep open through transactions? */
110 bool truncated; /* tuplestore_trim has removed tuples? */
111 bool usedDisk; /* used by tuplestore_get_stats() */
112 int64 maxSpace; /* used by tuplestore_get_stats() */
113 int64 availMem; /* remaining memory available, in bytes */
114 int64 allowedMem; /* total memory allowed, in bytes */
115 int64 tuples; /* number of tuples added */
116 BufFile *myfile; /* underlying file, or NULL if none */
117 MemoryContext context; /* memory context for holding tuples */
118 ResourceOwner resowner; /* resowner for holding temp files */
119
120 /*
121 * These function pointers decouple the routines that must know what kind
122 * of tuple we are handling from the routines that don't need to know it.
123 * They are set up by the tuplestore_begin_xxx routines.
124 *
125 * (Although tuplestore.c currently only supports heap tuples, I've copied
126 * this part of tuplesort.c so that extension to other kinds of objects
127 * will be easy if it's ever needed.)
128 *
129 * Function to copy a supplied input tuple into palloc'd space. (NB: we
130 * assume that a single pfree() is enough to release the tuple later, so
131 * the representation must be "flat" in one palloc chunk.) state->availMem
132 * must be decreased by the amount of space used.
133 */
134 void *(*copytup) (Tuplestorestate *state, void *tup);
135
136 /*
137 * Function to write a stored tuple onto tape. The representation of the
138 * tuple on tape need not be the same as it is in memory; requirements on
139 * the tape representation are given below. After writing the tuple,
140 * pfree() it, and increase state->availMem by the amount of memory space
141 * thereby released.
142 */
144
145 /*
146 * Function to read a stored tuple from tape back into memory. 'len' is
147 * the already-read length of the stored tuple. Create and return a
148 * palloc'd copy, and decrease state->availMem by the amount of memory
149 * space consumed.
150 */
151 void *(*readtup) (Tuplestorestate *state, unsigned int len);
152
153 /*
154 * This array holds pointers to tuples in memory if we are in state INMEM.
155 * In states WRITEFILE and READFILE it's not used.
156 *
157 * When memtupdeleted > 0, the first memtupdeleted pointers are already
158 * released due to a tuplestore_trim() operation, but we haven't expended
159 * the effort to slide the remaining pointers down. These unused pointers
160 * are set to NULL to catch any invalid accesses. Note that memtupcount
161 * includes the deleted pointers.
162 */
163 void **memtuples; /* array of pointers to palloc'd tuples */
164 int memtupdeleted; /* the first N slots are currently unused */
165 int memtupcount; /* number of tuples currently present */
166 int memtupsize; /* allocated length of memtuples array */
167 bool growmemtuples; /* memtuples' growth still underway? */
168
169 /*
170 * These variables are used to keep track of the current positions.
171 *
172 * In state WRITEFILE, the current file seek position is the write point;
173 * in state READFILE, the write position is remembered in writepos_xxx.
174 * (The write position is the same as EOF, but since BufFileSeek doesn't
175 * currently implement SEEK_END, we have to remember it explicitly.)
176 */
177 TSReadPointer *readptrs; /* array of read pointers */
178 int activeptr; /* index of the active read pointer */
179 int readptrcount; /* number of pointers currently valid */
180 int readptrsize; /* allocated length of readptrs array */
181
182 int writepos_file; /* file# (valid if READFILE state) */
183 pgoff_t writepos_offset; /* offset (valid if READFILE state) */
184};
185
186#define COPYTUP(state,tup) ((*(state)->copytup) (state, tup))
187#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
188#define READTUP(state,len) ((*(state)->readtup) (state, len))
189#define LACKMEM(state) ((state)->availMem < 0)
190#define USEMEM(state,amt) ((state)->availMem -= (amt))
191#define FREEMEM(state,amt) ((state)->availMem += (amt))
192
193/*--------------------
194 *
195 * NOTES about on-tape representation of tuples:
196 *
197 * We require the first "unsigned int" of a stored tuple to be the total size
198 * on-tape of the tuple, including itself (so it is never zero).
199 * The remainder of the stored tuple
200 * may or may not match the in-memory representation of the tuple ---
201 * any conversion needed is the job of the writetup and readtup routines.
202 *
203 * If state->backward is true, then the stored representation of
204 * the tuple must be followed by another "unsigned int" that is a copy of the
205 * length --- so the total tape space used is actually sizeof(unsigned int)
206 * more than the stored length value. This allows read-backwards. When
207 * state->backward is not set, the write/read routines may omit the extra
208 * length word.
209 *
210 * writetup is expected to write both length words as well as the tuple
211 * data. When readtup is called, the tape is positioned just after the
212 * front length word; readtup must read the tuple data and advance past
213 * the back length word (if present).
214 *
215 * The write/read routines can make use of the tuple description data
216 * stored in the Tuplestorestate record, if needed. They are also expected
217 * to adjust state->availMem by the amount of memory space (not tape space!)
218 * released or consumed. There is no error return from either writetup
219 * or readtup; they should ereport() on failure.
220 *
221 *
222 * NOTES about memory consumption calculations:
223 *
224 * We count space allocated for tuples against the maxKBytes limit,
225 * plus the space used by the variable-size array memtuples.
226 * Fixed-size space (primarily the BufFile I/O buffer) is not counted.
227 * We don't worry about the size of the read pointer array, either.
228 *
229 * Note that we count actual space used (as shown by GetMemoryChunkSpace)
230 * rather than the originally-requested size. This is important since
231 * palloc can add substantial overhead. It's not a complete answer since
232 * we won't count any wasted space in palloc allocation blocks, but it's
233 * a lot better than what we were doing before 7.3.
234 *
235 *--------------------
236 */
237
238
240 bool interXact,
241 int maxKBytes);
242static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
243static void dumptuples(Tuplestorestate *state);
245static unsigned int getlen(Tuplestorestate *state, bool eofOK);
246static void *copytup_heap(Tuplestorestate *state, void *tup);
247static void writetup_heap(Tuplestorestate *state, void *tup);
248static void *readtup_heap(Tuplestorestate *state, unsigned int len);
249
250
251/*
252 * tuplestore_begin_xxx
253 *
254 * Initialize for a tuple store operation.
255 */
256static Tuplestorestate *
257tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
258{
260
262
263 state->status = TSS_INMEM;
264 state->eflags = eflags;
265 state->interXact = interXact;
266 state->truncated = false;
267 state->usedDisk = false;
268 state->maxSpace = 0;
269 state->allowedMem = maxKBytes * (int64) 1024;
270 state->availMem = state->allowedMem;
271 state->myfile = NULL;
272
273 /*
274 * The palloc/pfree pattern for tuple memory is in a FIFO pattern. A
275 * generation context is perfectly suited for this.
276 */
278 "tuplestore tuples",
280 state->resowner = CurrentResourceOwner;
281
282 state->memtupdeleted = 0;
283 state->memtupcount = 0;
284 state->tuples = 0;
285
286 /*
287 * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
288 * see comments in grow_memtuples().
289 */
290 state->memtupsize = Max(16384 / sizeof(void *),
291 ALLOCSET_SEPARATE_THRESHOLD / sizeof(void *) + 1);
292
293 state->growmemtuples = true;
294 state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
295
297
298 state->activeptr = 0;
299 state->readptrcount = 1;
300 state->readptrsize = 8; /* arbitrary */
301 state->readptrs = (TSReadPointer *)
302 palloc(state->readptrsize * sizeof(TSReadPointer));
303
304 state->readptrs[0].eflags = eflags;
305 state->readptrs[0].eof_reached = false;
306 state->readptrs[0].current = 0;
307
308 return state;
309}
310
311/*
312 * tuplestore_begin_heap
313 *
314 * Create a new tuplestore; other types of tuple stores (other than
315 * "heap" tuple stores, for heap tuples) are possible, but not presently
316 * implemented.
317 *
318 * randomAccess: if true, both forward and backward accesses to the
319 * tuple store are allowed.
320 *
321 * interXact: if true, the files used for on-disk storage persist beyond the
322 * end of the current transaction. NOTE: It's the caller's responsibility to
323 * create such a tuplestore in a memory context and resource owner that will
324 * also survive transaction boundaries, and to ensure the tuplestore is closed
325 * when it's no longer wanted.
326 *
327 * maxKBytes: how much data to store in memory (any data beyond this
328 * amount is paged to disk). When in doubt, use work_mem.
329 */
331tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
332{
334 int eflags;
335
336 /*
337 * This interpretation of the meaning of randomAccess is compatible with
338 * the pre-8.3 behavior of tuplestores.
339 */
340 eflags = randomAccess ?
343
344 state = tuplestore_begin_common(eflags, interXact, maxKBytes);
345
346 state->copytup = copytup_heap;
347 state->writetup = writetup_heap;
348 state->readtup = readtup_heap;
349
350 return state;
351}
352
353/*
354 * tuplestore_set_eflags
355 *
356 * Set the capability flags for read pointer 0 at a finer grain than is
357 * allowed by tuplestore_begin_xxx. This must be called before inserting
358 * any data into the tuplestore.
359 *
360 * eflags is a bitmask following the meanings used for executor node
361 * startup flags (see executor.h). tuplestore pays attention to these bits:
362 * EXEC_FLAG_REWIND need rewind to start
363 * EXEC_FLAG_BACKWARD need backward fetch
364 * If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD
365 * is set per "randomAccess" in the tuplestore_begin_xxx call.
366 *
367 * NOTE: setting BACKWARD without REWIND means the pointer can read backwards,
368 * but not further than the truncation point (the furthest-back read pointer
369 * position at the time of the last tuplestore_trim call).
370 */
371void
373{
374 int i;
375
376 if (state->status != TSS_INMEM || state->memtupcount != 0)
377 elog(ERROR, "too late to call tuplestore_set_eflags");
378
379 state->readptrs[0].eflags = eflags;
380 for (i = 1; i < state->readptrcount; i++)
381 eflags |= state->readptrs[i].eflags;
382 state->eflags = eflags;
383}
384
385/*
386 * tuplestore_alloc_read_pointer - allocate another read pointer.
387 *
388 * Returns the pointer's index.
389 *
390 * The new pointer initially copies the position of read pointer 0.
391 * It can have its own eflags, but if any data has been inserted into
392 * the tuplestore, these eflags must not represent an increase in
393 * requirements.
394 */
395int
397{
398 /* Check for possible increase of requirements */
399 if (state->status != TSS_INMEM || state->memtupcount != 0)
400 {
401 if ((state->eflags | eflags) != state->eflags)
402 elog(ERROR, "too late to require new tuplestore eflags");
403 }
404
405 /* Make room for another read pointer if needed */
406 if (state->readptrcount >= state->readptrsize)
407 {
408 int newcnt = state->readptrsize * 2;
409
410 state->readptrs = (TSReadPointer *)
411 repalloc(state->readptrs, newcnt * sizeof(TSReadPointer));
412 state->readptrsize = newcnt;
413 }
414
415 /* And set it up */
416 state->readptrs[state->readptrcount] = state->readptrs[0];
417 state->readptrs[state->readptrcount].eflags = eflags;
418
419 state->eflags |= eflags;
420
421 return state->readptrcount++;
422}
423
424/*
425 * tuplestore_clear
426 *
427 * Delete all the contents of a tuplestore, and reset its read pointers
428 * to the start.
429 */
430void
432{
433 int i;
434 TSReadPointer *readptr;
435
436 /* update the maxSpace before doing any USEMEM/FREEMEM adjustments */
438
439 if (state->myfile)
440 BufFileClose(state->myfile);
441 state->myfile = NULL;
442
443#ifdef USE_ASSERT_CHECKING
444 {
445 int64 availMem = state->availMem;
446
447 /*
448 * Below, we reset the memory context for storing tuples. To save
449 * from having to always call GetMemoryChunkSpace() on all stored
450 * tuples, we adjust the availMem to forget all the tuples and just
451 * recall USEMEM for the space used by the memtuples array. Here we
452 * just Assert that's correct and the memory tracking hasn't gone
453 * wrong anywhere.
454 */
455 for (i = state->memtupdeleted; i < state->memtupcount; i++)
456 availMem += GetMemoryChunkSpace(state->memtuples[i]);
457
458 availMem += GetMemoryChunkSpace(state->memtuples);
459
460 Assert(availMem == state->allowedMem);
461 }
462#endif
463
464 /* clear the memory consumed by the memory tuples */
465 MemoryContextReset(state->context);
466
467 /*
468 * Zero the used memory and re-consume the space for the memtuples array.
469 * This saves having to FREEMEM for each stored tuple.
470 */
471 state->availMem = state->allowedMem;
473
474 state->status = TSS_INMEM;
475 state->truncated = false;
476 state->memtupdeleted = 0;
477 state->memtupcount = 0;
478 state->tuples = 0;
479 readptr = state->readptrs;
480 for (i = 0; i < state->readptrcount; readptr++, i++)
481 {
482 readptr->eof_reached = false;
483 readptr->current = 0;
484 }
485}
486
487/*
488 * tuplestore_end
489 *
490 * Release resources and clean up.
491 */
492void
494{
495 if (state->myfile)
496 BufFileClose(state->myfile);
497
498 MemoryContextDelete(state->context);
499 pfree(state->memtuples);
500 pfree(state->readptrs);
501 pfree(state);
502}
503
504/*
505 * tuplestore_select_read_pointer - make the specified read pointer active
506 */
507void
509{
510 TSReadPointer *readptr;
512
513 Assert(ptr >= 0 && ptr < state->readptrcount);
514
515 /* No work if already active */
516 if (ptr == state->activeptr)
517 return;
518
519 readptr = &state->readptrs[ptr];
520 oldptr = &state->readptrs[state->activeptr];
521
522 switch (state->status)
523 {
524 case TSS_INMEM:
525 case TSS_WRITEFILE:
526 /* no work */
527 break;
528 case TSS_READFILE:
529
530 /*
531 * First, save the current read position in the pointer about to
532 * become inactive.
533 */
534 if (!oldptr->eof_reached)
535 BufFileTell(state->myfile,
536 &oldptr->file,
537 &oldptr->offset);
538
539 /*
540 * We have to make the temp file's seek position equal to the
541 * logical position of the new read pointer. In eof_reached
542 * state, that's the EOF, which we have available from the saved
543 * write position.
544 */
545 if (readptr->eof_reached)
546 {
547 if (BufFileSeek(state->myfile,
548 state->writepos_file,
549 state->writepos_offset,
550 SEEK_SET) != 0)
553 errmsg("could not seek in tuplestore temporary file")));
554 }
555 else
556 {
557 if (BufFileSeek(state->myfile,
558 readptr->file,
559 readptr->offset,
560 SEEK_SET) != 0)
563 errmsg("could not seek in tuplestore temporary file")));
564 }
565 break;
566 default:
567 elog(ERROR, "invalid tuplestore state");
568 break;
569 }
570
571 state->activeptr = ptr;
572}
573
574/*
575 * tuplestore_tuple_count
576 *
577 * Returns the number of tuples added since creation or the last
578 * tuplestore_clear().
579 */
580int64
582{
583 return state->tuples;
584}
585
586/*
587 * tuplestore_ateof
588 *
589 * Returns the active read pointer's eof_reached state.
590 */
591bool
593{
594 return state->readptrs[state->activeptr].eof_reached;
595}
596
597/*
598 * Grow the memtuples[] array, if possible within our memory constraint. We
599 * must not exceed INT_MAX tuples in memory or the caller-provided memory
600 * limit. Return true if we were able to enlarge the array, false if not.
601 *
602 * Normally, at each increment we double the size of the array. When doing
603 * that would exceed a limit, we attempt one last, smaller increase (and then
604 * clear the growmemtuples flag so we don't try any more). That allows us to
605 * use memory as fully as permitted; sticking to the pure doubling rule could
606 * result in almost half going unused. Because availMem moves around with
607 * tuple addition/removal, we need some rule to prevent making repeated small
608 * increases in memtupsize, which would just be useless thrashing. The
609 * growmemtuples flag accomplishes that and also prevents useless
610 * recalculations in this function.
611 */
612static bool
614{
615 int newmemtupsize;
616 int memtupsize = state->memtupsize;
617 int64 memNowUsed = state->allowedMem - state->availMem;
618
619 /* Forget it if we've already maxed out memtuples, per comment above */
620 if (!state->growmemtuples)
621 return false;
622
623 /* Select new value of memtupsize */
624 if (memNowUsed <= state->availMem)
625 {
626 /*
627 * We've used no more than half of allowedMem; double our usage,
628 * clamping at INT_MAX tuples.
629 */
630 if (memtupsize < INT_MAX / 2)
631 newmemtupsize = memtupsize * 2;
632 else
633 {
635 state->growmemtuples = false;
636 }
637 }
638 else
639 {
640 /*
641 * This will be the last increment of memtupsize. Abandon doubling
642 * strategy and instead increase as much as we safely can.
643 *
644 * To stay within allowedMem, we can't increase memtupsize by more
645 * than availMem / sizeof(void *) elements. In practice, we want to
646 * increase it by considerably less, because we need to leave some
647 * space for the tuples to which the new array slots will refer. We
648 * assume the new tuples will be about the same size as the tuples
649 * we've already seen, and thus we can extrapolate from the space
650 * consumption so far to estimate an appropriate new size for the
651 * memtuples array. The optimal value might be higher or lower than
652 * this estimate, but it's hard to know that in advance. We again
653 * clamp at INT_MAX tuples.
654 *
655 * This calculation is safe against enlarging the array so much that
656 * LACKMEM becomes true, because the memory currently used includes
657 * the present array; thus, there would be enough allowedMem for the
658 * new array elements even if no other memory were currently used.
659 *
660 * We do the arithmetic in float8, because otherwise the product of
661 * memtupsize and allowedMem could overflow. Any inaccuracy in the
662 * result should be insignificant; but even if we computed a
663 * completely insane result, the checks below will prevent anything
664 * really bad from happening.
665 */
666 double grow_ratio;
667
668 grow_ratio = (double) state->allowedMem / (double) memNowUsed;
669 if (memtupsize * grow_ratio < INT_MAX)
670 newmemtupsize = (int) (memtupsize * grow_ratio);
671 else
673
674 /* We won't make any further enlargement attempts */
675 state->growmemtuples = false;
676 }
677
678 /* Must enlarge array by at least one element, else report failure */
679 if (newmemtupsize <= memtupsize)
680 goto noalloc;
681
682 /*
683 * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp
684 * to ensure our request won't be rejected. Note that we can easily
685 * exhaust address space before facing this outcome. (This is presently
686 * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
687 * don't rely on that at this distance.)
688 */
689 if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(void *))
690 {
691 newmemtupsize = (int) (MaxAllocHugeSize / sizeof(void *));
692 state->growmemtuples = false; /* can't grow any more */
693 }
694
695 /*
696 * We need to be sure that we do not cause LACKMEM to become true, else
697 * the space management algorithm will go nuts. The code above should
698 * never generate a dangerous request, but to be safe, check explicitly
699 * that the array growth fits within availMem. (We could still cause
700 * LACKMEM if the memory chunk overhead associated with the memtuples
701 * array were to increase. That shouldn't happen because we chose the
702 * initial array size large enough to ensure that palloc will be treating
703 * both old and new arrays as separate chunks. But we'll check LACKMEM
704 * explicitly below just in case.)
705 */
706 if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(void *)))
707 goto noalloc;
708
709 /* OK, do it */
711 state->memtupsize = newmemtupsize;
712 state->memtuples = (void **)
713 repalloc_huge(state->memtuples,
714 state->memtupsize * sizeof(void *));
716 if (LACKMEM(state))
717 elog(ERROR, "unexpected out-of-memory situation in tuplestore");
718 return true;
719
720noalloc:
721 /* If for any reason we didn't realloc, shut off future attempts */
722 state->growmemtuples = false;
723 return false;
724}
725
726/*
727 * Accept one tuple and append it to the tuplestore.
728 *
729 * Note that the input tuple is always copied; the caller need not save it.
730 *
731 * If the active read pointer is currently "at EOF", it remains so (the read
732 * pointer implicitly advances along with the write pointer); otherwise the
733 * read pointer is unchanged. Non-active read pointers do not move, which
734 * means they are certain to not be "at EOF" immediately after puttuple.
735 * This curious-seeming behavior is for the convenience of nodeMaterial.c and
736 * nodeCtescan.c, which would otherwise need to do extra pointer repositioning
737 * steps.
738 *
739 * tuplestore_puttupleslot() is a convenience routine to collect data from
740 * a TupleTableSlot without an extra copy operation.
741 */
742void
744 TupleTableSlot *slot)
745{
746 MinimalTuple tuple;
748
749 /*
750 * Form a MinimalTuple in working memory
751 */
752 tuple = ExecCopySlotMinimalTuple(slot);
754
756
758}
759
760/*
761 * "Standard" case to copy from a HeapTuple. This is actually now somewhat
762 * deprecated, but not worth getting rid of in view of the number of callers.
763 */
764void
766{
768
769 /*
770 * Copy the tuple. (Must do this even in WRITEFILE case. Note that
771 * COPYTUP includes USEMEM, so we needn't do that here.)
772 */
773 tuple = COPYTUP(state, tuple);
774
776
778}
779
780/*
781 * Similar to tuplestore_puttuple(), but work from values + nulls arrays.
782 * This avoids an extra tuple-construction operation.
783 */
784void
786 const Datum *values, const bool *isnull)
787{
788 MinimalTuple tuple;
790
791 tuple = heap_form_minimal_tuple(tdesc, values, isnull, 0);
793
795
797}
798
799static void
801{
802 TSReadPointer *readptr;
803 int i;
804 ResourceOwner oldowner;
806
807 state->tuples++;
808
809 switch (state->status)
810 {
811 case TSS_INMEM:
812
813 /*
814 * Update read pointers as needed; see API spec above.
815 */
816 readptr = state->readptrs;
817 for (i = 0; i < state->readptrcount; readptr++, i++)
818 {
819 if (readptr->eof_reached && i != state->activeptr)
820 {
821 readptr->eof_reached = false;
822 readptr->current = state->memtupcount;
823 }
824 }
825
826 /*
827 * Grow the array as needed. Note that we try to grow the array
828 * when there is still one free slot remaining --- if we fail,
829 * there'll still be room to store the incoming tuple, and then
830 * we'll switch to tape-based operation.
831 */
832 if (state->memtupcount >= state->memtupsize - 1)
833 {
835 Assert(state->memtupcount < state->memtupsize);
836 }
837
838 /* Stash the tuple in the in-memory array */
839 state->memtuples[state->memtupcount++] = tuple;
840
841 /*
842 * Done if we still fit in available memory and have array slots.
843 */
844 if (state->memtupcount < state->memtupsize && !LACKMEM(state))
845 return;
846
847 /*
848 * Nope; time to switch to tape-based operation. Make sure that
849 * the temp file(s) are created in suitable temp tablespaces.
850 */
852
853 /* associate the file with the store's resource owner */
854 oldowner = CurrentResourceOwner;
855 CurrentResourceOwner = state->resowner;
856
857 /*
858 * We switch out of the state->context as this is a generation
859 * context, which isn't ideal for allocations relating to the
860 * BufFile.
861 */
862 oldcxt = MemoryContextSwitchTo(state->context->parent);
863
864 state->myfile = BufFileCreateTemp(state->interXact);
865
867
868 CurrentResourceOwner = oldowner;
869
870 /*
871 * Freeze the decision about whether trailing length words will be
872 * used. We can't change this choice once data is on tape, even
873 * though callers might drop the requirement.
874 */
875 state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
876
877 /*
878 * Update the maximum space used before dumping the tuples. It's
879 * possible that more space will be used by the tuples in memory
880 * than the space that will be used on disk.
881 */
883
884 state->status = TSS_WRITEFILE;
886 break;
887 case TSS_WRITEFILE:
888
889 /*
890 * Update read pointers as needed; see API spec above. Note:
891 * BufFileTell is quite cheap, so not worth trying to avoid
892 * multiple calls.
893 */
894 readptr = state->readptrs;
895 for (i = 0; i < state->readptrcount; readptr++, i++)
896 {
897 if (readptr->eof_reached && i != state->activeptr)
898 {
899 readptr->eof_reached = false;
900 BufFileTell(state->myfile,
901 &readptr->file,
902 &readptr->offset);
903 }
904 }
905
906 WRITETUP(state, tuple);
907 break;
908 case TSS_READFILE:
909
910 /*
911 * Switch from reading to writing.
912 */
913 if (!state->readptrs[state->activeptr].eof_reached)
914 BufFileTell(state->myfile,
915 &state->readptrs[state->activeptr].file,
916 &state->readptrs[state->activeptr].offset);
917 if (BufFileSeek(state->myfile,
918 state->writepos_file, state->writepos_offset,
919 SEEK_SET) != 0)
922 errmsg("could not seek in tuplestore temporary file")));
923 state->status = TSS_WRITEFILE;
924
925 /*
926 * Update read pointers as needed; see API spec above.
927 */
928 readptr = state->readptrs;
929 for (i = 0; i < state->readptrcount; readptr++, i++)
930 {
931 if (readptr->eof_reached && i != state->activeptr)
932 {
933 readptr->eof_reached = false;
934 readptr->file = state->writepos_file;
935 readptr->offset = state->writepos_offset;
936 }
937 }
938
939 WRITETUP(state, tuple);
940 break;
941 default:
942 elog(ERROR, "invalid tuplestore state");
943 break;
944 }
945}
946
947/*
948 * Fetch the next tuple in either forward or back direction.
949 * Returns NULL if no more tuples. If should_free is set, the
950 * caller must pfree the returned tuple when done with it.
951 *
952 * Backward scan is only allowed if randomAccess was set true or
953 * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
954 */
955static void *
957 bool *should_free)
958{
959 TSReadPointer *readptr = &state->readptrs[state->activeptr];
960 unsigned int tuplen;
961 void *tup;
962
963 Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
964
965 switch (state->status)
966 {
967 case TSS_INMEM:
968 *should_free = false;
969 if (forward)
970 {
971 if (readptr->eof_reached)
972 return NULL;
973 if (readptr->current < state->memtupcount)
974 {
975 /* We have another tuple, so return it */
976 return state->memtuples[readptr->current++];
977 }
978 readptr->eof_reached = true;
979 return NULL;
980 }
981 else
982 {
983 /*
984 * if all tuples are fetched already then we return last
985 * tuple, else tuple before last returned.
986 */
987 if (readptr->eof_reached)
988 {
989 readptr->current = state->memtupcount;
990 readptr->eof_reached = false;
991 }
992 else
993 {
994 if (readptr->current <= state->memtupdeleted)
995 {
996 Assert(!state->truncated);
997 return NULL;
998 }
999 readptr->current--; /* last returned tuple */
1000 }
1001 if (readptr->current <= state->memtupdeleted)
1002 {
1003 Assert(!state->truncated);
1004 return NULL;
1005 }
1006 return state->memtuples[readptr->current - 1];
1007 }
1008 break;
1009
1010 case TSS_WRITEFILE:
1011 /* Skip state change if we'll just return NULL */
1012 if (readptr->eof_reached && forward)
1013 return NULL;
1014
1015 /*
1016 * Switch from writing to reading.
1017 */
1018 BufFileTell(state->myfile,
1019 &state->writepos_file, &state->writepos_offset);
1020 if (!readptr->eof_reached)
1021 if (BufFileSeek(state->myfile,
1022 readptr->file, readptr->offset,
1023 SEEK_SET) != 0)
1024 ereport(ERROR,
1026 errmsg("could not seek in tuplestore temporary file")));
1027 state->status = TSS_READFILE;
1029
1030 case TSS_READFILE:
1031 *should_free = true;
1032 if (forward)
1033 {
1034 if ((tuplen = getlen(state, true)) != 0)
1035 {
1036 tup = READTUP(state, tuplen);
1037 return tup;
1038 }
1039 else
1040 {
1041 readptr->eof_reached = true;
1042 return NULL;
1043 }
1044 }
1045
1046 /*
1047 * Backward.
1048 *
1049 * if all tuples are fetched already then we return last tuple,
1050 * else tuple before last returned.
1051 *
1052 * Back up to fetch previously-returned tuple's ending length
1053 * word. If seek fails, assume we are at start of file.
1054 */
1055 if (BufFileSeek(state->myfile, 0, -(pgoff_t) sizeof(unsigned int),
1056 SEEK_CUR) != 0)
1057 {
1058 /* even a failed backwards fetch gets you out of eof state */
1059 readptr->eof_reached = false;
1060 Assert(!state->truncated);
1061 return NULL;
1062 }
1063 tuplen = getlen(state, false);
1064
1065 if (readptr->eof_reached)
1066 {
1067 readptr->eof_reached = false;
1068 /* We will return the tuple returned before returning NULL */
1069 }
1070 else
1071 {
1072 /*
1073 * Back up to get ending length word of tuple before it.
1074 */
1075 if (BufFileSeek(state->myfile, 0,
1076 -(pgoff_t) (tuplen + 2 * sizeof(unsigned int)),
1077 SEEK_CUR) != 0)
1078 {
1079 /*
1080 * If that fails, presumably the prev tuple is the first
1081 * in the file. Back up so that it becomes next to read
1082 * in forward direction (not obviously right, but that is
1083 * what in-memory case does).
1084 */
1085 if (BufFileSeek(state->myfile, 0,
1086 -(pgoff_t) (tuplen + sizeof(unsigned int)),
1087 SEEK_CUR) != 0)
1088 ereport(ERROR,
1090 errmsg("could not seek in tuplestore temporary file")));
1091 Assert(!state->truncated);
1092 return NULL;
1093 }
1094 tuplen = getlen(state, false);
1095 }
1096
1097 /*
1098 * Now we have the length of the prior tuple, back up and read it.
1099 * Note: READTUP expects we are positioned after the initial
1100 * length word of the tuple, so back up to that point.
1101 */
1102 if (BufFileSeek(state->myfile, 0,
1103 -(pgoff_t) tuplen,
1104 SEEK_CUR) != 0)
1105 ereport(ERROR,
1107 errmsg("could not seek in tuplestore temporary file")));
1108 tup = READTUP(state, tuplen);
1109 return tup;
1110
1111 default:
1112 elog(ERROR, "invalid tuplestore state");
1113 return NULL; /* keep compiler quiet */
1114 }
1115}
1116
1117/*
1118 * tuplestore_gettupleslot - exported function to fetch a MinimalTuple
1119 *
1120 * If successful, put tuple in slot and return true; else, clear the slot
1121 * and return false.
1122 *
1123 * If copy is true, the slot receives a copied tuple (allocated in current
1124 * memory context) that will stay valid regardless of future manipulations of
1125 * the tuplestore's state. If copy is false, the slot may just receive a
1126 * pointer to a tuple held within the tuplestore. The latter is more
1127 * efficient but the slot contents may be corrupted if additional writes to
1128 * the tuplestore occur. (If using tuplestore_trim, see comments therein.)
1129 */
1130bool
1132 bool copy, TupleTableSlot *slot)
1133{
1134 MinimalTuple tuple;
1135 bool should_free;
1136
1138
1139 if (tuple)
1140 {
1141 if (copy && !should_free)
1142 {
1143 tuple = heap_copy_minimal_tuple(tuple, 0);
1144 should_free = true;
1145 }
1146 ExecStoreMinimalTuple(tuple, slot, should_free);
1147 return true;
1148 }
1149 else
1150 {
1151 ExecClearTuple(slot);
1152 return false;
1153 }
1154}
1155
1156/*
1157 * tuplestore_gettupleslot_force - exported function to fetch a tuple
1158 *
1159 * This is identical to tuplestore_gettupleslot except the given slot can be
1160 * any kind of slot; it need not be one that will accept a MinimalTuple.
1161 */
1162bool
1164 bool copy, TupleTableSlot *slot)
1165{
1166 MinimalTuple tuple;
1167 bool should_free;
1168
1170
1171 if (tuple)
1172 {
1173 if (copy && !should_free)
1174 {
1175 tuple = heap_copy_minimal_tuple(tuple, 0);
1176 should_free = true;
1177 }
1179 return true;
1180 }
1181 else
1182 {
1183 ExecClearTuple(slot);
1184 return false;
1185 }
1186}
1187
1188/*
1189 * tuplestore_advance - exported function to adjust position without fetching
1190 *
1191 * We could optimize this case to avoid palloc/pfree overhead, but for the
1192 * moment it doesn't seem worthwhile.
1193 */
1194bool
1196{
1197 void *tuple;
1198 bool should_free;
1199
1201
1202 if (tuple)
1203 {
1204 if (should_free)
1205 pfree(tuple);
1206 return true;
1207 }
1208 else
1209 {
1210 return false;
1211 }
1212}
1213
1214/*
1215 * Advance over N tuples in either forward or back direction,
1216 * without returning any data. N<=0 is a no-op.
1217 * Returns true if successful, false if ran out of tuples.
1218 */
1219bool
1221{
1222 TSReadPointer *readptr = &state->readptrs[state->activeptr];
1223
1224 Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
1225
1226 if (ntuples <= 0)
1227 return true;
1228
1229 switch (state->status)
1230 {
1231 case TSS_INMEM:
1232 if (forward)
1233 {
1234 if (readptr->eof_reached)
1235 return false;
1236 if (state->memtupcount - readptr->current >= ntuples)
1237 {
1238 readptr->current += ntuples;
1239 return true;
1240 }
1241 readptr->current = state->memtupcount;
1242 readptr->eof_reached = true;
1243 return false;
1244 }
1245 else
1246 {
1247 if (readptr->eof_reached)
1248 {
1249 readptr->current = state->memtupcount;
1250 readptr->eof_reached = false;
1251 ntuples--;
1252 }
1253 if (readptr->current - state->memtupdeleted > ntuples)
1254 {
1255 readptr->current -= ntuples;
1256 return true;
1257 }
1258 Assert(!state->truncated);
1259 readptr->current = state->memtupdeleted;
1260 return false;
1261 }
1262 break;
1263
1264 default:
1265 /* We don't currently try hard to optimize other cases */
1266 while (ntuples-- > 0)
1267 {
1268 void *tuple;
1269 bool should_free;
1270
1272
1273 if (tuple == NULL)
1274 return false;
1275 if (should_free)
1276 pfree(tuple);
1278 }
1279 return true;
1280 }
1281}
1282
1283/*
1284 * dumptuples - remove tuples from memory and write to tape
1285 *
1286 * As a side effect, we must convert each read pointer's position from
1287 * "current" to file/offset format. But eof_reached pointers don't
1288 * need to change state.
1289 */
1290static void
1292{
1293 int i;
1294
1295 for (i = state->memtupdeleted;; i++)
1296 {
1297 TSReadPointer *readptr = state->readptrs;
1298 int j;
1299
1300 for (j = 0; j < state->readptrcount; readptr++, j++)
1301 {
1302 if (i == readptr->current && !readptr->eof_reached)
1303 BufFileTell(state->myfile,
1304 &readptr->file, &readptr->offset);
1305 }
1306 if (i >= state->memtupcount)
1307 break;
1308 WRITETUP(state, state->memtuples[i]);
1309 }
1310 state->memtupdeleted = 0;
1311 state->memtupcount = 0;
1312}
1313
1314/*
1315 * tuplestore_rescan - rewind the active read pointer to start
1316 */
1317void
1319{
1320 TSReadPointer *readptr = &state->readptrs[state->activeptr];
1321
1322 Assert(readptr->eflags & EXEC_FLAG_REWIND);
1323 Assert(!state->truncated);
1324
1325 switch (state->status)
1326 {
1327 case TSS_INMEM:
1328 readptr->eof_reached = false;
1329 readptr->current = 0;
1330 break;
1331 case TSS_WRITEFILE:
1332 readptr->eof_reached = false;
1333 readptr->file = 0;
1334 readptr->offset = 0;
1335 break;
1336 case TSS_READFILE:
1337 readptr->eof_reached = false;
1338 if (BufFileSeek(state->myfile, 0, 0, SEEK_SET) != 0)
1339 ereport(ERROR,
1341 errmsg("could not seek in tuplestore temporary file")));
1342 break;
1343 default:
1344 elog(ERROR, "invalid tuplestore state");
1345 break;
1346 }
1347}
1348
1349/*
1350 * tuplestore_copy_read_pointer - copy a read pointer's state to another
1351 */
1352void
1354 int srcptr, int destptr)
1355{
1356 TSReadPointer *sptr = &state->readptrs[srcptr];
1357 TSReadPointer *dptr = &state->readptrs[destptr];
1358
1359 Assert(srcptr >= 0 && srcptr < state->readptrcount);
1360 Assert(destptr >= 0 && destptr < state->readptrcount);
1361
1362 /* Assigning to self is a no-op */
1363 if (srcptr == destptr)
1364 return;
1365
1366 if (dptr->eflags != sptr->eflags)
1367 {
1368 /* Possible change of overall eflags, so copy and then recompute */
1369 int eflags;
1370 int i;
1371
1372 *dptr = *sptr;
1373 eflags = state->readptrs[0].eflags;
1374 for (i = 1; i < state->readptrcount; i++)
1375 eflags |= state->readptrs[i].eflags;
1376 state->eflags = eflags;
1377 }
1378 else
1379 *dptr = *sptr;
1380
1381 switch (state->status)
1382 {
1383 case TSS_INMEM:
1384 case TSS_WRITEFILE:
1385 /* no work */
1386 break;
1387 case TSS_READFILE:
1388
1389 /*
1390 * This case is a bit tricky since the active read pointer's
1391 * position corresponds to the seek point, not what is in its
1392 * variables. Assigning to the active requires a seek, and
1393 * assigning from the active requires a tell, except when
1394 * eof_reached.
1395 */
1396 if (destptr == state->activeptr)
1397 {
1398 if (dptr->eof_reached)
1399 {
1400 if (BufFileSeek(state->myfile,
1401 state->writepos_file,
1402 state->writepos_offset,
1403 SEEK_SET) != 0)
1404 ereport(ERROR,
1406 errmsg("could not seek in tuplestore temporary file")));
1407 }
1408 else
1409 {
1410 if (BufFileSeek(state->myfile,
1411 dptr->file, dptr->offset,
1412 SEEK_SET) != 0)
1413 ereport(ERROR,
1415 errmsg("could not seek in tuplestore temporary file")));
1416 }
1417 }
1418 else if (srcptr == state->activeptr)
1419 {
1420 if (!dptr->eof_reached)
1421 BufFileTell(state->myfile,
1422 &dptr->file,
1423 &dptr->offset);
1424 }
1425 break;
1426 default:
1427 elog(ERROR, "invalid tuplestore state");
1428 break;
1429 }
1430}
1431
1432/*
1433 * tuplestore_trim - remove all no-longer-needed tuples
1434 *
1435 * Calling this function authorizes the tuplestore to delete all tuples
1436 * before the oldest read pointer, if no read pointer is marked as requiring
1437 * REWIND capability.
1438 *
1439 * Note: this is obviously safe if no pointer has BACKWARD capability either.
1440 * If a pointer is marked as BACKWARD but not REWIND capable, it means that
1441 * the pointer can be moved backward but not before the oldest other read
1442 * pointer.
1443 */
1444void
1446{
1447 int oldest;
1448 int nremove;
1449 int i;
1450
1451 /*
1452 * Truncation is disallowed if any read pointer requires rewind
1453 * capability.
1454 */
1455 if (state->eflags & EXEC_FLAG_REWIND)
1456 return;
1457
1458 /*
1459 * We don't bother trimming temp files since it usually would mean more
1460 * work than just letting them sit in kernel buffers until they age out.
1461 */
1462 if (state->status != TSS_INMEM)
1463 return;
1464
1465 /* Find the oldest read pointer */
1466 oldest = state->memtupcount;
1467 for (i = 0; i < state->readptrcount; i++)
1468 {
1469 if (!state->readptrs[i].eof_reached)
1470 oldest = Min(oldest, state->readptrs[i].current);
1471 }
1472
1473 /*
1474 * Note: you might think we could remove all the tuples before the oldest
1475 * "current", since that one is the next to be returned. However, since
1476 * tuplestore_gettuple returns a direct pointer to our internal copy of
1477 * the tuple, it's likely that the caller has still got the tuple just
1478 * before "current" referenced in a slot. So we keep one extra tuple
1479 * before the oldest "current". (Strictly speaking, we could require such
1480 * callers to use the "copy" flag to tuplestore_gettupleslot, but for
1481 * efficiency we allow this one case to not use "copy".)
1482 */
1483 nremove = oldest - 1;
1484 if (nremove <= 0)
1485 return; /* nothing to do */
1486
1487 Assert(nremove >= state->memtupdeleted);
1488 Assert(nremove <= state->memtupcount);
1489
1490 /* before freeing any memory, update the statistics */
1492
1493 /* Release no-longer-needed tuples */
1494 for (i = state->memtupdeleted; i < nremove; i++)
1495 {
1496 FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
1497 pfree(state->memtuples[i]);
1498 state->memtuples[i] = NULL;
1499 }
1500 state->memtupdeleted = nremove;
1501
1502 /* mark tuplestore as truncated (used for Assert crosschecks only) */
1503 state->truncated = true;
1504
1505 /*
1506 * If nremove is less than 1/8th memtupcount, just stop here, leaving the
1507 * "deleted" slots as NULL. This prevents us from expending O(N^2) time
1508 * repeatedly memmove-ing a large pointer array. The worst case space
1509 * wastage is pretty small, since it's just pointers and not whole tuples.
1510 */
1511 if (nremove < state->memtupcount / 8)
1512 return;
1513
1514 /*
1515 * Slide the array down and readjust pointers.
1516 *
1517 * In mergejoin's current usage, it's demonstrable that there will always
1518 * be exactly one non-removed tuple; so optimize that case.
1519 */
1520 if (nremove + 1 == state->memtupcount)
1521 state->memtuples[0] = state->memtuples[nremove];
1522 else
1523 memmove(state->memtuples, state->memtuples + nremove,
1524 (state->memtupcount - nremove) * sizeof(void *));
1525
1526 state->memtupdeleted = 0;
1527 state->memtupcount -= nremove;
1528 for (i = 0; i < state->readptrcount; i++)
1529 {
1530 if (!state->readptrs[i].eof_reached)
1531 state->readptrs[i].current -= nremove;
1532 }
1533}
1534
1535/*
1536 * tuplestore_updatemax
1537 * Update the maximum space used by this tuplestore and the method used
1538 * for storage.
1539 */
1540static void
1542{
1543 if (state->status == TSS_INMEM)
1544 state->maxSpace = Max(state->maxSpace,
1545 state->allowedMem - state->availMem);
1546 else
1547 {
1548 state->maxSpace = Max(state->maxSpace,
1549 BufFileSize(state->myfile));
1550
1551 /*
1552 * usedDisk never gets set to false again after spilling to disk, even
1553 * if tuplestore_clear() is called and new tuples go to memory again.
1554 */
1555 state->usedDisk = true;
1556 }
1557}
1558
1559/*
1560 * tuplestore_get_stats
1561 * Obtain statistics about the maximum space used by the tuplestore.
1562 * These statistics are the maximums and are not reset by calls to
1563 * tuplestore_trim() or tuplestore_clear().
1564 */
1565void
1568{
1570
1571 if (state->usedDisk)
1572 *max_storage_type = "Disk";
1573 else
1574 *max_storage_type = "Memory";
1575
1576 *max_space = state->maxSpace;
1577}
1578
1579/*
1580 * tuplestore_in_memory
1581 *
1582 * Returns true if the tuplestore has not spilled to disk.
1583 *
1584 * XXX exposing this is a violation of modularity ... should get rid of it.
1585 */
1586bool
1588{
1589 return (state->status == TSS_INMEM);
1590}
1591
1592
1593/*
1594 * Tape interface routines
1595 */
1596
1597static unsigned int
1599{
1600 unsigned int len;
1601 size_t nbytes;
1602
1603 nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
1604 if (nbytes == 0)
1605 return 0;
1606 else
1607 return len;
1608}
1609
1610
1611/*
1612 * Routines specialized for HeapTuple case
1613 *
1614 * The stored form is actually a MinimalTuple, but for largely historical
1615 * reasons we allow COPYTUP to work from a HeapTuple.
1616 *
1617 * Since MinimalTuple already has length in its first word, we don't need
1618 * to write that separately.
1619 */
1620
1621static void *
1623{
1624 MinimalTuple tuple;
1625
1628 return tuple;
1629}
1630
1631static void
1633{
1634 MinimalTuple tuple = (MinimalTuple) tup;
1635
1636 /* the part of the MinimalTuple we'll write: */
1637 char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1638 unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
1639
1640 /* total on-disk footprint: */
1641 unsigned int tuplen = tupbodylen + sizeof(int);
1642
1643 BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1645 if (state->backward) /* need trailing length word? */
1646 BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1647
1650}
1651
1652static void *
1654{
1655 unsigned int tupbodylen = len - sizeof(int);
1656 unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
1657 MinimalTuple tuple = (MinimalTuple) palloc(tuplen);
1658 char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1659
1660 /* read in the tuple proper */
1661 tuple->t_len = tuplen;
1663 if (state->backward) /* need trailing length word? */
1664 BufFileReadExact(state->myfile, &tuplen, sizeof(tuplen));
1665 return tuple;
1666}
void PrepareTempTablespaces(void)
static Datum values[MAXATTR]
Definition bootstrap.c:188
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition buffile.c:655
BufFile * BufFileCreateTemp(bool interXact)
Definition buffile.c:194
int BufFileSeek(BufFile *file, int fileno, pgoff_t offset, int whence)
Definition buffile.c:741
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition buffile.c:677
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition buffile.c:665
void BufFileTell(BufFile *file, int *fileno, pgoff_t *offset)
Definition buffile.c:833
int64 BufFileSize(BufFile *file)
Definition buffile.c:866
void BufFileClose(BufFile *file)
Definition buffile.c:413
#define Min(x, y)
Definition c.h:1093
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
#define pg_fallthrough
Definition c.h:152
size_t Size
Definition c.h:691
int errcode_for_file_access(void)
Definition elog.c:897
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
#define EXEC_FLAG_BACKWARD
Definition executor.h:70
#define EXEC_FLAG_REWIND
Definition executor.h:69
#define palloc0_object(type)
Definition fe_memutils.h:75
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition generation.c:162
MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup, Size extra)
Definition heaptuple.c:1490
MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull, Size extra)
Definition heaptuple.c:1402
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition heaptuple.c:1478
MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup, Size extra)
Definition heaptuple.c:1535
MinimalTupleData * MinimalTuple
Definition htup.h:27
#define MINIMAL_TUPLE_DATA_OFFSET
int j
Definition isn.c:78
int i
Definition isn.c:77
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
Size GetMemoryChunkSpace(void *pointer)
Definition mcxt.c:770
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
void * repalloc_huge(void *pointer, Size size)
Definition mcxt.c:1757
#define MaxAllocHugeSize
Definition memutils.h:45
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define ALLOCSET_SEPARATE_THRESHOLD
Definition memutils.h:187
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
const void size_t len
off_t pgoff_t
Definition port.h:421
uint64_t Datum
Definition postgres.h:70
static int fb(int x)
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
pgoff_t offset
Definition tuplestore.c:98
ResourceOwner resowner
Definition tuplestore.c:118
TupStoreStatus status
Definition tuplestore.c:106
MemoryContext context
Definition tuplestore.c:117
TSReadPointer * readptrs
Definition tuplestore.c:177
BufFile * myfile
Definition tuplestore.c:116
void ** memtuples
Definition tuplestore.c:163
pgoff_t writepos_offset
Definition tuplestore.c:183
void(* writetup)(Tuplestorestate *state, void *tup)
Definition tuplestore.c:143
#define WRITETUP(state, tup)
Definition tuplestore.c:187
void tuplestore_get_stats(Tuplestorestate *state, char **max_storage_type, int64 *max_space)
#define READTUP(state, len)
Definition tuplestore.c:188
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
#define LACKMEM(state)
Definition tuplestore.c:189
#define USEMEM(state, amt)
Definition tuplestore.c:190
static void writetup_heap(Tuplestorestate *state, void *tup)
void tuplestore_puttupleslot(Tuplestorestate *state, TupleTableSlot *slot)
Definition tuplestore.c:743
static void * copytup_heap(Tuplestorestate *state, void *tup)
void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
Definition tuplestore.c:508
#define COPYTUP(state, tup)
Definition tuplestore.c:186
void tuplestore_clear(Tuplestorestate *state)
Definition tuplestore.c:431
static bool grow_memtuples(Tuplestorestate *state)
Definition tuplestore.c:613
#define FREEMEM(state, amt)
Definition tuplestore.c:191
bool tuplestore_gettupleslot_force(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
static Tuplestorestate * tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
Definition tuplestore.c:257
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
Definition tuplestore.c:800
TupStoreStatus
Definition tuplestore.c:74
@ TSS_READFILE
Definition tuplestore.c:77
@ TSS_INMEM
Definition tuplestore.c:75
@ TSS_WRITEFILE
Definition tuplestore.c:76
int64 tuplestore_tuple_count(Tuplestorestate *state)
Definition tuplestore.c:581
void tuplestore_rescan(Tuplestorestate *state)
int tuplestore_alloc_read_pointer(Tuplestorestate *state, int eflags)
Definition tuplestore.c:396
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition tuplestore.c:331
void tuplestore_trim(Tuplestorestate *state)
void tuplestore_copy_read_pointer(Tuplestorestate *state, int srcptr, int destptr)
bool tuplestore_advance(Tuplestorestate *state, bool forward)
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
static void * readtup_heap(Tuplestorestate *state, unsigned int len)
bool tuplestore_in_memory(Tuplestorestate *state)
void tuplestore_end(Tuplestorestate *state)
Definition tuplestore.c:493
static void * tuplestore_gettuple(Tuplestorestate *state, bool forward, bool *should_free)
Definition tuplestore.c:956
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition tuplestore.c:765
bool tuplestore_ateof(Tuplestorestate *state)
Definition tuplestore.c:592
static void dumptuples(Tuplestorestate *state)
static void tuplestore_updatemax(Tuplestorestate *state)
void tuplestore_set_eflags(Tuplestorestate *state, int eflags)
Definition tuplestore.c:372
bool tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
static unsigned int getlen(Tuplestorestate *state, bool eofOK)
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition tuptable.h:514
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476