PostgreSQL Source Code  git master
tuplestore.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tuplestore.c
4  * Generalized routines for temporary tuple storage.
5  *
6  * This module handles temporary storage of tuples for purposes such
7  * as Materialize nodes, hashjoin batch files, etc. It is essentially
8  * a dumbed-down version of tuplesort.c; it does no sorting of tuples
9  * but can only store and regurgitate a sequence of tuples. However,
10  * because no sort is required, it is allowed to start reading the sequence
11  * before it has all been written. This is particularly useful for cursors,
12  * because it allows random access within the already-scanned portion of
13  * a query without having to process the underlying scan to completion.
14  * Also, it is possible to support multiple independent read pointers.
15  *
16  * A temporary file is used to handle the data if it exceeds the
17  * space limit specified by the caller.
18  *
19  * The (approximate) amount of memory allowed to the tuplestore is specified
20  * in kilobytes by the caller. We absorb tuples and simply store them in an
21  * in-memory array as long as we haven't exceeded maxKBytes. If we do exceed
22  * maxKBytes, we dump all the tuples into a temp file and then read from that
23  * when needed.
24  *
25  * Upon creation, a tuplestore supports a single read pointer, numbered 0.
26  * Additional read pointers can be created using tuplestore_alloc_read_pointer.
27  * Mark/restore behavior is supported by copying read pointers.
28  *
29  * When the caller requests backward-scan capability, we write the temp file
30  * in a format that allows either forward or backward scan. Otherwise, only
31  * forward scan is allowed. A request for backward scan must be made before
32  * putting any tuples into the tuplestore. Rewind is normally allowed but
33  * can be turned off via tuplestore_set_eflags; turning off rewind for all
34  * read pointers enables truncation of the tuplestore at the oldest read point
35  * for minimal memory usage. (The caller must explicitly call tuplestore_trim
36  * at appropriate times for truncation to actually happen.)
37  *
38  * Note: in TSS_WRITEFILE state, the temp file's seek position is the
39  * current write position, and the write-position variables in the tuplestore
40  * aren't kept up to date. Similarly, in TSS_READFILE state the temp file's
41  * seek position is the active read pointer's position, and that read pointer
42  * isn't kept up to date. We update the appropriate variables using ftell()
43  * before switching to the other state or activating a different read pointer.
44  *
45  *
46  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
47  * Portions Copyright (c) 1994, Regents of the University of California
48  *
49  * IDENTIFICATION
50  * src/backend/utils/sort/tuplestore.c
51  *
52  *-------------------------------------------------------------------------
53  */
54 
55 #include "postgres.h"
56 
57 #include <limits.h>
58 
59 #include "access/htup_details.h"
60 #include "commands/tablespace.h"
61 #include "executor/executor.h"
62 #include "miscadmin.h"
63 #include "storage/buffile.h"
64 #include "utils/memutils.h"
65 #include "utils/resowner.h"
66 
67 
68 /*
69  * Possible states of a Tuplestore object. These denote the states that
70  * persist between calls of Tuplestore routines.
71  */
72 typedef enum
73 {
74  TSS_INMEM, /* Tuples still fit in memory */
75  TSS_WRITEFILE, /* Writing to temp file */
76  TSS_READFILE, /* Reading from temp file */
78 
79 /*
80  * State for a single read pointer. If we are in state INMEM then all the
81  * read pointers' "current" fields denote the read positions. In state
82  * WRITEFILE, the file/offset fields denote the read positions. In state
83  * READFILE, inactive read pointers have valid file/offset, but the active
84  * read pointer implicitly has position equal to the temp file's seek position.
85  *
86  * Special case: if eof_reached is true, then the pointer's read position is
87  * implicitly equal to the write position, and current/file/offset aren't
88  * maintained. This way we need not update all the read pointers each time
89  * we write.
90  */
91 typedef struct
92 {
93  int eflags; /* capability flags */
94  bool eof_reached; /* read has reached EOF */
95  int current; /* next array index to read */
96  int file; /* temp file# */
97  off_t offset; /* byte offset in file */
99 
100 /*
101  * Private state of a Tuplestore operation.
102  */
104 {
105  TupStoreStatus status; /* enumerated value as shown above */
106  int eflags; /* capability flags (OR of pointers' flags) */
107  bool backward; /* store extra length words in file? */
108  bool interXact; /* keep open through transactions? */
109  bool truncated; /* tuplestore_trim has removed tuples? */
110  int64 availMem; /* remaining memory available, in bytes */
111  int64 allowedMem; /* total memory allowed, in bytes */
112  int64 maxSpace; /* maximum space used in memory */
113  int64 tuples; /* number of tuples added */
114  BufFile *myfile; /* underlying file, or NULL if none */
115  MemoryContext context; /* memory context for holding tuples */
116  ResourceOwner resowner; /* resowner for holding temp files */
117 
118  /*
119  * These function pointers decouple the routines that must know what kind
120  * of tuple we are handling from the routines that don't need to know it.
121  * They are set up by the tuplestore_begin_xxx routines.
122  *
123  * (Although tuplestore.c currently only supports heap tuples, I've copied
124  * this part of tuplesort.c so that extension to other kinds of objects
125  * will be easy if it's ever needed.)
126  *
127  * Function to copy a supplied input tuple into palloc'd space. (NB: we
128  * assume that a single pfree() is enough to release the tuple later, so
129  * the representation must be "flat" in one palloc chunk.) state->availMem
130  * must be decreased by the amount of space used.
131  */
132  void *(*copytup) (Tuplestorestate *state, void *tup);
133 
134  /*
135  * Function to write a stored tuple onto tape. The representation of the
136  * tuple on tape need not be the same as it is in memory; requirements on
137  * the tape representation are given below. After writing the tuple,
138  * pfree() it, and increase state->availMem by the amount of memory space
139  * thereby released.
140  */
141  void (*writetup) (Tuplestorestate *state, void *tup);
142 
143  /*
144  * Function to read a stored tuple from tape back into memory. 'len' is
145  * the already-read length of the stored tuple. Create and return a
146  * palloc'd copy, and decrease state->availMem by the amount of memory
147  * space consumed.
148  */
149  void *(*readtup) (Tuplestorestate *state, unsigned int len);
150 
151  /*
152  * This array holds pointers to tuples in memory if we are in state INMEM.
153  * In states WRITEFILE and READFILE it's not used.
154  *
155  * When memtupdeleted > 0, the first memtupdeleted pointers are already
156  * released due to a tuplestore_trim() operation, but we haven't expended
157  * the effort to slide the remaining pointers down. These unused pointers
158  * are set to NULL to catch any invalid accesses. Note that memtupcount
159  * includes the deleted pointers.
160  */
161  void **memtuples; /* array of pointers to palloc'd tuples */
162  int memtupdeleted; /* the first N slots are currently unused */
163  int memtupcount; /* number of tuples currently present */
164  int memtupsize; /* allocated length of memtuples array */
165  bool growmemtuples; /* memtuples' growth still underway? */
166 
167  /*
168  * These variables are used to keep track of the current positions.
169  *
170  * In state WRITEFILE, the current file seek position is the write point;
171  * in state READFILE, the write position is remembered in writepos_xxx.
172  * (The write position is the same as EOF, but since BufFileSeek doesn't
173  * currently implement SEEK_END, we have to remember it explicitly.)
174  */
175  TSReadPointer *readptrs; /* array of read pointers */
176  int activeptr; /* index of the active read pointer */
177  int readptrcount; /* number of pointers currently valid */
178  int readptrsize; /* allocated length of readptrs array */
179 
180  int writepos_file; /* file# (valid if READFILE state) */
181  off_t writepos_offset; /* offset (valid if READFILE state) */
182 };
183 
184 #define COPYTUP(state,tup) ((*(state)->copytup) (state, tup))
185 #define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
186 #define READTUP(state,len) ((*(state)->readtup) (state, len))
187 #define LACKMEM(state) ((state)->availMem < 0)
188 #define USEMEM(state,amt) ((state)->availMem -= (amt))
189 #define FREEMEM(state,amt) ((state)->availMem += (amt))
190 
191 /*--------------------
192  *
193  * NOTES about on-tape representation of tuples:
194  *
195  * We require the first "unsigned int" of a stored tuple to be the total size
196  * on-tape of the tuple, including itself (so it is never zero).
197  * The remainder of the stored tuple
198  * may or may not match the in-memory representation of the tuple ---
199  * any conversion needed is the job of the writetup and readtup routines.
200  *
201  * If state->backward is true, then the stored representation of
202  * the tuple must be followed by another "unsigned int" that is a copy of the
203  * length --- so the total tape space used is actually sizeof(unsigned int)
204  * more than the stored length value. This allows read-backwards. When
205  * state->backward is not set, the write/read routines may omit the extra
206  * length word.
207  *
208  * writetup is expected to write both length words as well as the tuple
209  * data. When readtup is called, the tape is positioned just after the
210  * front length word; readtup must read the tuple data and advance past
211  * the back length word (if present).
212  *
213  * The write/read routines can make use of the tuple description data
214  * stored in the Tuplestorestate record, if needed. They are also expected
215  * to adjust state->availMem by the amount of memory space (not tape space!)
216  * released or consumed. There is no error return from either writetup
217  * or readtup; they should ereport() on failure.
218  *
219  *
220  * NOTES about memory consumption calculations:
221  *
222  * We count space allocated for tuples against the maxKBytes limit,
223  * plus the space used by the variable-size array memtuples.
224  * Fixed-size space (primarily the BufFile I/O buffer) is not counted.
225  * We don't worry about the size of the read pointer array, either.
226  *
227  * Note that we count actual space used (as shown by GetMemoryChunkSpace)
228  * rather than the originally-requested size. This is important since
229  * palloc can add substantial overhead. It's not a complete answer since
230  * we won't count any wasted space in palloc allocation blocks, but it's
231  * a lot better than what we were doing before 7.3.
232  *
233  *--------------------
234  */
235 
236 
237 static Tuplestorestate *tuplestore_begin_common(int eflags,
238  bool interXact,
239  int maxKBytes);
240 static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
241 static void dumptuples(Tuplestorestate *state);
243 static unsigned int getlen(Tuplestorestate *state, bool eofOK);
244 static void *copytup_heap(Tuplestorestate *state, void *tup);
245 static void writetup_heap(Tuplestorestate *state, void *tup);
246 static void *readtup_heap(Tuplestorestate *state, unsigned int len);
247 
248 
249 /*
250  * tuplestore_begin_xxx
251  *
252  * Initialize for a tuple store operation.
253  */
254 static Tuplestorestate *
255 tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
256 {
258 
260 
261  state->status = TSS_INMEM;
262  state->eflags = eflags;
263  state->interXact = interXact;
264  state->truncated = false;
265  state->allowedMem = maxKBytes * 1024L;
266  state->availMem = state->allowedMem;
267  state->maxSpace = 0;
268  state->myfile = NULL;
269 
270  /*
271  * The palloc/pfree pattern for tuple memory is in a FIFO pattern. A
272  * generation context is perfectly suited for this.
273  */
275  "tuplestore tuples",
277  state->resowner = CurrentResourceOwner;
278 
279  state->memtupdeleted = 0;
280  state->memtupcount = 0;
281  state->tuples = 0;
282 
283  /*
284  * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
285  * see comments in grow_memtuples().
286  */
287  state->memtupsize = Max(16384 / sizeof(void *),
288  ALLOCSET_SEPARATE_THRESHOLD / sizeof(void *) + 1);
289 
290  state->growmemtuples = true;
291  state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
292 
293  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
294 
295  state->activeptr = 0;
296  state->readptrcount = 1;
297  state->readptrsize = 8; /* arbitrary */
298  state->readptrs = (TSReadPointer *)
299  palloc(state->readptrsize * sizeof(TSReadPointer));
300 
301  state->readptrs[0].eflags = eflags;
302  state->readptrs[0].eof_reached = false;
303  state->readptrs[0].current = 0;
304 
305  return state;
306 }
307 
308 /*
309  * tuplestore_begin_heap
310  *
311  * Create a new tuplestore; other types of tuple stores (other than
312  * "heap" tuple stores, for heap tuples) are possible, but not presently
313  * implemented.
314  *
315  * randomAccess: if true, both forward and backward accesses to the
316  * tuple store are allowed.
317  *
318  * interXact: if true, the files used for on-disk storage persist beyond the
319  * end of the current transaction. NOTE: It's the caller's responsibility to
320  * create such a tuplestore in a memory context and resource owner that will
321  * also survive transaction boundaries, and to ensure the tuplestore is closed
322  * when it's no longer wanted.
323  *
324  * maxKBytes: how much data to store in memory (any data beyond this
325  * amount is paged to disk). When in doubt, use work_mem.
326  */
328 tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
329 {
331  int eflags;
332 
333  /*
334  * This interpretation of the meaning of randomAccess is compatible with
335  * the pre-8.3 behavior of tuplestores.
336  */
337  eflags = randomAccess ?
340 
341  state = tuplestore_begin_common(eflags, interXact, maxKBytes);
342 
343  state->copytup = copytup_heap;
344  state->writetup = writetup_heap;
345  state->readtup = readtup_heap;
346 
347  return state;
348 }
349 
350 /*
351  * tuplestore_set_eflags
352  *
353  * Set the capability flags for read pointer 0 at a finer grain than is
354  * allowed by tuplestore_begin_xxx. This must be called before inserting
355  * any data into the tuplestore.
356  *
357  * eflags is a bitmask following the meanings used for executor node
358  * startup flags (see executor.h). tuplestore pays attention to these bits:
359  * EXEC_FLAG_REWIND need rewind to start
360  * EXEC_FLAG_BACKWARD need backward fetch
361  * If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD
362  * is set per "randomAccess" in the tuplestore_begin_xxx call.
363  *
364  * NOTE: setting BACKWARD without REWIND means the pointer can read backwards,
365  * but not further than the truncation point (the furthest-back read pointer
366  * position at the time of the last tuplestore_trim call).
367  */
368 void
370 {
371  int i;
372 
373  if (state->status != TSS_INMEM || state->memtupcount != 0)
374  elog(ERROR, "too late to call tuplestore_set_eflags");
375 
376  state->readptrs[0].eflags = eflags;
377  for (i = 1; i < state->readptrcount; i++)
378  eflags |= state->readptrs[i].eflags;
379  state->eflags = eflags;
380 }
381 
382 /*
383  * tuplestore_alloc_read_pointer - allocate another read pointer.
384  *
385  * Returns the pointer's index.
386  *
387  * The new pointer initially copies the position of read pointer 0.
388  * It can have its own eflags, but if any data has been inserted into
389  * the tuplestore, these eflags must not represent an increase in
390  * requirements.
391  */
392 int
394 {
395  /* Check for possible increase of requirements */
396  if (state->status != TSS_INMEM || state->memtupcount != 0)
397  {
398  if ((state->eflags | eflags) != state->eflags)
399  elog(ERROR, "too late to require new tuplestore eflags");
400  }
401 
402  /* Make room for another read pointer if needed */
403  if (state->readptrcount >= state->readptrsize)
404  {
405  int newcnt = state->readptrsize * 2;
406 
407  state->readptrs = (TSReadPointer *)
408  repalloc(state->readptrs, newcnt * sizeof(TSReadPointer));
409  state->readptrsize = newcnt;
410  }
411 
412  /* And set it up */
413  state->readptrs[state->readptrcount] = state->readptrs[0];
414  state->readptrs[state->readptrcount].eflags = eflags;
415 
416  state->eflags |= eflags;
417 
418  return state->readptrcount++;
419 }
420 
421 /*
422  * tuplestore_clear
423  *
424  * Delete all the contents of a tuplestore, and reset its read pointers
425  * to the start.
426  */
427 void
429 {
430  int i;
431  TSReadPointer *readptr;
432 
433  /* update the maxSpace before doing any USEMEM/FREEMEM adjustments */
435 
436  if (state->myfile)
437  BufFileClose(state->myfile);
438  state->myfile = NULL;
439 
440 #ifdef USE_ASSERT_CHECKING
441  {
442  int64 availMem = state->availMem;
443 
444  /*
445  * Below, we reset the memory context for storing tuples. To save
446  * from having to always call GetMemoryChunkSpace() on all stored
447  * tuples, we adjust the availMem to forget all the tuples and just
448  * recall USEMEM for the space used by the memtuples array. Here we
449  * just Assert that's correct and the memory tracking hasn't gone
450  * wrong anywhere.
451  */
452  for (i = state->memtupdeleted; i < state->memtupcount; i++)
453  availMem += GetMemoryChunkSpace(state->memtuples[i]);
454 
455  availMem += GetMemoryChunkSpace(state->memtuples);
456 
457  Assert(availMem == state->allowedMem);
458  }
459 #endif
460 
461  /* clear the memory consumed by the memory tuples */
462  MemoryContextReset(state->context);
463 
464  /*
465  * Zero the used memory and re-consume the space for the memtuples array.
466  * This saves having to FREEMEM for each stored tuple.
467  */
468  state->availMem = state->allowedMem;
469  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
470 
471  state->status = TSS_INMEM;
472  state->truncated = false;
473  state->memtupdeleted = 0;
474  state->memtupcount = 0;
475  state->tuples = 0;
476  readptr = state->readptrs;
477  for (i = 0; i < state->readptrcount; readptr++, i++)
478  {
479  readptr->eof_reached = false;
480  readptr->current = 0;
481  }
482 }
483 
484 /*
485  * tuplestore_end
486  *
487  * Release resources and clean up.
488  */
489 void
491 {
492  if (state->myfile)
493  BufFileClose(state->myfile);
494 
495  MemoryContextDelete(state->context);
496  pfree(state->memtuples);
497  pfree(state->readptrs);
498  pfree(state);
499 }
500 
501 /*
502  * tuplestore_select_read_pointer - make the specified read pointer active
503  */
504 void
506 {
507  TSReadPointer *readptr;
508  TSReadPointer *oldptr;
509 
510  Assert(ptr >= 0 && ptr < state->readptrcount);
511 
512  /* No work if already active */
513  if (ptr == state->activeptr)
514  return;
515 
516  readptr = &state->readptrs[ptr];
517  oldptr = &state->readptrs[state->activeptr];
518 
519  switch (state->status)
520  {
521  case TSS_INMEM:
522  case TSS_WRITEFILE:
523  /* no work */
524  break;
525  case TSS_READFILE:
526 
527  /*
528  * First, save the current read position in the pointer about to
529  * become inactive.
530  */
531  if (!oldptr->eof_reached)
532  BufFileTell(state->myfile,
533  &oldptr->file,
534  &oldptr->offset);
535 
536  /*
537  * We have to make the temp file's seek position equal to the
538  * logical position of the new read pointer. In eof_reached
539  * state, that's the EOF, which we have available from the saved
540  * write position.
541  */
542  if (readptr->eof_reached)
543  {
544  if (BufFileSeek(state->myfile,
545  state->writepos_file,
546  state->writepos_offset,
547  SEEK_SET) != 0)
548  ereport(ERROR,
550  errmsg("could not seek in tuplestore temporary file")));
551  }
552  else
553  {
554  if (BufFileSeek(state->myfile,
555  readptr->file,
556  readptr->offset,
557  SEEK_SET) != 0)
558  ereport(ERROR,
560  errmsg("could not seek in tuplestore temporary file")));
561  }
562  break;
563  default:
564  elog(ERROR, "invalid tuplestore state");
565  break;
566  }
567 
568  state->activeptr = ptr;
569 }
570 
571 /*
572  * tuplestore_tuple_count
573  *
574  * Returns the number of tuples added since creation or the last
575  * tuplestore_clear().
576  */
577 int64
579 {
580  return state->tuples;
581 }
582 
583 /*
584  * tuplestore_ateof
585  *
586  * Returns the active read pointer's eof_reached state.
587  */
588 bool
590 {
591  return state->readptrs[state->activeptr].eof_reached;
592 }
593 
594 /*
595  * Grow the memtuples[] array, if possible within our memory constraint. We
596  * must not exceed INT_MAX tuples in memory or the caller-provided memory
597  * limit. Return true if we were able to enlarge the array, false if not.
598  *
599  * Normally, at each increment we double the size of the array. When doing
600  * that would exceed a limit, we attempt one last, smaller increase (and then
601  * clear the growmemtuples flag so we don't try any more). That allows us to
602  * use memory as fully as permitted; sticking to the pure doubling rule could
603  * result in almost half going unused. Because availMem moves around with
604  * tuple addition/removal, we need some rule to prevent making repeated small
605  * increases in memtupsize, which would just be useless thrashing. The
606  * growmemtuples flag accomplishes that and also prevents useless
607  * recalculations in this function.
608  */
609 static bool
611 {
612  int newmemtupsize;
613  int memtupsize = state->memtupsize;
614  int64 memNowUsed = state->allowedMem - state->availMem;
615 
616  /* Forget it if we've already maxed out memtuples, per comment above */
617  if (!state->growmemtuples)
618  return false;
619 
620  /* Select new value of memtupsize */
621  if (memNowUsed <= state->availMem)
622  {
623  /*
624  * We've used no more than half of allowedMem; double our usage,
625  * clamping at INT_MAX tuples.
626  */
627  if (memtupsize < INT_MAX / 2)
628  newmemtupsize = memtupsize * 2;
629  else
630  {
631  newmemtupsize = INT_MAX;
632  state->growmemtuples = false;
633  }
634  }
635  else
636  {
637  /*
638  * This will be the last increment of memtupsize. Abandon doubling
639  * strategy and instead increase as much as we safely can.
640  *
641  * To stay within allowedMem, we can't increase memtupsize by more
642  * than availMem / sizeof(void *) elements. In practice, we want to
643  * increase it by considerably less, because we need to leave some
644  * space for the tuples to which the new array slots will refer. We
645  * assume the new tuples will be about the same size as the tuples
646  * we've already seen, and thus we can extrapolate from the space
647  * consumption so far to estimate an appropriate new size for the
648  * memtuples array. The optimal value might be higher or lower than
649  * this estimate, but it's hard to know that in advance. We again
650  * clamp at INT_MAX tuples.
651  *
652  * This calculation is safe against enlarging the array so much that
653  * LACKMEM becomes true, because the memory currently used includes
654  * the present array; thus, there would be enough allowedMem for the
655  * new array elements even if no other memory were currently used.
656  *
657  * We do the arithmetic in float8, because otherwise the product of
658  * memtupsize and allowedMem could overflow. Any inaccuracy in the
659  * result should be insignificant; but even if we computed a
660  * completely insane result, the checks below will prevent anything
661  * really bad from happening.
662  */
663  double grow_ratio;
664 
665  grow_ratio = (double) state->allowedMem / (double) memNowUsed;
666  if (memtupsize * grow_ratio < INT_MAX)
667  newmemtupsize = (int) (memtupsize * grow_ratio);
668  else
669  newmemtupsize = INT_MAX;
670 
671  /* We won't make any further enlargement attempts */
672  state->growmemtuples = false;
673  }
674 
675  /* Must enlarge array by at least one element, else report failure */
676  if (newmemtupsize <= memtupsize)
677  goto noalloc;
678 
679  /*
680  * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp
681  * to ensure our request won't be rejected. Note that we can easily
682  * exhaust address space before facing this outcome. (This is presently
683  * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
684  * don't rely on that at this distance.)
685  */
686  if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(void *))
687  {
688  newmemtupsize = (int) (MaxAllocHugeSize / sizeof(void *));
689  state->growmemtuples = false; /* can't grow any more */
690  }
691 
692  /*
693  * We need to be sure that we do not cause LACKMEM to become true, else
694  * the space management algorithm will go nuts. The code above should
695  * never generate a dangerous request, but to be safe, check explicitly
696  * that the array growth fits within availMem. (We could still cause
697  * LACKMEM if the memory chunk overhead associated with the memtuples
698  * array were to increase. That shouldn't happen because we chose the
699  * initial array size large enough to ensure that palloc will be treating
700  * both old and new arrays as separate chunks. But we'll check LACKMEM
701  * explicitly below just in case.)
702  */
703  if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(void *)))
704  goto noalloc;
705 
706  /* OK, do it */
707  FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
708  state->memtupsize = newmemtupsize;
709  state->memtuples = (void **)
710  repalloc_huge(state->memtuples,
711  state->memtupsize * sizeof(void *));
712  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
713  if (LACKMEM(state))
714  elog(ERROR, "unexpected out-of-memory situation in tuplestore");
715  return true;
716 
717 noalloc:
718  /* If for any reason we didn't realloc, shut off future attempts */
719  state->growmemtuples = false;
720  return false;
721 }
722 
723 /*
724  * Accept one tuple and append it to the tuplestore.
725  *
726  * Note that the input tuple is always copied; the caller need not save it.
727  *
728  * If the active read pointer is currently "at EOF", it remains so (the read
729  * pointer implicitly advances along with the write pointer); otherwise the
730  * read pointer is unchanged. Non-active read pointers do not move, which
731  * means they are certain to not be "at EOF" immediately after puttuple.
732  * This curious-seeming behavior is for the convenience of nodeMaterial.c and
733  * nodeCtescan.c, which would otherwise need to do extra pointer repositioning
734  * steps.
735  *
736  * tuplestore_puttupleslot() is a convenience routine to collect data from
737  * a TupleTableSlot without an extra copy operation.
738  */
739 void
741  TupleTableSlot *slot)
742 {
743  MinimalTuple tuple;
744  MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
745 
746  /*
747  * Form a MinimalTuple in working memory
748  */
749  tuple = ExecCopySlotMinimalTuple(slot);
751 
752  tuplestore_puttuple_common(state, (void *) tuple);
753 
754  MemoryContextSwitchTo(oldcxt);
755 }
756 
757 /*
758  * "Standard" case to copy from a HeapTuple. This is actually now somewhat
759  * deprecated, but not worth getting rid of in view of the number of callers.
760  */
761 void
763 {
764  MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
765 
766  /*
767  * Copy the tuple. (Must do this even in WRITEFILE case. Note that
768  * COPYTUP includes USEMEM, so we needn't do that here.)
769  */
770  tuple = COPYTUP(state, tuple);
771 
772  tuplestore_puttuple_common(state, (void *) tuple);
773 
774  MemoryContextSwitchTo(oldcxt);
775 }
776 
777 /*
778  * Similar to tuplestore_puttuple(), but work from values + nulls arrays.
779  * This avoids an extra tuple-construction operation.
780  */
781 void
783  const Datum *values, const bool *isnull)
784 {
785  MinimalTuple tuple;
786  MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
787 
788  tuple = heap_form_minimal_tuple(tdesc, values, isnull);
790 
791  tuplestore_puttuple_common(state, (void *) tuple);
792 
793  MemoryContextSwitchTo(oldcxt);
794 }
795 
796 static void
798 {
799  TSReadPointer *readptr;
800  int i;
801  ResourceOwner oldowner;
802  MemoryContext oldcxt;
803 
804  state->tuples++;
805 
806  switch (state->status)
807  {
808  case TSS_INMEM:
809 
810  /*
811  * Update read pointers as needed; see API spec above.
812  */
813  readptr = state->readptrs;
814  for (i = 0; i < state->readptrcount; readptr++, i++)
815  {
816  if (readptr->eof_reached && i != state->activeptr)
817  {
818  readptr->eof_reached = false;
819  readptr->current = state->memtupcount;
820  }
821  }
822 
823  /*
824  * Grow the array as needed. Note that we try to grow the array
825  * when there is still one free slot remaining --- if we fail,
826  * there'll still be room to store the incoming tuple, and then
827  * we'll switch to tape-based operation.
828  */
829  if (state->memtupcount >= state->memtupsize - 1)
830  {
831  (void) grow_memtuples(state);
832  Assert(state->memtupcount < state->memtupsize);
833  }
834 
835  /* Stash the tuple in the in-memory array */
836  state->memtuples[state->memtupcount++] = tuple;
837 
838  /*
839  * Done if we still fit in available memory and have array slots.
840  */
841  if (state->memtupcount < state->memtupsize && !LACKMEM(state))
842  return;
843 
844  /*
845  * Nope; time to switch to tape-based operation. Make sure that
846  * the temp file(s) are created in suitable temp tablespaces.
847  */
849 
850  /* associate the file with the store's resource owner */
851  oldowner = CurrentResourceOwner;
852  CurrentResourceOwner = state->resowner;
853 
854  /*
855  * We switch out of the state->context as this is a generation
856  * context, which isn't ideal for allocations relating to the
857  * BufFile.
858  */
859  oldcxt = MemoryContextSwitchTo(state->context->parent);
860 
861  state->myfile = BufFileCreateTemp(state->interXact);
862 
863  MemoryContextSwitchTo(oldcxt);
864 
865  CurrentResourceOwner = oldowner;
866 
867  /*
868  * Freeze the decision about whether trailing length words will be
869  * used. We can't change this choice once data is on tape, even
870  * though callers might drop the requirement.
871  */
872  state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
873  state->status = TSS_WRITEFILE;
874  dumptuples(state);
875  break;
876  case TSS_WRITEFILE:
877 
878  /*
879  * Update read pointers as needed; see API spec above. Note:
880  * BufFileTell is quite cheap, so not worth trying to avoid
881  * multiple calls.
882  */
883  readptr = state->readptrs;
884  for (i = 0; i < state->readptrcount; readptr++, i++)
885  {
886  if (readptr->eof_reached && i != state->activeptr)
887  {
888  readptr->eof_reached = false;
889  BufFileTell(state->myfile,
890  &readptr->file,
891  &readptr->offset);
892  }
893  }
894 
895  WRITETUP(state, tuple);
896  break;
897  case TSS_READFILE:
898 
899  /*
900  * Switch from reading to writing.
901  */
902  if (!state->readptrs[state->activeptr].eof_reached)
903  BufFileTell(state->myfile,
904  &state->readptrs[state->activeptr].file,
905  &state->readptrs[state->activeptr].offset);
906  if (BufFileSeek(state->myfile,
907  state->writepos_file, state->writepos_offset,
908  SEEK_SET) != 0)
909  ereport(ERROR,
911  errmsg("could not seek in tuplestore temporary file")));
912  state->status = TSS_WRITEFILE;
913 
914  /*
915  * Update read pointers as needed; see API spec above.
916  */
917  readptr = state->readptrs;
918  for (i = 0; i < state->readptrcount; readptr++, i++)
919  {
920  if (readptr->eof_reached && i != state->activeptr)
921  {
922  readptr->eof_reached = false;
923  readptr->file = state->writepos_file;
924  readptr->offset = state->writepos_offset;
925  }
926  }
927 
928  WRITETUP(state, tuple);
929  break;
930  default:
931  elog(ERROR, "invalid tuplestore state");
932  break;
933  }
934 }
935 
936 /*
937  * Fetch the next tuple in either forward or back direction.
938  * Returns NULL if no more tuples. If should_free is set, the
939  * caller must pfree the returned tuple when done with it.
940  *
941  * Backward scan is only allowed if randomAccess was set true or
942  * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
943  */
944 static void *
946  bool *should_free)
947 {
948  TSReadPointer *readptr = &state->readptrs[state->activeptr];
949  unsigned int tuplen;
950  void *tup;
951 
952  Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
953 
954  switch (state->status)
955  {
956  case TSS_INMEM:
957  *should_free = false;
958  if (forward)
959  {
960  if (readptr->eof_reached)
961  return NULL;
962  if (readptr->current < state->memtupcount)
963  {
964  /* We have another tuple, so return it */
965  return state->memtuples[readptr->current++];
966  }
967  readptr->eof_reached = true;
968  return NULL;
969  }
970  else
971  {
972  /*
973  * if all tuples are fetched already then we return last
974  * tuple, else tuple before last returned.
975  */
976  if (readptr->eof_reached)
977  {
978  readptr->current = state->memtupcount;
979  readptr->eof_reached = false;
980  }
981  else
982  {
983  if (readptr->current <= state->memtupdeleted)
984  {
985  Assert(!state->truncated);
986  return NULL;
987  }
988  readptr->current--; /* last returned tuple */
989  }
990  if (readptr->current <= state->memtupdeleted)
991  {
992  Assert(!state->truncated);
993  return NULL;
994  }
995  return state->memtuples[readptr->current - 1];
996  }
997  break;
998 
999  case TSS_WRITEFILE:
1000  /* Skip state change if we'll just return NULL */
1001  if (readptr->eof_reached && forward)
1002  return NULL;
1003 
1004  /*
1005  * Switch from writing to reading.
1006  */
1007  BufFileTell(state->myfile,
1008  &state->writepos_file, &state->writepos_offset);
1009  if (!readptr->eof_reached)
1010  if (BufFileSeek(state->myfile,
1011  readptr->file, readptr->offset,
1012  SEEK_SET) != 0)
1013  ereport(ERROR,
1015  errmsg("could not seek in tuplestore temporary file")));
1016  state->status = TSS_READFILE;
1017  /* FALLTHROUGH */
1018 
1019  case TSS_READFILE:
1020  *should_free = true;
1021  if (forward)
1022  {
1023  if ((tuplen = getlen(state, true)) != 0)
1024  {
1025  tup = READTUP(state, tuplen);
1026  return tup;
1027  }
1028  else
1029  {
1030  readptr->eof_reached = true;
1031  return NULL;
1032  }
1033  }
1034 
1035  /*
1036  * Backward.
1037  *
1038  * if all tuples are fetched already then we return last tuple,
1039  * else tuple before last returned.
1040  *
1041  * Back up to fetch previously-returned tuple's ending length
1042  * word. If seek fails, assume we are at start of file.
1043  */
1044  if (BufFileSeek(state->myfile, 0, -(long) sizeof(unsigned int),
1045  SEEK_CUR) != 0)
1046  {
1047  /* even a failed backwards fetch gets you out of eof state */
1048  readptr->eof_reached = false;
1049  Assert(!state->truncated);
1050  return NULL;
1051  }
1052  tuplen = getlen(state, false);
1053 
1054  if (readptr->eof_reached)
1055  {
1056  readptr->eof_reached = false;
1057  /* We will return the tuple returned before returning NULL */
1058  }
1059  else
1060  {
1061  /*
1062  * Back up to get ending length word of tuple before it.
1063  */
1064  if (BufFileSeek(state->myfile, 0,
1065  -(long) (tuplen + 2 * sizeof(unsigned int)),
1066  SEEK_CUR) != 0)
1067  {
1068  /*
1069  * If that fails, presumably the prev tuple is the first
1070  * in the file. Back up so that it becomes next to read
1071  * in forward direction (not obviously right, but that is
1072  * what in-memory case does).
1073  */
1074  if (BufFileSeek(state->myfile, 0,
1075  -(long) (tuplen + sizeof(unsigned int)),
1076  SEEK_CUR) != 0)
1077  ereport(ERROR,
1079  errmsg("could not seek in tuplestore temporary file")));
1080  Assert(!state->truncated);
1081  return NULL;
1082  }
1083  tuplen = getlen(state, false);
1084  }
1085 
1086  /*
1087  * Now we have the length of the prior tuple, back up and read it.
1088  * Note: READTUP expects we are positioned after the initial
1089  * length word of the tuple, so back up to that point.
1090  */
1091  if (BufFileSeek(state->myfile, 0,
1092  -(long) tuplen,
1093  SEEK_CUR) != 0)
1094  ereport(ERROR,
1096  errmsg("could not seek in tuplestore temporary file")));
1097  tup = READTUP(state, tuplen);
1098  return tup;
1099 
1100  default:
1101  elog(ERROR, "invalid tuplestore state");
1102  return NULL; /* keep compiler quiet */
1103  }
1104 }
1105 
1106 /*
1107  * tuplestore_gettupleslot - exported function to fetch a MinimalTuple
1108  *
1109  * If successful, put tuple in slot and return true; else, clear the slot
1110  * and return false.
1111  *
1112  * If copy is true, the slot receives a copied tuple (allocated in current
1113  * memory context) that will stay valid regardless of future manipulations of
1114  * the tuplestore's state. If copy is false, the slot may just receive a
1115  * pointer to a tuple held within the tuplestore. The latter is more
1116  * efficient but the slot contents may be corrupted if additional writes to
1117  * the tuplestore occur. (If using tuplestore_trim, see comments therein.)
1118  */
1119 bool
1121  bool copy, TupleTableSlot *slot)
1122 {
1123  MinimalTuple tuple;
1124  bool should_free;
1125 
1126  tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
1127 
1128  if (tuple)
1129  {
1130  if (copy && !should_free)
1131  {
1132  tuple = heap_copy_minimal_tuple(tuple);
1133  should_free = true;
1134  }
1135  ExecStoreMinimalTuple(tuple, slot, should_free);
1136  return true;
1137  }
1138  else
1139  {
1140  ExecClearTuple(slot);
1141  return false;
1142  }
1143 }
1144 
1145 /*
1146  * tuplestore_advance - exported function to adjust position without fetching
1147  *
1148  * We could optimize this case to avoid palloc/pfree overhead, but for the
1149  * moment it doesn't seem worthwhile.
1150  */
1151 bool
1153 {
1154  void *tuple;
1155  bool should_free;
1156 
1157  tuple = tuplestore_gettuple(state, forward, &should_free);
1158 
1159  if (tuple)
1160  {
1161  if (should_free)
1162  pfree(tuple);
1163  return true;
1164  }
1165  else
1166  {
1167  return false;
1168  }
1169 }
1170 
1171 /*
1172  * Advance over N tuples in either forward or back direction,
1173  * without returning any data. N<=0 is a no-op.
1174  * Returns true if successful, false if ran out of tuples.
1175  */
1176 bool
1177 tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
1178 {
1179  TSReadPointer *readptr = &state->readptrs[state->activeptr];
1180 
1181  Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
1182 
1183  if (ntuples <= 0)
1184  return true;
1185 
1186  switch (state->status)
1187  {
1188  case TSS_INMEM:
1189  if (forward)
1190  {
1191  if (readptr->eof_reached)
1192  return false;
1193  if (state->memtupcount - readptr->current >= ntuples)
1194  {
1195  readptr->current += ntuples;
1196  return true;
1197  }
1198  readptr->current = state->memtupcount;
1199  readptr->eof_reached = true;
1200  return false;
1201  }
1202  else
1203  {
1204  if (readptr->eof_reached)
1205  {
1206  readptr->current = state->memtupcount;
1207  readptr->eof_reached = false;
1208  ntuples--;
1209  }
1210  if (readptr->current - state->memtupdeleted > ntuples)
1211  {
1212  readptr->current -= ntuples;
1213  return true;
1214  }
1215  Assert(!state->truncated);
1216  readptr->current = state->memtupdeleted;
1217  return false;
1218  }
1219  break;
1220 
1221  default:
1222  /* We don't currently try hard to optimize other cases */
1223  while (ntuples-- > 0)
1224  {
1225  void *tuple;
1226  bool should_free;
1227 
1228  tuple = tuplestore_gettuple(state, forward, &should_free);
1229 
1230  if (tuple == NULL)
1231  return false;
1232  if (should_free)
1233  pfree(tuple);
1235  }
1236  return true;
1237  }
1238 }
1239 
1240 /*
1241  * dumptuples - remove tuples from memory and write to tape
1242  *
1243  * As a side effect, we must convert each read pointer's position from
1244  * "current" to file/offset format. But eof_reached pointers don't
1245  * need to change state.
1246  */
1247 static void
1249 {
1250  int i;
1251 
1252  for (i = state->memtupdeleted;; i++)
1253  {
1254  TSReadPointer *readptr = state->readptrs;
1255  int j;
1256 
1257  for (j = 0; j < state->readptrcount; readptr++, j++)
1258  {
1259  if (i == readptr->current && !readptr->eof_reached)
1260  BufFileTell(state->myfile,
1261  &readptr->file, &readptr->offset);
1262  }
1263  if (i >= state->memtupcount)
1264  break;
1265  WRITETUP(state, state->memtuples[i]);
1266  }
1267  state->memtupdeleted = 0;
1268  state->memtupcount = 0;
1269 }
1270 
1271 /*
1272  * tuplestore_rescan - rewind the active read pointer to start
1273  */
1274 void
1276 {
1277  TSReadPointer *readptr = &state->readptrs[state->activeptr];
1278 
1279  Assert(readptr->eflags & EXEC_FLAG_REWIND);
1280  Assert(!state->truncated);
1281 
1282  switch (state->status)
1283  {
1284  case TSS_INMEM:
1285  readptr->eof_reached = false;
1286  readptr->current = 0;
1287  break;
1288  case TSS_WRITEFILE:
1289  readptr->eof_reached = false;
1290  readptr->file = 0;
1291  readptr->offset = 0;
1292  break;
1293  case TSS_READFILE:
1294  readptr->eof_reached = false;
1295  if (BufFileSeek(state->myfile, 0, 0, SEEK_SET) != 0)
1296  ereport(ERROR,
1298  errmsg("could not seek in tuplestore temporary file")));
1299  break;
1300  default:
1301  elog(ERROR, "invalid tuplestore state");
1302  break;
1303  }
1304 }
1305 
1306 /*
1307  * tuplestore_copy_read_pointer - copy a read pointer's state to another
1308  */
1309 void
1311  int srcptr, int destptr)
1312 {
1313  TSReadPointer *sptr = &state->readptrs[srcptr];
1314  TSReadPointer *dptr = &state->readptrs[destptr];
1315 
1316  Assert(srcptr >= 0 && srcptr < state->readptrcount);
1317  Assert(destptr >= 0 && destptr < state->readptrcount);
1318 
1319  /* Assigning to self is a no-op */
1320  if (srcptr == destptr)
1321  return;
1322 
1323  if (dptr->eflags != sptr->eflags)
1324  {
1325  /* Possible change of overall eflags, so copy and then recompute */
1326  int eflags;
1327  int i;
1328 
1329  *dptr = *sptr;
1330  eflags = state->readptrs[0].eflags;
1331  for (i = 1; i < state->readptrcount; i++)
1332  eflags |= state->readptrs[i].eflags;
1333  state->eflags = eflags;
1334  }
1335  else
1336  *dptr = *sptr;
1337 
1338  switch (state->status)
1339  {
1340  case TSS_INMEM:
1341  case TSS_WRITEFILE:
1342  /* no work */
1343  break;
1344  case TSS_READFILE:
1345 
1346  /*
1347  * This case is a bit tricky since the active read pointer's
1348  * position corresponds to the seek point, not what is in its
1349  * variables. Assigning to the active requires a seek, and
1350  * assigning from the active requires a tell, except when
1351  * eof_reached.
1352  */
1353  if (destptr == state->activeptr)
1354  {
1355  if (dptr->eof_reached)
1356  {
1357  if (BufFileSeek(state->myfile,
1358  state->writepos_file,
1359  state->writepos_offset,
1360  SEEK_SET) != 0)
1361  ereport(ERROR,
1363  errmsg("could not seek in tuplestore temporary file")));
1364  }
1365  else
1366  {
1367  if (BufFileSeek(state->myfile,
1368  dptr->file, dptr->offset,
1369  SEEK_SET) != 0)
1370  ereport(ERROR,
1372  errmsg("could not seek in tuplestore temporary file")));
1373  }
1374  }
1375  else if (srcptr == state->activeptr)
1376  {
1377  if (!dptr->eof_reached)
1378  BufFileTell(state->myfile,
1379  &dptr->file,
1380  &dptr->offset);
1381  }
1382  break;
1383  default:
1384  elog(ERROR, "invalid tuplestore state");
1385  break;
1386  }
1387 }
1388 
1389 /*
1390  * tuplestore_trim - remove all no-longer-needed tuples
1391  *
1392  * Calling this function authorizes the tuplestore to delete all tuples
1393  * before the oldest read pointer, if no read pointer is marked as requiring
1394  * REWIND capability.
1395  *
1396  * Note: this is obviously safe if no pointer has BACKWARD capability either.
1397  * If a pointer is marked as BACKWARD but not REWIND capable, it means that
1398  * the pointer can be moved backward but not before the oldest other read
1399  * pointer.
1400  */
1401 void
1403 {
1404  int oldest;
1405  int nremove;
1406  int i;
1407 
1408  /*
1409  * Truncation is disallowed if any read pointer requires rewind
1410  * capability.
1411  */
1412  if (state->eflags & EXEC_FLAG_REWIND)
1413  return;
1414 
1415  /*
1416  * We don't bother trimming temp files since it usually would mean more
1417  * work than just letting them sit in kernel buffers until they age out.
1418  */
1419  if (state->status != TSS_INMEM)
1420  return;
1421 
1422  /* Find the oldest read pointer */
1423  oldest = state->memtupcount;
1424  for (i = 0; i < state->readptrcount; i++)
1425  {
1426  if (!state->readptrs[i].eof_reached)
1427  oldest = Min(oldest, state->readptrs[i].current);
1428  }
1429 
1430  /*
1431  * Note: you might think we could remove all the tuples before the oldest
1432  * "current", since that one is the next to be returned. However, since
1433  * tuplestore_gettuple returns a direct pointer to our internal copy of
1434  * the tuple, it's likely that the caller has still got the tuple just
1435  * before "current" referenced in a slot. So we keep one extra tuple
1436  * before the oldest "current". (Strictly speaking, we could require such
1437  * callers to use the "copy" flag to tuplestore_gettupleslot, but for
1438  * efficiency we allow this one case to not use "copy".)
1439  */
1440  nremove = oldest - 1;
1441  if (nremove <= 0)
1442  return; /* nothing to do */
1443 
1444  Assert(nremove >= state->memtupdeleted);
1445  Assert(nremove <= state->memtupcount);
1446 
1447  /* before freeing any memory, update maxSpace */
1449 
1450  /* Release no-longer-needed tuples */
1451  for (i = state->memtupdeleted; i < nremove; i++)
1452  {
1453  FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
1454  pfree(state->memtuples[i]);
1455  state->memtuples[i] = NULL;
1456  }
1457  state->memtupdeleted = nremove;
1458 
1459  /* mark tuplestore as truncated (used for Assert crosschecks only) */
1460  state->truncated = true;
1461 
1462  /*
1463  * If nremove is less than 1/8th memtupcount, just stop here, leaving the
1464  * "deleted" slots as NULL. This prevents us from expending O(N^2) time
1465  * repeatedly memmove-ing a large pointer array. The worst case space
1466  * wastage is pretty small, since it's just pointers and not whole tuples.
1467  */
1468  if (nremove < state->memtupcount / 8)
1469  return;
1470 
1471  /*
1472  * Slide the array down and readjust pointers.
1473  *
1474  * In mergejoin's current usage, it's demonstrable that there will always
1475  * be exactly one non-removed tuple; so optimize that case.
1476  */
1477  if (nremove + 1 == state->memtupcount)
1478  state->memtuples[0] = state->memtuples[nremove];
1479  else
1480  memmove(state->memtuples, state->memtuples + nremove,
1481  (state->memtupcount - nremove) * sizeof(void *));
1482 
1483  state->memtupdeleted = 0;
1484  state->memtupcount -= nremove;
1485  for (i = 0; i < state->readptrcount; i++)
1486  {
1487  if (!state->readptrs[i].eof_reached)
1488  state->readptrs[i].current -= nremove;
1489  }
1490 }
1491 
1492 /*
1493  * tuplestore_updatemax
1494  * Update maxSpace field
1495  */
1496 static void
1498 {
1499  if (state->status == TSS_INMEM)
1500  state->maxSpace = Max(state->maxSpace,
1501  state->allowedMem - state->availMem);
1502 }
1503 
1504 /*
1505  * tuplestore_storage_type_name
1506  * Return a string description of the storage method used to store the
1507  * tuples.
1508  */
1509 const char *
1511 {
1512  if (state->status == TSS_INMEM)
1513  return "Memory";
1514  else
1515  return "Disk";
1516 }
1517 
1518 /*
1519  * tuplestore_space_used
1520  * Return the maximum space used in memory unless the tuplestore has spilled
1521  * to disk, in which case, return the disk space used.
1522  */
1523 int64
1525 {
1526  /* First, update the maxSpace field */
1528 
1529  if (state->status == TSS_INMEM)
1530  return state->maxSpace;
1531  else
1532  return BufFileSize(state->myfile);
1533 }
1534 
1535 /*
1536  * tuplestore_in_memory
1537  *
1538  * Returns true if the tuplestore has not spilled to disk.
1539  *
1540  * XXX exposing this is a violation of modularity ... should get rid of it.
1541  */
1542 bool
1544 {
1545  return (state->status == TSS_INMEM);
1546 }
1547 
1548 
1549 /*
1550  * Tape interface routines
1551  */
1552 
1553 static unsigned int
1555 {
1556  unsigned int len;
1557  size_t nbytes;
1558 
1559  nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
1560  if (nbytes == 0)
1561  return 0;
1562  else
1563  return len;
1564 }
1565 
1566 
1567 /*
1568  * Routines specialized for HeapTuple case
1569  *
1570  * The stored form is actually a MinimalTuple, but for largely historical
1571  * reasons we allow COPYTUP to work from a HeapTuple.
1572  *
1573  * Since MinimalTuple already has length in its first word, we don't need
1574  * to write that separately.
1575  */
1576 
1577 static void *
1579 {
1580  MinimalTuple tuple;
1581 
1583  USEMEM(state, GetMemoryChunkSpace(tuple));
1584  return (void *) tuple;
1585 }
1586 
1587 static void
1589 {
1590  MinimalTuple tuple = (MinimalTuple) tup;
1591 
1592  /* the part of the MinimalTuple we'll write: */
1593  char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1594  unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
1595 
1596  /* total on-disk footprint: */
1597  unsigned int tuplen = tupbodylen + sizeof(int);
1598 
1599  BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1600  BufFileWrite(state->myfile, tupbody, tupbodylen);
1601  if (state->backward) /* need trailing length word? */
1602  BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
1603 
1604  /* no need to call tuplestore_updatemax() when not in TSS_INMEM */
1606  heap_free_minimal_tuple(tuple);
1607 }
1608 
1609 static void *
1611 {
1612  unsigned int tupbodylen = len - sizeof(int);
1613  unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
1614  MinimalTuple tuple = (MinimalTuple) palloc(tuplen);
1615  char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1616 
1617  /* read in the tuple proper */
1618  tuple->t_len = tuplen;
1619  BufFileReadExact(state->myfile, tupbody, tupbodylen);
1620  if (state->backward) /* need trailing length word? */
1621  BufFileReadExact(state->myfile, &tuplen, sizeof(tuplen));
1622  return (void *) tuple;
1623 }
void PrepareTempTablespaces(void)
Definition: tablespace.c:1331
static Datum values[MAXATTR]
Definition: bootstrap.c:150
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
void BufFileTell(BufFile *file, int *fileno, off_t *offset)
Definition: buffile.c:833
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition: buffile.c:664
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
int64 BufFileSize(BufFile *file)
Definition: buffile.c:866
void BufFileClose(BufFile *file)
Definition: buffile.c:412
#define Min(x, y)
Definition: c.h:1004
#define Max(x, y)
Definition: c.h:998
#define Assert(condition)
Definition: c.h:858
size_t Size
Definition: c.h:605
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1533
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
#define EXEC_FLAG_REWIND
Definition: executor.h:67
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: generation.c:160
MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup)
Definition: heaptuple.c:1576
MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1452
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1523
MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1535
MinimalTupleData * MinimalTuple
Definition: htup.h:27
#define MINIMAL_TUPLE_DATA_OFFSET
Definition: htup_details.h:621
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void pfree(void *pointer)
Definition: mcxt.c:1521
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
void * palloc0(Size size)
Definition: mcxt.c:1347
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * palloc(Size size)
Definition: mcxt.c:1317
void * repalloc_huge(void *pointer, Size size)
Definition: mcxt.c:1672
#define MaxAllocHugeSize
Definition: memutils.h:45
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define ALLOCSET_SEPARATE_THRESHOLD
Definition: memutils.h:187
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
const void size_t len
uintptr_t Datum
Definition: postgres.h:64
MemoryContextSwitchTo(old_ctx)
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
bool eof_reached
Definition: tuplestore.c:94
off_t offset
Definition: tuplestore.c:97
ResourceOwner resowner
Definition: tuplestore.c:116
TupStoreStatus status
Definition: tuplestore.c:105
MemoryContext context
Definition: tuplestore.c:115
TSReadPointer * readptrs
Definition: tuplestore.c:175
BufFile * myfile
Definition: tuplestore.c:114
off_t writepos_offset
Definition: tuplestore.c:181
void ** memtuples
Definition: tuplestore.c:161
void(* writetup)(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:141
Definition: regguts.h:323
#define WRITETUP(state, tup)
Definition: tuplestore.c:185
#define READTUP(state, len)
Definition: tuplestore.c:186
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1120
#define LACKMEM(state)
Definition: tuplestore.c:187
#define USEMEM(state, amt)
Definition: tuplestore.c:188
static void writetup_heap(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:1588
void tuplestore_puttupleslot(Tuplestorestate *state, TupleTableSlot *slot)
Definition: tuplestore.c:740
static void * copytup_heap(Tuplestorestate *state, void *tup)
Definition: tuplestore.c:1578
void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
Definition: tuplestore.c:505
#define COPYTUP(state, tup)
Definition: tuplestore.c:184
void tuplestore_clear(Tuplestorestate *state)
Definition: tuplestore.c:428
static bool grow_memtuples(Tuplestorestate *state)
Definition: tuplestore.c:610
static void * tuplestore_gettuple(Tuplestorestate *state, bool forward, bool *should_free)
Definition: tuplestore.c:945
#define FREEMEM(state, amt)
Definition: tuplestore.c:189
static Tuplestorestate * tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
Definition: tuplestore.c:255
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
Definition: tuplestore.c:797
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:328
int64 tuplestore_space_used(Tuplestorestate *state)
Definition: tuplestore.c:1524
TupStoreStatus
Definition: tuplestore.c:73
@ TSS_READFILE
Definition: tuplestore.c:76
@ TSS_INMEM
Definition: tuplestore.c:74
@ TSS_WRITEFILE
Definition: tuplestore.c:75
int64 tuplestore_tuple_count(Tuplestorestate *state)
Definition: tuplestore.c:578
void tuplestore_rescan(Tuplestorestate *state)
Definition: tuplestore.c:1275
int tuplestore_alloc_read_pointer(Tuplestorestate *state, int eflags)
Definition: tuplestore.c:393
void tuplestore_trim(Tuplestorestate *state)
Definition: tuplestore.c:1402
void tuplestore_copy_read_pointer(Tuplestorestate *state, int srcptr, int destptr)
Definition: tuplestore.c:1310
bool tuplestore_advance(Tuplestorestate *state, bool forward)
Definition: tuplestore.c:1152
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:782
static void * readtup_heap(Tuplestorestate *state, unsigned int len)
Definition: tuplestore.c:1610
bool tuplestore_in_memory(Tuplestorestate *state)
Definition: tuplestore.c:1543
void tuplestore_end(Tuplestorestate *state)
Definition: tuplestore.c:490
const char * tuplestore_storage_type_name(Tuplestorestate *state)
Definition: tuplestore.c:1510
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition: tuplestore.c:762
bool tuplestore_ateof(Tuplestorestate *state)
Definition: tuplestore.c:589
static void dumptuples(Tuplestorestate *state)
Definition: tuplestore.c:1248
static void tuplestore_updatemax(Tuplestorestate *state)
Definition: tuplestore.c:1497
void tuplestore_set_eflags(Tuplestorestate *state, int eflags)
Definition: tuplestore.c:369
bool tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
Definition: tuplestore.c:1177
static unsigned int getlen(Tuplestorestate *state, bool eofOK)
Definition: tuplestore.c:1554
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition: tuptable.h:492
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454