PostgreSQL Source Code  git master
windowfuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * windowfuncs.c
4  * Standard window functions defined in SQL spec.
5  *
6  * Portions Copyright (c) 2000-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/windowfuncs.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "nodes/parsenodes.h"
17 #include "nodes/supportnodes.h"
18 #include "utils/fmgrprotos.h"
19 #include "windowapi.h"
20 
21 /*
22  * ranking process information
23  */
24 typedef struct rank_context
25 {
26  int64 rank; /* current rank */
28 
29 /*
30  * ntile process information
31  */
32 typedef struct
33 {
34  int32 ntile; /* current result */
35  int64 rows_per_bucket; /* row number of current bucket */
36  int64 boundary; /* how many rows should be in the bucket */
37  int64 remainder; /* (total rows) % (bucket num) */
39 
40 static bool rank_up(WindowObject winobj);
42  bool forward, bool withoffset, bool withdefault);
43 
44 
45 /*
46  * utility routine for *_rank functions.
47  */
48 static bool
50 {
51  bool up = false; /* should rank increase? */
52  int64 curpos = WinGetCurrentPosition(winobj);
54 
55  context = (rank_context *)
57 
58  if (context->rank == 0)
59  {
60  /* first call: rank of first row is always 1 */
61  Assert(curpos == 0);
62  context->rank = 1;
63  }
64  else
65  {
66  Assert(curpos > 0);
67  /* do current and prior tuples match by ORDER BY clause? */
68  if (!WinRowsArePeers(winobj, curpos - 1, curpos))
69  up = true;
70  }
71 
72  /* We can advance the mark, but only *after* access to prior row */
73  WinSetMarkPosition(winobj, curpos);
74 
75  return up;
76 }
77 
78 
79 /*
80  * row_number
81  * just increment up from 1 until current partition finishes.
82  */
83 Datum
85 {
86  WindowObject winobj = PG_WINDOW_OBJECT();
87  int64 curpos = WinGetCurrentPosition(winobj);
88 
89  WinSetMarkPosition(winobj, curpos);
90  PG_RETURN_INT64(curpos + 1);
91 }
92 
93 /*
94  * window_row_number_support
95  * prosupport function for window_row_number()
96  */
97 Datum
99 {
100  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
101 
102  if (IsA(rawreq, SupportRequestWFuncMonotonic))
103  {
105 
106  /* row_number() is monotonically increasing */
108  PG_RETURN_POINTER(req);
109  }
110 
112  {
114 
115  /*
116  * The frame options can always become "ROWS BETWEEN UNBOUNDED
117  * PRECEDING AND CURRENT ROW". row_number() always just increments by
118  * 1 with each row in the partition. Using ROWS instead of RANGE
119  * saves effort checking peer rows during execution.
120  */
125 
126  PG_RETURN_POINTER(req);
127  }
128 
129  PG_RETURN_POINTER(NULL);
130 }
131 
132 /*
133  * rank
134  * Rank changes when key columns change.
135  * The new rank number is the current row number.
136  */
137 Datum
139 {
140  WindowObject winobj = PG_WINDOW_OBJECT();
142  bool up;
143 
144  up = rank_up(winobj);
145  context = (rank_context *)
146  WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
147  if (up)
148  context->rank = WinGetCurrentPosition(winobj) + 1;
149 
150  PG_RETURN_INT64(context->rank);
151 }
152 
153 /*
154  * window_rank_support
155  * prosupport function for window_rank()
156  */
157 Datum
159 {
160  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
161 
162  if (IsA(rawreq, SupportRequestWFuncMonotonic))
163  {
165 
166  /* rank() is monotonically increasing */
168  PG_RETURN_POINTER(req);
169  }
170 
172  {
174 
175  /*
176  * rank() is coded in such a way that it returns "(COUNT (*) OVER
177  * (<opt> RANGE UNBOUNDED PRECEDING) - COUNT (*) OVER (<opt> RANGE
178  * CURRENT ROW) + 1)" regardless of the frame options. We'll set the
179  * frame options to "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW"
180  * so they agree with what window_row_number_support() optimized the
181  * frame options to be. Using ROWS instead of RANGE saves from doing
182  * peer row checks during execution.
183  */
188 
189  PG_RETURN_POINTER(req);
190  }
191 
192  PG_RETURN_POINTER(NULL);
193 }
194 
195 /*
196  * dense_rank
197  * Rank increases by 1 when key columns change.
198  */
199 Datum
201 {
202  WindowObject winobj = PG_WINDOW_OBJECT();
204  bool up;
205 
206  up = rank_up(winobj);
207  context = (rank_context *)
208  WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
209  if (up)
210  context->rank++;
211 
212  PG_RETURN_INT64(context->rank);
213 }
214 
215 /*
216  * window_dense_rank_support
217  * prosupport function for window_dense_rank()
218  */
219 Datum
221 {
222  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
223 
224  if (IsA(rawreq, SupportRequestWFuncMonotonic))
225  {
227 
228  /* dense_rank() is monotonically increasing */
230  PG_RETURN_POINTER(req);
231  }
232 
234  {
236 
237  /*
238  * dense_rank() is unaffected by the frame options. Here we set the
239  * frame options to match what's done in row_number's support
240  * function. Using ROWS instead of RANGE (the default) saves the
241  * executor from having to check for peer rows.
242  */
247 
248  PG_RETURN_POINTER(req);
249  }
250 
251  PG_RETURN_POINTER(NULL);
252 }
253 
254 /*
255  * percent_rank
256  * return fraction between 0 and 1 inclusive,
257  * which is described as (RK - 1) / (NR - 1), where RK is the current row's
258  * rank and NR is the total number of rows, per spec.
259  */
260 Datum
262 {
263  WindowObject winobj = PG_WINDOW_OBJECT();
265  bool up;
266  int64 totalrows = WinGetPartitionRowCount(winobj);
267 
268  Assert(totalrows > 0);
269 
270  up = rank_up(winobj);
271  context = (rank_context *)
272  WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
273  if (up)
274  context->rank = WinGetCurrentPosition(winobj) + 1;
275 
276  /* return zero if there's only one row, per spec */
277  if (totalrows <= 1)
278  PG_RETURN_FLOAT8(0.0);
279 
280  PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1));
281 }
282 
283 /*
284  * window_percent_rank_support
285  * prosupport function for window_percent_rank()
286  */
287 Datum
289 {
290  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
291 
292  if (IsA(rawreq, SupportRequestWFuncMonotonic))
293  {
295 
296  /* percent_rank() is monotonically increasing */
298  PG_RETURN_POINTER(req);
299  }
300 
302  {
304 
305  /*
306  * percent_rank() is unaffected by the frame options. Here we set the
307  * frame options to match what's done in row_number's support
308  * function. Using ROWS instead of RANGE (the default) saves the
309  * executor from having to check for peer rows.
310  */
315 
316  PG_RETURN_POINTER(req);
317  }
318 
319  PG_RETURN_POINTER(NULL);
320 }
321 
322 
323 /*
324  * cume_dist
325  * return fraction between 0 and 1 inclusive,
326  * which is described as NP / NR, where NP is the number of rows preceding or
327  * peers to the current row, and NR is the total number of rows, per spec.
328  */
329 Datum
331 {
332  WindowObject winobj = PG_WINDOW_OBJECT();
334  bool up;
335  int64 totalrows = WinGetPartitionRowCount(winobj);
336 
337  Assert(totalrows > 0);
338 
339  up = rank_up(winobj);
340  context = (rank_context *)
341  WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
342  if (up || context->rank == 1)
343  {
344  /*
345  * The current row is not peer to prior row or is just the first, so
346  * count up the number of rows that are peer to the current.
347  */
348  int64 row;
349 
350  context->rank = WinGetCurrentPosition(winobj) + 1;
351 
352  /*
353  * start from current + 1
354  */
355  for (row = context->rank; row < totalrows; row++)
356  {
357  if (!WinRowsArePeers(winobj, row - 1, row))
358  break;
359  context->rank++;
360  }
361  }
362 
363  PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows);
364 }
365 
366 /*
367  * window_cume_dist_support
368  * prosupport function for window_cume_dist()
369  */
370 Datum
372 {
373  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
374 
375  if (IsA(rawreq, SupportRequestWFuncMonotonic))
376  {
378 
379  /* cume_dist() is monotonically increasing */
381  PG_RETURN_POINTER(req);
382  }
383 
385  {
387 
388  /*
389  * cume_dist() is unaffected by the frame options. Here we set the
390  * frame options to match what's done in row_number's support
391  * function. Using ROWS instead of RANGE (the default) saves the
392  * executor from having to check for peer rows.
393  */
398 
399  PG_RETURN_POINTER(req);
400  }
401 
402  PG_RETURN_POINTER(NULL);
403 }
404 
405 /*
406  * ntile
407  * compute an exact numeric value with scale 0 (zero),
408  * ranging from 1 (one) to n, per spec.
409  */
410 Datum
412 {
413  WindowObject winobj = PG_WINDOW_OBJECT();
415 
416  context = (ntile_context *)
417  WinGetPartitionLocalMemory(winobj, sizeof(ntile_context));
418 
419  if (context->ntile == 0)
420  {
421  /* first call */
422  int64 total;
423  int32 nbuckets;
424  bool isnull;
425 
426  total = WinGetPartitionRowCount(winobj);
427  nbuckets = DatumGetInt32(WinGetFuncArgCurrent(winobj, 0, &isnull));
428 
429  /*
430  * per spec: If NT is the null value, then the result is the null
431  * value.
432  */
433  if (isnull)
434  PG_RETURN_NULL();
435 
436  /*
437  * per spec: If NT is less than or equal to 0 (zero), then an
438  * exception condition is raised.
439  */
440  if (nbuckets <= 0)
441  ereport(ERROR,
442  (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTILE),
443  errmsg("argument of ntile must be greater than zero")));
444 
445  context->ntile = 1;
446  context->rows_per_bucket = 0;
447  context->boundary = total / nbuckets;
448  if (context->boundary <= 0)
449  context->boundary = 1;
450  else
451  {
452  /*
453  * If the total number is not divisible, add 1 row to leading
454  * buckets.
455  */
456  context->remainder = total % nbuckets;
457  if (context->remainder != 0)
458  context->boundary++;
459  }
460  }
461 
462  context->rows_per_bucket++;
463  if (context->boundary < context->rows_per_bucket)
464  {
465  /* ntile up */
466  if (context->remainder != 0 && context->ntile == context->remainder)
467  {
468  context->remainder = 0;
469  context->boundary -= 1;
470  }
471  context->ntile += 1;
472  context->rows_per_bucket = 1;
473  }
474 
475  PG_RETURN_INT32(context->ntile);
476 }
477 
478 /*
479  * window_ntile_support
480  * prosupport function for window_ntile()
481  */
482 Datum
484 {
485  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
486 
487  if (IsA(rawreq, SupportRequestWFuncMonotonic))
488  {
490 
491  /*
492  * ntile() is monotonically increasing as the number of buckets cannot
493  * change after the first call
494  */
496  PG_RETURN_POINTER(req);
497  }
498 
500  {
502 
503  /*
504  * ntile() is unaffected by the frame options. Here we set the frame
505  * options to match what's done in row_number's support function.
506  * Using ROWS instead of RANGE (the default) saves the executor from
507  * having to check for peer rows.
508  */
513 
514  PG_RETURN_POINTER(req);
515  }
516 
517  PG_RETURN_POINTER(NULL);
518 }
519 
520 /*
521  * leadlag_common
522  * common operation of lead() and lag()
523  * For lead() forward is true, whereas for lag() it is false.
524  * withoffset indicates we have an offset second argument.
525  * withdefault indicates we have a default third argument.
526  */
527 static Datum
529  bool forward, bool withoffset, bool withdefault)
530 {
531  WindowObject winobj = PG_WINDOW_OBJECT();
532  int32 offset;
533  bool const_offset;
534  Datum result;
535  bool isnull;
536  bool isout;
537 
538  if (withoffset)
539  {
540  offset = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull));
541  if (isnull)
542  PG_RETURN_NULL();
543  const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1);
544  }
545  else
546  {
547  offset = 1;
548  const_offset = true;
549  }
550 
551  result = WinGetFuncArgInPartition(winobj, 0,
552  (forward ? offset : -offset),
554  const_offset,
555  &isnull, &isout);
556 
557  if (isout)
558  {
559  /*
560  * target row is out of the partition; supply default value if
561  * provided. otherwise it'll stay NULL
562  */
563  if (withdefault)
564  result = WinGetFuncArgCurrent(winobj, 2, &isnull);
565  }
566 
567  if (isnull)
568  PG_RETURN_NULL();
569 
570  PG_RETURN_DATUM(result);
571 }
572 
573 /*
574  * lag
575  * returns the value of VE evaluated on a row that is 1
576  * row before the current row within a partition,
577  * per spec.
578  */
579 Datum
581 {
582  return leadlag_common(fcinfo, false, false, false);
583 }
584 
585 /*
586  * lag_with_offset
587  * returns the value of VE evaluated on a row that is OFFSET
588  * rows before the current row within a partition,
589  * per spec.
590  */
591 Datum
593 {
594  return leadlag_common(fcinfo, false, true, false);
595 }
596 
597 /*
598  * lag_with_offset_and_default
599  * same as lag_with_offset but accepts default value
600  * as its third argument.
601  */
602 Datum
604 {
605  return leadlag_common(fcinfo, false, true, true);
606 }
607 
608 /*
609  * lead
610  * returns the value of VE evaluated on a row that is 1
611  * row after the current row within a partition,
612  * per spec.
613  */
614 Datum
616 {
617  return leadlag_common(fcinfo, true, false, false);
618 }
619 
620 /*
621  * lead_with_offset
622  * returns the value of VE evaluated on a row that is OFFSET
623  * number of rows after the current row within a partition,
624  * per spec.
625  */
626 Datum
628 {
629  return leadlag_common(fcinfo, true, true, false);
630 }
631 
632 /*
633  * lead_with_offset_and_default
634  * same as lead_with_offset but accepts default value
635  * as its third argument.
636  */
637 Datum
639 {
640  return leadlag_common(fcinfo, true, true, true);
641 }
642 
643 /*
644  * first_value
645  * return the value of VE evaluated on the first row of the
646  * window frame, per spec.
647  */
648 Datum
650 {
651  WindowObject winobj = PG_WINDOW_OBJECT();
652  Datum result;
653  bool isnull;
654 
655  result = WinGetFuncArgInFrame(winobj, 0,
656  0, WINDOW_SEEK_HEAD, true,
657  &isnull, NULL);
658  if (isnull)
659  PG_RETURN_NULL();
660 
661  PG_RETURN_DATUM(result);
662 }
663 
664 /*
665  * last_value
666  * return the value of VE evaluated on the last row of the
667  * window frame, per spec.
668  */
669 Datum
671 {
672  WindowObject winobj = PG_WINDOW_OBJECT();
673  Datum result;
674  bool isnull;
675 
676  result = WinGetFuncArgInFrame(winobj, 0,
677  0, WINDOW_SEEK_TAIL, true,
678  &isnull, NULL);
679  if (isnull)
680  PG_RETURN_NULL();
681 
682  PG_RETURN_DATUM(result);
683 }
684 
685 /*
686  * nth_value
687  * return the value of VE evaluated on the n-th row from the first
688  * row of the window frame, per spec.
689  */
690 Datum
692 {
693  WindowObject winobj = PG_WINDOW_OBJECT();
694  bool const_offset;
695  Datum result;
696  bool isnull;
697  int32 nth;
698 
699  nth = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull));
700  if (isnull)
701  PG_RETURN_NULL();
702  const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1);
703 
704  if (nth <= 0)
705  ereport(ERROR,
706  (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTH_VALUE),
707  errmsg("argument of nth_value must be greater than zero")));
708 
709  result = WinGetFuncArgInFrame(winobj, 0,
710  nth - 1, WINDOW_SEEK_HEAD, const_offset,
711  &isnull, NULL);
712  if (isnull)
713  PG_RETURN_NULL();
714 
715  PG_RETURN_DATUM(result);
716 }
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
double float8
Definition: c.h:630
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
bool get_fn_expr_arg_stable(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1975
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_INT64(x)
Definition: fmgr.h:368
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Datum WinGetFuncArgInPartition(WindowObject winobj, int argno, int relpos, int seektype, bool set_mark, bool *isnull, bool *isout)
Datum WinGetFuncArgInFrame(WindowObject winobj, int argno, int relpos, int seektype, bool set_mark, bool *isnull, bool *isout)
int64 WinGetCurrentPosition(WindowObject winobj)
bool WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2)
void WinSetMarkPosition(WindowObject winobj, int64 markpos)
void * WinGetPartitionLocalMemory(WindowObject winobj, Size sz)
Datum WinGetFuncArgCurrent(WindowObject winobj, int argno, bool *isnull)
int64 WinGetPartitionRowCount(WindowObject winobj)
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define FRAMEOPTION_END_CURRENT_ROW
Definition: parsenodes.h:591
#define FRAMEOPTION_START_UNBOUNDED_PRECEDING
Definition: parsenodes.h:586
#define FRAMEOPTION_NONDEFAULT
Definition: parsenodes.h:581
#define FRAMEOPTION_ROWS
Definition: parsenodes.h:583
@ MONOTONICFUNC_INCREASING
Definition: plannodes.h:1588
uintptr_t Datum
Definition: postgres.h:64
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
tree context
Definition: radixtree.h:1833
FmgrInfo * flinfo
Definition: fmgr.h:87
Definition: nodes.h:129
MonotonicFunction monotonic
Definition: supportnodes.h:299
int64 rows_per_bucket
Definition: windowfuncs.c:35
int64 remainder
Definition: windowfuncs.c:37
int64 boundary
Definition: windowfuncs.c:36
#define WINDOW_SEEK_TAIL
Definition: windowapi.h:34
#define WINDOW_SEEK_HEAD
Definition: windowapi.h:33
#define PG_WINDOW_OBJECT()
Definition: windowapi.h:39
#define WINDOW_SEEK_CURRENT
Definition: windowapi.h:32
Datum window_lag_with_offset(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:592
static bool rank_up(WindowObject winobj)
Definition: windowfuncs.c:49
Datum window_dense_rank(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:200
Datum window_cume_dist_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:371
Datum window_last_value(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:670
Datum window_percent_rank(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:261
static Datum leadlag_common(FunctionCallInfo fcinfo, bool forward, bool withoffset, bool withdefault)
Definition: windowfuncs.c:528
Datum window_ntile(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:411
Datum window_row_number(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:84
Datum window_percent_rank_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:288
Datum window_first_value(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:649
Datum window_lead(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:615
Datum window_lead_with_offset(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:627
Datum window_lag_with_offset_and_default(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:603
Datum window_lag(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:580
Datum window_row_number_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:98
Datum window_lead_with_offset_and_default(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:638
Datum window_cume_dist(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:330
Datum window_rank(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:138
Datum window_ntile_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:483
struct rank_context rank_context
Datum window_dense_rank_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:220
Datum window_nth_value(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:691
Datum window_rank_support(PG_FUNCTION_ARGS)
Definition: windowfuncs.c:158