PostgreSQL Source Code  git master
queryjumblefuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * queryjumblefuncs.c
4  * Query normalization and fingerprinting.
5  *
6  * Normalization is a process whereby similar queries, typically differing only
7  * in their constants (though the exact rules are somewhat more subtle than
8  * that) are recognized as equivalent, and are tracked as a single entry. This
9  * is particularly useful for non-prepared queries.
10  *
11  * Normalization is implemented by fingerprinting queries, selectively
12  * serializing those fields of each query tree's nodes that are judged to be
13  * essential to the query. This is referred to as a query jumble. This is
14  * distinct from a regular serialization in that various extraneous
15  * information is ignored as irrelevant or not essential to the query, such
16  * as the collations of Vars and, most notably, the values of constants.
17  *
18  * This jumble is acquired at the end of parse analysis of each query, and
19  * a 64-bit hash of it is stored into the query's Query.queryId field.
20  * The server then copies this value around, making it available in plan
21  * tree(s) generated from the query. The executor can then use this value
22  * to blame query costs on the proper queryId.
23  *
24  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/nodes/queryjumblefuncs.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include "common/hashfn.h"
36 #include "miscadmin.h"
37 #include "nodes/queryjumble.h"
38 #include "parser/scansup.h"
39 
40 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 
42 /* GUC parameters */
44 
45 /* True when compute_query_id is ON, or AUTO and a module requests them */
46 bool query_id_enabled = false;
47 
48 static void AppendJumble(JumbleState *jstate,
49  const unsigned char *item, Size size);
50 static void RecordConstLocation(JumbleState *jstate, int location);
51 static void _jumbleNode(JumbleState *jstate, Node *node);
52 static void _jumbleA_Const(JumbleState *jstate, Node *node);
53 static void _jumbleList(JumbleState *jstate, Node *node);
54 static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
55 
56 /*
57  * Given a possibly multi-statement source string, confine our attention to the
58  * relevant part of the string.
59  */
60 const char *
61 CleanQuerytext(const char *query, int *location, int *len)
62 {
63  int query_location = *location;
64  int query_len = *len;
65 
66  /* First apply starting offset, unless it's -1 (unknown). */
67  if (query_location >= 0)
68  {
69  Assert(query_location <= strlen(query));
70  query += query_location;
71  /* Length of 0 (or -1) means "rest of string" */
72  if (query_len <= 0)
73  query_len = strlen(query);
74  else
75  Assert(query_len <= strlen(query));
76  }
77  else
78  {
79  /* If query location is unknown, distrust query_len as well */
80  query_location = 0;
81  query_len = strlen(query);
82  }
83 
84  /*
85  * Discard leading and trailing whitespace, too. Use scanner_isspace()
86  * not libc's isspace(), because we want to match the lexer's behavior.
87  */
88  while (query_len > 0 && scanner_isspace(query[0]))
89  query++, query_location++, query_len--;
90  while (query_len > 0 && scanner_isspace(query[query_len - 1]))
91  query_len--;
92 
93  *location = query_location;
94  *len = query_len;
95 
96  return query;
97 }
98 
101 {
102  JumbleState *jstate = NULL;
103 
105 
106  jstate = (JumbleState *) palloc(sizeof(JumbleState));
107 
108  /* Set up workspace for query jumbling */
109  jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
110  jstate->jumble_len = 0;
111  jstate->clocations_buf_size = 32;
112  jstate->clocations = (LocationLen *)
113  palloc(jstate->clocations_buf_size * sizeof(LocationLen));
114  jstate->clocations_count = 0;
115  jstate->highest_extern_param_id = 0;
116 
117  /* Compute query ID and mark the Query node with it */
118  _jumbleNode(jstate, (Node *) query);
119  query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
120  jstate->jumble_len,
121  0));
122 
123  /*
124  * If we are unlucky enough to get a hash of zero, use 1 instead for
125  * normal statements and 2 for utility queries.
126  */
127  if (query->queryId == UINT64CONST(0))
128  {
129  if (query->utilityStmt)
130  query->queryId = UINT64CONST(2);
131  else
132  query->queryId = UINT64CONST(1);
133  }
134 
135  return jstate;
136 }
137 
138 /*
139  * Enables query identifier computation.
140  *
141  * Third-party plugins can use this function to inform core that they require
142  * a query identifier to be computed.
143  */
144 void
146 {
148  query_id_enabled = true;
149 }
150 
151 /*
152  * AppendJumble: Append a value that is substantive in a given query to
153  * the current jumble.
154  */
155 static void
156 AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
157 {
158  unsigned char *jumble = jstate->jumble;
159  Size jumble_len = jstate->jumble_len;
160 
161  /*
162  * Whenever the jumble buffer is full, we hash the current contents and
163  * reset the buffer to contain just that hash value, thus relying on the
164  * hash to summarize everything so far.
165  */
166  while (size > 0)
167  {
168  Size part_size;
169 
170  if (jumble_len >= JUMBLE_SIZE)
171  {
172  uint64 start_hash;
173 
174  start_hash = DatumGetUInt64(hash_any_extended(jumble,
175  JUMBLE_SIZE, 0));
176  memcpy(jumble, &start_hash, sizeof(start_hash));
177  jumble_len = sizeof(start_hash);
178  }
179  part_size = Min(size, JUMBLE_SIZE - jumble_len);
180  memcpy(jumble + jumble_len, item, part_size);
181  jumble_len += part_size;
182  item += part_size;
183  size -= part_size;
184  }
185  jstate->jumble_len = jumble_len;
186 }
187 
188 /*
189  * Record location of constant within query string of query tree
190  * that is currently being walked.
191  */
192 static void
193 RecordConstLocation(JumbleState *jstate, int location)
194 {
195  /* -1 indicates unknown or undefined location */
196  if (location >= 0)
197  {
198  /* enlarge array if needed */
199  if (jstate->clocations_count >= jstate->clocations_buf_size)
200  {
201  jstate->clocations_buf_size *= 2;
202  jstate->clocations = (LocationLen *)
203  repalloc(jstate->clocations,
204  jstate->clocations_buf_size *
205  sizeof(LocationLen));
206  }
207  jstate->clocations[jstate->clocations_count].location = location;
208  /* initialize lengths to -1 to simplify third-party module usage */
209  jstate->clocations[jstate->clocations_count].length = -1;
210  jstate->clocations_count++;
211  }
212 }
213 
214 #define JUMBLE_NODE(item) \
215  _jumbleNode(jstate, (Node *) expr->item)
216 #define JUMBLE_LOCATION(location) \
217  RecordConstLocation(jstate, expr->location)
218 #define JUMBLE_FIELD(item) \
219  AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
220 #define JUMBLE_FIELD_SINGLE(item) \
221  AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
222 #define JUMBLE_STRING(str) \
223 do { \
224  if (expr->str) \
225  AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
226 } while(0)
227 
228 #include "queryjumblefuncs.funcs.c"
229 
230 static void
231 _jumbleNode(JumbleState *jstate, Node *node)
232 {
233  Node *expr = node;
234 
235  if (expr == NULL)
236  return;
237 
238  /* Guard against stack overflow due to overly complex expressions */
240 
241  /*
242  * We always emit the node's NodeTag, then any additional fields that are
243  * considered significant, and then we recurse to any child nodes.
244  */
246 
247  switch (nodeTag(expr))
248  {
249 #include "queryjumblefuncs.switch.c"
250 
251  case T_List:
252  case T_IntList:
253  case T_OidList:
254  case T_XidList:
255  _jumbleList(jstate, expr);
256  break;
257 
258  default:
259  /* Only a warning, since we can stumble along anyway */
260  elog(WARNING, "unrecognized node type: %d",
261  (int) nodeTag(expr));
262  break;
263  }
264 
265  /* Special cases to handle outside the automated code */
266  switch (nodeTag(expr))
267  {
268  case T_Param:
269  {
270  Param *p = (Param *) node;
271 
272  /*
273  * Update the highest Param id seen, in order to start
274  * normalization correctly.
275  */
276  if (p->paramkind == PARAM_EXTERN &&
277  p->paramid > jstate->highest_extern_param_id)
278  jstate->highest_extern_param_id = p->paramid;
279  }
280  break;
281  default:
282  break;
283  }
284 }
285 
286 static void
287 _jumbleList(JumbleState *jstate, Node *node)
288 {
289  List *expr = (List *) node;
290  ListCell *l;
291 
292  switch (expr->type)
293  {
294  case T_List:
295  foreach(l, expr)
296  _jumbleNode(jstate, lfirst(l));
297  break;
298  case T_IntList:
299  foreach(l, expr)
301  break;
302  case T_OidList:
303  foreach(l, expr)
305  break;
306  case T_XidList:
307  foreach(l, expr)
309  break;
310  default:
311  elog(ERROR, "unrecognized list node type: %d",
312  (int) expr->type);
313  return;
314  }
315 }
316 
317 static void
319 {
320  A_Const *expr = (A_Const *) node;
321 
322  JUMBLE_FIELD(isnull);
323  if (!expr->isnull)
324  {
325  JUMBLE_FIELD(val.node.type);
326  switch (nodeTag(&expr->val))
327  {
328  case T_Integer:
329  JUMBLE_FIELD(val.ival.ival);
330  break;
331  case T_Float:
332  JUMBLE_STRING(val.fval.fval);
333  break;
334  case T_Boolean:
335  JUMBLE_FIELD(val.boolval.boolval);
336  break;
337  case T_String:
338  JUMBLE_STRING(val.sval.sval);
339  break;
340  case T_BitString:
341  JUMBLE_STRING(val.bsval.bsval);
342  break;
343  default:
344  elog(ERROR, "unrecognized node type: %d",
345  (int) nodeTag(&expr->val));
346  break;
347  }
348  }
349 }
350 
351 static void
353 {
354  RangeTblEntry *expr = (RangeTblEntry *) node;
355 
356  JUMBLE_FIELD(rtekind);
357  switch (expr->rtekind)
358  {
359  case RTE_RELATION:
360  JUMBLE_FIELD(relid);
361  JUMBLE_NODE(tablesample);
362  JUMBLE_FIELD(inh);
363  break;
364  case RTE_SUBQUERY:
365  JUMBLE_NODE(subquery);
366  break;
367  case RTE_JOIN:
368  JUMBLE_FIELD(jointype);
369  break;
370  case RTE_FUNCTION:
372  break;
373  case RTE_TABLEFUNC:
374  JUMBLE_NODE(tablefunc);
375  break;
376  case RTE_VALUES:
377  JUMBLE_NODE(values_lists);
378  break;
379  case RTE_CTE:
380 
381  /*
382  * Depending on the CTE name here isn't ideal, but it's the only
383  * info we have to identify the referenced WITH item.
384  */
385  JUMBLE_STRING(ctename);
386  JUMBLE_FIELD(ctelevelsup);
387  break;
388  case RTE_NAMEDTUPLESTORE:
389  JUMBLE_STRING(enrname);
390  break;
391  case RTE_RESULT:
392  break;
393  default:
394  elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
395  break;
396  }
397 }
#define Min(x, y)
Definition: c.h:993
size_t Size
Definition: c.h:594
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
long val
Definition: informix.c:664
Assert(fmt[strlen(fmt) - 1] !='\n')
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1476
void * palloc(Size size)
Definition: mcxt.c:1226
#define nodeTag(nodeptr)
Definition: nodes.h:133
@ RTE_JOIN
Definition: parsenodes.h:1015
@ RTE_CTE
Definition: parsenodes.h:1019
@ RTE_NAMEDTUPLESTORE
Definition: parsenodes.h:1020
@ RTE_VALUES
Definition: parsenodes.h:1018
@ RTE_SUBQUERY
Definition: parsenodes.h:1014
@ RTE_RESULT
Definition: parsenodes.h:1021
@ RTE_FUNCTION
Definition: parsenodes.h:1016
@ RTE_TABLEFUNC
Definition: parsenodes.h:1017
@ RTE_RELATION
Definition: parsenodes.h:1013
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_int(lc)
Definition: pg_list.h:173
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define lfirst_xid(lc)
Definition: pg_list.h:175
void check_stack_depth(void)
Definition: postgres.c:3523
static uint64 DatumGetUInt64(Datum X)
Definition: postgres.h:419
@ PARAM_EXTERN
Definition: primnodes.h:345
@ COMPUTE_QUERY_ID_AUTO
Definition: queryjumble.h:58
@ COMPUTE_QUERY_ID_OFF
Definition: queryjumble.h:56
static bool IsQueryIdEnabled(void)
Definition: queryjumble.h:77
JumbleState * JumbleQuery(Query *query)
#define JUMBLE_NODE(item)
bool query_id_enabled
static void _jumbleNode(JumbleState *jstate, Node *node)
static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
#define JUMBLE_SIZE
int compute_query_id
static void RecordConstLocation(JumbleState *jstate, int location)
static void _jumbleList(JumbleState *jstate, Node *node)
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node)
#define JUMBLE_STRING(str)
const char * CleanQuerytext(const char *query, int *location, int *len)
#define JUMBLE_FIELD_SINGLE(item)
static void _jumbleA_Const(JumbleState *jstate, Node *node)
void EnableQueryId(void)
#define JUMBLE_FIELD(item)
static const struct fns functions
Definition: regcomp.c:356
bool scanner_isspace(char ch)
Definition: scansup.c:117
bool isnull
Definition: parsenodes.h:360
union ValUnion val
Definition: parsenodes.h:359
unsigned char * jumble
Definition: queryjumble.h:35
int clocations_buf_size
Definition: queryjumble.h:44
Size jumble_len
Definition: queryjumble.h:38
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: pg_list.h:54
NodeTag type
Definition: pg_list.h:55
Definition: nodes.h:129
int paramid
Definition: primnodes.h:355
ParamKind paramkind
Definition: primnodes.h:354
Node * utilityStmt
Definition: parsenodes.h:142
RTEKind rtekind
Definition: parsenodes.h:1032
const char * type