PostgreSQL Source Code  git master
ts_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_utils.c
4  * various support functions
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/ts_utils.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 
19 #include "miscadmin.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_public.h"
22 
23 
24 /*
25  * Given the base name and extension of a tsearch config file, return
26  * its full path name. The base name is assumed to be user-supplied,
27  * and is checked to prevent pathname attacks. The extension is assumed
28  * to be safe.
29  *
30  * The result is a palloc'd string.
31  */
32 char *
33 get_tsearch_config_filename(const char *basename,
34  const char *extension)
35 {
36  char sharepath[MAXPGPATH];
37  char *result;
38 
39  /*
40  * We limit the basename to contain a-z, 0-9, and underscores. This may
41  * be overly restrictive, but we don't want to allow access to anything
42  * outside the tsearch_data directory, so for instance '/' *must* be
43  * rejected, and on some platforms '\' and ':' are risky as well. Allowing
44  * uppercase might result in incompatible behavior between case-sensitive
45  * and case-insensitive filesystems, and non-ASCII characters create other
46  * interesting risks, so on the whole a tight policy seems best.
47  */
48  if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
49  ereport(ERROR,
50  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
51  errmsg("invalid text search configuration file name \"%s\"",
52  basename)));
53 
54  get_share_path(my_exec_path, sharepath);
55  result = palloc(MAXPGPATH);
56  snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
57  sharepath, basename, extension);
58 
59  return result;
60 }
61 
62 /*
63  * Reads a stop-word file. Each word is run through 'wordop'
64  * function, if given. wordop may either modify the input in-place,
65  * or palloc a new version.
66  */
67 void
68 readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
69 {
70  char **stop = NULL;
71 
72  s->len = 0;
73  if (fname && *fname)
74  {
75  char *filename = get_tsearch_config_filename(fname, "stop");
77  char *line;
78  int reallen = 0;
79 
80  if (!tsearch_readline_begin(&trst, filename))
81  ereport(ERROR,
82  (errcode(ERRCODE_CONFIG_FILE_ERROR),
83  errmsg("could not open stop-word file \"%s\": %m",
84  filename)));
85 
86  while ((line = tsearch_readline(&trst)) != NULL)
87  {
88  char *pbuf = line;
89 
90  /* Trim trailing space */
91  while (*pbuf && !t_isspace(pbuf))
92  pbuf += pg_mblen(pbuf);
93  *pbuf = '\0';
94 
95  /* Skip empty lines */
96  if (*line == '\0')
97  {
98  pfree(line);
99  continue;
100  }
101 
102  if (s->len >= reallen)
103  {
104  if (reallen == 0)
105  {
106  reallen = 64;
107  stop = (char **) palloc(sizeof(char *) * reallen);
108  }
109  else
110  {
111  reallen *= 2;
112  stop = (char **) repalloc(stop, sizeof(char *) * reallen);
113  }
114  }
115 
116  if (wordop)
117  {
118  stop[s->len] = wordop(line);
119  if (stop[s->len] != line)
120  pfree(line);
121  }
122  else
123  stop[s->len] = line;
124 
125  (s->len)++;
126  }
127 
128  tsearch_readline_end(&trst);
129  pfree(filename);
130  }
131 
132  s->stop = stop;
133 
134  /* Sort to allow binary searching */
135  if (s->stop && s->len > 0)
136  qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
137 }
138 
139 bool
141 {
142  return (s->stop && s->len > 0 &&
143  bsearch(&key, s->stop, s->len,
144  sizeof(char *), pg_qsort_strcmp));
145 }
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
char my_exec_path[MAXPGPATH]
Definition: globals.c:80
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void pfree(void *pointer)
Definition: mcxt.c:1521
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:119
void get_share_path(const char *my_exec_path, char *ret_path)
Definition: path.c:825
int pg_qsort_strcmp(const void *a, const void *b)
Definition: qsort.c:19
#define snprintf
Definition: port.h:238
#define qsort(a, b, c, d)
Definition: port.h:447
int len
Definition: ts_public.h:102
char ** stop
Definition: ts_public.h:103
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:134
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:157
int t_isspace(const char *ptr)
Definition: ts_locale.c:50
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:202
void readstoplist(const char *fname, StopList *s, char *(*wordop)(const char *))
Definition: ts_utils.c:68
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:33
bool searchstoplist(StopList *s, char *key)
Definition: ts_utils.c:140