PostgreSQL Source Code  git master
spell.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * spell.h
4  *
5  * Declarations for ISpell dictionary
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  *
9  * src/include/tsearch/dicts/spell.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #ifndef __SPELL_H__
15 #define __SPELL_H__
16 
17 #include "regex/regex.h"
18 #include "tsearch/dicts/regis.h"
19 #include "tsearch/ts_public.h"
20 
21 /*
22  * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
23  * a words list.
24  */
25 struct SPNode;
26 
27 typedef struct
28 {
30  isword:1,
31  /* Stores compound flags listed below */
33  /* Reference to an entry of the AffixData field */
34  affix:19;
35  struct SPNode *node;
36 } SPNodeData;
37 
38 /*
39  * Names of FF_ are correlated with Hunspell options in affix file
40  * https://hunspell.github.io/
41  */
42 #define FF_COMPOUNDONLY 0x01
43 #define FF_COMPOUNDBEGIN 0x02
44 #define FF_COMPOUNDMIDDLE 0x04
45 #define FF_COMPOUNDLAST 0x08
46 #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
47  FF_COMPOUNDLAST )
48 #define FF_COMPOUNDFLAGMASK 0x0f
49 
50 typedef struct SPNode
51 {
55 
56 #define SPNHDRSZ (offsetof(SPNode,data))
57 
58 /*
59  * Represents an entry in a words list.
60  */
61 typedef struct spell_struct
62 {
63  union
64  {
65  /*
66  * flag is filled in by NIImportDictionary(). After
67  * NISortDictionary(), d is used instead of flag.
68  */
69  char *flag;
70  /* d is used in mkSPNode() */
71  struct
72  {
73  /* Reference to an entry of the AffixData field */
74  int affix;
75  /* Length of the word */
76  int len;
77  } d;
78  } p;
81 
82 #define SPELLHDRSZ (offsetof(SPELL, word))
83 
84 /*
85  * If an affix uses a regex, we have to store that separately in a struct
86  * that won't move around when arrays of affixes are enlarged or sorted.
87  * This is so that it can be found to be cleaned up at context destruction.
88  */
89 typedef struct aff_regex_struct
90 {
94 
95 /*
96  * Represents an entry in an affix list.
97  */
98 typedef struct aff_struct
99 {
100  char *flag;
101  /* FF_SUFFIX or FF_PREFIX */
106  replen:14;
107  char *find;
108  char *repl;
109  union
110  {
113  } reg;
115 
116 /*
117  * affixes use dictionary flags too
118  */
119 #define FF_COMPOUNDPERMITFLAG 0x10
120 #define FF_COMPOUNDFORBIDFLAG 0x20
121 #define FF_CROSSPRODUCT 0x40
122 
123 /*
124  * Don't change the order of these. Initialization sorts by these,
125  * and expects prefixes to come first after sorting.
126  */
127 #define FF_SUFFIX 1
128 #define FF_PREFIX 0
129 
130 /*
131  * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
132  * an affix list.
133  */
134 struct AffixNode;
135 
136 typedef struct
137 {
139  naff:24;
141  struct AffixNode *node;
142 } AffixNodeData;
143 
144 typedef struct AffixNode
145 {
147  length:31;
150 
151 #define ANHRDSZ (offsetof(AffixNode, data))
152 
153 typedef struct
154 {
155  char *affix;
156  int len;
157  bool issuffix;
158 } CMPDAffix;
159 
160 /*
161  * Type of encoding affix flags in Hunspell dictionaries
162  */
163 typedef enum
164 {
165  FM_CHAR, /* one character (like ispell) */
166  FM_LONG, /* two characters */
167  FM_NUM /* number, >= 0 and < 65536 */
169 
170 /*
171  * Structure to store Hunspell options. Flag representation depends on flag
172  * type. These flags are about support of compound words.
173  */
174 typedef struct CompoundAffixFlag
175 {
176  union
177  {
178  /* Flag name if flagMode is FM_CHAR or FM_LONG */
179  char *s;
180  /* Flag name if flagMode is FM_NUM */
182  } flag;
183  /* we don't have a bsearch_arg version, so, copy FlagMode */
187 
188 #define FLAGNUM_MAXSIZE (1 << 16)
189 
190 typedef struct
191 {
192  int maffixes;
193  int naffixes;
195 
198 
200  /* Array of sets of affixes */
201  char **AffixData;
205 
207 
210 
211  /*
212  * All follow fields are actually needed only for initialization
213  */
214 
215  /* Array of Hunspell options in affix file */
217  /* number of entries in CompoundAffixFlags array */
219  /* allocated length of CompoundAffixFlags array */
221 
222  /*
223  * Remaining fields are only used during dictionary construction; they are
224  * set up by NIStartBuild and cleared by NIFinishBuild.
225  */
226  MemoryContext buildCxt; /* temp context for construction */
227 
228  /* Temporary array of all words in the dict file */
230  int nspell; /* number of valid entries in Spell array */
231  int mspell; /* allocated length of Spell array */
232 
233  /* These are used to allocate "compact" data without palloc overhead */
234  char *firstfree; /* first free address (always maxaligned) */
235  size_t avail; /* free space remaining at firstfree */
236 } IspellDict;
237 
238 extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
239 
240 extern void NIStartBuild(IspellDict *Conf);
241 extern void NIImportAffixes(IspellDict *Conf, const char *filename);
242 extern void NIImportDictionary(IspellDict *Conf, const char *filename);
243 extern void NISortDictionary(IspellDict *Conf);
244 extern void NISortAffixes(IspellDict *Conf);
245 extern void NIFinishBuild(IspellDict *Conf);
246 
247 #endif
unsigned int uint32
Definition: c.h:490
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:382
static char * filename
Definition: pg_dumpall.c:119
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476
void NIStartBuild(IspellDict *Conf)
Definition: spell.c:88
void NIFinishBuild(IspellDict *Conf)
Definition: spell.c:103
void NIImportAffixes(IspellDict *Conf, const char *filename)
Definition: spell.c:1445
void NISortDictionary(IspellDict *Conf)
Definition: spell.c:1738
struct CompoundAffixFlag CompoundAffixFlag
struct SPNode SPNode
void NISortAffixes(IspellDict *Conf)
Definition: spell.c:1992
struct aff_regex_struct aff_regex_struct
struct spell_struct SPELL
void NIImportDictionary(IspellDict *Conf, const char *filename)
Definition: spell.c:517
TSLexeme * NINormalizeWord(IspellDict *Conf, char *word)
Definition: spell.c:2556
FlagMode
Definition: spell.h:164
@ FM_LONG
Definition: spell.h:166
@ FM_CHAR
Definition: spell.h:165
@ FM_NUM
Definition: spell.h:167
struct AffixNode AffixNode
struct aff_struct AFFIX
uint32 naff
Definition: spell.h:139
AFFIX ** aff
Definition: spell.h:140
uint32 val
Definition: spell.h:138
struct AffixNode * node
Definition: spell.h:141
uint32 isvoid
Definition: spell.h:146
AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:148
uint32 length
Definition: spell.h:147
int len
Definition: spell.h:156
bool issuffix
Definition: spell.h:157
char * affix
Definition: spell.h:155
union CompoundAffixFlag::@118 flag
uint32 value
Definition: spell.h:185
FlagMode flagMode
Definition: spell.h:184
int maffixes
Definition: spell.h:192
int lenAffixData
Definition: spell.h:202
MemoryContext buildCxt
Definition: spell.h:226
int mspell
Definition: spell.h:231
AffixNode * Suffix
Definition: spell.h:196
char ** AffixData
Definition: spell.h:201
int naffixes
Definition: spell.h:193
bool usecompound
Definition: spell.h:208
CompoundAffixFlag * CompoundAffixFlags
Definition: spell.h:216
AFFIX * Affix
Definition: spell.h:194
int nAffixData
Definition: spell.h:203
int nCompoundAffixFlag
Definition: spell.h:218
CMPDAffix * CompoundAffix
Definition: spell.h:206
bool useFlagAliases
Definition: spell.h:204
SPNode * Dictionary
Definition: spell.h:199
int mCompoundAffixFlag
Definition: spell.h:220
int nspell
Definition: spell.h:230
char * firstfree
Definition: spell.h:234
FlagMode flagMode
Definition: spell.h:209
size_t avail
Definition: spell.h:235
AffixNode * Prefix
Definition: spell.h:197
SPELL ** Spell
Definition: spell.h:229
Definition: regis.h:33
struct SPNode * node
Definition: spell.h:35
uint32 val
Definition: spell.h:29
uint32 compoundflag
Definition: spell.h:32
uint32 isword
Definition: spell.h:30
uint32 affix
Definition: spell.h:34
Definition: spell.h:51
SPNodeData data[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:53
uint32 length
Definition: spell.h:52
regex_t regex
Definition: spell.h:91
MemoryContextCallback mcallback
Definition: spell.h:92
aff_regex_struct * pregex
Definition: spell.h:111
uint32 isregis
Definition: spell.h:105
char * flag
Definition: spell.h:100
uint32 type
Definition: spell.h:102
Regis regis
Definition: spell.h:112
uint32 replen
Definition: spell.h:106
union aff_struct::@117 reg
char * repl
Definition: spell.h:108
uint32 flagflags
Definition: spell.h:103
uint32 issimple
Definition: spell.h:104
char * find
Definition: spell.h:107
Definition: regex.h:56
int len
Definition: spell.h:76
char * flag
Definition: spell.h:69
union spell_struct::@115 p
char word[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:79
int affix
Definition: spell.h:74
struct spell_struct::@115::@116 d