PostgreSQL Source Code  git master
spell.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * spell.h
4  *
5  * Declarations for ISpell dictionary
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  *
9  * src/include/tsearch/dicts/spell.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #ifndef __SPELL_H__
15 #define __SPELL_H__
16 
17 #include "regex/regex.h"
18 #include "tsearch/dicts/regis.h"
19 #include "tsearch/ts_public.h"
20 
21 /*
22  * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
23  * a words list.
24  */
25 struct SPNode;
26 
27 typedef struct
28 {
30  isword:1,
31  /* Stores compound flags listed below */
33  /* Reference to an entry of the AffixData field */
34  affix:19;
35  struct SPNode *node;
36 } SPNodeData;
37 
38 /*
39  * Names of FF_ are correlated with Hunspell options in affix file
40  * https://hunspell.github.io/
41  */
42 #define FF_COMPOUNDONLY 0x01
43 #define FF_COMPOUNDBEGIN 0x02
44 #define FF_COMPOUNDMIDDLE 0x04
45 #define FF_COMPOUNDLAST 0x08
46 #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
47  FF_COMPOUNDLAST )
48 #define FF_COMPOUNDFLAGMASK 0x0f
49 
50 typedef struct SPNode
51 {
55 
56 #define SPNHDRSZ (offsetof(SPNode,data))
57 
58 /*
59  * Represents an entry in a words list.
60  */
61 typedef struct spell_struct
62 {
63  union
64  {
65  /*
66  * flag is filled in by NIImportDictionary(). After
67  * NISortDictionary(), d is used instead of flag.
68  */
69  char *flag;
70  /* d is used in mkSPNode() */
71  struct
72  {
73  /* Reference to an entry of the AffixData field */
74  int affix;
75  /* Length of the word */
76  int len;
77  } d;
78  } p;
81 
82 #define SPELLHDRSZ (offsetof(SPELL, word))
83 
84 /*
85  * Represents an entry in an affix list.
86  */
87 typedef struct aff_struct
88 {
89  char *flag;
90  /* FF_SUFFIX or FF_PREFIX */
95  replen:14;
96  char *find;
97  char *repl;
98  union
99  {
100  /*
101  * Arrays of AFFIX are moved and sorted. We'll use a pointer to
102  * regex_t to keep this struct small, and avoid assuming that regex_t
103  * is movable.
104  */
107  } reg;
109 
110 /*
111  * affixes use dictionary flags too
112  */
113 #define FF_COMPOUNDPERMITFLAG 0x10
114 #define FF_COMPOUNDFORBIDFLAG 0x20
115 #define FF_CROSSPRODUCT 0x40
116 
117 /*
118  * Don't change the order of these. Initialization sorts by these,
119  * and expects prefixes to come first after sorting.
120  */
121 #define FF_SUFFIX 1
122 #define FF_PREFIX 0
123 
124 /*
125  * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
126  * an affix list.
127  */
128 struct AffixNode;
129 
130 typedef struct
131 {
133  naff:24;
135  struct AffixNode *node;
136 } AffixNodeData;
137 
138 typedef struct AffixNode
139 {
141  length:31;
144 
145 #define ANHRDSZ (offsetof(AffixNode, data))
146 
147 typedef struct
148 {
149  char *affix;
150  int len;
151  bool issuffix;
152 } CMPDAffix;
153 
154 /*
155  * Type of encoding affix flags in Hunspell dictionaries
156  */
157 typedef enum
158 {
159  FM_CHAR, /* one character (like ispell) */
160  FM_LONG, /* two characters */
161  FM_NUM, /* number, >= 0 and < 65536 */
162 } FlagMode;
163 
164 /*
165  * Structure to store Hunspell options. Flag representation depends on flag
166  * type. These flags are about support of compound words.
167  */
168 typedef struct CompoundAffixFlag
169 {
170  union
171  {
172  /* Flag name if flagMode is FM_CHAR or FM_LONG */
173  char *s;
174  /* Flag name if flagMode is FM_NUM */
176  } flag;
177  /* we don't have a bsearch_arg version, so, copy FlagMode */
181 
182 #define FLAGNUM_MAXSIZE (1 << 16)
183 
184 typedef struct
185 {
186  int maffixes;
187  int naffixes;
189 
192 
194  /* Array of sets of affixes */
195  char **AffixData;
199 
201 
204 
205  /*
206  * All follow fields are actually needed only for initialization
207  */
208 
209  /* Array of Hunspell options in affix file */
211  /* number of entries in CompoundAffixFlags array */
213  /* allocated length of CompoundAffixFlags array */
215 
216  /*
217  * Remaining fields are only used during dictionary construction; they are
218  * set up by NIStartBuild and cleared by NIFinishBuild.
219  */
220  MemoryContext buildCxt; /* temp context for construction */
221 
222  /* Temporary array of all words in the dict file */
224  int nspell; /* number of valid entries in Spell array */
225  int mspell; /* allocated length of Spell array */
226 
227  /* These are used to allocate "compact" data without palloc overhead */
228  char *firstfree; /* first free address (always maxaligned) */
229  size_t avail; /* free space remaining at firstfree */
230 } IspellDict;
231 
232 extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
233 
234 extern void NIStartBuild(IspellDict *Conf);
235 extern void NIImportAffixes(IspellDict *Conf, const char *filename);
236 extern void NIImportDictionary(IspellDict *Conf, const char *filename);
237 extern void NISortDictionary(IspellDict *Conf);
238 extern void NISortAffixes(IspellDict *Conf);
239 extern void NIFinishBuild(IspellDict *Conf);
240 
241 #endif
unsigned int uint32
Definition: c.h:506
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
static char * filename
Definition: pg_dumpall.c:119
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1474
void NIStartBuild(IspellDict *Conf)
Definition: spell.c:88
void NIFinishBuild(IspellDict *Conf)
Definition: spell.c:103
void NIImportAffixes(IspellDict *Conf, const char *filename)
Definition: spell.c:1425
void NISortDictionary(IspellDict *Conf)
Definition: spell.c:1718
struct CompoundAffixFlag CompoundAffixFlag
struct SPNode SPNode
void NISortAffixes(IspellDict *Conf)
Definition: spell.c:1972
struct spell_struct SPELL
void NIImportDictionary(IspellDict *Conf, const char *filename)
Definition: spell.c:517
TSLexeme * NINormalizeWord(IspellDict *Conf, char *word)
Definition: spell.c:2536
FlagMode
Definition: spell.h:158
@ FM_LONG
Definition: spell.h:160
@ FM_CHAR
Definition: spell.h:159
@ FM_NUM
Definition: spell.h:161
struct AffixNode AffixNode
struct aff_struct AFFIX
uint32 naff
Definition: spell.h:133
AFFIX ** aff
Definition: spell.h:134
uint32 val
Definition: spell.h:132
struct AffixNode * node
Definition: spell.h:135
uint32 isvoid
Definition: spell.h:140
AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:142
uint32 length
Definition: spell.h:141
int len
Definition: spell.h:150
bool issuffix
Definition: spell.h:151
char * affix
Definition: spell.h:149
uint32 value
Definition: spell.h:179
FlagMode flagMode
Definition: spell.h:178
union CompoundAffixFlag::@129 flag
int maffixes
Definition: spell.h:186
int lenAffixData
Definition: spell.h:196
MemoryContext buildCxt
Definition: spell.h:220
int mspell
Definition: spell.h:225
AffixNode * Suffix
Definition: spell.h:190
char ** AffixData
Definition: spell.h:195
int naffixes
Definition: spell.h:187
bool usecompound
Definition: spell.h:202
CompoundAffixFlag * CompoundAffixFlags
Definition: spell.h:210
AFFIX * Affix
Definition: spell.h:188
int nAffixData
Definition: spell.h:197
int nCompoundAffixFlag
Definition: spell.h:212
CMPDAffix * CompoundAffix
Definition: spell.h:200
bool useFlagAliases
Definition: spell.h:198
SPNode * Dictionary
Definition: spell.h:193
int mCompoundAffixFlag
Definition: spell.h:214
int nspell
Definition: spell.h:224
char * firstfree
Definition: spell.h:228
FlagMode flagMode
Definition: spell.h:203
size_t avail
Definition: spell.h:229
AffixNode * Prefix
Definition: spell.h:191
SPELL ** Spell
Definition: spell.h:223
Definition: regis.h:33
struct SPNode * node
Definition: spell.h:35
uint32 val
Definition: spell.h:29
uint32 compoundflag
Definition: spell.h:32
uint32 isword
Definition: spell.h:30
uint32 affix
Definition: spell.h:34
Definition: spell.h:51
SPNodeData data[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:53
uint32 length
Definition: spell.h:52
uint32 isregis
Definition: spell.h:94
char * flag
Definition: spell.h:89
uint32 type
Definition: spell.h:91
Regis regis
Definition: spell.h:106
uint32 replen
Definition: spell.h:95
regex_t * pregex
Definition: spell.h:105
char * repl
Definition: spell.h:97
uint32 flagflags
Definition: spell.h:92
union aff_struct::@128 reg
uint32 issimple
Definition: spell.h:93
char * find
Definition: spell.h:96
Definition: regex.h:56
int len
Definition: spell.h:76
char * flag
Definition: spell.h:69
union spell_struct::@126 p
char word[FLEXIBLE_ARRAY_MEMBER]
Definition: spell.h:79
struct spell_struct::@126::@127 d
int affix
Definition: spell.h:74