tesseract  4.1.1
trie.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: trie.h
5  * Description: Functions to build a trie data structure.
6  * Author: Mark Seaman, SW Productivity
7  *
8  * (c) Copyright 1987, Hewlett-Packard Company.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  *********************************************************************************/
20 #ifndef TRIE_H
21 #define TRIE_H
22 
23 #include "dawg.h"
24 #include "genericvector.h"
25 
26 class UNICHARSET;
27 
28 // Note: if we consider either NODE_REF or EDGE_INDEX to ever exceed
29 // max int32, we will need to change GenericVector to use int64 for size
30 // and address indices. This does not seem to be needed immediately,
31 // since currently the largest number of edges limit used by tesseract
32 // (kMaxNumEdges in wordlist2dawg.cpp) is far less than max int32.
33 // There are also int casts below to satisfy the WIN32 compiler that would
34 // need to be changed.
35 // It might be cleanest to change the types of most of the Trie/Dawg related
36 // typedefs to int and restrict the casts to extracting these values from
37 // the 64 bit EDGE_RECORD.
38 using EDGE_INDEX = int64_t ; // index of an edge in a given node
39 using NODE_MARKER = bool *;
41 
45 };
47 
48 namespace tesseract {
49 
56 class Trie : public Dawg {
57  public:
62  };
63 
64  // Minimum number of concrete characters at the beginning of user patterns.
65  static const int kSaneNumConcreteChars = 0;
66  // Various unicode whitespace characters are used to denote unichar patterns,
67  // (character classifier would never produce these whitespace characters as a
68  // valid classification).
69  static const char kAlphaPatternUnicode[];
70  static const char kDigitPatternUnicode[];
71  static const char kAlphanumPatternUnicode[];
72  static const char kPuncPatternUnicode[];
73  static const char kLowerPatternUnicode[];
74  static const char kUpperPatternUnicode[];
75 
76  static const char *get_reverse_policy_name(
77  RTLReversePolicy reverse_policy);
78 
79  // max_num_edges argument allows limiting the amount of memory this
80  // Trie can consume (if a new word insert would cause the Trie to
81  // contain more edges than max_num_edges, all the edges are cleared
82  // so that new inserts can proceed).
84  int unicharset_size, int debug_level)
85  : Dawg(type, lang, perm, debug_level) {
86  init(unicharset_size);
87  num_edges_ = 0;
89  new_dawg_node(); // need to allocate node 0
90  initialized_patterns_ = false;
91  }
92  ~Trie() override { nodes_.delete_data_pointers(); }
93 
94  // Reset the Trie to empty.
95  void clear();
96 
98  EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id,
99  bool word_end) const override {
100  EDGE_RECORD *edge_ptr;
101  EDGE_INDEX edge_index;
102  if (!edge_char_of(node_ref, NO_EDGE, FORWARD_EDGE, word_end, unichar_id,
103  &edge_ptr, &edge_index)) return NO_EDGE;
104  return make_edge_ref(node_ref, edge_index);
105  }
106 
112  bool word_end) const override {
113  const EDGE_VECTOR &forward_edges =
114  nodes_[static_cast<int>(node)]->forward_edges;
115  for (int i = 0; i < forward_edges.size(); ++i) {
116  if (!word_end || end_of_word_from_edge_rec(forward_edges[i])) {
117  vec->push_back(NodeChild(unichar_id_from_edge_rec(forward_edges[i]),
118  make_edge_ref(node, i)));
119  }
120  }
121  }
122 
127  NODE_REF next_node(EDGE_REF edge_ref) const override {
128  if (edge_ref == NO_EDGE || num_edges_ == 0) return NO_EDGE;
129  return next_node_from_edge_rec(*deref_edge_ref(edge_ref));
130  }
131 
136  bool end_of_word(EDGE_REF edge_ref) const override {
137  if (edge_ref == NO_EDGE || num_edges_ == 0) return false;
138  return end_of_word_from_edge_rec(*deref_edge_ref(edge_ref));
139  }
140 
142  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override {
143  if (edge_ref == NO_EDGE || num_edges_ == 0) return INVALID_UNICHAR_ID;
144  return unichar_id_from_edge_rec(*deref_edge_ref(edge_ref));
145  }
146  // Sets the UNICHAR_ID in the given edge_rec to unicharset_size_, marking
147  // the edge dead.
148  void KillEdge(EDGE_RECORD* edge_rec) const {
149  *edge_rec &= ~letter_mask_;
150  *edge_rec |= (unicharset_size_ << LETTER_START_BIT);
151  }
152  bool DeadEdge(const EDGE_RECORD& edge_rec) const {
153  return unichar_id_from_edge_rec(edge_rec) == unicharset_size_;
154  }
155 
156  // Prints the contents of the node indicated by the given NODE_REF.
157  // At most max_num_edges will be printed.
158  void print_node(NODE_REF node, int max_num_edges) const override;
159 
160  // Writes edges from nodes_ to an EDGE_ARRAY and creates a SquishedDawg.
161  // Eliminates redundant edges and returns the pointer to the SquishedDawg.
162  // Note: the caller is responsible for deallocating memory associated
163  // with the returned SquishedDawg pointer.
165 
166  // Reads a list of words from the given file and adds into the Trie.
167  // Calls WERD_CHOICE::reverse_unichar_ids_if_rtl() according to the reverse
168  // policy and information in the unicharset.
169  // Returns false on error.
170  bool read_and_add_word_list(const char *filename,
171  const UNICHARSET &unicharset,
173 
174  // Reads a list of words from the given file.
175  // Returns false on error.
176  bool read_word_list(const char *filename,
177  GenericVector<STRING>* words);
178  // Adds a list of words previously read using read_word_list to the trie
179  // using the given unicharset and reverse_policy to convert to unichar-ids.
180  // Returns false on error.
181  bool add_word_list(const GenericVector<STRING> &words,
182  const UNICHARSET &unicharset,
183  Trie::RTLReversePolicy reverse_policy);
184 
185  // Inserts the list of patterns from the given file into the Trie.
186  // The pattern list file should contain one pattern per line in UTF-8 format.
187  //
188  // Each pattern can contain any non-whitespace characters, however only the
189  // patterns that contain characters from the unicharset of the corresponding
190  // language will be useful.
191  // The only meta character is '\'. To be used in a pattern as an ordinary
192  // string it should be escaped with '\' (e.g. string "C:\Documents" should
193  // be written in the patterns file as "C:\\Documents").
194  // This function supports a very limited regular expression syntax. One can
195  // express a character, a certain character class and a number of times the
196  // entity should be repeated in the pattern.
197  //
198  // To denote a character class use one of:
199  // \c - unichar for which UNICHARSET::get_isalpha() is true (character)
200  // \d - unichar for which UNICHARSET::get_isdigit() is true
201  // \n - unichar for which UNICHARSET::get_isdigit() and
202  // UNICHARSET::isalpha() are true
203  // \p - unichar for which UNICHARSET::get_ispunct() is true
204  // \a - unichar for which UNICHARSET::get_islower() is true
205  // \A - unichar for which UNICHARSET::get_isupper() is true
206  //
207  // \* could be specified after each character or pattern to indicate that
208  // the character/pattern can be repeated any number of times before the next
209  // character/pattern occurs.
210  //
211  // Examples:
212  // 1-8\d\d-GOOG-411 will be expanded to strings:
213  // 1-800-GOOG-411, 1-801-GOOG-411, ... 1-899-GOOG-411.
214  //
215  // http://www.\n\*.com will be expanded to strings like:
216  // http://www.a.com http://www.a123.com ... http://www.ABCDefgHIJKLMNop.com
217  //
218  // Note: In choosing which patterns to include please be aware of the fact
219  // providing very generic patterns will make tesseract run slower.
220  // For example \n\* at the beginning of the pattern will make Tesseract
221  // consider all the combinations of proposed character choices for each
222  // of the segmentations, which will be unacceptably slow.
223  // Because of potential problems with speed that could be difficult to
224  // identify, each user pattern has to have at least kSaneNumConcreteChars
225  // concrete characters from the unicharset at the beginning.
226  bool read_pattern_list(const char *filename, const UNICHARSET &unicharset);
227 
228  // Initializes the values of *_pattern_ unichar ids.
229  // This function should be called before calling read_pattern_list().
230  void initialize_patterns(UNICHARSET *unicharset);
231 
232  // Fills in the given unichar id vector with the unichar ids that represent
233  // the patterns of the character classes of the given unichar_id.
234  void unichar_id_to_patterns(UNICHAR_ID unichar_id,
235  const UNICHARSET &unicharset,
236  GenericVector<UNICHAR_ID> *vec) const override;
237 
238  // Returns the given EDGE_REF if the EDGE_RECORD that it points to has
239  // a self loop and the given unichar_id matches the unichar_id stored in the
240  // EDGE_RECORD, returns NO_EDGE otherwise.
242  UNICHAR_ID unichar_id,
243  bool word_end) const override {
244  if (edge_ref == NO_EDGE) return NO_EDGE;
245  EDGE_RECORD *edge_rec = deref_edge_ref(edge_ref);
246  return (marker_flag_from_edge_rec(*edge_rec) &&
247  unichar_id == unichar_id_from_edge_rec(*edge_rec) &&
248  word_end == end_of_word_from_edge_rec(*edge_rec)) ?
249  edge_ref : NO_EDGE;
250  }
251 
252  // Adds a word to the Trie (creates the necessary nodes and edges).
253  //
254  // If repetitions vector is not nullptr, each entry in the vector indicates
255  // whether the unichar id with the corresponding index in the word is allowed
256  // to repeat an unlimited number of times. For each entry that is true, MARKER
257  // flag of the corresponding edge created for this unichar id is set to true).
258  //
259  // Return true if add succeeded, false otherwise (e.g. when a word contained
260  // an invalid unichar id or the trie was getting too large and was cleared).
261  bool add_word_to_dawg(const WERD_CHOICE &word,
262  const GenericVector<bool> *repetitions);
263  bool add_word_to_dawg(const WERD_CHOICE &word) {
264  return add_word_to_dawg(word, nullptr);
265  }
266 
267  protected:
268  // The structure of an EDGE_REF for Trie edges is as follows:
269  // [LETTER_START_BIT, flag_start_bit_):
270  // edge index in *_edges in a TRIE_NODE_RECORD
271  // [flag_start_bit, 30th bit]: node index in nodes (TRIE_NODES vector)
272  //
273  // With this arrangement there are enough bits to represent edge indices
274  // (each node can have at most unicharset_size_ forward edges and
275  // the position of flag_start_bit is set to be log2(unicharset_size_)).
276  // It is also possible to accommodate a maximum number of nodes that is at
277  // least as large as that of the SquishedDawg representation (in SquishedDawg
278  // each EDGE_RECORD has 32-(flag_start_bit+NUM_FLAG_BITS) bits to represent
279  // the next node index).
280  //
281 
282  // Returns the pointer to EDGE_RECORD after decoding the location
283  // of the edge from the information in the given EDGE_REF.
284  // This function assumes that EDGE_REF holds valid node/edge indices.
285  inline EDGE_RECORD *deref_edge_ref(EDGE_REF edge_ref) const {
286  int edge_index = static_cast<int>(
287  (edge_ref & letter_mask_) >> LETTER_START_BIT);
288  int node_index = static_cast<int>(
289  (edge_ref & deref_node_index_mask_) >> flag_start_bit_);
290  TRIE_NODE_RECORD *node_rec = nodes_[node_index];
291  return &(node_rec->forward_edges[edge_index]);
292  }
294  inline EDGE_REF make_edge_ref(NODE_REF node_index,
295  EDGE_INDEX edge_index) const {
296  return ((node_index << flag_start_bit_) |
297  (edge_index << LETTER_START_BIT));
298  }
300  inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats,
301  int direction, bool word_end, UNICHAR_ID unichar_id) {
302  EDGE_RECORD flags = 0;
303  if (repeats) flags |= MARKER_FLAG;
304  if (word_end) flags |= WERD_END_FLAG;
305  if (direction == BACKWARD_EDGE) flags |= DIRECTION_FLAG;
306  *edge = ((nxt << next_node_start_bit_) |
307  (static_cast<EDGE_RECORD>(flags) << flag_start_bit_) |
308  (static_cast<EDGE_RECORD>(unichar_id) << LETTER_START_BIT));
309  }
311  inline void print_edge_rec(const EDGE_RECORD &edge_rec) const {
312  tprintf("|" REFFORMAT "|%s%s%s|%d|", next_node_from_edge_rec(edge_rec),
313  marker_flag_from_edge_rec(edge_rec) ? "R," : "",
314  (direction_from_edge_rec(edge_rec) == FORWARD_EDGE) ? "F" : "B",
315  end_of_word_from_edge_rec(edge_rec) ? ",E" : "",
316  unichar_id_from_edge_rec(edge_rec));
317  }
318  // Returns true if the next node in recorded the given EDGE_RECORD
319  // has exactly one forward edge.
320  inline bool can_be_eliminated(const EDGE_RECORD &edge_rec) {
321  NODE_REF node_ref = next_node_from_edge_rec(edge_rec);
322  return (node_ref != NO_EDGE &&
323  nodes_[static_cast<int>(node_ref)]->forward_edges.size() == 1);
324  }
325 
326  // Prints the contents of the Trie.
327  // At most max_num_edges will be printed for each node.
328  void print_all(const char* msg, int max_num_edges) {
329  tprintf("\n__________________________\n%s\n", msg);
330  for (int i = 0; i < nodes_.size(); ++i) print_node(i, max_num_edges);
331  tprintf("__________________________\n");
332  }
333 
334  // Finds the edge with the given direction, word_end and unichar_id
335  // in the node indicated by node_ref. Fills in the pointer to the
336  // EDGE_RECORD and the index of the edge with the the values
337  // corresponding to the edge found. Returns true if an edge was found.
338  bool edge_char_of(NODE_REF node_ref, NODE_REF next_node,
339  int direction, bool word_end, UNICHAR_ID unichar_id,
340  EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const;
341 
342  // Adds an single edge linkage between node1 and node2 in the direction
343  // indicated by direction argument.
344  bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats,
345  int direction, bool word_end,
346  UNICHAR_ID unichar_id);
347 
348  // Adds forward edge linkage from node1 to node2 and the corresponding
349  // backward edge linkage in the other direction.
350  bool add_new_edge(NODE_REF node1, NODE_REF node2,
351  bool repeats, bool word_end, UNICHAR_ID unichar_id) {
352  return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE,
353  word_end, unichar_id) &&
354  add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE,
355  word_end, unichar_id));
356  }
357 
358  // Sets the word ending flags in an already existing edge pair.
359  // Returns true on success.
360  void add_word_ending(EDGE_RECORD *edge,
361  NODE_REF the_next_node,
362  bool repeats,
363  UNICHAR_ID unichar_id);
364 
365  // Allocates space for a new node in the Trie.
367 
368  // Removes a single edge linkage to between node1 and node2 in the
369  // direction indicated by direction argument.
370  void remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction,
371  bool word_end, UNICHAR_ID unichar_id);
372 
373  // Removes forward edge linkage from node1 to node2 and the corresponding
374  // backward edge linkage in the other direction.
375  void remove_edge(NODE_REF node1, NODE_REF node2,
376  bool word_end, UNICHAR_ID unichar_id) {
377  remove_edge_linkage(node1, node2, FORWARD_EDGE, word_end, unichar_id);
378  remove_edge_linkage(node2, node1, BACKWARD_EDGE, word_end, unichar_id);
379  }
380 
381  // Compares edge1 and edge2 in the given node to see if they point to two
382  // next nodes that could be collapsed. If they do, performs the reduction
383  // and returns true.
384  bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1,
385  const EDGE_RECORD &edge2);
386 
387  // Assuming that edge_index indicates the first edge in a group of edges
388  // in this node with a particular letter value, looks through these edges
389  // to see if any of them can be collapsed. If so does it. Returns to the
390  // caller when all edges with this letter have been reduced.
391  // Returns true if further reduction is possible with this same letter.
392  bool reduce_lettered_edges(EDGE_INDEX edge_index,
393  UNICHAR_ID unichar_id,
394  NODE_REF node,
395  EDGE_VECTOR* backward_edges,
396  NODE_MARKER reduced_nodes);
397 
404  void sort_edges(EDGE_VECTOR *edges);
405 
407  void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes);
408 
409  // Returns the pattern unichar id for the given character class code.
411 
412  // Member variables
413  TRIE_NODES nodes_; // vector of nodes in the Trie
414  // Freelist of edges in the root backwards node that were previously zeroed.
416  uint64_t num_edges_; // sum of all edges (forward and backward)
417  uint64_t deref_direction_mask_; // mask for EDGE_REF to extract direction
418  uint64_t deref_node_index_mask_; // mask for EDGE_REF to extract node index
419  // Variables for translating character class codes denoted in user patterns
420  // file to the unichar ids used to represent them in a Trie.
428 };
429 } // namespace tesseract
430 
431 #endif
int UNICHAR_ID
Definition: unichar.h:34
void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes)
Definition: trie.cpp:660
#define FORWARD_EDGE
Definition: dawg.h:81
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void clear()
Definition: trie.cpp:57
PermuterType
Definition: ratngs.h:232
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override
Definition: trie.h:111
bool * NODE_MARKER
Definition: trie.h:39
TRIE_NODES nodes_
Definition: trie.h:413
EDGE_VECTOR forward_edges
Definition: trie.h:43
NODE_REF new_dawg_node()
Definition: trie.cpp:268
~Trie() override
Definition: trie.h:92
const STRING & lang() const
Definition: dawg.h:125
uint64_t num_edges_
Definition: trie.h:416
int next_node_start_bit_
Definition: dawg.h:310
static const char kDigitPatternUnicode[]
Definition: trie.h:70
void delete_data_pointers()
void KillEdge(EDGE_RECORD *edge_rec) const
Definition: trie.h:148
EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const override
Definition: trie.h:241
static const int kSaneNumConcreteChars
Definition: trie.h:65
void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id)
Definition: trie.h:300
UNICHAR_ID lower_pattern_
Definition: trie.h:425
bool read_word_list(const char *filename, GenericVector< STRING > *words)
Definition: trie.cpp:290
void init(int unicharset_size)
Definition: dawg.cpp:176
bool end_of_word(EDGE_REF edge_ref) const override
Definition: trie.h:136
int64_t EDGE_REF
Definition: dawg.h:51
bool add_word_list(const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy)
Definition: trie.cpp:313
static const char kUpperPatternUnicode[]
Definition: trie.h:74
DawgType type() const
Definition: dawg.h:124
Trie(DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: trie.h:83
bool read_and_add_word_list(const char *filename, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse)
Definition: trie.cpp:281
EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const override
Definition: trie.h:98
int unicharset_size_
Definition: dawg.h:308
NODE_REF next_node(EDGE_REF edge_ref) const override
Definition: trie.h:127
void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const override
Definition: trie.cpp:354
#define DIRECTION_FLAG
Definition: dawg.h:85
int flag_start_bit_
Definition: dawg.h:309
bool add_word_to_dawg(const WERD_CHOICE &word, const GenericVector< bool > *repetitions)
Definition: trie.cpp:169
EDGE_RECORD * deref_edge_ref(EDGE_REF edge_ref) const
Definition: trie.h:285
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:226
static const char kAlphaPatternUnicode[]
Definition: trie.h:69
uint64_t letter_mask_
Definition: dawg.h:307
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override
Definition: trie.h:142
bool initialized_patterns_
Definition: trie.h:427
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:209
#define WERD_END_FLAG
Definition: dawg.h:86
RTLReversePolicy
Definition: trie.h:58
void initialize_patterns(UNICHARSET *unicharset)
Definition: trie.cpp:337
static const char kAlphanumPatternUnicode[]
Definition: trie.h:71
EDGE_VECTOR backward_edges
Definition: trie.h:44
void print_node(NODE_REF node, int max_num_edges) const override
Definition: trie.cpp:697
UNICHAR_ID upper_pattern_
Definition: trie.h:426
int64_t EDGE_INDEX
Definition: trie.h:38
void print_all(const char *msg, int max_num_edges)
Definition: trie.h:328
bool add_new_edge(NODE_REF node1, NODE_REF node2, bool repeats, bool word_end, UNICHAR_ID unichar_id)
Definition: trie.h:350
void add_word_ending(EDGE_RECORD *edge, NODE_REF the_next_node, bool repeats, UNICHAR_ID unichar_id)
Definition: trie.cpp:152
bool read_pattern_list(const char *filename, const UNICHARSET &unicharset)
Definition: trie.cpp:394
void remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id)
Definition: trie.cpp:476
bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id)
Definition: trie.cpp:116
bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2)
Definition: trie.cpp:558
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:213
GenericVector< EDGE_INDEX > root_back_freelist_
Definition: trie.h:415
SquishedDawg * trie_to_dawg()
Definition: trie.cpp:511
int64_t NODE_REF
Definition: dawg.h:52
UNICHAR_ID character_class_to_pattern(char ch)
Definition: trie.cpp:376
UNICHAR_ID digit_pattern_
Definition: trie.h:422
Definition: strngs.h:45
bool reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, EDGE_VECTOR *backward_edges, NODE_MARKER reduced_nodes)
Definition: trie.cpp:605
void sort_edges(EDGE_VECTOR *edges)
Definition: trie.cpp:646
LIST reverse(LIST list)
Definition: oldlist.cpp:244
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:222
void print_edge_rec(const EDGE_RECORD &edge_rec) const
Definition: trie.h:311
void remove_edge(NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id)
Definition: trie.h:375
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:217
static const char kLowerPatternUnicode[]
Definition: trie.h:73
static const char * get_reverse_policy_name(RTLReversePolicy reverse_policy)
Definition: trie.cpp:52
bool DeadEdge(const EDGE_RECORD &edge_rec) const
Definition: trie.h:152
int push_back(T object)
bool can_be_eliminated(const EDGE_RECORD &edge_rec)
Definition: trie.h:320
#define MARKER_FLAG
Definition: dawg.h:84
int size() const
Definition: genericvector.h:72
uint64_t deref_direction_mask_
Definition: trie.h:417
UNICHAR_ID punc_pattern_
Definition: trie.h:424
EDGE_REF make_edge_ref(NODE_REF node_index, EDGE_INDEX edge_index) const
Definition: trie.h:294
#define REFFORMAT
Definition: dawg.h:89
#define LETTER_START_BIT
Definition: dawg.h:87
static const char kPuncPatternUnicode[]
Definition: trie.h:72
uint64_t deref_node_index_mask_
Definition: trie.h:418
DawgType
Definition: dawg.h:68
UNICHAR_ID alphanum_pattern_
Definition: trie.h:423
uint64_t EDGE_RECORD
Definition: dawg.h:49
UNICHAR_ID alpha_pattern_
Definition: trie.h:421
bool add_word_to_dawg(const WERD_CHOICE &word)
Definition: trie.h:263
#define BACKWARD_EDGE
Definition: dawg.h:82