36 5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
39 static const char* kNodeContNames[] = {
"Anything",
"OnlyDup",
"NoDup"};
44 if (
code == null_char) {
53 if (depth > 0 &&
prev !=
nullptr) {
55 prev->
Print(null_char, unicharset, depth - 1);
63 int null_char,
bool simple_text,
Dict* dict)
69 space_delimited_(true),
70 is_simple_text_(simple_text),
71 null_char_(null_char) {
77 double cert_offset,
double worst_dict_cert,
78 const UNICHARSET* charset,
int lstm_choice_mode) {
80 int width = output.
Width();
83 for (
int t = 0; t < width; ++t) {
84 ComputeTopN(output.
f(t), output.
NumFeatures(), kBeamWidths[0]);
85 DecodeStep(output.
f(t), t, dict_ratio, cert_offset, worst_dict_cert,
87 if (lstm_choice_mode) {
88 SaveMostCertainChoices(output.
f(t), output.
NumFeatures(), charset, t);
93 double dict_ratio,
double cert_offset,
94 double worst_dict_cert,
97 int width = output.
dim1();
98 for (
int t = 0; t < width; ++t) {
99 ComputeTopN(output[t], output.
dim2(), kBeamWidths[0]);
100 DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
104 void RecodeBeamSearch::SaveMostCertainChoices(
const float* outputs,
108 std::vector<std::pair<const char*, float>> choices;
109 for (
int i = 0; i < num_outputs; ++i) {
110 if (outputs[i] >= 0.01f) {
112 if (i + 2 >= num_outputs) {
122 while (choices.size() > pos && choices[pos].second > outputs[i]) {
125 choices.insert(choices.begin() + pos,
126 std::pair<const char*, float>(
character, outputs[i]));
138 ExtractBestPaths(&best_nodes,
nullptr);
141 int width = best_nodes.
size();
143 int label = best_nodes[t]->code;
144 if (label != null_char_) {
148 while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
161 ExtractBestPaths(&best_nodes,
nullptr);
162 ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
164 DebugPath(unicharset, best_nodes);
165 DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
172 float scale_factor,
bool debug,
175 int lstm_choice_mode) {
183 std::deque<std::tuple<int, int>> best_choices;
184 ExtractBestPaths(&best_nodes, &second_nodes);
186 DebugPath(unicharset, best_nodes);
187 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
189 tprintf(
"\nSecond choice path:\n");
190 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
196 if (lstm_choice_mode == 2) {
197 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198 &xcoords, &best_choices);
199 if (best_choices.size() > 0) {
200 timestepEnd = std::get<1>(best_choices.front());
201 best_choices.pop_front();
204 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
207 int num_ids = unichar_ids.
size();
209 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
214 float prev_space_cert = 0.0f;
215 for (
int word_start = 0; word_start < num_ids; word_start = word_end) {
216 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
221 int index = xcoords[word_end];
222 if (best_nodes[index]->start_of_word)
break;
228 float space_cert = 0.0f;
229 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
230 space_cert = certs[word_end];
232 word_start > 0 && unichar_ids[word_start - 1] ==
UNICHAR_SPACE;
234 WERD_RES* word_res = InitializeWord(
235 leading_space, line_box, word_start, word_end,
236 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237 if (lstm_choice_mode == 1) {
238 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
241 timestepEnd = xcoords[word_end];
242 }
else if (lstm_choice_mode == 2){
245 std::vector<std::pair<const char*, float>> choice_pairs;
246 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247 for (std::pair<const char*, float> choice :
timesteps[i]) {
248 if (std::strcmp(choice.first,
"")) {
249 sum += choice.second;
250 choice_pairs.push_back(choice);
253 if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254 || i == xcoords[word_end]-1) {
255 std::map<const char*, float> summed_propabilities;
256 for (
auto & choice_pair : choice_pairs) {
257 summed_propabilities[choice_pair.first] += choice_pair.second;
259 std::vector<std::pair<const char*, float>> accumulated_timestep;
260 for (
auto& summed_propability : summed_propabilities) {
262 summed_propability.second/=sum;
264 while (accumulated_timestep.size() > pos
265 && accumulated_timestep[pos].second > summed_propability.second) {
268 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269 std::pair<const char*,float>(summed_propability.first,
270 summed_propability.second));
272 if (best_choices.size() > 0) {
273 best_choices.pop_front();
275 choice_pairs.clear();
276 word_res->
timesteps.push_back(accumulated_timestep);
280 timestepEnd = xcoords[word_end];
282 for (
int i = word_start; i < word_end; ++i) {
283 auto* choices =
new BLOB_CHOICE_LIST;
284 BLOB_CHOICE_IT bc_it(choices);
286 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
288 int col = i - word_start;
289 choice->set_matrix_cell(col, col);
290 bc_it.add_after_then_move(choice);
293 int index = xcoords[word_end - 1];
296 prev_space_cert = space_cert;
297 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
304 for (
int p = 0; p < beam_size_; ++p) {
305 for (
int d = 0; d < 2; ++d) {
306 for (
int c = 0; c <
NC_COUNT; ++c) {
309 if (beam_[p]->beams_[index].empty())
continue;
311 tprintf(
"Position %d: %s+%s beam\n", p, d ?
"Dict" :
"Non-Dict",
313 DebugBeamPos(unicharset, beam_[p]->beams_[index]);
320 void RecodeBeamSearch::DebugBeamPos(
const UNICHARSET& unicharset,
325 int heap_size = heap.
size();
326 for (
int i = 0; i < heap_size; ++i) {
329 if (null_best ==
nullptr || null_best->
score < node->
score) null_best = node;
331 if (unichar_bests[node->
unichar_id] ==
nullptr ||
337 for (
int u = 0; u < unichar_bests.
size(); ++u) {
338 if (unichar_bests[u] !=
nullptr) {
339 const RecodeNode& node = *unichar_bests[u];
340 node.Print(null_char_, unicharset, 1);
343 if (null_best !=
nullptr) {
344 null_best->
Print(null_char_, unicharset, 1);
351 void RecodeBeamSearch::ExtractPathAsUnicharIds(
355 std::deque<std::tuple<int, int>>* best_choices) {
362 int width = best_nodes.
size();
366 double certainty = 0.0;
368 while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
369 double cert = best_nodes[t++]->certainty;
370 if (cert < certainty) certainty = cert;
374 int unichar_id = best_nodes[t]->unichar_id;
376 best_nodes[t]->permuter !=
NO_PERM) {
379 if (certainty < certs->back()) certs->
back() = certainty;
380 ratings->
back() += rating;
386 if (best_choices !=
nullptr) {
391 double cert = best_nodes[t++]->certainty;
395 best_nodes[t - 1]->permuter ==
NO_PERM)) {
399 }
while (t < width && best_nodes[t]->duplicate);
402 }
else if (!certs->
empty()) {
403 if (certainty < certs->back()) certs->
back() = certainty;
404 ratings->
back() += rating;
406 if (best_choices !=
nullptr) {
407 best_choices->push_back(
408 std::tuple<int, int>(
id, tposition));
416 WERD_RES* RecodeBeamSearch::InitializeWord(
bool leading_space,
417 const TBOX& line_box,
int word_start,
418 int word_end,
float space_certainty,
421 float scale_factor) {
424 C_BLOB_IT b_it(&blobs);
425 for (
int i = word_start; i < word_end; ++i) {
426 int min_half_width = xcoords[i + 1] - xcoords[i];
427 if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
428 min_half_width = xcoords[i] - xcoords[i - 1];
429 if (min_half_width < 1) min_half_width = 1;
431 TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
433 box.
scale(scale_factor);
435 box.set_top(line_box.
top());
439 WERD* word =
new WERD(&blobs, leading_space,
nullptr);
441 auto* word_res =
new WERD_RES(word);
442 word_res->uch_set = unicharset;
443 word_res->combination =
true;
444 word_res->space_certainty = space_certainty;
445 word_res->ratings =
new MATRIX(word_end - word_start, 1);
451 void RecodeBeamSearch::ComputeTopN(
const float* outputs,
int num_outputs,
457 for (
int i = 0; i < num_outputs; ++i) {
458 if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
459 TopPair entry(outputs[i], i);
460 top_heap_.Push(&entry);
461 if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
464 while (!top_heap_.empty()) {
466 top_heap_.Pop(&entry);
467 if (top_heap_.size() > 1) {
468 top_n_flags_[entry.data] =
TN_TOPN;
470 top_n_flags_[entry.data] =
TN_TOP2;
471 if (top_heap_.empty())
472 top_code_ = entry.data;
474 second_code_ = entry.data;
477 top_n_flags_[null_char_] =
TN_TOP2;
483 void RecodeBeamSearch::DecodeStep(
const float* outputs,
int t,
484 double dict_ratio,
double cert_offset,
485 double worst_dict_cert,
488 RecodeBeam* step = beam_[t];
494 charset, dict_ratio, cert_offset, worst_dict_cert, step);
495 if (dict_ !=
nullptr) {
497 charset, dict_ratio, cert_offset, worst_dict_cert, step);
500 RecodeBeam* prev = beam_[t - 1];
503 for (
int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
505 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
506 tprintf(
"Step %d: Dawg beam %d:\n", t, i);
507 DebugPath(charset, path);
510 for (
int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
512 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
513 tprintf(
"Step %d: Non-Dawg beam %d:\n", t, i);
514 DebugPath(charset, path);
522 for (
int tn = 0; tn <
TN_COUNT && total_beam == 0; ++tn) {
524 for (
int index = 0; index <
kNumBeams; ++index) {
528 for (
int i = prev->beams_[index].size() - 1; i >= 0; --i) {
529 ContinueContext(&prev->beams_[index].get(i).data, index, outputs, top_n,
530 charset, dict_ratio, cert_offset, worst_dict_cert, step);
533 for (
int index = 0; index <
kNumBeams; ++index) {
535 total_beam += step->beams_[index].size();
540 for (
int c = 0; c <
NC_COUNT; ++c) {
541 if (step->best_initial_dawgs_[c].code >= 0) {
542 int index =
BeamIndex(
true, static_cast<NodeContinuation>(c), 0);
544 PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
555 void RecodeBeamSearch::ContinueContext(
const RecodeNode* prev,
int index,
556 const float* outputs,
561 double worst_dict_cert,
563 RecodedCharID prefix;
564 RecodedCharID full_code;
565 const RecodeNode* previous = prev;
569 for (
int p = length - 1; p >= 0; --p, previous = previous->prev) {
570 while (previous !=
nullptr &&
571 (previous->duplicate || previous->code == null_char_)) {
572 previous = previous->prev;
574 if (previous !=
nullptr) {
575 prefix.Set(p, previous->code);
576 full_code.Set(p, previous->code);
579 if (prev !=
nullptr && !is_simple_text_) {
580 if (top_n_flags_[prev->code] == top_n_flag) {
584 PushDupOrNoDawgIfBetter(length,
true, prev->code, prev->unichar_id,
585 cert, worst_dict_cert, dict_ratio, use_dawgs,
589 prev->code != null_char_) {
591 outputs[null_char_]) +
593 PushDupOrNoDawgIfBetter(length,
true, prev->code, prev->unichar_id,
594 cert, worst_dict_cert, dict_ratio, use_dawgs,
599 if (prev->code != null_char_ && length > 0 &&
600 top_n_flags_[null_char_] == top_n_flag) {
605 PushDupOrNoDawgIfBetter(length,
false, null_char_, INVALID_UNICHAR_ID,
606 cert, worst_dict_cert, dict_ratio, use_dawgs,
611 if (final_codes !=
nullptr) {
612 for (
int i = 0; i < final_codes->
size(); ++i) {
613 int code = (*final_codes)[i];
614 if (top_n_flags_[code] != top_n_flag)
continue;
615 if (prev !=
nullptr && prev->code == code && !is_simple_text_)
continue;
618 full_code.Set(length, code);
621 if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
622 if (unichar_id != INVALID_UNICHAR_ID &&
623 charset !=
nullptr &&
626 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
628 if (top_n_flag ==
TN_TOP2 && code != null_char_) {
629 float prob = outputs[code] + outputs[null_char_];
631 prev->code != null_char_ &&
632 ((prev->code == top_code_ && code == second_code_) ||
633 (code == top_code_ && prev->code == second_code_))) {
634 prob += outputs[prev->code];
637 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
643 if (next_codes !=
nullptr) {
644 for (
int i = 0; i < next_codes->
size(); ++i) {
645 int code = (*next_codes)[i];
646 if (top_n_flags_[code] != top_n_flag)
continue;
647 if (prev !=
nullptr && prev->code == code && !is_simple_text_)
continue;
649 PushDupOrNoDawgIfBetter(length + 1,
false, code, INVALID_UNICHAR_ID, cert,
650 worst_dict_cert, dict_ratio, use_dawgs,
652 if (top_n_flag ==
TN_TOP2 && code != null_char_) {
653 float prob = outputs[code] + outputs[null_char_];
655 prev->code != null_char_ &&
656 ((prev->code == top_code_ && code == second_code_) ||
657 (code == top_code_ && prev->code == second_code_))) {
658 prob += outputs[prev->code];
661 PushDupOrNoDawgIfBetter(length + 1,
false, code, INVALID_UNICHAR_ID,
662 cert, worst_dict_cert, dict_ratio, use_dawgs,
670 void RecodeBeamSearch::ContinueUnichar(
int code,
int unichar_id,
float cert,
671 float worst_dict_cert,
float dict_ratio,
673 const RecodeNode* prev,
676 if (cert > worst_dict_cert) {
677 ContinueDawg(code, unichar_id, cert, cont, prev, step);
681 PushHeapIfBetter(kBeamWidths[0], code, unichar_id,
TOP_CHOICE_PERM,
false,
682 false,
false,
false, cert * dict_ratio, prev,
nullptr,
684 if (dict_ !=
nullptr &&
690 float dawg_cert = cert;
704 dawg_cert *= dict_ratio;
705 PushInitialDawgIfBetter(code, unichar_id, permuter,
false,
false,
706 dawg_cert, cont, prev, step);
714 void RecodeBeamSearch::ContinueDawg(
int code,
int unichar_id,
float cert,
716 const RecodeNode* prev, RecodeBeam* step) {
719 if (unichar_id == INVALID_UNICHAR_ID) {
720 PushHeapIfBetter(kBeamWidths[0], code, unichar_id,
NO_PERM,
false,
false,
721 false,
false, cert, prev,
nullptr, dawg_heap);
726 if (prev !=
nullptr) score += prev->score;
727 if (dawg_heap->size() >= kBeamWidths[0] &&
728 score <= dawg_heap->PeekTop().data.score &&
729 nodawg_heap->size() >= kBeamWidths[0] &&
730 score <= nodawg_heap->PeekTop().data.score) {
733 const RecodeNode* uni_prev = prev;
736 while (uni_prev !=
nullptr &&
737 (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
738 uni_prev = uni_prev->prev;
740 if (uni_prev !=
nullptr && uni_prev->end_of_word) {
743 PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter,
false,
744 false, cert, cont, prev, step);
745 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
746 false,
false,
false,
false, cert, prev,
nullptr,
750 }
else if (uni_prev !=
nullptr && uni_prev->start_of_dawg &&
756 DawgPositionVector initial_dawgs;
757 auto* updated_dawgs =
new DawgPositionVector;
758 DawgArgs dawg_args(&initial_dawgs, updated_dawgs,
NO_PERM);
759 bool word_start =
false;
760 if (uni_prev ==
nullptr) {
764 }
else if (uni_prev->dawgs !=
nullptr) {
766 dawg_args.active_dawgs = uni_prev->dawgs;
767 word_start = uni_prev->start_of_dawg;
775 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter,
false,
776 word_start, dawg_args.valid_end,
false, cert, prev,
777 dawg_args.updated_dawgs, dawg_heap);
778 if (dawg_args.valid_end && !space_delimited_) {
782 PushInitialDawgIfBetter(code, unichar_id, permuter, word_start,
true,
783 cert, cont, prev, step);
784 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter,
false,
785 word_start,
true,
false, cert, prev,
nullptr, nodawg_heap);
788 delete updated_dawgs;
795 void RecodeBeamSearch::PushInitialDawgIfBetter(
int code,
int unichar_id,
797 bool start,
bool end,
float cert,
799 const RecodeNode* prev,
801 RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
803 if (prev !=
nullptr) score += prev->score;
804 if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
805 auto* initial_dawgs =
new DawgPositionVector;
807 RecodeNode node(code, unichar_id, permuter,
true, start, end,
false, cert,
808 score, prev, initial_dawgs,
809 ComputeCodeHash(code,
false, prev));
810 *best_initial_dawg = node;
818 void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
819 int length,
bool dup,
int code,
int unichar_id,
float cert,
820 float worst_dict_cert,
float dict_ratio,
bool use_dawgs,
822 int index =
BeamIndex(use_dawgs, cont, length);
824 if (cert > worst_dict_cert) {
825 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
826 prev ? prev->permuter :
NO_PERM,
false,
false,
false,
827 dup, cert, prev,
nullptr, &step->beams_[index]);
832 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
834 false, dup, cert, prev,
nullptr, &step->beams_[index]);
842 void RecodeBeamSearch::PushHeapIfBetter(
int max_size,
int code,
int unichar_id,
844 bool word_start,
bool end,
bool dup,
845 float cert,
const RecodeNode* prev,
846 DawgPositionVector* d,
849 if (prev !=
nullptr) score += prev->score;
850 if (heap->size() < max_size || score > heap->PeekTop().data.score) {
851 uint64_t hash = ComputeCodeHash(code, dup, prev);
852 RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
853 dup, cert, score, prev, d, hash);
854 if (UpdateHeapIfMatched(&node, heap))
return;
858 if (heap->size() > max_size) heap->Pop(&entry);
866 void RecodeBeamSearch::PushHeapIfBetter(
int max_size, RecodeNode* node,
868 if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
869 if (UpdateHeapIfMatched(node, heap)) {
875 if (heap->size() > max_size) heap->Pop(&entry);
881 bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
887 for (
int i = 0; i < nodes->
size(); ++i) {
888 RecodeNode& node = (*nodes)[i].data;
889 if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
890 node.permuter == new_node->permuter &&
891 node.start_of_dawg == new_node->start_of_dawg) {
892 if (new_node->score > node.score) {
896 (*nodes)[i].key = node.score;
897 heap->Reshuffle(&(*nodes)[i]);
906 uint64_t RecodeBeamSearch::ComputeCodeHash(
int code,
bool dup,
907 const RecodeNode* prev)
const {
908 uint64_t hash = prev ==
nullptr ? 0 : prev->code_hash;
909 if (!dup && code != null_char_) {
911 uint64_t carry = (((hash >> 32) * num_classes) >> 32);
923 void RecodeBeamSearch::ExtractBestPaths(
927 const RecodeNode* best_node =
nullptr;
928 const RecodeNode* second_best_node =
nullptr;
929 const RecodeBeam* last_beam = beam_[beam_size_ - 1];
930 for (
int c = 0; c <
NC_COUNT; ++c) {
933 for (
int is_dawg = 0; is_dawg < 2; ++is_dawg) {
934 int beam_index =
BeamIndex(is_dawg, cont, 0);
935 int heap_size = last_beam->beams_[beam_index].size();
936 for (
int h = 0; h < heap_size; ++h) {
937 const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
941 const RecodeNode* dawg_node = node;
942 while (dawg_node !=
nullptr &&
943 (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
944 dawg_node->duplicate))
945 dawg_node = dawg_node->prev;
946 if (dawg_node ==
nullptr || (!dawg_node->end_of_word &&
952 if (best_node ==
nullptr || node->score > best_node->score) {
953 second_best_node = best_node;
955 }
else if (second_best_node ==
nullptr ||
956 node->score > second_best_node->score) {
957 second_best_node = node;
962 if (second_nodes !=
nullptr) ExtractPath(second_best_node, second_nodes);
963 ExtractPath(best_node, best_nodes);
968 void RecodeBeamSearch::ExtractPath(
971 while (node !=
nullptr) {
979 void RecodeBeamSearch::DebugPath(
982 for (
int c = 0; c < path.
size(); ++c) {
983 const RecodeNode& node = *path[c];
985 node.Print(null_char_, *unicharset, 1);
990 void RecodeBeamSearch::DebugUnicharPath(
995 int num_ids = unichar_ids.
size();
996 double total_rating = 0.0;
997 for (
int c = 0; c < num_ids; ++c) {
998 int coord = xcoords[c];
999 tprintf(
"%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
1001 certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
1002 path[coord]->permuter);
1003 total_rating += ratings[c];
1005 tprintf(
"Path total rating = %g\n", total_rating);
DLLSYM void tprintf(const char *format,...)
void scale(const float f)
void init_to_size(int size, const T &t)
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
std::vector< std::vector< std::pair< const char *, float > > > timesteps
static constexpr float kMinCertainty
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
int DecodeUnichar(const RecodedCharID &code) const
static const int kMaxCodeLen
const Pair & get(int index) const
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
const char * string() const
static int LengthFromBeamsIndex(int index)
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
static float ProbToCertainty(float prob)
GenericHeap< RecodePair > RecodeHeap
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
KDPairInc< double, RecodeNode > RecodePair
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
static const int kNumBeams
STRING debug_str(UNICHAR_ID id) const
const char * id_to_unichar_ext(UNICHAR_ID id) const
int def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
const UNICHARSET & getUnicharset() const
static C_BLOB * FakeBlob(const TBOX &box)
std::vector< std::vector< std::pair< const char *, float > > > timesteps
static NodeContinuation ContinuationFromBeamsIndex(int index)
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
void DebugBeams(const UNICHARSET &unicharset) const
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
bool get_enabled(UNICHAR_ID unichar_id) const
void put(ICOORD pos, const T &thing)
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
void FakeWordFromRatings(PermuterType permuter)
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
static bool IsDawgFromBeamsIndex(int index)