tesseract  4.1.1
blamer.cpp
Go to the documentation of this file.
1 // File: blamer.cpp
3 // Description: Module allowing precise error causes to be allocated.
4 // Author: Rike Antonova
5 // Refactored: Ray Smith
6 // Created: Mon Feb 04 14:37:01 PST 2013
7 //
8 // (C) Copyright 2013, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "blamer.h"
22 #include <cmath> // for abs
23 #include <cstdlib> // for abs
24 #include "blobs.h" // for TPOINT, TWERD, TBLOB
25 #include "errcode.h" // for ASSERT_HOST
26 #if !defined(DISABLED_LEGACY_ENGINE)
27 #include "lm_pain_points.h" // for LMPainPoints
28 #endif
29 #include "matrix.h" // for MATRIX
30 #include "normalis.h" // for DENORM
31 #include "pageres.h" // for WERD_RES
32 #include "tesscallback.h" // for TessResultCallback2
33 #include "unicharset.h" // for UNICHARSET
34 
35 // Names for each value of IncorrectResultReason enum. Keep in sync.
36 const char kBlameCorrect[] = "corr";
37 const char kBlameClassifier[] = "cl";
38 const char kBlameChopper[] = "chop";
39 const char kBlameClassLMTradeoff[] = "cl/LM";
40 const char kBlamePageLayout[] = "pglt";
41 const char kBlameSegsearchHeur[] = "ss_heur";
42 const char kBlameSegsearchPP[] = "ss_pp";
43 const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
44 const char kBlameAdaption[] = "adapt";
45 const char kBlameNoTruthSplit[] = "no_tr_spl";
46 const char kBlameNoTruth[] = "no_tr";
47 const char kBlameUnknown[] = "unkn";
48 
49 const char * const kIncorrectResultReasonNames[] = {
62 };
63 
65  return kIncorrectResultReasonNames[irr];
66 }
67 
68 const char *BlamerBundle::IncorrectReason() const {
69  return kIncorrectResultReasonNames[incorrect_result_reason_];
70 }
71 
72 // Functions to setup the blamer.
73 // Whole word string, whole word bounding box.
74 void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset,
75  const char* truth_str, const TBOX& word_box) {
76  truth_word_.InsertBox(0, word_box);
77  truth_has_char_boxes_ = false;
78  // Encode the string as UNICHAR_IDs.
80  GenericVector<char> lengths;
81  unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
82  int total_length = 0;
83  for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
84  STRING uch(truth_str + total_length);
85  uch.truncate_at(lengths[i] - total_length);
86  UNICHAR_ID id = encoding[i];
87  if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
88  truth_text_.push_back(uch);
89  }
90 }
91 
92 // Single "character" string, "character" bounding box.
93 // May be called multiple times to indicate the characters in a word.
95  const char* char_str, const TBOX& char_box) {
96  STRING symbol_str(char_str);
97  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
98  if (id != INVALID_UNICHAR_ID) {
99  STRING normed_uch(unicharset.get_normed_unichar(id));
100  if (normed_uch.length() > 0) symbol_str = normed_uch;
101  }
102  int length = truth_word_.length();
103  truth_text_.push_back(symbol_str);
104  truth_word_.InsertBox(length, char_box);
105  if (length == 0)
106  truth_has_char_boxes_ = true;
107  else if (truth_word_.BlobBox(length - 1) == char_box)
108  truth_has_char_boxes_ = false;
109 }
110 
111 // Marks that there is something wrong with the truth text, like it contains
112 // reject characters.
114  incorrect_result_reason_ = IRR_NO_TRUTH;
115  truth_has_char_boxes_ = false;
116 }
117 
118 // Returns true if the provided word_choice is correct.
119 bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const {
120  if (word_choice == nullptr) return false;
121  const UNICHARSET* uni_set = word_choice->unicharset();
122  STRING normed_choice_str;
123  for (int i = 0; i < word_choice->length(); ++i) {
124  normed_choice_str +=
125  uni_set->get_normed_unichar(word_choice->unichar_id(i));
126  }
127  STRING truth_str = TruthString();
128  return truth_str == normed_choice_str;
129 }
130 
132  const WERD_CHOICE *choice,
133  STRING *debug) {
134  (*debug) += "Truth ";
135  for (int i = 0; i < this->truth_text_.length(); ++i) {
136  (*debug) += this->truth_text_[i];
137  }
138  if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
139  if (choice != nullptr) {
140  (*debug) += " Choice ";
141  STRING choice_str;
142  choice->string_and_lengths(&choice_str, nullptr);
143  (*debug) += choice_str;
144  }
145  if (msg.length() > 0) {
146  (*debug) += "\n";
147  (*debug) += msg;
148  }
149  (*debug) += "\n";
150 }
151 
152 // Sets up the norm_truth_word from truth_word using the given DENORM.
154  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
155  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
156  TPOINT topleft;
157  TPOINT botright;
158  TPOINT norm_topleft;
159  TPOINT norm_botright;
160  for (int b = 0; b < truth_word_.length(); ++b) {
161  const TBOX &box = truth_word_.BlobBox(b);
162  topleft.x = box.left();
163  topleft.y = box.top();
164  botright.x = box.right();
165  botright.y = box.bottom();
166  denorm.NormTransform(nullptr, topleft, &norm_topleft);
167  denorm.NormTransform(nullptr, botright, &norm_botright);
168  TBOX norm_box(norm_topleft.x, norm_botright.y,
169  norm_botright.x, norm_topleft.y);
170  norm_truth_word_.InsertBox(b, norm_box);
171  }
172 }
173 
174 // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
175 // bundles) where the right edge/ of the left-hand word is word1_right,
176 // and the left edge of the right-hand word is word2_left.
177 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug,
178  BlamerBundle* bundle1,
179  BlamerBundle* bundle2) const {
180  STRING debug_str;
181  // Find truth boxes that correspond to the split in the blobs.
182  int b;
183  int begin2_truth_index = -1;
184  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
185  truth_has_char_boxes_) {
186  debug_str = "Looking for truth split at";
187  debug_str.add_str_int(" end1_x ", word1_right);
188  debug_str.add_str_int(" begin2_x ", word2_left);
189  debug_str += "\nnorm_truth_word boxes:\n";
190  if (norm_truth_word_.length() > 1) {
191  norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
192  for (b = 1; b < norm_truth_word_.length(); ++b) {
193  norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
194  if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
195  norm_box_tolerance_) &&
196  (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
197  norm_box_tolerance_)) {
198  begin2_truth_index = b;
199  debug_str += "Split found";
200  break;
201  }
202  }
203  debug_str += '\n';
204  }
205  }
206  // Populate truth information in word and word2 with the first and second
207  // part of the original truth.
208  if (begin2_truth_index > 0) {
209  bundle1->truth_has_char_boxes_ = true;
210  bundle1->norm_box_tolerance_ = norm_box_tolerance_;
211  bundle2->truth_has_char_boxes_ = true;
212  bundle2->norm_box_tolerance_ = norm_box_tolerance_;
213  BlamerBundle *curr_bb = bundle1;
214  for (b = 0; b < norm_truth_word_.length(); ++b) {
215  if (b == begin2_truth_index) curr_bb = bundle2;
216  curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
217  curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
218  curr_bb->truth_text_.push_back(truth_text_[b]);
219  }
220  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
221  bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
222  bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
223  } else {
224  debug_str += "Truth split not found";
225  debug_str += truth_has_char_boxes_ ?
226  "\n" : " (no truth char boxes)\n";
227  bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
228  bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
229  }
230 }
231 
232 // "Joins" the blames from bundle1 and bundle2 into *this.
234  const BlamerBundle& bundle2, bool debug) {
235  STRING debug_str;
236  IncorrectResultReason irr = incorrect_result_reason_;
237  if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
238  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
239  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
240  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
241  debug_str += "Blame from part 1: ";
242  debug_str += bundle1.debug_;
243  irr = bundle1.incorrect_result_reason_;
244  }
245  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
246  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
247  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
248  debug_str += "Blame from part 2: ";
249  debug_str += bundle2.debug_;
250  if (irr == IRR_CORRECT) {
251  irr = bundle2.incorrect_result_reason_;
252  } else if (irr != bundle2.incorrect_result_reason_) {
253  irr = IRR_UNKNOWN;
254  }
255  }
256  incorrect_result_reason_ = irr;
257  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
258  SetBlame(irr, debug_str, nullptr, debug);
259  }
260 }
261 
262 // If a blob with the same bounding box as one of the truth character
263 // bounding boxes is not classified as the corresponding truth character
264 // blames character classifier for incorrect answer.
266  const TBOX& blob_box,
267  const BLOB_CHOICE_LIST& choices,
268  bool debug) {
269  if (!truth_has_char_boxes_ ||
270  incorrect_result_reason_ != IRR_CORRECT)
271  return; // Nothing to do here.
272 
273  for (int b = 0; b < norm_truth_word_.length(); ++b) {
274  const TBOX &truth_box = norm_truth_word_.BlobBox(b);
275  // Note that we are more strict on the bounding box boundaries here
276  // than in other places (chopper, segmentation search), since we do
277  // not have the ability to check the previous and next bounding box.
278  if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
279  bool found = false;
280  bool incorrect_adapted = false;
281  UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
282  const char *truth_str = truth_text_[b].string();
283  // We promise not to modify the list or its contents, using a
284  // const BLOB_CHOICE* below.
285  BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
286  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
287  choices_it.forward()) {
288  const BLOB_CHOICE* choice = choices_it.data();
289  if (strcmp(truth_str, unicharset.get_normed_unichar(
290  choice->unichar_id())) == 0) {
291  found = true;
292  break;
293  } else if (choice->IsAdapted()) {
294  incorrect_adapted = true;
295  incorrect_adapted_id = choice->unichar_id();
296  }
297  } // end choices_it for loop
298  if (!found) {
299  STRING debug_str = "unichar ";
300  debug_str += truth_str;
301  debug_str += " not found in classification list";
302  SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
303  } else if (incorrect_adapted) {
304  STRING debug_str = "better rating for adapted ";
305  debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
306  debug_str += " than for correct ";
307  debug_str += truth_str;
308  SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
309  }
310  break;
311  }
312  } // end iterating over blamer_bundle->norm_truth_word
313 }
314 
315 // Checks whether chops were made at all the character bounding box
316 // boundaries in word->truth_word. If not - blames the chopper for an
317 // incorrect answer.
318 void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
319  if (NoTruth() || !truth_has_char_boxes_ ||
321  return;
322  }
323  STRING debug_str;
324  bool missing_chop = false;
325  int num_blobs = word->chopped_word->blobs.size();
326  int box_index = 0;
327  int blob_index = 0;
328  int16_t truth_x = -1;
329  while (box_index < truth_word_.length() && blob_index < num_blobs) {
330  truth_x = norm_truth_word_.BlobBox(box_index).right();
331  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
332  if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
333  ++blob_index;
334  continue; // encountered an extra chop, keep looking
335  } else if (curr_blob->bounding_box().right() >
336  truth_x + norm_box_tolerance_) {
337  missing_chop = true;
338  break;
339  } else {
340  ++blob_index;
341  }
342  }
343  if (missing_chop || box_index < norm_truth_word_.length()) {
344  STRING debug_str;
345  if (missing_chop) {
346  debug_str.add_str_int("Detected missing chop (tolerance=",
347  norm_box_tolerance_);
348  debug_str += ") at Bounding Box=";
349  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
350  curr_blob->bounding_box().print_to_str(&debug_str);
351  debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
352  } else {
353  debug_str.add_str_int("Missing chops for last ",
354  norm_truth_word_.length() - box_index);
355  debug_str += " truth box(es)";
356  }
357  debug_str += "\nMaximally chopped word boxes:\n";
358  for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
359  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
360  curr_blob->bounding_box().print_to_str(&debug_str);
361  debug_str += '\n';
362  }
363  debug_str += "Truth bounding boxes:\n";
364  for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
365  norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
366  debug_str += '\n';
367  }
368  SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
369  }
370 }
371 
372 // Blames the classifier or the language model if, after running only the
373 // chopper, best_choice is incorrect and no blame has been yet set.
374 // Blames the classifier if best_choice is classifier's top choice and is a
375 // dictionary word (i.e. language model could not have helped).
376 // Otherwise, blames the language model (formerly permuter word adjustment).
378  const WERD_RES* word,
379  const UNICHARSET& unicharset, bool valid_permuter, bool debug) {
380  if (valid_permuter) {
381  // Find out whether best choice is a top choice.
382  best_choice_is_dict_and_top_choice_ = true;
383  for (int i = 0; i < word->best_choice->length(); ++i) {
384  BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
385  ASSERT_HOST(!blob_choice_it.empty());
386  BLOB_CHOICE *first_choice = nullptr;
387  for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
388  blob_choice_it.forward()) { // find first non-fragment choice
389  if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
390  first_choice = blob_choice_it.data();
391  break;
392  }
393  }
394  ASSERT_HOST(first_choice != nullptr);
395  if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
396  best_choice_is_dict_and_top_choice_ = false;
397  break;
398  }
399  }
400  }
401  STRING debug_str;
402  if (best_choice_is_dict_and_top_choice_) {
403  debug_str = "Best choice is: incorrect, top choice, dictionary word";
404  debug_str += " with permuter ";
405  debug_str += word->best_choice->permuter_name();
406  } else {
407  debug_str = "Classifier/Old LM tradeoff is to blame";
408  }
409  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
411  debug_str, word->best_choice, debug);
412 }
413 
414 // Sets up the correct_segmentation_* to mark the correct bounding boxes.
416 #ifndef DISABLED_LEGACY_ENGINE
417  params_training_bundle_.StartHypothesisList();
418 #endif // ndef DISABLED_LEGACY_ENGINE
419  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
420  return; // Nothing to do here.
421 
422  STRING debug_str;
423  debug_str += "Blamer computing correct_segmentation_cols\n";
424  int curr_box_col = 0;
425  int next_box_col = 0;
426  int num_blobs = word->NumBlobs();
427  if (num_blobs == 0) return; // No blobs to play with.
428  int blob_index = 0;
429  int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
430  for (int truth_idx = 0; blob_index < num_blobs &&
431  truth_idx < norm_truth_word_.length();
432  ++blob_index) {
433  ++next_box_col;
434  int16_t curr_box_x = next_box_x;
435  if (blob_index + 1 < num_blobs)
436  next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
437  int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
438  debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
439  debug_str.add_str_int(" ", truth_x);
440  debug_str += "\n";
441  if (curr_box_x > (truth_x + norm_box_tolerance_)) {
442  break; // failed to find a matching box
443  } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
444  (blob_index + 1 >= num_blobs || // next box can't be included
445  next_box_x > truth_x + norm_box_tolerance_)) {
446  correct_segmentation_cols_.push_back(curr_box_col);
447  correct_segmentation_rows_.push_back(next_box_col-1);
448  ++truth_idx;
449  debug_str.add_str_int("col=", curr_box_col);
450  debug_str.add_str_int(" row=", next_box_col-1);
451  debug_str += "\n";
452  curr_box_col = next_box_col;
453  }
454  }
455  if (blob_index < num_blobs || // trailing blobs
456  correct_segmentation_cols_.length() != norm_truth_word_.length()) {
457  debug_str.add_str_int("Blamer failed to find correct segmentation"
458  " (tolerance=", norm_box_tolerance_);
459  if (blob_index >= num_blobs) debug_str += " blob == nullptr";
460  debug_str += ")\n";
461  debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
462  debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
463  debug_str += "\n";
464  SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
465  correct_segmentation_cols_.clear();
466  correct_segmentation_rows_.clear();
467  }
468 }
469 
470 // Returns true if a guided segmentation search is needed.
471 bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
472  return incorrect_result_reason_ == IRR_CORRECT &&
473  !segsearch_is_looking_for_blame_ &&
474  truth_has_char_boxes_ &&
475  !ChoiceIsCorrect(best_choice);
476 }
477 
478 #if !defined(DISABLED_LEGACY_ENGINE)
479 // Setup ready to guide the segmentation search to the correct segmentation.
480 // The callback pp_cb is used to avoid a cyclic dependency.
481 // It calls into LMPainPoints::GenerateForBlamer by pre-binding the
482 // WERD_RES, and the LMPainPoints itself.
483 // pp_cb must be a permanent callback, and should be deleted by the caller.
485  MATRIX* ratings, UNICHAR_ID wildcard_id,
486  bool debug, STRING *debug_str,
488  segsearch_is_looking_for_blame_ = true;
489  if (debug) {
490  tprintf("segsearch starting to look for blame\n");
491  }
492  // Fill pain points for any unclassifed blob corresponding to the
493  // correct segmentation state.
494  *debug_str += "Correct segmentation:\n";
495  for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
496  debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
497  debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
498  *debug_str += "\n";
499  if (!ratings->Classified(correct_segmentation_cols_[idx],
500  correct_segmentation_rows_[idx],
501  wildcard_id) &&
502  !cb->Run(correct_segmentation_cols_[idx],
503  correct_segmentation_rows_[idx])) {
504  segsearch_is_looking_for_blame_ = false;
505  *debug_str += "\nFailed to insert pain point\n";
506  SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
507  break;
508  }
509  } // end for blamer_bundle->correct_segmentation_cols/rows
510 }
511 #endif // !defined(DISABLED_LEGACY_ENGINE)
512 
513 // Returns true if the guided segsearch is in progress.
515  return segsearch_is_looking_for_blame_;
516 }
517 
518 // The segmentation search has ended. Sets the blame appropriately.
520  bool debug, STRING *debug_str) {
521  // If we are still looking for blame (i.e. best_choice is incorrect, but a
522  // path representing the correct segmentation could be constructed), we can
523  // blame segmentation search pain point prioritization if the rating of the
524  // path corresponding to the correct segmentation is better than that of
525  // best_choice (i.e. language model would have done the correct thing, but
526  // because of poor pain point prioritization the correct segmentation was
527  // never explored). Otherwise we blame the tradeoff between the language model
528  // and the classifier, since even after exploring the path corresponding to
529  // the correct segmentation incorrect best_choice would have been chosen.
530  // One special case when we blame the classifier instead is when best choice
531  // is incorrect, but it is a dictionary word and it classifier's top choice.
532  if (segsearch_is_looking_for_blame_) {
533  segsearch_is_looking_for_blame_ = false;
534  if (best_choice_is_dict_and_top_choice_) {
535  *debug_str = "Best choice is: incorrect, top choice, dictionary word";
536  *debug_str += " with permuter ";
537  *debug_str += best_choice->permuter_name();
538  SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
539  } else if (best_correctly_segmented_rating_ <
540  best_choice->rating()) {
541  *debug_str += "Correct segmentation state was not explored";
542  SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
543  } else {
544  if (best_correctly_segmented_rating_ >=
546  *debug_str += "Correct segmentation paths were pruned by LM\n";
547  } else {
548  debug_str->add_str_double("Best correct segmentation rating ",
549  best_correctly_segmented_rating_);
550  debug_str->add_str_double(" vs. best choice rating ",
551  best_choice->rating());
552  }
553  SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
554  }
555  }
556 }
557 
558 // If the bundle is null or still does not indicate the correct result,
559 // fix it and use some backup reason for the blame.
561  if (word->blamer_bundle == nullptr) {
563  word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
565  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
566  word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
568  } else {
570  IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
571  if (irr == IRR_CORRECT && !correct) {
572  STRING debug_str = "Choice is incorrect after recognition";
573  word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
574  debug);
575  } else if (irr != IRR_CORRECT && correct) {
576  if (debug) {
577  tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
578  }
579  word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
580  word->blamer_bundle->debug_ = "";
581  }
582  }
583 }
584 
585 // Sets the misadaption debug if this word is incorrect, as this word is
586 // being adapted to.
588  bool debug) {
589  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
590  !ChoiceIsCorrect(best_choice)) {
591  misadaption_debug_ ="misadapt to word (";
592  misadaption_debug_ += best_choice->permuter_name();
593  misadaption_debug_ += "): ";
594  FillDebugString("", best_choice, &misadaption_debug_);
595  if (debug) {
596  tprintf("%s\n", misadaption_debug_.string());
597  }
598  }
599 }
int UNICHAR_ID
Definition: unichar.h:34
bool empty() const
Definition: genericvector.h:91
const char kBlameSegsearchHeur[]
Definition: blamer.cpp:41
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:198
Definition: blobs.h:418
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:49
static const char * IncorrectReasonName(IncorrectResultReason irr)
Definition: blamer.cpp:64
int length() const
Definition: ratngs.h:293
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
bool IsAdapted() const
Definition: ratngs.h:132
float rating() const
Definition: ratngs.h:317
const char kBlameClassifier[]
Definition: blamer.cpp:37
bool NoTruth() const
Definition: blamer.h:123
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:253
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
Definition: matrix.h:578
Definition: blobs.h:284
bool GuidedSegsearchStillGoing() const
Definition: blamer.cpp:514
BlamerBundle * blamer_bundle
Definition: pageres.h:252
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:759
static void LastChanceBlame(bool debug, WERD_RES *word)
Definition: blamer.cpp:560
const char * IncorrectReason() const
Definition: blamer.cpp:68
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
Definition: blamer.cpp:519
virtual R Run(A1, A2)=0
const char * string() const
Definition: strngs.cpp:194
const UNICHARSET * unicharset() const
Definition: ratngs.h:290
int length() const
Definition: genericvector.h:86
void SetRejectedTruth()
Definition: blamer.cpp:113
int16_t left() const
Definition: rect.h:72
const char kBlameChopper[]
Definition: blamer.cpp:38
BlamerBundle()
Definition: blamer.h:104
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
int32_t length() const
Definition: strngs.cpp:189
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
Definition: blamer.cpp:471
TWERD * chopped_word
Definition: pageres.h:212
float x_scale() const
Definition: normalis.h:267
IncorrectResultReason
Definition: blamer.h:51
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb)
Definition: blamer.cpp:484
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:335
void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
Definition: blamer.cpp:74
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:453
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
int16_t bottom() const
Definition: rect.h:65
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:119
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:734
void print_to_str(STRING *str) const
Definition: rect.cpp:175
void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
Definition: blamer.cpp:233
void truncate_at(int32_t index)
Definition: strngs.cpp:265
const char kBlameUnknown[]
Definition: blamer.cpp:47
const char kBlameNoTruth[]
Definition: blamer.cpp:46
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
const char kBlameAdaption[]
Definition: blamer.cpp:44
void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
Definition: blamer.cpp:265
const char kBlameNoTruthSplit[]
Definition: blamer.cpp:45
Definition: blobs.h:51
const char kBlameClassOldLMTradeoff[]
Definition: blamer.cpp:43
Definition: strngs.h:45
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
Definition: blamer.cpp:131
void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
Definition: blamer.cpp:177
static const float kBadRating
Definition: ratngs.h:265
Definition: rect.h:34
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
const char kBlameCorrect[]
Definition: blamer.cpp:36
const char kBlamePageLayout[]
Definition: blamer.cpp:40
void SetupCorrectSegmentation(const TWERD *word, bool debug)
Definition: blamer.cpp:415
int length() const
Definition: boxword.h:83
int push_back(T object)
void SetChopperBlame(const WERD_RES *word, bool debug)
Definition: blamer.cpp:318
void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug)
Definition: blamer.cpp:587
int size() const
Definition: genericvector.h:72
TBOX bounding_box() const
Definition: blobs.cpp:468
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:259
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:153
const char kBlameSegsearchPP[]
Definition: blamer.cpp:42
WERD_RES * word() const
Definition: pageres.h:754
STRING TruthString() const
Definition: blamer.h:114
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:377
void add_str_double(const char *str, double number)
Definition: strngs.cpp:387
int16_t right() const
Definition: rect.h:79
#define ASSERT_HOST(x)
Definition: errcode.h:88
int16_t top() const
Definition: rect.h:58
void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
Definition: blamer.cpp:94
const char kBlameClassLMTradeoff[]
Definition: blamer.cpp:39
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:828
const STRING & debug() const
Definition: blamer.h:130
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:36