tesseract  4.1.1
fixspace.cpp File Reference
#include "fixspace.h"
#include <cstdint>
#include "blobs.h"
#include "boxword.h"
#include "errcode.h"
#include "normalis.h"
#include "ocrclass.h"
#include "pageres.h"
#include "params.h"
#include "ratngs.h"
#include "rect.h"
#include "stepblob.h"
#include "strngs.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "tprintf.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define PERFECT_WERDS   999
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

◆ PERFECT_WERDS

#define PERFECT_WERDS   999

Definition at line 44 of file fixspace.cpp.

Function Documentation

◆ fixspace_dbg()

void fixspace_dbg ( WERD_RES word)

Definition at line 822 of file fixspace.cpp.

822  {
823  TBOX box = word->word->bounding_box();
824  const bool show_map_detail = false;
825  int16_t i;
826 
827  box.print();
828  tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
829  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
830  word->word->cblob_list()->length(),
831  word->rebuild_word->NumBlobs(),
832  word->box_word->length());
833  word->reject_map.print(debug_fp);
834  tprintf("\n");
835  if (show_map_detail) {
836  tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
837  for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
838  tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
839  word->reject_map[i].full_print(debug_fp);
840  }
841  }
842 
843  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
844  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
845 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int NumBlobs() const
Definition: blobs.h:448
REJMAP reject_map
Definition: pageres.h:294
void print(FILE *fp)
Definition: rejctmap.cpp:321
TWERD * rebuild_word
Definition: pageres.h:266
const char * string() const
Definition: strngs.cpp:194
bool tess_accepted
Definition: pageres.h:303
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
TBOX bounding_box() const
Definition: werd.cpp:148
const STRING & unichar_string() const
Definition: ratngs.h:531
void print() const
Definition: rect.h:278
Definition: rect.h:34
WERD * word
Definition: pageres.h:186
void full_print(FILE *fp)
Definition: rejctmap.cpp:333
int length() const
Definition: boxword.h:83
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool done
Definition: pageres.h:305
tesseract::BoxWord * box_word
Definition: pageres.h:272
FILE * debug_fp
Definition: tessvars.cpp:24

◆ initialise_search()

void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 204 of file fixspace.cpp.

204  {
205  WERD_RES_IT src_it(&src_list);
206  WERD_RES_IT new_it(&new_list);
207  WERD_RES *src_wd;
208  WERD_RES *new_wd;
209 
210  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
211  src_wd = src_it.data();
212  if (!src_wd->combination) {
213  new_wd = WERD_RES::deep_copy(src_wd);
214  new_wd->combination = false;
215  new_wd->part_of_combo = false;
216  new_it.add_after_then_move(new_wd);
217  }
218  }
219 }
bool combination
Definition: pageres.h:339
bool part_of_combo
Definition: pageres.h:340
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:649

◆ transform_to_next_perm()

void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 399 of file fixspace.cpp.

399  {
400  WERD_RES_IT word_it(&words);
401  WERD_RES_IT prev_word_it(&words);
402  WERD_RES *word;
403  WERD_RES *prev_word;
404  WERD_RES *combo;
405  WERD *copy_word;
406  int16_t prev_right = -INT16_MAX;
407  TBOX box;
408  int16_t gap;
409  int16_t min_gap = INT16_MAX;
410 
411  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
412  word = word_it.data();
413  if (!word->part_of_combo) {
414  box = word->word->bounding_box();
415  if (prev_right > -INT16_MAX) {
416  gap = box.left() - prev_right;
417  if (gap < min_gap)
418  min_gap = gap;
419  }
420  prev_right = box.right();
421  }
422  }
423  if (min_gap < INT16_MAX) {
424  prev_right = -INT16_MAX; // back to start
425  word_it.set_to_list(&words);
426  // Note: we can't use cycle_pt due to inserted combos at start of list.
427  for (; (prev_right == -INT16_MAX) || !word_it.at_first();
428  word_it.forward()) {
429  word = word_it.data();
430  if (!word->part_of_combo) {
431  box = word->word->bounding_box();
432  if (prev_right > -INT16_MAX) {
433  gap = box.left() - prev_right;
434  if (gap <= min_gap) {
435  prev_word = prev_word_it.data();
436  if (prev_word->combination) {
437  combo = prev_word;
438  } else {
439  /* Make a new combination and insert before
440  * the first word being joined. */
441  copy_word = new WERD;
442  *copy_word = *(prev_word->word);
443  // deep copy
444  combo = new WERD_RES(copy_word);
445  combo->combination = true;
446  combo->x_height = prev_word->x_height;
447  prev_word->part_of_combo = true;
448  prev_word_it.add_before_then_move(combo);
449  }
450  combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
451  if (word->combination) {
452  combo->word->join_on(word->word);
453  // Move blobs to combo
454  // old combo no longer needed
455  delete word_it.extract();
456  } else {
457  // Copy current wd to combo
458  combo->copy_on(word);
459  word->part_of_combo = true;
460  }
461  combo->done = false;
462  combo->ClearResults();
463  } else {
464  prev_word_it = word_it; // catch up
465  }
466  }
467  prev_right = box.right();
468  }
469  }
470  } else {
471  words.clear(); // signal termination
472  }
473 }
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
bool combination
Definition: pageres.h:339
float x_height
Definition: pageres.h:316
bool part_of_combo
Definition: pageres.h:340
void copy_on(WERD_RES *word_res)
Definition: pageres.h:660
int16_t left() const
Definition: rect.h:72
end of line
Definition: werd.h:33
TBOX bounding_box() const
Definition: werd.cpp:148
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
void join_on(WERD *other)
Definition: werd.cpp:199
Definition: rect.h:34
WERD * word
Definition: pageres.h:186
void ClearResults()
Definition: pageres.cpp:1104
bool done
Definition: pageres.h:305
int16_t right() const
Definition: rect.h:79
Definition: werd.h:56