tesseract  4.1.1
seam.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: seam.cpp (Formerly seam.c)
5  * Author: Mark Seaman, OCR Technology
6  *
7  * (c) Copyright 1987, Hewlett-Packard Company.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  *********************************************************************************/
19 /*----------------------------------------------------------------------
20  I n c l u d e s
21 ----------------------------------------------------------------------*/
22 #include "seam.h"
23 #include "blobs.h"
24 #include "tprintf.h"
25 
26 /*----------------------------------------------------------------------
27  Public Function Code
28 ----------------------------------------------------------------------*/
29 
30 // Returns the bounding box of all the points in the seam.
32  TBOX box(location_.x, location_.y, location_.x, location_.y);
33  for (int s = 0; s < num_splits_; ++s) {
34  box += splits_[s].bounding_box();
35  }
36  return box;
37 }
38 
39 // Returns true if other can be combined into *this.
40 bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
41  float max_total_priority) const {
42  int dist = location_.x - other.location_.x;
43  if (-max_x_dist < dist && dist < max_x_dist &&
44  num_splits_ + other.num_splits_ <= kMaxNumSplits &&
45  priority_ + other.priority_ < max_total_priority &&
46  !OverlappingSplits(other) && !SharesPosition(other)) {
47  return true;
48  } else {
49  return false;
50  }
51 }
52 
53 // Combines other into *this. Only works if CombinableWith returned true.
54 void SEAM::CombineWith(const SEAM& other) {
55  priority_ += other.priority_;
56  location_ += other.location_;
57  location_ /= 2;
58 
59  for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
60  splits_[num_splits_++] = other.splits_[s];
61 }
62 
63 // Returns true if the splits in *this SEAM appear OK in the sense that they
64 // do not cross any outlines and do not chop off any ridiculously small
65 // pieces.
66 bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
67  // TODO(rays) Try testing all the splits. Duplicating original code for now,
68  // which tested only the first.
69  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
70 }
71 
72 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
73 // seam, which is about to be inserted at insert_index. Returns false if
74 // any of the computations fails, as this indicates an invalid chop.
75 // widthn_/widthp_ are only changed if modify is true.
77  const GenericVector<TBLOB*>& blobs,
78  int insert_index, bool modify) {
79  for (int s = 0; s < insert_index; ++s) {
80  if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
81  }
82  if (!FindBlobWidth(blobs, insert_index, modify)) return false;
83  for (int s = insert_index; s < seams.size(); ++s) {
84  if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
85  }
86  return true;
87 }
88 
89 // Computes the widthp_/widthn_ range. Returns false if not all the splits
90 // are accounted for. widthn_/widthp_ are only changed if modify is true.
91 bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
92  bool modify) {
93  int num_found = 0;
94  if (modify) {
95  widthp_ = 0;
96  widthn_ = 0;
97  }
98  for (int s = 0; s < num_splits_; ++s) {
99  const SPLIT& split = splits_[s];
100  bool found_split = split.ContainedByBlob(*blobs[index]);
101  // Look right.
102  for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
103  found_split = split.ContainedByBlob(*blobs[b]);
104  if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
105  }
106  // Look left.
107  for (int b = index - 1; !found_split && b >= 0; --b) {
108  found_split = split.ContainedByBlob(*blobs[b]);
109  if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
110  }
111  if (found_split) ++num_found;
112  }
113  return num_found == num_splits_;
114 }
115 
116 // Splits this blob into two blobs by applying the splits included in
117 // *this SEAM
118 void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
119  for (int s = 0; s < num_splits_; ++s) {
120  splits_[s].SplitOutlineList(blob->outlines);
121  }
122  blob->ComputeBoundingBoxes();
123 
124  divide_blobs(blob, other_blob, italic_blob, location_);
125 
127  other_blob->EliminateDuplicateOutlines();
128 
129  blob->CorrectBlobOrder(other_blob);
130 }
131 
132 // Undoes ApplySeam by removing the seam between these two blobs.
133 // Produces one blob as a result, and deletes other_blob.
134 void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
135  if (blob->outlines == nullptr) {
136  blob->outlines = other_blob->outlines;
137  other_blob->outlines = nullptr;
138  }
139 
140  TESSLINE* outline = blob->outlines;
141  while (outline->next) outline = outline->next;
142  outline->next = other_blob->outlines;
143  other_blob->outlines = nullptr;
144  delete other_blob;
145 
146  for (int s = 0; s < num_splits_; ++s) {
147  splits_[s].UnsplitOutlineList(blob);
148  }
149  blob->ComputeBoundingBoxes();
151 }
152 
153 // Prints everything in *this SEAM.
154 void SEAM::Print(const char* label) const {
155  tprintf(label);
156  tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
157  widthp_, widthn_);
158  for (int s = 0; s < num_splits_; ++s) {
159  splits_[s].Print();
160  if (s + 1 < num_splits_) tprintf(", ");
161  }
162  tprintf("\n");
163 }
164 
165 // Prints a collection of SEAMs.
166 /* static */
167 void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
168  if (!seams.empty()) {
169  tprintf("%s\n", label);
170  for (int x = 0; x < seams.size(); ++x) {
171  tprintf("%2d: ", x);
172  seams[x]->Print("");
173  }
174  tprintf("\n");
175  }
176 }
177 
178 #ifndef GRAPHICS_DISABLED
179 // Draws the seam in the given window.
180 void SEAM::Mark(ScrollView* window) const {
181  for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
182 }
183 #endif
184 
185 // Break up the blobs in this chain so that they are all independent.
186 // This operation should undo the affect of join_pieces.
187 /* static */
189  const GenericVector<TBLOB*>& blobs, int first,
190  int last) {
191  for (int x = first; x < last; ++x) seams[x]->Reveal();
192 
193  TESSLINE* outline = blobs[first]->outlines;
194  int next_blob = first + 1;
195 
196  while (outline != nullptr && next_blob <= last) {
197  if (outline->next == blobs[next_blob]->outlines) {
198  outline->next = nullptr;
199  outline = blobs[next_blob]->outlines;
200  ++next_blob;
201  } else {
202  outline = outline->next;
203  }
204  }
205 }
206 
207 // Join a group of base level pieces into a single blob that can then
208 // be classified.
209 /* static */
211  const GenericVector<TBLOB*>& blobs, int first, int last) {
212  TESSLINE* outline = blobs[first]->outlines;
213  if (!outline)
214  return;
215 
216  for (int x = first; x < last; ++x) {
217  SEAM *seam = seams[x];
218  if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
219  while (outline->next) outline = outline->next;
220  outline->next = blobs[x + 1]->outlines;
221  }
222 }
223 
224 // Hides the seam so the outlines appear not to be cut by it.
225 void SEAM::Hide() const {
226  for (int s = 0; s < num_splits_; ++s) {
227  splits_[s].Hide();
228  }
229 }
230 
231 // Undoes hide, so the outlines are cut by the seam.
232 void SEAM::Reveal() const {
233  for (int s = 0; s < num_splits_; ++s) {
234  splits_[s].Reveal();
235  }
236 }
237 
238 // Computes and returns, but does not set, the full priority of *this SEAM.
239 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
240  int centered_maxwidth, double center_knob,
241  double width_change_knob) const {
242  if (num_splits_ == 0) return 0.0f;
243  for (int s = 1; s < num_splits_; ++s) {
244  splits_[s].SplitOutline();
245  }
246  float full_priority =
247  priority_ +
248  splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
249  center_knob, width_change_knob);
250  for (int s = num_splits_ - 1; s >= 1; --s) {
251  splits_[s].UnsplitOutlines();
252  }
253  return full_priority;
254 }
255 
263 void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
264  seam_array->truncate(0);
265  TPOINT location;
266 
267  for (int b = 1; b < word->NumBlobs(); ++b) {
268  TBOX bbox = word->blobs[b - 1]->bounding_box();
269  TBOX nbox = word->blobs[b]->bounding_box();
270  location.x = (bbox.right() + nbox.left()) / 2;
271  location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
272  seam_array->push_back(new SEAM(0.0f, location));
273  }
274 }
void Hide() const
Definition: seam.cpp:225
bool empty() const
Definition: genericvector.h:91
void EliminateDuplicateOutlines()
Definition: blobs.cpp:480
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void Mark(ScrollView *window) const
Definition: seam.cpp:180
int NumBlobs() const
Definition: blobs.h:448
bool FindBlobWidth(const GenericVector< TBLOB *> &blobs, int index, bool modify)
Definition: seam.cpp:91
Definition: blobs.h:418
bool OverlappingSplits(const SEAM &other) const
Definition: seam.h:97
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
void Print(const char *label) const
Definition: seam.cpp:154
int16_t x
Definition: blobs.h:93
void Reveal() const
Definition: split.cpp:65
int16_t y
Definition: blobs.h:94
Definition: blobs.h:284
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:263
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:239
TESSLINE * outlines
Definition: blobs.h:400
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:134
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:210
int16_t left() const
Definition: rect.h:72
Definition: split.h:37
TBOX bounding_box() const
Definition: seam.cpp:31
void truncate(int size)
TESSLINE * next
Definition: blobs.h:281
static void PrintSeams(const char *label, const GenericVector< SEAM *> &seams)
Definition: seam.cpp:167
bool ContainedByBlob(const TBLOB &blob) const
Definition: split.h:65
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:81
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
Definition: blobs.cpp:962
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:279
void Reveal() const
Definition: seam.cpp:232
bool SharesPosition(const SEAM &other) const
Definition: seam.h:89
LIST last(LIST var_list)
Definition: oldlist.cpp:190
void SplitOutline() const
Definition: split.cpp:254
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:40
int16_t bottom() const
Definition: rect.h:65
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:66
void UnsplitOutlines() const
Definition: split.cpp:295
void CorrectBlobOrder(TBLOB *next)
Definition: blobs.cpp:501
void Hide() const
Definition: split.cpp:51
Definition: blobs.h:51
Definition: seam.h:38
Definition: rect.h:34
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:115
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:118
void Print() const
Definition: split.cpp:219
int push_back(T object)
int size() const
Definition: genericvector.h:72
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:235
TBOX bounding_box() const
Definition: split.cpp:44
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:76
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:188
void ComputeBoundingBoxes()
Definition: blobs.cpp:446
int16_t right() const
Definition: rect.h:79
int16_t top() const
Definition: rect.h:58
void CombineWith(const SEAM &other)
Definition: seam.cpp:54