Skip to content

Commit 38c4da2

Browse files
committed
🐛 fix potential problem where the words can be re-arranged by the user when calculating lines.
1 parent c047050 commit 38c4da2

File tree

2 files changed

+15
-12
lines changed

2 files changed

+15
-12
lines changed

mindee/fields/ocr.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,17 @@ def __str__(self) -> str:
4141
class OcrPage:
4242
"""OCR extraction for a single page."""
4343

44-
all_words: List[OcrWord]
45-
"""All the words on the page, in semi-random order."""
44+
_all_words: List[OcrWord]
4645
_lines: List[OcrLine]
4746

4847
def __init__(self, prediction: TypeApiPrediction):
49-
self.all_words = [
48+
self._all_words = [
5049
OcrWord(word_prediction) for word_prediction in prediction["all_words"]
5150
]
51+
# make sure words are sorted from top to bottom
52+
self._all_words.sort(
53+
key=lambda item: get_min_max_y(item.polygon).min, reverse=False
54+
)
5255
self._lines = []
5356

5457
@staticmethod
@@ -70,14 +73,9 @@ def _to_lines(self) -> List[OcrLine]:
7073
indexes: List[int] = []
7174
lines: List[OcrLine] = []
7275

73-
# make sure words are sorted from top to bottom
74-
self.all_words.sort(
75-
key=lambda item: get_min_max_y(item.polygon).min, reverse=False
76-
)
77-
78-
for _ in self.all_words:
76+
for _ in self._all_words:
7977
line: OcrLine = OcrLine()
80-
for idx, word in enumerate(self.all_words):
78+
for idx, word in enumerate(self._all_words):
8179
if idx in indexes:
8280
continue
8381
if current is None:
@@ -102,6 +100,11 @@ def all_lines(self) -> List[OcrLine]:
102100
self._lines = self._to_lines()
103101
return self._lines
104102

103+
@property
104+
def all_words(self) -> List[OcrWord]:
105+
"""All the words on the page, in semi-random order."""
106+
return self._all_words
107+
105108
def __str__(self) -> str:
106109
return "\n".join(str(line) for line in self.all_lines) + "\n"
107110

mindee/geometry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def get_min_max_x(points: Points) -> MinMax:
208208

209209
def is_point_in_polygon_x(point: Point, polygon: Polygon) -> bool:
210210
"""
211-
Determine if the Point is in the Polygon's Y-axis.
211+
Determine if the Point is in the Polygon's X-axis.
212212
213213
:param point: Point to compare
214214
:param polygon: Polygon to look into
@@ -219,7 +219,7 @@ def is_point_in_polygon_x(point: Point, polygon: Polygon) -> bool:
219219

220220
def is_point_in_x(point: Point, min_x: float, max_x: float) -> bool:
221221
"""
222-
Determine if the Point is in the Polygon's Y-axis.
222+
Determine if the Point is within the X-axis interval.
223223
224224
:param point: Point to compare
225225
:param min_x: Minimum X-axis value

0 commit comments

Comments
 (0)