File tree Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Original file line number Diff line number Diff line change 1+ """
2+ Test for issue #4503 in pymupdf:
3+ Correct recognition of strikeout and underline styles in text spans.
4+ """
5+
6+ import os
7+ import pymupdf
8+ from pymupdf import mupdf
9+
10+ STRIKEOUT = mupdf .FZ_STEXT_STRIKEOUT
11+ UNDERLINE = mupdf .FZ_STEXT_UNDERLINE
12+
13+
14+ def test_4503 ():
15+ """
16+ Check that the text span with the specified text has the correct styling:
17+ strikeout, but no underline.
18+ Previously, the text was broken in multiple spans with span breaks at
19+ every space. and some parts were not detected as strikeout at all.
20+ """
21+ scriptdir = os .path .dirname (os .path .abspath (__file__ ))
22+ text = "the right to request the state to review and, if appropriate,"
23+ filename = os .path .join (scriptdir , "resources" , "test-4503.pdf" )
24+ doc = pymupdf .open (filename )
25+ page = doc [0 ]
26+ flags = pymupdf .TEXT_ACCURATE_BBOXES | pymupdf .TEXT_COLLECT_STYLES
27+ spans = [
28+ s
29+ for b in page .get_text ("dict" , flags = flags )["blocks" ]
30+ for l in b ["lines" ]
31+ for s in l ["spans" ]
32+ if s ["text" ] == text
33+ ]
34+ assert spans , "No spans found with the specified text"
35+ span = spans [0 ]
36+
37+ assert span ["char_flags" ] & STRIKEOUT
38+ assert not span ["char_flags" ] & UNDERLINE
You can’t perform that action at this time.
0 commit comments