@@ -11,7 +11,9 @@ def do_tokenize(self, text, result):
1111 def test_phone (self ):
1212 return self .do_tokenize (
1313 "Phone:855-349-1914" ,
14- [TextToken (chars = 'Phone:855-349-1914' , position = 0 , length = 18 )]
14+ [TextToken (chars = 'Phone' , position = 0 , length = 5 )]
15+ [TextToken (chars = ':' , position = 5 , length = 1 )]
16+ [TextToken (chars = '855-349-1914' , position = 6 , length = 12 )]
1517 )
1618
1719 @pytest .mark .xfail
@@ -21,22 +23,26 @@ def test_hyphen_mid(self):
2123 [TextToken (chars = 'Powai' , position = 0 , length = 5 ),
2224 TextToken (chars = 'Campus' , position = 6 , length = 6 ),
2325 TextToken (chars = ',' , position = 12 , length = 1 ),
24- TextToken (chars = 'Mumbai-400077' , position = 14 , length = 13 )]
26+ TextToken (chars = 'Mumbai' , position = 14 , length = 6 ),
27+ TextToken (chars = '-' , position = 20 , length = 1 ),
28+ TextToken (chars = '400077' , position = 21 , length = 6 )]
2529 )
2630
2731 @pytest .mark .xfail
2832 def test_hyphen_end (self ):
2933 return self .do_tokenize (
3034 "Saudi Arabia-" ,
3135 [TextToken (chars = 'Saudi' , position = 0 , length = 5 ),
32- TextToken (chars = 'Arabia-' , position = 6 , length = 7 )]
36+ TextToken (chars = 'Arabia' , position = 6 , length = 6 ),
37+ TextToken (chars = '-' , position = 12 , length = 1 )]
3338 )
3439
3540 @pytest .mark .xfail
3641 def test_hyphen_end (self ):
3742 return self .do_tokenize (
3843 "1 5858/ 1800" ,
3944 [TextToken (chars = '1' , position = 0 , length = 1 ),
40- TextToken (chars = '5858/' , position = 2 , length = 5 ),
45+ TextToken (chars = '5858' , position = 2 , length = 4 ),
46+ TextToken (chars = '/' , position = 6 , length = 1 ),
4147 TextToken (chars = '1800' , position = 8 , length = 4 )]
4248 )
0 commit comments