66
77
88def main ():
9- cmdline = argparse .ArgumentParser (description = ('utility to convert annotations'
10- 'from GATE format to WebAnnotator format' ))
11- cmdline .add_argument ('--input' ,
12- help = 'path to source annotated file' ,
9+ cmdline = argparse .ArgumentParser (description = ('utility '
10+ 'to convert annotations '
11+ 'from GATE format to '
12+ 'WebAnnotator format' ))
13+ cmdline .add_argument ('--GATE' ,
14+ help = 'path to file annotated in GATE format' ,
1315 type = str ,
1416 required = True )
1517 cmdline .add_argument ('--sample' ,
16- help = 'path to already marked html' ,
18+ help = ('path to file annotated in WebAnnotator format '
19+ 'for colors and entities transfer' ),
1720 type = str ,
1821 required = True )
19- cmdline .add_argument ('--output ' ,
20- help = 'path to result annotated file' ,
22+ cmdline .add_argument ('--WebAnnotator ' ,
23+ help = 'path to result file in WebAnnotator format ' ,
2124 type = str ,
2225 required = True )
2326 cmdline .add_argument ('--loglevel' ,
@@ -38,12 +41,12 @@ def main():
3841
3942 gate = webstruct .loaders .GateLoader (known_entities = entities )
4043 tokenizer = webstruct .HtmlTokenizer (tagset = entities )
41- with open (args .input , 'rb' ) as reader :
44+ with open (args .GATE , 'rb' ) as reader :
4245 data = reader .read ()
4346 tree = gate .loadbytes (data )
4447 tokens , annotations = tokenizer .tokenize_single (tree )
4548 tree = to_webannotator (tree , entity_colors = colors )
46- with open (args .output , 'wb' ) as writer :
49+ with open (args .WebAnnotator , 'wb' ) as writer :
4750 tree .write (writer , method = 'html' , pretty_print = True )
4851
4952if __name__ == "__main__" :
0 commit comments