11use regex:: Regex ;
2+ use std:: collections:: HashMap ;
23use std:: fs:: File ;
34use std:: io:: { BufReader , Read } ;
45use std:: path:: Path ;
5- use xml_dom:: level2:: { Attribute , Node , RefNode } ;
6+ use xml_dom:: level2:: { Attribute , Node , RefNode , Element } ;
67use xml_dom:: parser:: read_reader;
78use unicode_bom:: Bom ;
89use walkdir:: WalkDir ;
@@ -29,21 +30,43 @@ impl XMLUtil {
2930 }
3031
3132 pub fn replace_xml ( dir : & str , src_file : & str , pattern : & str , replace : & str , output_file : & Option < & str > ) {
33+ let ( _, files) = Self :: get_files_with_content_type ( dir,
34+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" ) ;
35+
3236 let out_file = match output_file {
3337 Some ( of) => of,
3438 None => src_file
3539 } ;
3640
37- Self :: snr_xml ( Mode :: Value , dir, src_file, Some ( vec ! ( "word/document(\\ d*).xml" ) ) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
41+ let fref = files. iter ( ) . map ( AsRef :: as_ref) . collect ( ) ;
42+ Self :: snr_xml ( Mode :: Value , dir, src_file, Some ( fref) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
3843 }
3944
4045 pub fn replace_attr ( dir : & str , src_file : & str , pattern : & str , replace : & str , output_file : & Option < & str > ) {
46+ let ( defaults, files) = Self :: get_files_with_content_type ( dir,
47+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" ) ;
48+ let rels_extension = & defaults[ "application/vnd.openxmlformats-package.relationships+xml" ] ;
49+
50+ let mut rels_files = vec ! ( ) ;
51+ for f in files {
52+ let last_slash = f. rfind ( '/' ) . unwrap ( ) ;
53+ let mut new_fn = String :: new ( ) ;
54+ new_fn. push_str ( & f[ ..last_slash] ) ;
55+ new_fn. push_str ( "/_" ) ;
56+ new_fn. push_str ( rels_extension) ;
57+ new_fn. push_str ( & f[ last_slash..] ) ;
58+ new_fn. push ( '.' ) ;
59+ new_fn. push_str ( rels_extension) ;
60+ rels_files. push ( new_fn) ;
61+ }
62+
4163 let out_file = match output_file {
4264 Some ( of) => of,
4365 None => src_file
4466 } ;
4567
46- Self :: snr_xml ( Mode :: Attribute , dir, src_file, Some ( vec ! ( "word/_rels/document(\\ d*).xml.rels" ) ) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
68+ let fref = rels_files. iter ( ) . map ( AsRef :: as_ref) . collect ( ) ;
69+ Self :: snr_xml ( Mode :: Attribute , dir, src_file, Some ( fref) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
4770 }
4871
4972 fn snr_xml ( mode : Mode , dir : & str , src_file : & str , files : Option < Vec < & str > > , pattern : Option < & str > , replace : Option < & str > , output_file : Option < & str > ) {
@@ -64,7 +87,7 @@ impl XMLUtil {
6487 let sub_path = FileUtil :: get_sub_path ( entry. path ( ) , & base_dir) ;
6588
6689 if let Some ( file_list) = & files {
67- if !Self :: list_matches ( & file_list, & sub_path. as_str ( ) ) {
90+ if !file_list. contains ( & sub_path. as_str ( ) ) {
6891 continue ;
6992 }
7093 } else {
@@ -120,8 +143,6 @@ impl XMLUtil {
120143
121144 for n in node. child_nodes ( ) {
122145 for ( _, mut attr) in n. attributes ( ) {
123- // let v = av.value();
124- // println!("Name: {} = {:?}", an, v);
125146 if let Some ( v) = attr. value ( ) {
126147 if v. len ( ) == 0 {
127148 continue ;
@@ -187,15 +208,71 @@ impl XMLUtil {
187208 Bom :: from ( & mut file)
188209 }
189210
190- fn list_matches ( file_list : & [ & str ] , name : & str ) -> bool {
191- for file_pat in file_list {
192- let regex = Regex :: new ( * file_pat) . unwrap ( ) ;
193- if regex. is_match ( name) {
194- return true ;
211+ fn get_content_types ( dir : & str ) -> ( HashMap < String , String > , HashMap < String , String > ) {
212+ let mut defaults = HashMap :: new ( ) ;
213+ let mut mappings = HashMap :: new ( ) ;
214+
215+ let path = Path :: new ( dir) . join ( "[Content_Types].xml" ) ;
216+
217+ let bom = Self :: get_bom ( & path) ;
218+ let f = File :: open ( path) . unwrap ( ) ; // TODO
219+ let mut r = BufReader :: new ( f) ;
220+
221+ if bom != Bom :: Null {
222+ // Remove the BOM bytes from the stream as they will cause the XML parsing to fail
223+ let len = bom. len ( ) ;
224+ let mut bom_prefix = vec ! [ 0 ; len] ;
225+ r. read_exact ( & mut bom_prefix) . unwrap ( ) ;
226+ }
227+
228+ let dom_res = read_reader ( r) . unwrap ( ) ;
229+ for n in dom_res. child_nodes ( ) {
230+ if n. local_name ( ) == "Types" {
231+ for m in n. child_nodes ( ) {
232+ match m. local_name ( ) . as_str ( ) {
233+ "Default" => {
234+ let en = m. get_attribute ( "Extension" ) ;
235+ let ct = m. get_attribute ( "ContentType" ) ;
236+
237+ if en. is_some ( ) && ct. is_some ( ) {
238+ defaults. insert ( ct. unwrap ( ) , en. unwrap ( ) ) ;
239+ }
240+ } ,
241+ "Override" => {
242+ let pn = m. get_attribute ( "PartName" ) ;
243+ let ct = m. get_attribute ( "ContentType" ) ;
244+
245+ if pn. is_some ( ) && ct. is_some ( ) {
246+ let pns = pn. unwrap ( ) ;
247+ let rel_pn;
248+ if pns. starts_with ( '/' ) {
249+ rel_pn = & pns[ 1 ..] ;
250+ } else {
251+ rel_pn = & pns;
252+ }
253+
254+ mappings. insert ( rel_pn. to_owned ( ) , ct. unwrap ( ) ) ;
255+ }
256+ } ,
257+ _ => { }
258+ }
259+ }
195260 }
196261 }
197262
198- false
263+ ( defaults, mappings)
264+ }
265+
266+ fn get_files_with_content_type ( dir : & str , content_type : & str ) -> ( HashMap < String , String > , Vec < String > ) {
267+ let ( defaults, mappings) = Self :: get_content_types ( dir) ;
268+
269+ let mut result = vec ! ( ) ;
270+ for ( file, ct) in & mappings {
271+ if ct == content_type {
272+ result. push ( file. to_owned ( ) ) ;
273+ }
274+ }
275+ ( defaults, result)
199276 }
200277}
201278
@@ -306,7 +383,8 @@ mod tests {
306383 #[ test]
307384 fn test_replace_both ( ) -> io:: Result < ( ) > {
308385 let orgdir = "./src/test/test_tree3" ;
309- let testdir = testdir ! ( ) ;
386+ let testroot = testdir ! ( ) ;
387+ let testdir = testroot. join ( "subdir" ) ;
310388
311389 copy_dir_all ( orgdir, & testdir) ?;
312390
@@ -319,15 +397,15 @@ mod tests {
319397 assert ! ( before. contains( ">www.example.com<" ) , "Precondition" ) ;
320398 assert ! ( !before. contains( "zzz" ) , "Precondition" ) ;
321399
322- let before_rels = fs:: read_to_string ( "./src/test/test_tree3/word/_rels/document3 .xml.rels" ) ?;
400+ let before_rels = fs:: read_to_string ( "./src/test/test_tree3/word/_rels/document2 .xml.rels" ) ?;
323401 assert ! ( before_rels. contains( "Target=\" http://www.example.com/\" " ) , "Precondition" ) ;
324402
325403 XMLUtil :: replace_xml ( & testdir. to_string_lossy ( ) , "my-source.docx" ,
326404 "[Ss]ome" , "zzz" ,
327- & Some ( & testdir . join ( "output.docx" ) . to_string_lossy ( ) ) ) ;
405+ & Some ( & testroot . join ( "output.docx" ) . to_string_lossy ( ) ) ) ;
328406 XMLUtil :: replace_attr ( & testdir. to_string_lossy ( ) , "my-source.docx" ,
329407 "www.example.com" , "foobar.org" ,
330- & Some ( & testdir . join ( "output-2.docx" ) . to_string_lossy ( ) ) ) ;
408+ & Some ( & testroot . join ( "output-2.docx" ) . to_string_lossy ( ) ) ) ;
331409
332410 // Check that the replacement worked as expected
333411 let after = fs:: read_to_string ( testdir. join ( "word/document2.xml" ) ) ?;
@@ -339,7 +417,7 @@ mod tests {
339417 assert ! ( !after. contains( "some" ) ) ;
340418 assert ! ( !after. contains( "Some" ) ) ;
341419
342- let after_rels = fs:: read_to_string ( testdir. join ( "word/_rels/document3 .xml.rels" ) ) ?;
420+ let after_rels = fs:: read_to_string ( testdir. join ( "word/_rels/document2 .xml.rels" ) ) ?;
343421 assert ! ( after_rels. contains( "Target=\" http://foobar.org/\" " ) ) ;
344422
345423 Ok ( ( ) )
0 commit comments