1111//! }
1212
1313fn main ( ) {
14- #[ cfg( any( feature = "unescape_fast" , feature = "entities" ) ) ]
14+ #[ cfg( any(
15+ feature = "unescape_fast" ,
16+ feature = "unescape" ,
17+ feature = "entities"
18+ ) ) ]
1519 let entities = load_entities ( "entities.json" ) ;
1620
1721 #[ cfg( feature = "unescape_fast" ) ]
1822 generate_matcher_rs ( & entities) ;
1923
24+ #[ cfg( feature = "unescape" ) ]
25+ generate_unescape_entity_rs ( & entities) ;
26+
27+ #[ cfg( any( feature = "unescape" , feature = "entities" ) ) ]
28+ generate_entities_length_rs ( & entities) ;
29+
2030 #[ cfg( feature = "entities" ) ]
2131 generate_entities_rs ( & entities) ;
2232}
2333
2434/// Generate entities.rs file containing all valid HTML entities in a
25- /// [`phf::Map`] along with a few useful constants. It also generates
26- /// documentation with all entities in a table .
35+ /// [`phf::Map`]. It also generates documentation with a table of all the
36+ /// entities and their expansions .
2737#[ cfg( feature = "entities" ) ]
2838fn generate_entities_rs ( entities : & [ ( String , String ) ] ) {
29- use std:: cmp:: { max, min} ;
3039 use std:: env;
3140 use std:: fs:: File ;
3241 use std:: io:: { BufWriter , Write } ;
@@ -51,28 +60,9 @@ fn generate_entities_rs(entities: &[(String, String)]) {
5160 /// Entity | Codepoints | Glyph\n \
5261 /// -------------------------------|--------------------|------") . unwrap ( ) ;
5362
54- let mut hashify = String :: new ( ) ;
55-
5663 let mut map_builder = phf_codegen:: Map :: < & [ u8 ] > :: new ( ) ;
57- let mut max_len: usize = 0 ;
58- let mut min_len: usize = usize:: MAX ;
59- let mut bare_max_len: usize = 0 ;
6064 for ( name, glyph) in entities {
6165 map_builder. entry ( name. as_bytes ( ) , & format ! ( "&{:?}" , glyph. as_bytes( ) ) ) ;
62- max_len = max ( max_len, name. len ( ) ) ;
63- min_len = min ( min_len, name. len ( ) ) ;
64- if !name. ends_with ( ';' ) {
65- bare_max_len = max ( bare_max_len, name. len ( ) ) ;
66- }
67-
68- {
69- use std:: fmt:: Write ;
70- write ! ( & mut hashify, "\n b\" {name}\" => &[" ) . unwrap ( ) ;
71- for & byte in glyph. as_bytes ( ) {
72- write ! ( & mut hashify, "{byte}," ) . unwrap ( ) ;
73- }
74- write ! ( & mut hashify, "]," ) . unwrap ( ) ;
75- }
7666
7767 // `{:28}` would pad the output inside the backticks.
7868 let name = format ! ( "`{name}`" ) ;
@@ -95,29 +85,98 @@ fn generate_entities_rs(entities: &[(String, String)]) {
9585 writeln ! ( out, "/// {name:30} | {codepoints:18} | {glyph}" , ) . unwrap ( ) ;
9686 }
9787
98- let map = map_builder. build ( ) ;
88+ writeln ! ( out, "#[allow(clippy::unreadable_literal)]" ) . unwrap ( ) ;
89+ writeln ! (
90+ out,
91+ "pub static ENTITIES: phf::Map<&[u8], &[u8]> = {};" ,
92+ map_builder. build( )
93+ )
94+ . unwrap ( ) ;
95+ }
96+
97+ /// Generate `entities_length.rs` file containing constants with the minimum
98+ /// and maximum entity lengths.
99+ #[ cfg( any( feature = "unescape" , feature = "entities" ) ) ]
100+ fn generate_entities_length_rs ( entities : & [ ( String , String ) ] ) {
101+ use std:: cmp:: { max, min} ;
102+ use std:: env;
103+ use std:: fs:: File ;
104+ use std:: io:: { BufWriter , Write } ;
105+ use std:: path:: Path ;
106+
107+ let out_path =
108+ Path :: new ( & env:: var ( "OUT_DIR" ) . unwrap ( ) ) . join ( "entities_length.rs" ) ;
109+ let mut out = BufWriter :: new ( File :: create ( out_path) . unwrap ( ) ) ;
110+
111+ let mut max_len: usize = 0 ;
112+ let mut min_len: usize = usize:: MAX ;
113+ let mut bare_max_len: usize = 0 ;
114+ for ( name, _) in entities {
115+ max_len = max ( max_len, name. len ( ) ) ;
116+ min_len = min ( min_len, name. len ( ) ) ;
117+ if !name. ends_with ( ';' ) {
118+ bare_max_len = max ( bare_max_len, name. len ( ) ) ;
119+ }
120+ }
121+ writeln ! (
122+ out,
123+ "\
124+ /// Length of longest entity including ‘&’ and possibly ‘;’.\n \
125+ pub const ENTITY_MAX_LENGTH: usize = {max_len};\n \
126+ \n \
127+ /// Length of shortest entity including ‘&’ and possibly ‘;’.\n \
128+ pub const ENTITY_MIN_LENGTH: usize = {min_len};\n \
129+ \n \
130+ /// Length of longest semicolon-less entity including ‘&’.\n \
131+ pub const BARE_ENTITY_MAX_LENGTH: usize = {bare_max_len};"
132+ )
133+ . unwrap ( ) ;
134+ }
135+
136+ /// Generate `expand_entity.rs` file containing a function that maps entity byte
137+ /// strings to their expansions.
138+ #[ cfg( feature = "unescape" ) ]
139+ fn generate_unescape_entity_rs ( entities : & [ ( String , String ) ] ) {
140+ use std:: env;
141+ use std:: fs:: File ;
142+ use std:: io:: { BufWriter , Write } ;
143+ use std:: path:: Path ;
144+
145+ let out_path =
146+ Path :: new ( & env:: var ( "OUT_DIR" ) . unwrap ( ) ) . join ( "expand_entity.rs" ) ;
147+ let mut out = BufWriter :: new ( File :: create ( out_path) . unwrap ( ) ) ;
148+
149+ writeln ! (
150+ out,
151+ "\
152+ /// Get expansion or `None` for a candidate HTML entity byte string.\n \
153+ #[must_use]\n \
154+ #[allow(clippy::too_many_lines)]\n \
155+ fn expand_entity(candidate: &[u8]) -> Option<&[u8]> {{\n \
156+ hashify::map! {{\n \
157+ candidate,\n \
158+ &[u8],"
159+ )
160+ . unwrap ( ) ;
161+
162+ for ( name, glyph) in entities {
163+ write ! (
164+ out,
165+ "\n \
166+ b\" {name}\" => &["
167+ )
168+ . unwrap ( ) ;
169+ for & byte in glyph. as_bytes ( ) {
170+ write ! ( out, "{byte}," ) . unwrap ( ) ;
171+ }
172+ write ! ( out, "]," ) . unwrap ( ) ;
173+ }
174+
99175 writeln ! (
100176 out,
101- r#"#[allow(clippy::unreadable_literal)]
102- pub static ENTITIES: phf::Map<&[u8], &[u8]> = {map};
103-
104- /// Length of longest entity including ‘&’ and possibly ‘;’.
105- pub const ENTITY_MAX_LENGTH: usize = {max_len};
106-
107- /// Length of shortest entity including ‘&’ and possibly ‘;’.
108- pub const ENTITY_MIN_LENGTH: usize = {min_len};
109-
110- /// Length of longest semicolon-less entity including ‘&’.
111- pub const BARE_ENTITY_MAX_LENGTH: usize = {bare_max_len};
112-
113- /// Get an unescaped character by its HTML entity
114- pub(crate) fn get_entity(candidate: &[u8]) -> Option<&[u8]> {{
115- hashify::map! {{
116- candidate,
117- &[u8],{hashify}
118- }}
119- }}
120- "#
177+ "\n \
178+ }}\n \
179+ }}"
121180 )
122181 . unwrap ( ) ;
123182}
@@ -146,7 +205,11 @@ fn generate_matcher_rs(entities: &[(String, String)]) {
146205}
147206
148207/// Load HTML entities as `vec![...(">", ">")...]`.
149- #[ cfg( any( feature = "unescape_fast" , feature = "entities" ) ) ]
208+ #[ cfg( any(
209+ feature = "unescape_fast" ,
210+ feature = "unescape" ,
211+ feature = "entities"
212+ ) ) ]
150213fn load_entities < P : AsRef < std:: path:: Path > > ( path : P ) -> Vec < ( String , String ) > {
151214 let input = std:: fs:: read ( path. as_ref ( ) ) . unwrap ( ) ;
152215 let input: serde_json:: Map < String , serde_json:: Value > =
0 commit comments