From 5f1862c71c64fa99b7e5d7928b6898a49bdf7d22 Mon Sep 17 00:00:00 2001 From: Andrey Vityuk Date: Wed, 29 Apr 2026 19:58:48 -0700 Subject: [PATCH 1/2] Cache generated parser tables. --- lrpar/src/lib/ctbuilder.rs | 42 +++++++++++++++++++++++++++++++------- lrpar/src/lib/mod.rs | 2 +- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 4f8ac648b..897cfa1ef 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -1093,7 +1093,7 @@ where let run_parser = match self.yacckind.unwrap() { YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => { quote! { - ::lrpar::RTParserBuilder::new(&grm, &stable) + ::lrpar::RTParserBuilder::new(grm, stable) .recoverer(#recoverer) .parse_map( lexer, @@ -1104,7 +1104,7 @@ where } YaccKind::Original(YaccOriginalActionKind::NoAction) => { quote! { - ::lrpar::RTParserBuilder::new(&grm, &stable) + ::lrpar::RTParserBuilder::new(grm, stable) .recoverer(#recoverer) .parse_map(lexer, &|_| (), &|_, _| ()).1 } @@ -1145,7 +1145,7 @@ where #action_fn_parse_param_ty ) -> #actionskind #type_generics > = ::std::vec![#(&#wrappers,)*]; - match ::lrpar::RTParserBuilder::new(&grm, &stable) + match ::lrpar::RTParserBuilder::new(grm, stable) .recoverer(#recoverer) .parse_actions(lexer, &actions, #action_fn_parse_param) { (Some(#actionskind::#action_ident(x)), y) => (Some(x), y), @@ -1205,6 +1205,14 @@ where const __GRM_DATA: &[u8] = &[#(#grm_data,)*]; const __STABLE_DATA: &[u8] = &[#(#stable_data,)*]; + fn __lrpar_parser_data() -> &'static ::lrpar::ParserData<#storaget> { + static DATA: ::std::sync::OnceLock<::lrpar::ParserData<#storaget>> + = ::std::sync::OnceLock::new(); + DATA.get_or_init( + || ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA) + ) + } + #[allow(dead_code)] pub fn parse #generics ( lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>, @@ -1212,7 +1220,9 @@ where ) -> #parse_fn_return_ty #where_clause { - let (grm, stable) = ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA); + let __data = __lrpar_parser_data(); + let grm = __data.grm(); + let stable = __data.stable(); #run_parser } }) @@ -1601,16 +1611,34 @@ where } } +/// Bundles `YaccGrammar` + `StateTable` so that generated parsers can hold +/// them in a `OnceLock` without naming `lrtable` directly. +#[doc(hidden)] +pub struct ParserData { + grm: YaccGrammar, + stable: StateTable, +} + +impl ParserData { + pub fn grm(&self) -> &YaccGrammar { + &self.grm + } + + pub fn stable(&self) -> &StateTable { + &self.stable + } +} + /// This function is called by generated files; it exists so that generated files don't require a /// direct dependency on bincode. #[doc(hidden)] -pub fn _reconstitute + Hash + PrimInt + Unsigned + 'static>( +pub fn _reconstitute + Eq + Hash + PrimInt + Unsigned + 'static>( grm_buf: &[u8], stable_buf: &[u8], -) -> (YaccGrammar, StateTable) { +) -> ParserData { let (grm, _) = decode_from_slice(grm_buf, bincode::config::standard()).unwrap(); let (stable, _) = decode_from_slice(stable_buf, bincode::config::standard()).unwrap(); - (grm, stable) + ParserData { grm, stable } } /// An interface to the result of [CTParserBuilder::build()]. diff --git a/lrpar/src/lib/mod.rs b/lrpar/src/lib/mod.rs index ad99982c3..ef85fedbd 100644 --- a/lrpar/src/lib/mod.rs +++ b/lrpar/src/lib/mod.rs @@ -206,7 +206,7 @@ pub mod parser; pub mod test_utils; pub use crate::{ - ctbuilder::{CTParser, CTParserBuilder, RustEdition, Visibility}, + ctbuilder::{CTParser, CTParserBuilder, ParserData, RustEdition, Visibility}, lex_api::{LexError, Lexeme, Lexer, LexerTypes, NonStreamingLexer}, parser::{LexParseError, ParseError, ParseRepair, RTParserBuilder, RecoveryKind}, }; From bf1870b7feaab38ae870c655bd094093da30a80f Mon Sep 17 00:00:00 2001 From: Andrey Vityuk Date: Wed, 29 Apr 2026 20:00:06 -0700 Subject: [PATCH 2/2] Reduce parser setup overhead. --- lrpar/src/lib/parser.rs | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 2055402d8..9ff9f6934 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -112,7 +112,7 @@ pub(super) struct Parser< { rcvry_kind: RecoveryKind, pub(super) grm: &'a YaccGrammar, - pub(super) token_cost: Box>, + pub(super) token_cost: TokenCostFn<'a, StorageT>, pub(super) stable: &'a StateTable, lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>, // In the long term, we should remove the `lexemes` field entirely, as the `NonStreamingLexer` API is @@ -176,7 +176,7 @@ where let psr = Parser { rcvry_kind, grm, - token_cost: Box::new(token_cost), + token_cost, stable, lexer, lexemes, @@ -274,7 +274,7 @@ where let psr = Parser { rcvry_kind, grm, - token_cost: Box::new(token_cost), + token_cost, stable, lexer, lexemes, @@ -970,13 +970,10 @@ where fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node, fnonterm: &dyn Fn(RIdx, Vec) -> Node, ) -> (Option, Vec>) { - let mut lexemes = vec![]; - for e in lexer.iter().collect::>() { - match e { - Ok(l) => lexemes.push(l), - Err(e) => return (None, vec![e.into()]), - } - } + let lexemes = match lexer.iter().collect() { + Ok(lexemes) => lexemes, + Err(e) => return (None, vec![e.into()]), + }; Parser::< StorageT, LexerTypesT, @@ -1019,13 +1016,10 @@ where actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>], param: ParamT, ) -> (Option, Vec>) { - let mut lexemes = vec![]; - for e in lexer.iter().collect::>() { - match e { - Ok(l) => lexemes.push(l), - Err(e) => return (None, vec![e.into()]), - } - } + let lexemes = match lexer.iter().collect() { + Ok(lexemes) => lexemes, + Err(e) => return (None, vec![e.into()]), + }; Parser::parse_actions( self.recoverer, self.grm,