From fa09fa44db25923131e5e6a2e0005e2ccb32a492 Mon Sep 17 00:00:00 2001 From: Jackson Riley Date: Sun, 29 Oct 2023 14:58:21 +0000 Subject: [PATCH 1/2] Don't recompile the sentinel regexes every Note::new - this saves quite a chunk of time --- src/builders/field.rs | 4 ++++ src/model.rs | 37 +++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/builders/field.rs b/src/builders/field.rs index cb0ec48..e3ba607 100644 --- a/src/builders/field.rs +++ b/src/builders/field.rs @@ -62,6 +62,10 @@ impl Field { self.size = Some(value); self } + + pub fn name(&self) -> &str { + &self.name + } } impl Into for Field { diff --git a/src/model.rs b/src/model.rs index acb29ee..23d8fa3 100644 --- a/src/model.rs +++ b/src/model.rs @@ -5,6 +5,7 @@ use crate::{Error, Field}; use fancy_regex::Regex; use ramhorns::Template as RamTemplate; use std::collections::HashMap; +use std::rc::Rc; const DEFAULT_LATEX_PRE: &str = r#" \documentclass[12pt]{article} @@ -17,6 +18,7 @@ const DEFAULT_LATEX_PRE: &str = r#" "#; const DEFAULT_LATEX_POST: &str = r"\end{document}"; +const SENTINEL: &str = "SeNtInEl"; /// `FrontBack` or `Cloze` to determine the type of a Model. /// @@ -39,6 +41,7 @@ pub struct Model { latex_pre: String, latex_post: String, sort_field_index: i64, + sentinel_regexes: Rc>, // Rc<_> so this can be clone } impl Model { @@ -68,6 +71,7 @@ impl Model { latex_pre: DEFAULT_LATEX_PRE.to_string(), latex_post: DEFAULT_LATEX_POST.to_string(), sort_field_index: 0, + sentinel_regexes: compile_sentinel_regexes(&fields), } } @@ -99,6 +103,7 @@ impl Model { latex_pre: latex_pre.unwrap_or(DEFAULT_LATEX_PRE).to_string(), latex_post: latex_post.unwrap_or(DEFAULT_LATEX_POST).to_string(), sort_field_index: sort_field_index.unwrap_or(0), + sentinel_regexes: compile_sentinel_regexes(&fields), } } @@ -152,11 +157,10 @@ impl Model { } pub(super) fn req(&self) -> Result)>, Error> { - let sentinel = "SeNtInEl".to_string(); let field_names: Vec = self.fields.iter().map(|field| field.name.clone()).collect(); let field_values = field_names .iter() - .map(|field| (field.as_str(), format!("{}{}", &field, &sentinel))); + .map(|field| (field.as_str(), format!("{}{}", &field, &SENTINEL))); let mut req = Vec::new(); for (template_ord, template) in self.templates.iter().enumerate() { let rendered = RamTemplate::new(template.qfmt.clone()) @@ -165,7 +169,7 @@ impl Model { let required_fields = field_values .clone() .enumerate() - .filter(|(_, (_, field))| !contains_other_fields(&rendered, field, &sentinel)) + .filter(|(field_ord, _)| !self.contains_other_fields(&rendered, *field_ord)) .map(|(field_ord, _)| field_ord) .collect::>(); if !required_fields.is_empty() { @@ -239,17 +243,26 @@ impl Model { .map_err(json_error)?, ) } + + fn contains_other_fields(&self, rendered: &str, field_ord: usize) -> bool { + self.sentinel_regexes[field_ord].is_match(rendered).unwrap() + } } -fn contains_other_fields(rendered: &str, current_field: &str, sentinel: &str) -> bool { - Regex::new(&format!( - "(?!{field}\\b)\\b(\\w)*{sentinel}+", - field = current_field, - sentinel = sentinel - )) - .unwrap() - .is_match(rendered) - .unwrap() +fn compile_sentinel_regexes(fields: &[Field]) -> Rc> { + Rc::new( + fields + .iter() + .map(|f| { + Regex::new(&format!( + "(?!{field}{sentinel}\\b)\\b(\\w)*{sentinel}+", + field = f.name(), + sentinel = SENTINEL + )) + .unwrap() + }) + .collect(), + ) } #[cfg(test)] From 6cf6c46aedef846561d6212dcf2ffd924e3615c3 Mon Sep 17 00:00:00 2001 From: Jackson Riley Date: Sun, 29 Oct 2023 15:45:17 +0000 Subject: [PATCH 2/2] Don't recompile the regexes used in Note::write_to_db every time --- Cargo.toml | 1 + src/note.rs | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6fd0cc0..ea91254 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ fancy-regex = "0.5.0" serde = { version = "1.0", features = ["derive"] } ramhorns = "0.10.2" thiserror = "1.0.32" +once_cell = "1.18.0" [dev-dependencies] anyhow = "1.0.62" diff --git a/src/note.rs b/src/note.rs index 7462a00..ebceced 100644 --- a/src/note.rs +++ b/src/note.rs @@ -9,6 +9,20 @@ use std::collections::HashSet; use std::ops::RangeFrom; use std::str::FromStr; +use once_cell::sync::Lazy; + +static CLOZE_REGEX: Lazy = + Lazy::new(|| Regex::new(r"{{[^}]*?cloze:(?:[^}]?:)*(.+?)}}").expect("static regex")); + +static CLOZE2_REGEX: Lazy = + Lazy::new(|| Regex::new("<%cloze:(.+?)%>").expect("static regex")); + +static UPDATES_REGEX: Lazy = + Lazy::new(|| Regex::new(r"(?s){{c(\d+)::.+?}}").expect("static regex")); + +static INVALID_HTML_REGEX: Lazy = + Lazy::new(|| Regex::new(r"<(?!/?[a-z0-9]+(?: .*|/?)>)(?:.|\n)*?>").expect("static regex")); + /// Note (Flashcard) to be added to a `Deck` #[derive(Clone)] pub struct Note { @@ -194,11 +208,8 @@ impl Note { fn cloze_cards(model: &Model, self_fields: &Vec) -> Vec { let mut card_ords: HashSet = HashSet::new(); let mut cloze_replacements: HashSet = HashSet::new(); - cloze_replacements.extend(re_findall( - r"{{[^}]*?cloze:(?:[^}]?:)*(.+?)}}", - &model.templates()[0].qfmt, - )); - cloze_replacements.extend(re_findall("<%cloze:(.+?)%>", &model.templates()[0].qfmt)); + cloze_replacements.extend(re_findall(&CLOZE_REGEX, &model.templates()[0].qfmt)); + cloze_replacements.extend(re_findall(&CLOZE2_REGEX, &model.templates()[0].qfmt)); for field_name in cloze_replacements { let fields = model.fields(); let mut field_index_iter = fields @@ -211,7 +222,7 @@ fn cloze_cards(model: &Model, self_fields: &Vec) -> Vec { } else { "".to_string() }; - let updates_str = re_findall(r"(?s){{c(\d+)::.+?}}", &field_value); + let updates_str = re_findall(&UPDATES_REGEX, &field_value); let updates = updates_str .iter() .map(|m| i64::from_str(m).expect("parsed from regex") - 1) @@ -243,8 +254,7 @@ fn front_back_cards(model: &Model, self_fields: &Vec) -> Result Vec { - let regex = Regex::new(regex_str).expect("static regex"); +fn re_findall(regex: &Regex, to_match: &str) -> Vec { regex .captures_iter(to_match) .filter_map(|m| m.ok()) @@ -268,8 +278,7 @@ fn validate_tags(tags: &Vec) -> Result<(), Error> { } fn find_invalid_html_tags_in_field(field: &str) -> Vec { - let regex = Regex::new(r"<(?!/?[a-z0-9]+(?: .*|/?)>)(?:.|\n)*?>").unwrap(); - regex + INVALID_HTML_REGEX .find_iter(field) .map(|m| m.unwrap().as_str().to_string()) .collect()