diff --git a/Cargo.toml b/Cargo.toml index 5869e81a..8754a798 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ signal-hook = "0.3.17" rev_lines = "0.3.0" faccess = "0.2.4" io-streams = "0.16.3" -regex = "1.11.1" rand = "0.9" rand_chacha = { version = "0.9.0", features = [ "os_rng" ]} time = "0.3" diff --git a/src/core.rs b/src/core.rs index f34243f0..89cb4271 100644 --- a/src/core.rs +++ b/src/core.rs @@ -89,7 +89,7 @@ impl ShellCore { if unistd::isatty(0) == Ok(true) && self.script_name == "-" { self.db.flags += "himH"; - let _ = self.db.set_param("PS1", "🍣 ", None); + let _ = self.db.set_param("PS1", "🍣\\[\x1b[1;32m\\]\\u@\\h:\\[\x1b[1;34m\\]\\w\\[\x1b[0m\\]$ ", None); let _ = self.db.set_param("PS2", "> ", None); let fd = fcntl::fcntl(0, fcntl::F_DUPFD_CLOEXEC(255)) .expect("sush(fatal): Can't allocate fd for tty FD"); diff --git a/src/elements/expr/conditional.rs b/src/elements/expr/conditional.rs index 10169039..2075b509 100644 --- a/src/elements/expr/conditional.rs +++ b/src/elements/expr/conditional.rs @@ -12,7 +12,6 @@ use crate::error::exec::ExecError; use crate::utils::{file_check, glob}; use crate::elements::word::Word; use crate::elements::substitution::variable::Variable; -use regex::Regex; use self::elem::CondElem; use super::arithmetic::elem::ArithElem; use std::env; @@ -226,27 +225,16 @@ impl ConditionalExpr { None => return Err(ExecError::Other("Invalid operand".to_string())), }; - let re = match Regex::new(&right_eval) { - Ok(regex) => regex, - Err(e) => return Err(ExecError::Other(e.to_string())), - }; + let ast = crate::regex::parse_regex(&right_eval)?; + let matched = crate::regex::match_here(&ast, &left); core.db.set_array("BASH_REMATCH", Some(vec![]), None)?; - if let Some(res) = re.captures(&left) { - for i in 0.. { - if let Some(e) = res.get(i) { - let s = e.as_str().to_string(); - core.db.set_array_elem("BASH_REMATCH", &s, i as isize, None)?; - }else{ - break; - } - } - stack.push( CondElem::Ans(true) ); - }else{ - stack.push( CondElem::Ans(false) ); + if matched { + core.db.set_array_elem("BASH_REMATCH", &left, 0, None)?; } - return Ok(()); + stack.push(CondElem::Ans(matched)); + Ok(()) } fn resolve_arithmetic_op(name: &str, core: &mut ShellCore) -> Result { diff --git a/src/elements/expr/conditional/parser.rs b/src/elements/expr/conditional/parser.rs index 4515375a..139728cd 100644 --- a/src/elements/expr/conditional/parser.rs +++ b/src/elements/expr/conditional/parser.rs @@ -208,6 +208,6 @@ impl ConditionalExpr { break; } - Ok(None) + Ok(Some(ans)) } } diff --git a/src/elements/subword.rs b/src/elements/subword.rs index 1fa556b6..b2eb6b05 100644 --- a/src/elements/subword.rs +++ b/src/elements/subword.rs @@ -21,6 +21,7 @@ mod process_sub; use crate::{ShellCore, Feeder}; use crate::error::{exec::ExecError, parse::ParseError}; use crate::elements::word::WordMode; +use crate::regex; use crate::utils::splitter; use self::ansi_c_quoted::AnsiCQuoted; use self::arithmetic::Arithmetic; @@ -95,7 +96,7 @@ pub trait Subword { fn make_regex(&mut self) -> Option { match self.get_text() { "" => None, - s => Some(s.to_string()), + s => Some(regex::glob_to_regex(s)), } } diff --git a/src/elements/word.rs b/src/elements/word.rs index bb647f46..64ba1b5b 100644 --- a/src/elements/word.rs +++ b/src/elements/word.rs @@ -113,21 +113,21 @@ impl Word { } pub fn eval_for_regex(&self, core: &mut ShellCore) -> Option { - let quoted = self.text.starts_with("\"") && self.text.ends_with("\""); + let quoted = self.text.starts_with('"') && self.text.ends_with('"'); match self.tilde_and_dollar_expansion(core) { Ok(mut w) => { - let mut re = w.make_regex()?; + let text = w.make_unquoted_word()?; + let mut re = crate::regex::glob_to_regex(&text); if quoted { re.insert(0, '"'); - re += "\""; + re.push('"'); } - Some(re) }, - Err(e) => { + Err(e) => { e.print(core); - return None; + None }, } } @@ -190,19 +190,6 @@ impl Word { Some(sw.into_iter().map(|s| s.unwrap()).collect::()) } - pub fn make_regex(&mut self) -> Option { - let sw: Vec> = self.subwords.iter_mut() - .map(|s| s.make_regex()) - .filter(|s| *s != None) - .collect(); - - if sw.is_empty() { - return None; - } - - Some(sw.into_iter().map(|s| s.unwrap()).collect::()) - } - fn make_glob_string(&mut self) -> String { self.subwords.iter_mut() .map(|s| s.make_glob_string()) diff --git a/src/error/parse.rs b/src/error/parse.rs index c7963022..1474ade4 100644 --- a/src/error/parse.rs +++ b/src/error/parse.rs @@ -9,6 +9,7 @@ pub enum ParseError { UnexpectedSymbol(String), Input(InputError), WrongAlias(String), + Regex(String), } //expected for conditional expression @@ -18,7 +19,8 @@ impl From<&ParseError> for String { //ParseError::UnexpectedSymbol(s) => format!("Unexpected token: {}", s), ParseError::UnexpectedSymbol(s) => format!("syntax error near unexpected token: {}", s), ParseError::Input(e) => From::from(e), - ParseError::WrongAlias(msg) => format!("Someting wrong alias: {}", msg), + ParseError::WrongAlias(msg) => format!("Something wrong alias: {}", msg), + ParseError::Regex(msg) => format!("regex error: {}", msg), } } } diff --git a/src/main.rs b/src/main.rs index b5a04d8c..744f302b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ mod error; mod feeder; mod elements; mod main_c_option; +mod regex; mod signal; mod proc_ctrl; mod utils; diff --git a/src/regex.rs b/src/regex.rs new file mode 100644 index 00000000..dabf4d60 --- /dev/null +++ b/src/regex.rs @@ -0,0 +1,324 @@ +//SPDX-FileCopyrightText: 2025 Hugo Fortin +//SPDX-License-Identifier: BSD-3-Clause + +///// Custom and ultra-light Regex implementation ///// + +use std::iter::Peekable; +use std::str::Chars; +use crate::error::parse::ParseError; +use crate::error::exec::ExecError; + +#[derive(Debug, Clone)] +pub enum RegexAst { + Literal(char), + AnyChar, + StartAnchor, + EndAnchor, + ZeroOrMore(Box), + OneOrMore(Box), + ZeroOrOne(Box), + Sequence(Vec), + Alternate(Vec), + Group(Box), + CharClass(Vec), + NegatedCharClass(Vec), +} + +#[derive(Debug, Clone)] +pub enum CharSetItem { + Single(char), + Range(char, char), +} + +fn regex_match(pattern: &str, text: &str) -> bool { + match parse(pattern) { + Ok(ast) => match_here(&ast, text), + Err(_) => false, + } +} + +fn parse(pattern: &str) -> Result { + let mut chars = pattern.chars().peekable(); + parse_expr(&mut chars) +} + +pub fn parse_regex(pattern: &str) -> Result { + parse(pattern) +} + +fn parse_expr(chars: &mut Peekable) -> Result { + let mut terms = Vec::new(); + let mut branches = Vec::new(); + + while let Some(&c) = chars.peek() { + match c { + ')' => break, + '|' => { + chars.next(); + if !terms.is_empty() { + branches.push(RegexAst::Sequence(terms)); + terms = Vec::new(); + } else { + branches.push(RegexAst::Sequence(vec![])); + } + } + _ => terms.push(parse_term(chars)?), + } + } + + if !branches.is_empty() { + branches.push(RegexAst::Sequence(terms)); + return Ok(RegexAst::Alternate(branches)); + } + + Ok(RegexAst::Sequence(terms)) +} + +fn parse_term(chars: &mut Peekable) -> Result { + let mut atom = match chars.next() { + Some('^') => RegexAst::StartAnchor, + Some('$') => RegexAst::EndAnchor, + Some('.') => RegexAst::AnyChar, + Some('(') => { + let inner = parse_expr(chars)?; + if chars.next() != Some(')') { + return Err(ExecError::from(ParseError::Regex("Unclosed ( group".to_string()))); + } + RegexAst::Group(Box::new(inner)) + } + Some('[') => parse_char_class(chars)?, + Some('\\') => { + let c = chars.next().ok_or_else(|| ExecError::from(ParseError::Regex("Backslash without character".to_string())))?; + RegexAst::Literal(c) + } + Some(c) => RegexAst::Literal(c), + None => return Err(ExecError::from(ParseError::Regex("Unexpected end of pattern".to_string()))), + }; + + while let Some(&next) = chars.peek() { + atom = match next { + '*' => { + chars.next(); + RegexAst::ZeroOrMore(Box::new(atom)) + } + '+' => { + chars.next(); + RegexAst::OneOrMore(Box::new(atom)) + } + '?' => { + chars.next(); + RegexAst::ZeroOrOne(Box::new(atom)) + } + _ => break, + }; + } + + Ok(atom) +} + +pub fn glob_to_regex(glob: &str) -> String { + let mut regex = String::from("^"); + for c in glob.chars() { + match c { + '*' => regex.push_str(".*"), + '?' => regex.push('.'), + '.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | '[' | ']' | '\\' | '#' | ' ' => { + regex.push('\\'); + regex.push(c); + } + _ => regex.push(c), + } + } + regex.push('$'); + regex +} + +fn parse_char_class(chars: &mut Peekable) -> Result { + let mut negate = false; + let mut items = Vec::new(); + + if let Some(&c) = chars.peek() { + if c == '^' { + negate = true; + chars.next(); + } + } + + while let Some(c) = chars.next() { + if c == ']' && !items.is_empty() { + break; + } + + if c == '-' { + if let Some(CharSetItem::Single(start)) = items.pop() { + if let Some(&end) = chars.peek() { + chars.next(); + items.push(CharSetItem::Range(start, end)); + } else { + return Err(ExecError::from(ParseError::Regex("Incomplete character range".to_string()))); + } + } else { + items.push(CharSetItem::Single('-')); + } + } else { + items.push(CharSetItem::Single(c)); + } + } + + if items.is_empty() { + return Err(ExecError::from(ParseError::Regex("Empty character class".to_string()))); + } + + Ok(if negate { + RegexAst::NegatedCharClass(items) + } else { + RegexAst::CharClass(items) + }) +} + +pub fn match_here(ast: &RegexAst, text: &str) -> bool { + match ast { + RegexAst::Sequence(seq) => match_sequence(seq, text), + RegexAst::Alternate(choices) => choices.iter().any(|a| match_here(a, text)), + _ => match_one(ast, text).map_or(false, |rest| rest.is_empty()), + } +} + +fn match_sequence(seq: &[RegexAst], text: &str) -> bool { + let anchored_start = matches!(seq.first(), Some(RegexAst::StartAnchor)); + let anchored_end = matches!(seq.last(), Some(RegexAst::EndAnchor)); + + let core = if anchored_start { + &seq[1..seq.len() - if anchored_end { 1 } else { 0 }] + } else { + seq + }; + + if anchored_start { + if anchored_end { + return match_sequence_strict(core, text); + } else { + return match_sequence_strict(core, text); + } + } + + for i in 0..=text.len() { + if match_sequence_strict(core, &text[i..]) { + return true; + } + } + + false +} + +fn match_sequence_strict<'a>(seq: &'a [RegexAst], mut text: &'a str) -> bool { + for node in seq { + match match_one(node, text) { + Some(rest) => text = rest, + None => return false, + } + } + text.is_empty() +} + +fn match_one<'a>(ast: &'a RegexAst, text: &'a str) -> Option<&'a str> { + match ast { + RegexAst::Literal(c) => { + let mut chars = text.chars(); + let t = chars.next()?; + if *c == t { + Some(&text[t.len_utf8()..]) + } else { + None + } + } + RegexAst::AnyChar => { + let mut chars = text.chars(); + let c = chars.next()?; + Some(&text[c.len_utf8()..]) + } + RegexAst::StartAnchor => Some(text), // handled in sequence + RegexAst::EndAnchor => if text.is_empty() { Some("") } else { None }, + RegexAst::ZeroOrMore(inner) => { + let mut t = text; + while let Some(rest) = match_one(inner, t) { + if rest == t { + break; + } + t = rest; + } + Some(t) + } + RegexAst::OneOrMore(inner) => { + let mut t = match_one(inner, text)?; + while let Some(rest) = match_one(inner, t) { + if rest == t { + break; + } + t = rest; + } + Some(t) + } + RegexAst::ZeroOrOne(inner) => { + if let Some(rest) = match_one(inner, text) { + Some(rest) + } else { + Some(text) + } + } + RegexAst::Group(inner) => match_one(inner, text), + RegexAst::CharClass(set) => { + let mut chars = text.chars(); + let c = chars.next()?; + if char_class_match(c, set) { + Some(&text[c.len_utf8()..]) + } else { + None + } + } + RegexAst::NegatedCharClass(set) => { + let mut chars = text.chars(); + let c = chars.next()?; + if !char_class_match(c, set) { + Some(&text[c.len_utf8()..]) + } else { + None + } + } + _ => None, + } +} + +fn char_class_match(c: char, set: &[CharSetItem]) -> bool { + for item in set { + match item { + CharSetItem::Single(ch) => if *ch == c { return true; }, + CharSetItem::Range(start, end) => if *start <= c && c <= *end { return true; }, + } + } + false +} + +fn eval_double_bracket_condition(cond: &str, context: &ShellContext) -> bool { + let parts: Vec<&str> = cond.splitn(2, "=~").map(str::trim).collect(); + if parts.len() != 2 { + return false; + } + + let var_name = parts[0].trim_start_matches('$'); + let pattern = parts[1]; + + let var_value = context.get_var(var_name).unwrap_or_default(); + + regex_match(pattern, &var_value) +} + +pub struct ShellContext { + vars: std::collections::HashMap, +} + +impl ShellContext { + pub fn get_var(&self, name: &str) -> Option { + self.vars.get(name).cloned() + } +}