From 874e4a584f28b4c1c2290206437953949dd8a429 Mon Sep 17 00:00:00 2001 From: LesterEvSe Date: Fri, 3 Apr 2026 16:42:02 +0300 Subject: [PATCH] feat: implement `ProjectGraph` inside `driver.rs` file and change parse trait to the `SourceFile` --- src/driver.rs | 205 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 6 +- src/parse.rs | 20 +++-- src/resolution.rs | 7 +- 4 files changed, 226 insertions(+), 12 deletions(-) create mode 100644 src/driver.rs diff --git a/src/driver.rs b/src/driver.rs new file mode 100644 index 0000000..6d130b7 --- /dev/null +++ b/src/driver.rs @@ -0,0 +1,205 @@ +use std::collections::{HashMap, VecDeque}; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::error::{Error, ErrorCollector, RichError, Span}; +use crate::parse::{self, ParseFromStrWithErrors}; +use crate::resolution::{CanonPath, DependencyMap, SourceFile}; + +/// Represents a single, isolated file in the SimplicityHL project. +/// In this architecture, a file and a module are the exact same thing. +#[derive(Debug, Clone)] +pub struct Module { + pub source: SourceFile, + /// The completely parsed program for this specific file. + /// it contains all the functions, aliases, and imports defined inside the file. + pub parsed_program: parse::Program, +} + +/// The Dependency Graph itself. +pub struct ProjectGraph { + /// Arena Pattern: the data itself lives here. + /// A flat vector guarantees that module data is stored contiguously in memory. + #[expect(dead_code)] + pub(self) modules: Vec, + + /// The configuration environment. + /// Used to resolve external library dependencies and invoke their associated functions. + pub dependency_map: Arc, + + /// Fast lookup: `CanonPath` -> Module ID. + /// A reverse index mapping absolute file paths to their internal IDs. + /// This solves the duplication problem, ensuring each file is only parsed once. + pub lookup: HashMap, + + /// Fast lookup: Module ID -> `CanonPath`. + /// A direct index mapping internal IDs back to their absolute file paths. + /// This serves as the exact inverse of the `lookup` map. + pub paths: Arc<[CanonPath]>, + + /// The Adjacency List: Defines the Directed acyclic Graph (DAG) of imports. + /// + /// The Key (`usize`) is the ID of a "Parent" module (the file doing the importing). + /// The Value (`Vec`) is a list of IDs of the "Child" modules it relies on. + /// + /// Example: If `main.simf` (ID: 0) has `use lib::math;` (ID: 1) and `use lib::io;` (ID: 2), + /// this map will contain: `{ 0: [1, 2] }`. + pub dependencies: HashMap>, +} + +impl ProjectGraph { + /// This helper cleanly encapsulates the process of loading source text, parsing it + /// into an `parse::Program`, and combining them so the compiler can easily work with the file. + /// If the file is missing or contains syntax errors, it logs the diagnostic to the + /// `ErrorCollector` and safely returns `None`. + fn parse_and_get_program( + path: &CanonPath, + importer_source: SourceFile, + span: Span, + handler: &mut ErrorCollector, + ) -> Option { + let Ok(content) = std::fs::read_to_string(path.as_path()) else { + let err = RichError::new(Error::FileNotFound(PathBuf::from(path.as_path())), span) + .with_source(importer_source.clone()); + + handler.push(err); + return None; + }; + + let dep_source_file = SourceFile::new(path.as_path(), Arc::from(content.clone())); + + parse::Program::parse_from_str_with_errors(&dep_source_file, handler).map( + |parsed_program| Module { + source: dep_source_file, + parsed_program, + }, + ) + } + + /// Initializes a new `ProjectGraph` by parsing the root program and discovering all dependencies. + /// + /// Performs a BFS to recursively parse `use` statements, + /// building a DAG of the project's modules. + /// + /// # Arguments + /// + /// * `root_source` - The `SourceFile` representing the entry point of the project. + /// * `dependency_map` - The context-aware mapping rules used to resolve external imports. + /// * `root_program` - A reference to the already-parsed AST of the root file. + /// * `handler` - The diagnostics collector used to record resolution and parsing errors. + /// + /// # Returns + /// + /// * `Ok(Some(Self))` - If the entire project graph was successfully resolved and parsed. + /// * `Ok(None)` - If the graph traversal completed, but one or more modules contained + /// errors (which have been safely logged into the `handler`). + /// + /// # Errors + /// + /// This function will return an `Err(String)` only for critical internal compiler errors + /// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path). + pub fn new( + root_source: SourceFile, + dependency_map: Arc, + root_program: &parse::Program, + handler: &mut ErrorCollector, + ) -> Result, String> { + let root_name = if let Some(root_name) = root_source.name() { + CanonPath::canonicalize(root_name)? + } else { + return Err( + "The root_source variable inside the ProjectGraph::new() function has no name" + .to_string(), + ); + }; + + let mut modules: Vec = vec![Module { + source: root_source, + parsed_program: root_program.clone(), + }]; + + let mut lookup: HashMap = HashMap::new(); + let mut paths: Vec = vec![root_name.clone()]; + let mut dependencies: HashMap> = HashMap::new(); + + let root_id = 0; + lookup.insert(root_name, root_id); + dependencies.insert(root_id, Vec::new()); + + // Implementation of the standard BFS algorithm with memoization and queue + let mut queue = VecDeque::new(); + queue.push_back(root_id); + + while let Some(curr_id) = queue.pop_front() { + // We need this to report errors inside THIS file. + let importer_source = modules[curr_id].source.clone(); + let importer_source_name = if let Some(name) = importer_source.name() { + CanonPath::canonicalize(name)? + } else { + return Err(format!( + "The {:?} variable inside the ProjectGraph::new() function has no name", + importer_source + )); + }; + + let current_program = &modules[curr_id].parsed_program; + + // Lists to separate valid logic from errors + let mut valid_imports: Vec<(CanonPath, Span)> = Vec::new(); + let mut resolution_errors: Vec = Vec::new(); + + // PHASE 1: Resolve Imports + for elem in current_program.items() { + if let parse::Item::Use(use_decl) = elem { + match dependency_map.resolve_path(importer_source_name.clone(), use_decl) { + Ok(path) => valid_imports.push((path, *use_decl.span())), + Err(err) => { + resolution_errors.push(err.with_source(importer_source.clone())) + } + } + } + } + + // PHASE 2: Load and Parse Dependencies + for (path, import_span) in valid_imports { + if let Some(&existing_id) = lookup.get(&path) { + let deps = dependencies.entry(curr_id).or_default(); + if !deps.contains(&existing_id) { + deps.push(existing_id); + } + continue; + } + + let Some(module) = ProjectGraph::parse_and_get_program( + &path, + importer_source.clone(), + import_span, + handler, + ) else { + continue; + }; + + let last_ind = modules.len(); + modules.push(module); + + lookup.insert(path.clone(), last_ind); + paths.push(path); + dependencies.entry(curr_id).or_default().push(last_ind); + + queue.push_back(last_ind); + } + } + + Ok(if handler.has_errors() { + None + } else { + Some(Self { + modules, + dependency_map, + lookup, + paths: paths.into(), + dependencies, + }) + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index f551ba1..dcda041 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod compile; pub mod debug; #[cfg(feature = "docs")] pub mod docs; +pub mod driver; pub mod dummy_env; pub mod error; pub mod jet; @@ -37,6 +38,7 @@ pub use simplicity::elements; use crate::debug::DebugSymbols; use crate::error::{ErrorCollector, WithContent}; use crate::parse::ParseFromStrWithErrors; +use crate::resolution::SourceFile; pub use crate::types::ResolvedType; pub use crate::value::Value; pub use crate::witness::{Arguments, Parameters, WitnessTypes, WitnessValues}; @@ -58,8 +60,10 @@ impl TemplateProgram { /// The string is not a valid SimplicityHL program. pub fn new>>(s: Str) -> Result { let file = s.into(); + let source = SourceFile::anonymous(file.clone()); let mut error_handler = ErrorCollector::new(); - let parse_program = parse::Program::parse_from_str_with_errors(&file, &mut error_handler); + let parse_program = parse::Program::parse_from_str_with_errors(&source, &mut error_handler); + if let Some(program) = parse_program { let ast_program = ast::Program::analyze(&program).with_content(Arc::clone(&file))?; Ok(Self { diff --git a/src/parse.rs b/src/parse.rs index 5b90612..59a4d34 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1033,7 +1033,10 @@ pub trait ParseFromStr: Sized { /// Trait for parsing with collection of errors. pub trait ParseFromStrWithErrors: Sized { /// Parse a value from the string `s` with Errors. - fn parse_from_str_with_errors(s: &str, handler: &mut ErrorCollector) -> Option; + fn parse_from_str_with_errors( + source: &SourceFile, + handler: &mut ErrorCollector, + ) -> Option; } /// Trait for generating parsers of themselves. @@ -1077,10 +1080,13 @@ impl ParseFromStr for A { } impl ParseFromStrWithErrors for A { - fn parse_from_str_with_errors(s: &str, handler: &mut ErrorCollector) -> Option { + fn parse_from_str_with_errors( + source: &SourceFile, + handler: &mut ErrorCollector, + ) -> Option { + let s = &source.content().to_string(); let (tokens, lex_errs) = crate::lexer::lex(s); - let source = SourceFile::anonymous(Arc::from(s)); handler.extend(source.clone(), lex_errs); let tokens = tokens?; @@ -1093,7 +1099,7 @@ impl ParseFromStrWithErrors for A { ) .into_output_errors(); - handler.extend(source, parse_errs); + handler.extend(source.clone(), parse_errs); // TODO: We should return parsed result if we found errors, but because analyzing in `ast` module // is not handling poisoned tree right now, we don't return parsed result @@ -2428,8 +2434,9 @@ mod test { #[test] fn test_double_colon() { let input = "fn main() { let ab: u8 = <(u4, u4)> : :into((0b1011, 0b1101)); }"; + let source = SourceFile::anonymous(Arc::from(input)); let mut error_handler = ErrorCollector::new(); - let parse_program = Program::parse_from_str_with_errors(input, &mut error_handler); + let parse_program = Program::parse_from_str_with_errors(&source, &mut error_handler); assert!(parse_program.is_none()); assert!(ErrorCollector::to_string(&error_handler).contains("Expected '::', found ':'")); @@ -2438,8 +2445,9 @@ mod test { #[test] fn test_double_double_colon() { let input = "fn main() { let pk: Pubkey = witnes::::PK; }"; + let source = SourceFile::anonymous(Arc::from(input)); let mut error_handler = ErrorCollector::new(); - let parse_program = Program::parse_from_str_with_errors(input, &mut error_handler); + let parse_program = Program::parse_from_str_with_errors(&source, &mut error_handler); assert!(parse_program.is_none()); assert!(ErrorCollector::to_string(&error_handler).contains("Expected ';', found '::'")); diff --git a/src/resolution.rs b/src/resolution.rs index ee310a5..ce6e353 100644 --- a/src/resolution.rs +++ b/src/resolution.rs @@ -8,7 +8,7 @@ use crate::parse::UseDecl; /// Powers error reporting by mapping compiler diagnostics to the specific file. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct SourceFile { - /// The name or path of the source file (e.g., "./simf/main.simf"). + /// The path of the source file (e.g., "./src/main.simf"). name: Option>, /// The actual text content of the source file. content: Arc, @@ -16,10 +16,7 @@ pub struct SourceFile { impl From<(&Path, &str)> for SourceFile { fn from((name, content): (&Path, &str)) -> Self { - Self { - name: Some(Arc::from(name)), - content: Arc::from(content), - } + Self::new(name, Arc::from(content)) } }