diff --git a/crates/my-lang/Cargo.toml b/crates/my-lang/Cargo.toml index 13deb3f..f4012c0 100644 --- a/crates/my-lang/Cargo.toml +++ b/crates/my-lang/Cargo.toml @@ -19,7 +19,9 @@ path = "src/main.rs" thiserror.workspace = true anyhow.workspace = true serde = { workspace = true, optional = true } -serde_json = { workspace = true, optional = true } +# Non-optional: json_parse / json_stringify stdlib builtins are always available +# to Solo programs (my-lang#47). serde derive stays behind the `serde` feature. +serde_json = { workspace = true } tracing.workspace = true clap.workspace = true my-ai = { path = "../my-ai" } @@ -29,7 +31,7 @@ dhat = { version = "0.3", optional = true } [features] default = [] -serde = ["dep:serde", "dep:serde_json"] +serde = ["dep:serde"] # Heap-profiling support for the #14 allocation investigation. Enables the # `dhat_checker_profile` example, which runs the checker under the dhat # allocator and writes a per-call-site `dhat-heap.json` (view at diff --git a/crates/my-lang/src/parser.rs b/crates/my-lang/src/parser.rs index d891036..7012d8a 100644 --- a/crates/my-lang/src/parser.rs +++ b/crates/my-lang/src/parser.rs @@ -1425,7 +1425,8 @@ impl Parser { fn parse_string_literal(&mut self) -> ParseResult { let token = self.advance().ok_or(ParseError::UnexpectedEof)?; - Ok(Expr::Literal(Literal::String(token.literal.clone(), token.span))) + let value = unescape_string_literal(&token.literal); + Ok(Expr::Literal(Literal::String(value, token.span))) } fn parse_bool_literal(&mut self) -> ParseResult { @@ -2109,6 +2110,72 @@ enum Attribute { Custom(String), } +/// Decode the standard escape sequences in a string-literal body. +/// +/// The lexer stores the *raw* slice between the quotes (it only consumes `\x` +/// pairs so it can find the closing quote). Without this, `"a\"b"` would yield +/// the four characters `a \ " b` and JSON literals such as +/// `json_parse("{\"k\":1}")` would be unparseable. See hyperpolymath/my-lang#47. +/// +/// Supported: `\" \\ \/ \n \t \r \0` and `\uXXXX` (BMP) / `\u{...}`. An unknown +/// escape is passed through literally (lenient: the backslash is dropped and the +/// following character kept), which never makes a previously-valid program fail. +fn unescape_string_literal(raw: &str) -> String { + if !raw.contains('\\') { + return raw.to_string(); + } + let mut out = String::with_capacity(raw.len()); + let mut chars = raw.chars(); + while let Some(c) = chars.next() { + if c != '\\' { + out.push(c); + continue; + } + match chars.next() { + Some('"') => out.push('"'), + Some('\\') => out.push('\\'), + Some('/') => out.push('/'), + Some('n') => out.push('\n'), + Some('t') => out.push('\t'), + Some('r') => out.push('\r'), + Some('0') => out.push('\0'), + Some('u') => { + // \u{...} (Rust-style) or \uXXXX (JSON-style, 4 hex digits). + let mut hex = String::new(); + let rest: String = chars.clone().collect(); + if rest.starts_with('{') { + chars.next(); // consume '{' + for d in chars.by_ref() { + if d == '}' { + break; + } + hex.push(d); + } + } else { + for _ in 0..4 { + match chars.next() { + Some(d) => hex.push(d), + None => break, + } + } + } + match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) { + Some(ch) => out.push(ch), + None => { + // Malformed: keep it visible rather than silently drop. + out.push('\\'); + out.push('u'); + out.push_str(&hex); + } + } + } + Some(other) => out.push(other), + None => out.push('\\'), + } + } + out +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/my-lang/src/stdlib.rs b/crates/my-lang/src/stdlib.rs index 88c86ec..dd0555e 100644 --- a/crates/my-lang/src/stdlib.rs +++ b/crates/my-lang/src/stdlib.rs @@ -47,6 +47,9 @@ pub fn register_stdlib(define: &mut impl FnMut(String, Value)) { // Map / Dict Functions (string-keyed maps over Value::Record; my-lang#46) register_map_functions(define); + + // JSON Functions (json_parse / json_stringify; my-lang#47) + register_json_functions(define); } // ============================================================================ @@ -1626,6 +1629,120 @@ fn register_map_functions(define: &mut impl FnMut(String, Value)) { ); } +// ============================================================================ +// JSON FUNCTIONS +// ============================================================================ +// +// json_parse(s) -> Value and json_stringify(v) -> String, backed by serde_json +// (a non-optional dependency of this crate). See hyperpolymath/my-lang#47 / #45. +// +// Representation mapping (paired with the Map/dict builtins, #46): +// JSON object <-> Value::Record JSON array <-> Value::Array +// JSON string <-> Value::String JSON bool <-> Value::Bool +// JSON null <-> Value::Unit +// JSON number -> Value::Int when integral and i64-representable, else Float +// <- Int as integer, Float as fractional +// Object keys serialize in sorted order (serde_json's default Map is a +// BTreeMap), matching the deterministic ordering of `map_keys`. + +/// serde_json::Value -> My Value (total; never fails). +fn json_to_value(j: serde_json::Value) -> Value { + match j { + serde_json::Value::Null => Value::Unit, + serde_json::Value::Bool(b) => Value::Bool(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Value::Int(i) + } else { + // u64 too large for i64, or a real: fall back to float. + Value::Float(n.as_f64().unwrap_or(f64::NAN)) + } + } + serde_json::Value::String(s) => Value::String(s), + serde_json::Value::Array(arr) => Value::Array(arr.into_iter().map(json_to_value).collect()), + serde_json::Value::Object(obj) => { + let mut map = HashMap::new(); + for (k, v) in obj { + map.insert(k, json_to_value(v)); + } + Value::Record(map) + } + } +} + +/// My Value -> serde_json::Value. Errors on values with no JSON analogue +/// (functions, native functions, AI results). +fn value_to_json(v: &Value) -> Result { + match v { + Value::Unit => Ok(serde_json::Value::Null), + Value::Bool(b) => Ok(serde_json::Value::Bool(*b)), + Value::Int(i) => Ok(serde_json::Value::Number((*i).into())), + Value::Float(f) => serde_json::Number::from_f64(*f) + .map(serde_json::Value::Number) + .ok_or_else(|| { + RuntimeError::Custom(format!( + "json_stringify: {} has no JSON representation (NaN/Infinity)", + f + )) + }), + Value::String(s) => Ok(serde_json::Value::String(s.clone())), + Value::Array(arr) => { + let mut out = Vec::with_capacity(arr.len()); + for item in arr { + out.push(value_to_json(item)?); + } + Ok(serde_json::Value::Array(out)) + } + Value::Record(map) => { + let mut obj = serde_json::Map::new(); + for (k, val) in map { + obj.insert(k.clone(), value_to_json(val)?); + } + Ok(serde_json::Value::Object(obj)) + } + other => Err(RuntimeError::TypeError { + expected: "JSON-representable value".to_string(), + got: format!("{:?}", other), + }), + } +} + +fn register_json_functions(define: &mut impl FnMut(String, Value)) { + // json_parse(s) -> Value - parse a JSON document into a My value. + define( + "json_parse".to_string(), + Value::NativeFunction(NativeFunction { + name: "json_parse".to_string(), + arity: 1, + func: |args| match &args[0] { + Value::String(s) => serde_json::from_str::(s) + .map(json_to_value) + .map_err(|e| RuntimeError::Custom(format!("json_parse: invalid JSON: {}", e))), + _ => Err(RuntimeError::TypeError { + expected: "string".to_string(), + got: format!("{:?}", args[0]), + }), + }, + }), + ); + + // json_stringify(v) -> String - serialize a My value to a JSON string + // (compact, object keys sorted for deterministic output). + define( + "json_stringify".to_string(), + Value::NativeFunction(NativeFunction { + name: "json_stringify".to_string(), + arity: 1, + func: |args| { + let j = value_to_json(&args[0])?; + serde_json::to_string(&j) + .map(Value::String) + .map_err(|e| RuntimeError::Custom(format!("json_stringify: {}", e))) + }, + }), + ); +} + /// Get a list of all stdlib function names pub fn stdlib_functions() -> Vec<&'static str> { vec![ @@ -1723,5 +1840,8 @@ pub fn stdlib_functions() -> Vec<&'static str> { "map_keys", "map_remove", "map_len", + // JSON + "json_parse", + "json_stringify", ] } diff --git a/examples/json.my b/examples/json.my new file mode 100644 index 0000000..4e7a41b --- /dev/null +++ b/examples/json.my @@ -0,0 +1,24 @@ +// Solo JSON builtins (hyperpolymath/my-lang#47): json_parse / json_stringify. +// +// my run examples/json.my +// +// Object keys serialize sorted (deterministic), pairing with map_keys (#46). +// Note: string literals support standard escapes (\" \\ \n \t \uXXXX), so a +// JSON document can be written inline. +// +// Expected output: +// {"a":[1,2,3],"b":2,"f":1.5,"n":null,"s":"hi","t":true} +// [1,2,3] +// 42 +// "plain" +// null + +fn main() -> Int { + let doc = json_parse("{\"b\": 2, \"a\": [1, 2, 3], \"n\": null, \"f\": 1.5, \"s\": \"hi\", \"t\": true}"); + println(json_stringify(doc)); + println(json_stringify(json_parse("[1, 2, 3]"))); + println(json_stringify(json_parse("42"))); + println(json_stringify(json_parse("\"plain\""))); + println(json_stringify(json_parse("null"))); + return 0; +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index a5e5e4f..759aa29 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -226,6 +226,41 @@ fn test_eval_map_builtins() { } } +// String-literal escape decoding (hyperpolymath/my-lang#47). The lexer stores +// the raw slice; parse_string_literal must decode \" \\ \n etc. Regression for +// the latent bug where `"a\"b"` yielded the literal 4 chars a \ " b. +#[test] +fn test_eval_string_escapes() { + let source = r#" + fn main() -> String { + return "a\"b\\c\nd\tend"; + } + "#; + match eval(source) { + Ok(Value::String(s)) => assert_eq!(s, "a\"b\\c\nd\tend"), + other => panic!("expected decoded escapes, got {:?}", other), + } +} + +// JSON stdlib builtins (hyperpolymath/my-lang#47): json_parse / json_stringify. +// Round-trips an object literal (requires escape decoding) and asserts the +// deterministic sorted-key serialization. +#[test] +fn test_eval_json_roundtrip() { + let source = r#" + fn main() -> String { + let v = json_parse("{\"b\": 2, \"a\": [1, 2, 3], \"n\": null, \"t\": true}"); + return json_stringify(v); + } + "#; + match eval(source) { + Ok(Value::String(s)) => { + assert_eq!(s, r#"{"a":[1,2,3],"b":2,"n":null,"t":true}"#) + } + other => panic!("expected sorted-key JSON string, got {:?}", other), + } +} + // AI Runtime tests (require API keys, so just test initialization) #[test] fn test_ai_runtime_creation() {