diff --git a/README.md b/README.md index 1e599b3..829164d 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

- Convert, inspect, normalize, translate, annotate, and sync localization assets across Apple, Android, CSV, TSV, and Tolgee-backed pipelines. + Convert, inspect, normalize, translate, annotate, and sync localization assets across Apple, XLIFF, Android, CSV, TSV, and Tolgee-backed pipelines.

@@ -47,7 +47,7 @@ Most localization workflows are a pile of one-off scripts, format-specific tools ## Highlights - Unified data model for singular and plural translations -- Read and write support for Apple `.strings`, Apple `.xcstrings`, Android `strings.xml`, CSV, and TSV +- Read and write support for Apple `.strings`, Apple `.xcstrings`, Apple/Xcode `.xliff`, Android `strings.xml`, CSV, and TSV - CLI commands for convert, diff, merge, sync, edit, normalize, view, stats, debug, translate, annotate, and Tolgee sync - Config-driven AI workflows with `langcodec.toml` - Rust library API for teams building custom localization pipelines @@ -73,6 +73,12 @@ Try the workflow: # Convert Apple strings to Android XML langcodec convert -i Localizable.strings -o values/strings.xml +# Export an Apple/Xcode translation exchange file +langcodec convert -i Localizable.xcstrings -o Localizable.xliff --output-lang fr + +# Import XLIFF back into an Xcode string catalog +langcodec convert -i Localizable.xliff -o Localizable.xcstrings + # Inspect work that still needs attention langcodec view -i Localizable.xcstrings --status new,needs_review --keys-only @@ -125,6 +131,7 @@ langcodec annotate \ | --------------------- | :---: | :---: | :-----: | :---: | :-----: | :------: | | Apple `.strings` | yes | yes | yes | yes | no | yes | | Apple `.xcstrings` | yes | yes | yes | yes | yes | yes | +| Apple `.xliff` | yes | yes | yes | no | no | yes | | Android `strings.xml` | yes | yes | yes | yes | yes | yes | | CSV | yes | yes | yes | yes | no | no | | TSV | yes | yes | yes | yes | no | no | diff --git a/langcodec-cli/README.md b/langcodec-cli/README.md index 22a31d0..17c76ad 100644 --- a/langcodec-cli/README.md +++ b/langcodec-cli/README.md @@ -8,6 +8,7 @@ Supported formats: - Apple `.strings` - Apple `.xcstrings` +- Apple/Xcode `.xliff` - Android `strings.xml` - CSV - TSV @@ -100,8 +101,12 @@ langcodec tolgee --help ```sh langcodec convert -i Localizable.xcstrings -o translations.csv langcodec convert -i translations.csv -o values/strings.xml +langcodec convert -i Localizable.xcstrings -o Localizable.xliff --output-lang fr +langcodec convert -i Localizable.xliff -o Localizable.xcstrings ``` +For `.xliff` output, pass `--output-lang` to choose the target language. Use `--source-language` when the source language is ambiguous. + ### Find strings that still need work ```sh @@ -122,6 +127,8 @@ langcodec edit set -i values/strings.xml -k welcome_title -v "Welcome" langcodec normalize -i 'locales/**/*.{strings,xml,csv,tsv,xcstrings}' --check ``` +`normalize`, `edit`, and `sync` intentionally do not operate on `.xliff` in v1; convert XLIFF into a project format first. + ### Sync or merge existing translation assets ```sh diff --git a/langcodec-cli/src/convert.rs b/langcodec-cli/src/convert.rs index 61fd6c4..17c9be0 100644 --- a/langcodec-cli/src/convert.rs +++ b/langcodec-cli/src/convert.rs @@ -23,15 +23,117 @@ fn parse_standard_output_format(format: &str) -> Result { "strings" => Ok(FormatType::Strings(None)), "android" | "androidstrings" => Ok(FormatType::AndroidStrings(None)), "xcstrings" => Ok(FormatType::Xcstrings), + "xliff" => Ok(FormatType::Xliff(None)), "csv" => Ok(FormatType::CSV), "tsv" => Ok(FormatType::TSV), _ => Err(format!( - "Unsupported output format: '{}'. Supported formats: strings, android, xcstrings, csv, tsv", + "Unsupported output format: '{}'. Supported formats: strings, android, xcstrings, xliff, csv, tsv", format )), } } +fn wants_named_output( + output: &str, + output_format_hint: Option<&String>, + extension: &str, + format_name: &str, +) -> bool { + output.ends_with(extension) + || output_format_hint.is_some_and(|hint| hint.eq_ignore_ascii_case(format_name)) +} + +fn wants_xcstrings_output(output: &str, output_format_hint: Option<&String>) -> bool { + wants_named_output(output, output_format_hint, ".xcstrings", "xcstrings") +} + +fn wants_xliff_output(output: &str, output_format_hint: Option<&String>) -> bool { + wants_named_output(output, output_format_hint, ".xliff", "xliff") +} + +fn resolve_xliff_source_language( + resources: &[langcodec::Resource], + explicit_source_language: Option<&String>, + target_language: &str, +) -> Result { + if let Some(explicit_source_language) = explicit_source_language { + let trimmed = explicit_source_language.trim(); + if trimmed.is_empty() { + return Err("--source-language cannot be empty for .xliff output".to_string()); + } + return Ok(trimmed.to_string()); + } + + let metadata_source_languages = resources + .iter() + .filter_map(|resource| resource.metadata.custom.get("source_language")) + .map(|value| value.trim()) + .filter(|value| !value.is_empty()) + .collect::>(); + + let available_languages = resources + .iter() + .map(|resource| resource.metadata.language.trim()) + .filter(|language| !language.is_empty()) + .collect::>(); + + if metadata_source_languages.len() > 1 { + return Err(format!( + "Conflicting source_language metadata found for .xliff output: {}. Pass --source-language.", + metadata_source_languages + .into_iter() + .collect::>() + .join(", ") + )); + } + if let Some(source_language) = metadata_source_languages.iter().next() { + let extras = available_languages + .iter() + .filter(|language| **language != *source_language && **language != target_language) + .cloned() + .collect::>(); + + if *source_language != target_language && extras.is_empty() { + return Ok((*source_language).to_string()); + } + + return Err(format!( + "source_language metadata '{}' is ambiguous for .xliff output with available languages ({}). Pass --source-language.", + source_language, + available_languages + .iter() + .cloned() + .collect::>() + .join(", ") + )); + } + + if available_languages.is_empty() { + return Err("XLIFF output requires language metadata on the input resources".to_string()); + } + + if available_languages.len() == 1 { + return Ok(available_languages.iter().next().unwrap().to_string()); + } + + let non_target_languages = available_languages + .iter() + .filter(|language| **language != target_language) + .cloned() + .collect::>(); + + match non_target_languages.as_slice() { + [source_language] => Ok((*source_language).to_string()), + _ => Err(format!( + "Could not infer the XLIFF source language from available languages ({}). Pass --source-language.", + available_languages + .into_iter() + .collect::>() + .join(", ") + )), + } +} + fn infer_output_path_language(path: &str) -> Option { match langcodec::infer_format_from_path(path) { Some(FormatType::Strings(Some(lang))) | Some(FormatType::AndroidStrings(Some(lang))) => { @@ -73,10 +175,21 @@ fn resolve_convert_output_format( } Ok(output_format) } + FormatType::Xliff(_) => { + if let Some(language) = output_lang { + output_format = output_format.with_language(Some(language.clone())); + Ok(output_format) + } else { + Err( + ".xliff output requires --output-lang to select the target language" + .to_string(), + ) + } + } FormatType::Xcstrings | FormatType::CSV | FormatType::TSV => { if let Some(language) = output_lang { Err(format!( - "--output-lang '{}' is only supported for single-language outputs (.strings, strings.xml)", + "--output-lang '{}' is only supported for .strings, strings.xml, or .xliff output", language )) } else { @@ -92,6 +205,65 @@ pub fn run_unified_convert_command( options: ConvertOptions, strict: bool, ) { + let wants_xliff = wants_xliff_output(&output, options.output_format.as_ref()); + if wants_xliff { + println!( + "{}", + ui::status_line_stdout( + ui::Tone::Info, + "Converting to XLIFF 1.2 with explicit source/target language selection...", + ) + ); + match read_resources_from_any_input(&input, options.input_format.as_ref(), strict).and_then( + |mut resources| { + let output_format = resolve_convert_output_format( + &output, + options.output_format.as_ref(), + options.output_lang.as_ref(), + )?; + let target_language = + match &output_format { + FormatType::Xliff(Some(target_language)) => target_language.clone(), + _ => return Err( + ".xliff output requires --output-lang to select the target language" + .to_string(), + ), + }; + let source_language = resolve_xliff_source_language( + &resources, + options.source_language.as_ref(), + &target_language, + )?; + + for resource in &mut resources { + resource + .metadata + .custom + .insert("source_language".to_string(), source_language.clone()); + } + + convert_resources_to_format(resources, &output, output_format) + .map_err(|e| format!("Error converting to xliff: {}", e)) + }, + ) { + Ok(()) => { + println!( + "{}", + ui::status_line_stdout(ui::Tone::Success, "Successfully converted to xliff",) + ); + return; + } + Err(e) => { + println!( + "{}", + ui::status_line_stdout(ui::Tone::Error, "Conversion to xliff failed") + ); + eprintln!("Error: {}", e); + std::process::exit(1); + } + } + } + if let Some(output_lang) = options.output_lang.as_ref() { if output.ends_with(".langcodec") { eprintln!( @@ -142,11 +314,7 @@ pub fn run_unified_convert_command( // Special handling: when targeting xcstrings, ensure required metadata exists. // If source_language/version are missing, default to en/1.0 respectively. - let wants_xcstrings = output.ends_with(".xcstrings") - || options - .output_format - .as_deref() - .is_some_and(|s| s.eq_ignore_ascii_case("xcstrings")); + let wants_xcstrings = wants_xcstrings_output(&output, options.output_format.as_ref()); if wants_xcstrings { println!( "{}", @@ -624,6 +792,7 @@ fn print_conversion_error(input: &str, output: &str) { eprintln!("- .strings (Apple strings files)"); eprintln!("- .xml (Android strings files)"); eprintln!("- .xcstrings (Apple xcstrings files)"); + eprintln!("- .xliff (Apple/Xcode XLIFF 1.2 files)"); eprintln!("- .csv (CSV files)"); eprintln!("- .tsv (TSV files)"); eprintln!("- .langcodec (Resource JSON array)"); @@ -634,6 +803,7 @@ fn print_conversion_error(input: &str, output: &str) { eprintln!("- .strings (Apple strings files)"); eprintln!("- .xml (Android strings files)"); eprintln!("- .xcstrings (Apple xcstrings files)"); + eprintln!("- .xliff (Apple/Xcode XLIFF 1.2 files)"); eprintln!("- .csv (CSV files)"); eprintln!("- .tsv (TSV files)"); eprintln!("- .langcodec (Resource JSON array)"); @@ -676,11 +846,12 @@ fn try_explicit_format_conversion( "strings" => langcodec::formats::FormatType::Strings(None), "android" | "androidstrings" => langcodec::formats::FormatType::AndroidStrings(None), "xcstrings" => langcodec::formats::FormatType::Xcstrings, + "xliff" => langcodec::formats::FormatType::Xliff(None), "csv" => langcodec::formats::FormatType::CSV, "tsv" => langcodec::formats::FormatType::TSV, _ => { return Err(format!( - "Unsupported input format: '{}'. Supported formats: strings, android, xcstrings, csv, tsv", + "Unsupported input format: '{}'. Supported formats: strings, android, xcstrings, xliff, csv, tsv", input_format )); } @@ -758,6 +929,7 @@ pub fn read_resources_from_any_input( Some(langcodec::formats::FormatType::AndroidStrings(None)) } "xcstrings" => Some(langcodec::formats::FormatType::Xcstrings), + "xliff" => Some(langcodec::formats::FormatType::Xliff(None)), "csv" => Some(langcodec::formats::FormatType::CSV), "tsv" => Some(langcodec::formats::FormatType::TSV), _ => None, @@ -783,6 +955,7 @@ pub fn read_resources_from_any_input( if input.ends_with(".strings") || input.ends_with(".xml") || input.ends_with(".xcstrings") + || input.ends_with(".xliff") || input.ends_with(".csv") || input.ends_with(".tsv") { @@ -807,7 +980,7 @@ pub fn read_resources_from_any_input( } return Err(format!( - "Unsupported input format or file extension: '{}'. Supported formats: .strings, .xml, .xcstrings, .csv, .tsv, .json, .yaml, .yml, .langcodec", + "Unsupported input format or file extension: '{}'. Supported formats: .strings, .xml, .xcstrings, .xliff, .csv, .tsv, .json, .yaml, .yml, .langcodec", input )); } @@ -821,6 +994,7 @@ pub fn read_resources_from_any_input( Some(langcodec::formats::FormatType::AndroidStrings(None)) } "xcstrings" => Some(langcodec::formats::FormatType::Xcstrings), + "xliff" => Some(langcodec::formats::FormatType::Xliff(None)), "csv" => Some(langcodec::formats::FormatType::CSV), "tsv" => Some(langcodec::formats::FormatType::TSV), _ => None, @@ -895,6 +1069,8 @@ pub fn read_resources_from_any_input( Some(langcodec::formats::FormatType::AndroidStrings(Some(lang))) } else if input.ends_with(".xcstrings") { Some(langcodec::formats::FormatType::Xcstrings) + } else if input.ends_with(".xliff") { + Some(langcodec::formats::FormatType::Xliff(None)) } else if input.ends_with(".csv") { Some(langcodec::formats::FormatType::CSV) } else if input.ends_with(".tsv") { @@ -908,6 +1084,7 @@ pub fn read_resources_from_any_input( codec .read_file_by_type(input, format_type) .map_err(|e2| format!("{err_prefix}{e2}"))?; + return Ok(codec.resources); } } else { eprintln!("Standard format detection failed: {}", e); @@ -937,7 +1114,7 @@ pub fn read_resources_from_any_input( } Err(format!( - "Unsupported input format or file extension: '{}'. Supported formats: .strings, .xml, .xcstrings, .csv, .tsv, .json, .yaml, .yml, .langcodec", + "Unsupported input format or file extension: '{}'. Supported formats: .strings, .xml, .xcstrings, .xliff, .csv, .tsv, .json, .yaml, .yml, .langcodec", input )) } diff --git a/langcodec-cli/src/edit.rs b/langcodec-cli/src/edit.rs index d77ca34..111d000 100644 --- a/langcodec-cli/src/edit.rs +++ b/langcodec-cli/src/edit.rs @@ -23,6 +23,21 @@ fn infer_output_format_from_path(path: &str) -> Result { .ok_or_else(|| format!("Cannot infer format from path: {}", path)) } +fn reject_xliff_paths(input_path: &str, output_path: Option<&String>) -> Result<(), String> { + if input_path.ends_with(".xliff") { + return Err( + ".xliff is not supported by `edit` in v1. Use `convert`, `view`, or `debug` instead." + .to_string(), + ); + } + if output_path.is_some_and(|path| path.ends_with(".xliff")) { + return Err( + ".xliff is not supported as an `edit` output in v1. Use `convert` instead.".to_string(), + ); + } + Ok(()) +} + fn pick_single_resource<'a>( codec: &'a Codec, lang: &Option, @@ -76,6 +91,9 @@ fn write_back( langcodec::converter::convert_resources_to_format(resources, out, fmt) .map_err(|e| format!("Error writing output: {}", e)) } + FormatType::Xliff(_) => Err( + ".xliff is not supported as an `edit` output in v1. Use `convert` instead.".to_string(), + ), } } @@ -224,6 +242,8 @@ fn apply_set_to_file( output: Option<&String>, dry_run: bool, ) -> Result<(), String> { + reject_xliff_paths(input, output)?; + let mut codec = Codec::new(); if let Err(e) = codec.read_file_by_extension(input, lang.clone()) { let ext = Path::new(input) diff --git a/langcodec-cli/src/main.rs b/langcodec-cli/src/main.rs index 1832cf9..7174640 100644 --- a/langcodec-cli/src/main.rs +++ b/langcodec-cli/src/main.rs @@ -66,19 +66,19 @@ enum Commands { /// The output file to write the results to #[arg(short, long)] output: String, - /// Optional input format hint (e.g., "json-language-map", "json-array-language-map", "yaml-language-map", "strings", "android") + /// Optional input format hint (e.g., "json-language-map", "json-array-language-map", "yaml-language-map", "strings", "android", "xliff") #[arg(long)] input_format: Option, - /// Optional output format hint (e.g., "xcstrings", "strings", "android") + /// Optional output format hint (e.g., "xcstrings", "xliff", "strings", "android") #[arg(long)] output_format: Option, - /// For xcstrings output: override source language (default: en) + /// For xcstrings or xliff output: override source language (default: inferred or en for xcstrings) #[arg(long)] source_language: Option, /// For xcstrings output: override version (default: 1.0) #[arg(long)] version: Option, - /// Select the output language for single-language outputs like `.strings` or `strings.xml` + /// Select the output language for `.strings`, `strings.xml`, or `.xliff` output #[arg(long, value_name = "LANG")] output_lang: Option, /// Language codes to exclude from output (e.g., "en", "fr"). Can be specified multiple times or as comma-separated values (e.g., "--exclude-lang en,fr,zh-hans"). Only affects .langcodec output format. diff --git a/langcodec-cli/src/merge.rs b/langcodec-cli/src/merge.rs index 2c9267b..cfd9071 100644 --- a/langcodec-cli/src/merge.rs +++ b/langcodec-cli/src/merge.rs @@ -16,7 +16,10 @@ pub enum ConflictStrategy { Skip, } -fn resolve_merge_output_format(output: &str, lang: Option<&String>) -> Result { +fn resolve_merge_output_format( + output: &str, + lang: Option<&String>, +) -> Result { let mut output_format = converter::infer_format_from_path(output) .ok_or_else(|| format!("Cannot infer format from output path: {}", output))?; @@ -41,6 +44,10 @@ fn resolve_merge_output_format(output: &str, lang: Option<&String>) -> Result Err( + ".xliff is not supported by `merge` in v1. Use `convert` for XLIFF generation." + .to_string(), + ), langcodec::FormatType::Xcstrings | langcodec::FormatType::CSV | langcodec::FormatType::TSV => Ok(output_format), diff --git a/langcodec-cli/src/normalize.rs b/langcodec-cli/src/normalize.rs index 0e03016..1642f9c 100644 --- a/langcodec-cli/src/normalize.rs +++ b/langcodec-cli/src/normalize.rs @@ -37,6 +37,16 @@ fn infer_output_format_from_path(path: &str) -> Result { .ok_or_else(|| format!("Cannot infer format from path: {}", path)) } +fn reject_xliff_normalize_paths(input: &str, output: Option<&String>) -> Result<(), String> { + if input.ends_with(".xliff") || output.is_some_and(|path| path.ends_with(".xliff")) { + return Err( + ".xliff is not supported by `normalize` in v1. Use `convert`, `view`, or `debug` instead." + .to_string(), + ); + } + Ok(()) +} + fn pick_single_resource(codec: &Codec) -> Result<&langcodec::Resource, String> { if codec.resources.len() == 1 { Ok(&codec.resources[0]) @@ -63,6 +73,10 @@ fn write_back(codec: &Codec, input_path: &str, output_path: &Option) -> langcodec::converter::convert_resources_to_format(codec.resources.clone(), out, fmt) .map_err(|e| format!("Error writing output: {}", e)) } + FormatType::Xliff(_) => Err( + ".xliff is not supported by `normalize` in v1. Use `convert`, `view`, or `debug` instead." + .to_string(), + ), } } @@ -87,6 +101,8 @@ fn run_normalize_for_file( key_style: &KeyStyle, strict: bool, ) -> Result { + reject_xliff_normalize_paths(input, output.as_ref())?; + validate_file_path(input)?; let mut codec = Codec::new(); diff --git a/langcodec-cli/src/sync.rs b/langcodec-cli/src/sync.rs index 98aee1a..22955e1 100644 --- a/langcodec-cli/src/sync.rs +++ b/langcodec-cli/src/sync.rs @@ -40,6 +40,23 @@ fn infer_output_format_from_path(path: &str) -> Result { .ok_or_else(|| format!("Cannot infer format from path: {}", path)) } +fn reject_xliff_sync_paths( + source: &str, + target: &str, + output: Option<&String>, +) -> Result<(), String> { + if source.ends_with(".xliff") + || target.ends_with(".xliff") + || output.is_some_and(|path| path.ends_with(".xliff")) + { + return Err( + ".xliff is not supported by `sync` in v1. Convert XLIFF into a standard project format first." + .to_string(), + ); + } + Ok(()) +} + fn pick_single_resource<'a>( codec: &'a Codec, lang: &Option, @@ -77,6 +94,10 @@ fn write_back( langcodec::converter::convert_resources_to_format(codec.resources.clone(), out, fmt) .map_err(|e| format!("Error writing output: {}", e)) } + FormatType::Xliff(_) => Err( + ".xliff is not supported by `sync` in v1. Convert XLIFF into a standard project format first." + .to_string(), + ), } } @@ -110,6 +131,8 @@ fn write_report(path: &str, options: &SyncOptions, report: &SyncReport) -> Resul } pub fn run_sync_command(opts: SyncOptions) -> Result<(), String> { + reject_xliff_sync_paths(&opts.source, &opts.target, opts.output.as_ref())?; + validate_file_path(&opts.source)?; validate_file_path(&opts.target)?; if let Some(output) = &opts.output { diff --git a/langcodec-cli/src/translate.rs b/langcodec-cli/src/translate.rs index ec1b42f..76d9e47 100644 --- a/langcodec-cli/src/translate.rs +++ b/langcodec-cli/src/translate.rs @@ -1025,6 +1025,10 @@ fn validate_output_serialization( .to_writer(&mut out) .map_err(|e| format!("Error serializing Xcstrings output: {}", e)) } + FormatType::Xliff(_) => Err( + "XLIFF output is not supported by `translate` in v1. Translate into .xcstrings, .strings, or strings.xml first." + .to_string(), + ), FormatType::CSV => { let format = CSVFormat::try_from(codec.resources.clone()) .map_err(|e| format!("Error building CSV output: {}", e))?; @@ -1529,6 +1533,10 @@ fn write_back( convert_resources_to_format(codec.resources.clone(), output_path, output_format.clone()) .map_err(|e| format!("Error writing output: {}", e)) } + FormatType::Xliff(_) => Err( + "XLIFF output is not supported by `translate` in v1. Translate into .xcstrings, .strings, or strings.xml first." + .to_string(), + ), } } diff --git a/langcodec-cli/src/validation.rs b/langcodec-cli/src/validation.rs index d7d2fae..2c405b6 100644 --- a/langcodec-cli/src/validation.rs +++ b/langcodec-cli/src/validation.rs @@ -150,10 +150,11 @@ pub fn validate_standard_format(format: &str) -> Result<(), String> { "android" | "androidstrings" | "xml" => Ok(()), "strings" => Ok(()), "xcstrings" => Ok(()), + "xliff" => Ok(()), "csv" => Ok(()), "tsv" => Ok(()), _ => Err(format!( - "Unsupported standard format: {}. Supported formats: android, strings, xcstrings, csv, tsv", + "Unsupported standard format: {}. Supported formats: android, strings, xcstrings, xliff, csv, tsv", format )), } diff --git a/langcodec-cli/tests/merge_tests.rs b/langcodec-cli/tests/merge_tests.rs index 00db3c6..afd718c 100644 --- a/langcodec-cli/tests/merge_tests.rs +++ b/langcodec-cli/tests/merge_tests.rs @@ -207,11 +207,9 @@ fn test_merge_with_conflicts_skip_strategy_removes_triply_duplicated_key() { }], }; - let merged = converter::merge_resources( - &[resource1, resource2, resource3], - &ConflictStrategy::Skip, - ) - .unwrap(); + let merged = + converter::merge_resources(&[resource1, resource2, resource3], &ConflictStrategy::Skip) + .unwrap(); assert!(merged.entries.is_empty()); } diff --git a/langcodec-cli/tests/xliff_cli_tests.rs b/langcodec-cli/tests/xliff_cli_tests.rs new file mode 100644 index 0000000..13ddc74 --- /dev/null +++ b/langcodec-cli/tests/xliff_cli_tests.rs @@ -0,0 +1,401 @@ +use std::fs; +use std::process::Command; +use tempfile::TempDir; + +fn langcodec_cmd() -> Command { + Command::new(assert_cmd::cargo::cargo_bin!("langcodec")) +} + +fn write_xcstrings_fixture(path: &std::path::Path) { + let xcstrings = r#"{ + "sourceLanguage": "en", + "version": "1.0", + "strings": { + "greeting": { + "comment": "Shown on the home screen.", + "localizations": { + "en": { + "stringUnit": { + "state": "translated", + "value": "Hello" + } + }, + "fr": { + "stringUnit": { + "state": "translated", + "value": "Bonjour" + } + } + } + }, + "pending": { + "localizations": { + "en": { + "stringUnit": { + "state": "translated", + "value": "Pending" + } + } + } + } + } +} +"#; + + fs::write(path, xcstrings).unwrap(); +} + +fn write_strings_fixture(path: &std::path::Path) { + let strings = r#"/* Greeting */ +"greeting" = "Hello"; +"pending" = "Pending"; +"#; + fs::write(path, strings).unwrap(); +} + +fn write_xliff_fixture(path: &std::path::Path) { + let xliff = r#" + + + + + Hello + Bonjour + Shown on the home screen. + + + Pending + + + + +"#; + fs::write(path, xliff).unwrap(); +} + +#[test] +fn test_convert_xcstrings_to_xliff() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xcstrings"); + let output = temp_dir.path().join("Localizable.xliff"); + write_xcstrings_fixture(&input); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + "--output-lang", + "fr", + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let xml = fs::read_to_string(&output).unwrap(); + assert!(xml.contains(r#"target-language="fr""#)); + assert!(xml.contains("Bonjour")); + assert!(xml.contains("")); +} + +#[test] +fn test_convert_xliff_to_xcstrings() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + let output = temp_dir.path().join("Localizable.xcstrings"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let content = fs::read_to_string(&output).unwrap(); + assert!(content.contains(r#""sourceLanguage": "en""#)); + assert!(content.contains(r#""fr""#)); + assert!(content.contains(r#""pending""#)); +} + +#[test] +fn test_convert_strings_to_xliff_with_empty_target_language() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("en.strings"); + let output = temp_dir.path().join("Localizable.xliff"); + write_strings_fixture(&input); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + "--source-language", + "en", + "--output-lang", + "fr", + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let xml = fs::read_to_string(&output).unwrap(); + assert!(xml.contains(r#"source-language="en""#)); + assert!(xml.contains(r#"target-language="fr""#)); + assert!(xml.contains("")); +} + +#[test] +fn test_convert_xliff_to_android_strings() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + let output = temp_dir.path().join("values-fr").join("strings.xml"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + "--output-lang", + "fr", + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let xml = fs::read_to_string(&output).unwrap(); + assert!(xml.contains(r#"name="greeting""#)); + assert!(xml.contains("Bonjour")); + assert!(!xml.contains(r#""#)); +} + +#[test] +fn test_convert_xliff_requires_output_lang() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xcstrings"); + let output = temp_dir.path().join("Localizable.xliff"); + write_xcstrings_fixture(&input); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + ]) + .output() + .unwrap(); + + assert!(!out.status.success()); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!(combined.contains("--output-lang")); +} + +#[test] +fn test_convert_xliff_surfaces_ambiguous_source_language() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("translations.csv"); + let output = temp_dir.path().join("Localizable.xliff"); + fs::write( + &input, + "key,en,fr,de\nwelcome,Welcome,Bienvenue,Willkommen\n", + ) + .unwrap(); + + let out = langcodec_cmd() + .args([ + "convert", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + "--output-lang", + "fr", + ]) + .output() + .unwrap(); + + assert!(!out.status.success()); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!(combined.contains("--source-language") || combined.contains("Could not infer")); +} + +#[test] +fn test_view_reads_xliff_input() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args([ + "view", + "--input", + input.to_str().unwrap(), + "--lang", + "fr", + "--keys-only", + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("greeting")); + assert!(stdout.contains("pending")); +} + +#[test] +fn test_debug_reads_xliff_input() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + let output = temp_dir.path().join("debug.json"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args([ + "debug", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + ]) + .output() + .unwrap(); + + assert!( + out.status.success(), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + let json = fs::read_to_string(&output).unwrap(); + assert!(json.contains(r#""language": "en""#)); + assert!(json.contains(r#""language": "fr""#)); + assert!(json.contains(r#""greeting""#)); +} + +#[test] +fn test_edit_rejects_xliff() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args([ + "edit", + "set", + "--inputs", + input.to_str().unwrap(), + "--key", + "greeting", + "--value", + "Bonjour!", + ]) + .output() + .unwrap(); + + assert!(!out.status.success()); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!(combined.contains("not supported by `edit`")); +} + +#[test] +fn test_sync_rejects_xliff() { + let temp_dir = TempDir::new().unwrap(); + let source = temp_dir.path().join("source.xliff"); + let target = temp_dir.path().join("target.csv"); + write_xliff_fixture(&source); + fs::write(&target, "key,en,fr\ngreeting,Hello,Bonjour\n").unwrap(); + + let out = langcodec_cmd() + .args([ + "sync", + "--source", + source.to_str().unwrap(), + "--target", + target.to_str().unwrap(), + "--dry-run", + ]) + .output() + .unwrap(); + + assert!(!out.status.success()); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!(combined.contains("not supported by `sync`")); +} + +#[test] +fn test_normalize_rejects_xliff() { + let temp_dir = TempDir::new().unwrap(); + let input = temp_dir.path().join("Localizable.xliff"); + write_xliff_fixture(&input); + + let out = langcodec_cmd() + .args(["normalize", "--inputs", input.to_str().unwrap()]) + .output() + .unwrap(); + + assert!(!out.status.success()); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!(combined.contains("not supported by `normalize`")); +} diff --git a/langcodec/README.md b/langcodec/README.md index e04931f..644e78a 100644 --- a/langcodec/README.md +++ b/langcodec/README.md @@ -2,7 +2,7 @@ Universal localization file toolkit for Rust. Parse, write, convert, merge. -- Formats: Apple `.strings`, `.xcstrings`, Android `strings.xml`, CSV, TSV +- Formats: Apple `.strings`, `.xcstrings`, `.xliff`, Android `strings.xml`, CSV, TSV - Unified model: `Resource` with `Entry`, `Translation::Singular|Plural` - Robust error type, utilities to infer format/language, merge, cache @@ -23,6 +23,23 @@ use langcodec::{Codec, convert_auto}; // Convert between formats automatically convert_auto("Localizable.strings", "strings.xml")?; +// Build an XLIFF exchange file (target language must be selected explicitly) +langcodec::convert_resources_to_format( + vec![langcodec::Resource { + metadata: langcodec::Metadata { + language: "en".into(), + domain: "Localizable".into(), + custom: std::collections::HashMap::from([( + "source_language".into(), + "en".into(), + )]), + }, + entries: vec![], + }], + "Localizable.xliff", + langcodec::FormatType::Xliff(Some("fr".into())), +)?; + // Load, inspect, and write let mut codec = Codec::new(); codec.read_file_by_extension("en.lproj/Localizable.strings", None)?; @@ -61,6 +78,7 @@ codec.update_translation("welcome", "en", Translation::Singular("Hello!".into()) ## Plurals - `.xcstrings`: plural variations supported via CLDR categories +- `.xliff`: Apple/Xcode XLIFF 1.2 bilingual exchange files - Android `strings.xml`: `` supported (one/two/few/many/other/zero) ## Error Handling diff --git a/langcodec/src/builder.rs b/langcodec/src/builder.rs index 1e0429b..a1585a9 100644 --- a/langcodec/src/builder.rs +++ b/langcodec/src/builder.rs @@ -16,7 +16,7 @@ /// .build(); /// # Ok::<(), langcodec::Error>(()) /// ``` -use crate::formats::{CSVFormat, TSVFormat}; +use crate::formats::{CSVFormat, TSVFormat, XliffFormat}; use crate::{error::Error, formats::*, traits::Parser, types::Resource}; use std::path::Path; @@ -68,6 +68,7 @@ impl CodecBuilder { vec![Resource::from(AndroidStringsFormat::read_from(path)?)] } FormatType::Xcstrings => Vec::::try_from(XcstringsFormat::read_from(path)?)?, + FormatType::Xliff(_) => Vec::::try_from(XliffFormat::read_from(path)?)?, FormatType::CSV => { // Parse CSV format and convert to resources let csv_format = CSVFormat::read_from(path)?; @@ -80,8 +81,13 @@ impl CodecBuilder { } }; + let should_override_language = matches!( + format_type, + FormatType::Strings(_) | FormatType::AndroidStrings(_) + ); + for new_resource in &mut new_resources { - if let Some(ref lang) = language { + if should_override_language && let Some(ref lang) = language { new_resource.metadata.language = lang.clone(); } new_resource.metadata.domain = domain.clone(); @@ -134,6 +140,7 @@ impl CodecBuilder { vec![Resource::from(AndroidStringsFormat::read_from(path)?)] } FormatType::Xcstrings => Vec::::try_from(XcstringsFormat::read_from(path)?)?, + FormatType::Xliff(_) => Vec::::try_from(XliffFormat::read_from(path)?)?, FormatType::CSV => { // Parse CSV format and convert to resources let csv_format = CSVFormat::read_from(path)?; @@ -146,8 +153,13 @@ impl CodecBuilder { } }; + let should_override_language = matches!( + format_type, + FormatType::Strings(_) | FormatType::AndroidStrings(_) + ); + for new_resource in &mut new_resources { - if let Some(ref lang) = language { + if should_override_language && let Some(ref lang) = language { new_resource.metadata.language = lang.clone(); } new_resource.metadata.domain = domain.clone(); @@ -183,6 +195,7 @@ impl CodecBuilder { Some("xml") => FormatType::AndroidStrings(lang), Some("strings") => FormatType::Strings(lang), Some("xcstrings") => FormatType::Xcstrings, + Some("xliff") => FormatType::Xliff(lang), Some("csv") => FormatType::CSV, Some("tsv") => FormatType::TSV, extension => { diff --git a/langcodec/src/codec.rs b/langcodec/src/codec.rs index 6246fb2..553b1f3 100644 --- a/langcodec/src/codec.rs +++ b/langcodec/src/codec.rs @@ -987,6 +987,9 @@ impl Codec { ) }) } + crate::formats::FormatType::Xliff(_) => Err(Error::InvalidResource( + "XLIFF output requires both source and target resources; use convert_resources_to_format instead of write_resource_to_file".to_string(), + )), crate::formats::FormatType::CSV => CSVFormat::try_from(vec![resource.clone()]) .and_then(|f| f.write_to(Path::new(output_path))) .map_err(|e| { @@ -1030,6 +1033,7 @@ impl Codec { FormatType::Strings(lang_opt) | FormatType::AndroidStrings(lang_opt) => { lang_opt.clone() } + FormatType::Xliff(lang_opt) => lang_opt.clone(), _ => None, }; @@ -1068,6 +1072,7 @@ impl Codec { vec![Resource::from(AndroidStringsFormat::read_from(path)?)] } FormatType::Xcstrings => Vec::::try_from(XcstringsFormat::read_from(path)?)?, + FormatType::Xliff(_) => Vec::::try_from(XliffFormat::read_from(path)?)?, FormatType::CSV => { // Parse CSV format and convert to resources let csv_format = CSVFormat::read_from(path)?; @@ -1136,6 +1141,7 @@ impl Codec { Some("xml") => FormatType::AndroidStrings(options.language_hint.clone()), Some("strings") => FormatType::Strings(options.language_hint.clone()), Some("xcstrings") => FormatType::Xcstrings, + Some("xliff") => FormatType::Xliff(None), Some("csv") => FormatType::CSV, Some("tsv") => FormatType::TSV, extension => { diff --git a/langcodec/src/converter.rs b/langcodec/src/converter.rs index e49a072..57aa892 100644 --- a/langcodec/src/converter.rs +++ b/langcodec/src/converter.rs @@ -8,6 +8,7 @@ use crate::{ error::Error, formats::{ AndroidStringsFormat, CSVFormat, FormatType, StringsFormat, TSVFormat, XcstringsFormat, + XliffFormat, }, placeholder::normalize_placeholders, traits::Parser, @@ -41,10 +42,22 @@ fn single_language_output_label(output_format: &FormatType) -> &'static str { match output_format { FormatType::AndroidStrings(_) => "Android strings.xml", FormatType::Strings(_) => "Apple .strings", - FormatType::Xcstrings | FormatType::CSV | FormatType::TSV => "single-language", + FormatType::Xcstrings | FormatType::Xliff(_) | FormatType::CSV | FormatType::TSV => { + "single-language" + } } } +fn should_propagate_input_language(input_format: &FormatType, output_format: &FormatType) -> bool { + matches!( + (input_format, output_format), + (FormatType::AndroidStrings(_), FormatType::AndroidStrings(_)) + | (FormatType::AndroidStrings(_), FormatType::Strings(_)) + | (FormatType::Strings(_), FormatType::AndroidStrings(_)) + | (FormatType::Strings(_), FormatType::Strings(_)) + ) +} + fn describe_resource_languages(resources: &[Resource]) -> String { let languages = resources .iter() @@ -70,7 +83,9 @@ fn select_single_language_resource( output_format: &FormatType, ) -> Result { if resources.is_empty() { - return Err(Error::InvalidResource("No resources to convert".to_string())); + return Err(Error::InvalidResource( + "No resources to convert".to_string(), + )); } let output_label = single_language_output_label(output_format); @@ -101,10 +116,12 @@ fn select_single_language_resource( describe_resource_languages(resources) ))), }, - FormatType::Xcstrings | FormatType::CSV | FormatType::TSV => Err(Error::InvalidResource( - "single-language resource selection requires a single-language output format" - .to_string(), - )), + FormatType::Xcstrings | FormatType::Xliff(_) | FormatType::CSV | FormatType::TSV => { + Err(Error::InvalidResource( + "single-language resource selection requires a single-language output format" + .to_string(), + )) + } } } @@ -173,6 +190,13 @@ pub fn convert_resources_to_format( Error::conversion_error(format!("Error writing Xcstrings output: {}", e), None) }) } + FormatType::Xliff(target_language) => { + XliffFormat::from_resources(resources, None, target_language.as_deref()) + .and_then(|f| f.write_to(Path::new(output_path))) + .map_err(|e| { + Error::conversion_error(format!("Error writing XLIFF output: {}", e), None) + }) + } FormatType::CSV => CSVFormat::try_from(resources) .and_then(|f| f.write_to(Path::new(output_path))) .map_err(|e| Error::conversion_error(format!("Error writing CSV output: {}", e), None)), @@ -214,8 +238,12 @@ pub fn convert>( output_format: FormatType, ) -> Result<(), Error> { // Propagate language code from input to output format if not specified - let output_format = if let Some(lang) = input_format.language() { - output_format.with_language(Some(lang.clone())) + let output_format = if should_propagate_input_language(&input_format, &output_format) { + input_format + .language() + .cloned() + .map(|lang| output_format.with_language(Some(lang))) + .unwrap_or(output_format) } else { output_format }; @@ -231,6 +259,7 @@ pub fn convert>( FormatType::AndroidStrings(_) => vec![AndroidStringsFormat::read_from(input)?.into()], FormatType::Strings(_) => vec![StringsFormat::read_from(input)?.into()], FormatType::Xcstrings => Vec::::try_from(XcstringsFormat::read_from(input)?)?, + FormatType::Xliff(_) => Vec::::try_from(XliffFormat::read_from(input)?)?, FormatType::CSV => Vec::::try_from(CSVFormat::read_from(input)?)?, FormatType::TSV => Vec::::try_from(TSVFormat::read_from(input)?)?, }; @@ -258,6 +287,10 @@ pub fn convert>( StringsFormat::try_from(resource)?.write_to(output) } FormatType::Xcstrings => XcstringsFormat::try_from(resources)?.write_to(output), + FormatType::Xliff(target_language) => { + XliffFormat::from_resources(resources, None, target_language.as_deref())? + .write_to(output) + } FormatType::CSV => CSVFormat::try_from(resources)?.write_to(output), FormatType::TSV => TSVFormat::try_from(resources)?.write_to(output), } @@ -293,8 +326,12 @@ pub fn convert_with_normalization>( let output = output.as_ref(); // Carry language between single-language formats - let output_format = if let Some(lang) = input_format.language() { - output_format.with_language(Some(lang.clone())) + let output_format = if should_propagate_input_language(&input_format, &output_format) { + input_format + .language() + .cloned() + .map(|lang| output_format.with_language(Some(lang))) + .unwrap_or(output_format) } else { output_format }; @@ -310,6 +347,7 @@ pub fn convert_with_normalization>( FormatType::AndroidStrings(_) => vec![AndroidStringsFormat::read_from(input)?.into()], FormatType::Strings(_) => vec![StringsFormat::read_from(input)?.into()], FormatType::Xcstrings => Vec::::try_from(XcstringsFormat::read_from(input)?)?, + FormatType::Xliff(_) => Vec::::try_from(XliffFormat::read_from(input)?)?, FormatType::CSV => Vec::::try_from(CSVFormat::read_from(input)?)?, FormatType::TSV => Vec::::try_from(TSVFormat::read_from(input)?)?, }; @@ -357,6 +395,10 @@ pub fn convert_with_normalization>( StringsFormat::try_from(resource)?.write_to(output) } FormatType::Xcstrings => XcstringsFormat::try_from(resources)?.write_to(output), + FormatType::Xliff(target_language) => { + XliffFormat::from_resources(resources, None, target_language.as_deref())? + .write_to(output) + } FormatType::CSV => CSVFormat::try_from(resources)?.write_to(output), FormatType::TSV => TSVFormat::try_from(resources)?.write_to(output), } @@ -514,6 +556,7 @@ pub fn infer_format_from_extension>(path: P) -> Option Some(FormatType::Strings(None)), "xml" => Some(FormatType::AndroidStrings(None)), "xcstrings" => Some(FormatType::Xcstrings), + "xliff" => Some(FormatType::Xliff(None)), "csv" => Some(FormatType::CSV), "tsv" => Some(FormatType::TSV), _ => None, @@ -555,7 +598,9 @@ pub fn infer_format_from_path>(path: P) -> Option { match infer_format_from_extension(&path) { Some(format) => match format { // Multi-language formats, no language inference needed - FormatType::Xcstrings | FormatType::CSV | FormatType::TSV => Some(format), + FormatType::Xcstrings | FormatType::Xliff(_) | FormatType::CSV | FormatType::TSV => { + Some(format) + } FormatType::AndroidStrings(_) | FormatType::Strings(_) => { let lang = infer_language_from_path(&path, &format).ok().flatten(); Some(format.with_language(lang)) @@ -738,6 +783,9 @@ pub fn write_resources_to_file(resources: &[Resource], file_path: &String) -> Re Some("AndroidStrings") => AndroidStringsFormat::from(first.clone()).write_to(path)?, Some("Strings") => StringsFormat::try_from(first.clone())?.write_to(path)?, Some("Xcstrings") => XcstringsFormat::try_from(resources.to_vec())?.write_to(path)?, + Some("xliff") | Some("Xliff") => { + XliffFormat::try_from(resources.to_vec())?.write_to(path)? + } Some("CSV") => CSVFormat::try_from(resources.to_vec())?.write_to(path)?, Some("TSV") => TSVFormat::try_from(resources.to_vec())?.write_to(path)?, _ => Err(Error::UnsupportedFormat(format!( diff --git a/langcodec/src/formats.rs b/langcodec/src/formats.rs index c0d8256..9abc291 100644 --- a/langcodec/src/formats.rs +++ b/langcodec/src/formats.rs @@ -8,6 +8,7 @@ pub mod csv; pub mod strings; pub mod tsv; pub mod xcstrings; +pub mod xliff; use std::{ fmt::{Display, Formatter}, @@ -20,6 +21,7 @@ pub use csv::{Format as CSVFormat, MultiLanguageCSVRecord}; pub use strings::Format as StringsFormat; pub use tsv::{Format as TSVFormat, MultiLanguageTSVRecord}; pub use xcstrings::Format as XcstringsFormat; +pub use xliff::Format as XliffFormat; use crate::Error; @@ -34,6 +36,8 @@ pub enum FormatType { Strings(Option), /// Apple `.xcstrings` format (no language code). Xcstrings, + /// Apple/Xcode `.xliff` format, with optional target language hint. + Xliff(Option), /// CSV format (multi-language support built-in). CSV, /// TSV format (multi-language support built-in). @@ -61,6 +65,7 @@ impl Display for FormatType { FormatType::AndroidStrings(_) => write!(f, "android"), FormatType::Strings(_) => write!(f, "strings"), FormatType::Xcstrings => write!(f, "xcstrings"), + FormatType::Xliff(_) => write!(f, "xliff"), FormatType::CSV => write!(f, "csv"), FormatType::TSV => write!(f, "tsv"), } @@ -93,6 +98,7 @@ impl FromStr for FormatType { "android" | "androidstrings" | "xml" => Ok(FormatType::AndroidStrings(None)), "strings" => Ok(FormatType::Strings(None)), "xcstrings" => Ok(FormatType::Xcstrings), + "xliff" => Ok(FormatType::Xliff(None)), "csv" => Ok(FormatType::CSV), "tsv" => Ok(FormatType::TSV), other => Err(Error::UnknownFormat(other.to_string())), @@ -107,6 +113,7 @@ impl FormatType { FormatType::AndroidStrings(_) => "xml", FormatType::Strings(_) => "strings", FormatType::Xcstrings => "xcstrings", + FormatType::Xliff(_) => "xliff", FormatType::CSV => "csv", FormatType::TSV => "tsv", } @@ -118,6 +125,7 @@ impl FormatType { FormatType::AndroidStrings(lang) => lang.as_ref(), FormatType::Strings(lang) => lang.as_ref(), FormatType::Xcstrings => None, + FormatType::Xliff(lang) => lang.as_ref(), FormatType::CSV => None, FormatType::TSV => None, } @@ -129,6 +137,7 @@ impl FormatType { FormatType::AndroidStrings(_) => FormatType::AndroidStrings(lang), FormatType::Strings(_) => FormatType::Strings(lang), FormatType::Xcstrings => FormatType::Xcstrings, + FormatType::Xliff(_) => FormatType::Xliff(lang), FormatType::CSV => FormatType::CSV, FormatType::TSV => FormatType::TSV, } @@ -162,6 +171,7 @@ impl FormatType { match (self, other) { // Multi-language containers match anything (both directions) (FormatType::Xcstrings, _) | (_, FormatType::Xcstrings) => true, + (FormatType::Xliff(_), _) | (_, FormatType::Xliff(_)) => true, (FormatType::CSV, _) | (_, FormatType::CSV) => true, (FormatType::TSV, _) | (_, FormatType::TSV) => true, _ => self.language() == other.language(), @@ -178,6 +188,7 @@ mod tests { assert_eq!(FormatType::AndroidStrings(None).to_string(), "android"); assert_eq!(FormatType::Strings(None).to_string(), "strings"); assert_eq!(FormatType::Xcstrings.to_string(), "xcstrings"); + assert_eq!(FormatType::Xliff(None).to_string(), "xliff"); assert_eq!(FormatType::CSV.to_string(), "csv"); assert_eq!(FormatType::TSV.to_string(), "tsv"); } @@ -222,6 +233,15 @@ mod tests { FormatType::Xcstrings ); + assert_eq!( + FormatType::from_str("xliff").unwrap(), + FormatType::Xliff(None) + ); + assert_eq!( + FormatType::from_str("XLIFF").unwrap(), + FormatType::Xliff(None) + ); + // CSV format assert_eq!(FormatType::from_str("csv").unwrap(), FormatType::CSV); assert_eq!(FormatType::from_str("CSV").unwrap(), FormatType::CSV); diff --git a/langcodec/src/formats/xliff.rs b/langcodec/src/formats/xliff.rs new file mode 100644 index 0000000..caaec01 --- /dev/null +++ b/langcodec/src/formats/xliff.rs @@ -0,0 +1,1205 @@ +//! Support for Apple/Xcode-flavored XLIFF 1.2 localization exchange files. +//! +//! This implementation intentionally targets the narrow Xcode-style subset used +//! for localization export/import: +//! - root `` +//! - `` groups with `source-language` and optional `target-language` +//! - `` entries with plain-text ``, optional ``, +//! and optional `` translator comments +//! +//! More advanced XLIFF constructs such as nested inline markup, `` +//! plural payloads, or XLIFF 2.0 are rejected explicitly. + +use quick_xml::{ + Reader, Writer, + events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}, +}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + io::{BufRead, Write}, + path::Path, +}; + +use crate::{ + error::Error, + traits::Parser, + types::{Entry, EntryStatus, Metadata, Resource, Translation}, +}; + +pub const XLIFF_ORIGINAL_KEY: &str = "xliff.original"; +pub const XLIFF_DATATYPE_KEY: &str = "xliff.datatype"; +pub const XLIFF_RESNAME_KEY: &str = "xliff.resname"; + +const DEFAULT_VERSION: &str = "1.2"; +const DEFAULT_DATATYPE: &str = "plaintext"; +const XLIFF_NAMESPACE: &str = "urn:oasis:names:tc:xliff:document:1.2"; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Format { + pub files: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileGroup { + pub original: Option, + pub source_language: String, + pub target_language: Option, + pub datatype: String, + pub units: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TransUnit { + pub id: String, + pub resname: Option, + pub source: String, + pub target: Option, + pub notes: Vec, +} + +impl Parser for Format { + fn from_reader(reader: R) -> Result { + let mut xml_reader = Reader::from_reader(reader); + xml_reader.config_mut().trim_text(false); + + let mut buf = Vec::new(); + let mut files = Vec::new(); + let mut saw_root = false; + let mut current_file: Option = None; + + loop { + match xml_reader.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) if e.name().as_ref() == b"xliff" => { + saw_root = true; + let version = required_attr(e, b"version", "")?; + if version != DEFAULT_VERSION { + return Err(Error::UnsupportedFormat(format!( + "Unsupported XLIFF version '{}'. Only XLIFF 1.2 is supported.", + version + ))); + } + } + Ok(Event::Start(ref e)) if e.name().as_ref() == b"file" => { + if current_file.is_some() { + return Err(Error::InvalidResource( + "Nested elements are not supported".to_string(), + )); + } + current_file = Some(parse_file_group_start(e)?); + } + Ok(Event::Start(ref e)) if e.name().as_ref() == b"body" => {} + Ok(Event::Start(ref e)) if e.name().as_ref() == b"trans-unit" => { + let file = current_file.as_mut().ok_or_else(|| { + Error::InvalidResource( + " encountered outside of a group".to_string(), + ) + })?; + let unit = parse_trans_unit(e, &mut xml_reader, &file.target_language)?; + file.units.push(unit); + } + Ok(Event::Start(ref e)) if e.name().as_ref() == b"group" => { + return Err(Error::UnsupportedFormat( + "Plural/group XLIFF payloads are not supported in v1".to_string(), + )); + } + Ok(Event::Start(ref e)) if e.name().as_ref() == b"bin-unit" => { + return Err(Error::UnsupportedFormat( + "Binary XLIFF units are not supported in v1".to_string(), + )); + } + Ok(Event::End(ref e)) if e.name().as_ref() == b"file" => { + let file = current_file.take().ok_or_else(|| { + Error::InvalidResource("Unexpected without matching ".into()) + })?; + validate_file_group(&file)?; + files.push(file); + } + Ok(Event::Eof) => break, + Ok(_) => {} + Err(e) => return Err(Error::XmlParse(e)), + } + buf.clear(); + } + + if !saw_root { + return Err(Error::InvalidResource( + "Missing root element".to_string(), + )); + } + + if current_file.is_some() { + return Err(Error::InvalidResource( + "Unexpected EOF before closing ".to_string(), + )); + } + + Ok(Self { files }) + } + + fn to_writer(&self, mut writer: W) -> Result<(), Error> { + let mut xml_writer = Writer::new(&mut writer); + + xml_writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("utf-8"), None)))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + let mut root = BytesStart::new("xliff"); + root.push_attribute(("xmlns", XLIFF_NAMESPACE)); + root.push_attribute(("version", DEFAULT_VERSION)); + xml_writer.write_event(Event::Start(root))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + let mut files = self.files.clone(); + files.sort_by(|a, b| { + let left = a.original.as_deref().unwrap_or(""); + let right = b.original.as_deref().unwrap_or(""); + left.cmp(right) + .then_with(|| a.source_language.cmp(&b.source_language)) + .then_with(|| a.target_language.cmp(&b.target_language)) + }); + + for file in &files { + write_indent(&mut xml_writer, 1)?; + let mut file_start = BytesStart::new("file"); + file_start.push_attribute(("source-language", file.source_language.as_str())); + if let Some(target_language) = &file.target_language { + file_start.push_attribute(("target-language", target_language.as_str())); + } + if let Some(original) = &file.original { + file_start.push_attribute(("original", original.as_str())); + } + file_start.push_attribute(("datatype", file.datatype.as_str())); + xml_writer.write_event(Event::Start(file_start))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + write_indent(&mut xml_writer, 2)?; + xml_writer.write_event(Event::Start(BytesStart::new("body")))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + let mut units = file.units.clone(); + units.sort_by(|a, b| a.id.cmp(&b.id)); + for unit in &units { + write_indent(&mut xml_writer, 3)?; + let mut unit_start = BytesStart::new("trans-unit"); + unit_start.push_attribute(("id", unit.id.as_str())); + unit_start.push_attribute(("xml:space", "preserve")); + if let Some(resname) = &unit.resname { + unit_start.push_attribute(("resname", resname.as_str())); + } + xml_writer.write_event(Event::Start(unit_start))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + write_text_element(&mut xml_writer, 4, "source", &unit.source)?; + if file.target_language.is_some() { + write_optional_text_element( + &mut xml_writer, + 4, + "target", + unit.target.as_deref(), + )?; + } + for note in &unit.notes { + write_text_element(&mut xml_writer, 4, "note", note)?; + } + + write_indent(&mut xml_writer, 3)?; + xml_writer.write_event(Event::End(BytesEnd::new("trans-unit")))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + } + + write_indent(&mut xml_writer, 2)?; + xml_writer.write_event(Event::End(BytesEnd::new("body")))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + + write_indent(&mut xml_writer, 1)?; + xml_writer.write_event(Event::End(BytesEnd::new("file")))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + } + + xml_writer.write_event(Event::End(BytesEnd::new("xliff")))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + Ok(()) + } +} + +impl Format { + pub fn from_resources( + resources: Vec, + source_language_hint: Option<&str>, + target_language_hint: Option<&str>, + ) -> Result { + if resources.is_empty() { + return Err(Error::InvalidResource( + "No resources provided for XLIFF output".to_string(), + )); + } + + let languages = collect_languages(&resources)?; + let source_language = resolve_source_language( + &resources, + &languages, + source_language_hint, + target_language_hint, + )?; + let target_language = + resolve_target_language(&languages, &source_language, target_language_hint)?; + + let mut group_map: BTreeMap = BTreeMap::new(); + + for resource in resources { + if resource.metadata.language != source_language + && resource.metadata.language != target_language + { + return Err(Error::InvalidResource(format!( + "XLIFF output requires exactly one source language ('{}') and one target language ('{}'), but found extra language '{}'", + source_language, target_language, resource.metadata.language + ))); + } + + let group_key = resource + .metadata + .custom + .get(XLIFF_ORIGINAL_KEY) + .cloned() + .or_else(|| { + let trimmed = resource.metadata.domain.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } + }) + .unwrap_or_else(|| "Localizable".to_string()); + + let datatype = resource + .metadata + .custom + .get(XLIFF_DATATYPE_KEY) + .cloned() + .unwrap_or_else(|| DEFAULT_DATATYPE.to_string()); + + reject_plural_like_metadata( + resource + .metadata + .custom + .get(XLIFF_ORIGINAL_KEY) + .map(String::as_str), + Some(datatype.as_str()), + )?; + + let group = group_map + .entry(group_key.clone()) + .or_insert_with(|| GroupAccumulator { + original: Some(group_key.clone()), + datatype: datatype.clone(), + source_entries: BTreeMap::new(), + target_entries: BTreeMap::new(), + }); + + if group.datatype != datatype { + return Err(Error::DataMismatch(format!( + "Conflicting XLIFF datatype metadata for group '{}': '{}' vs '{}'", + group_key, group.datatype, datatype + ))); + } + + for entry in resource.entries { + let prepared = PreparedEntry::from_entry(entry)?; + let prepared_id = prepared.id.clone(); + let destination = if resource.metadata.language == source_language { + &mut group.source_entries + } else { + &mut group.target_entries + }; + + if destination.insert(prepared_id.clone(), prepared).is_some() { + return Err(Error::InvalidResource(format!( + "Duplicate XLIFF entry id '{}' in group '{}' for language '{}'", + prepared_id, group_key, resource.metadata.language + ))); + } + } + } + + let mut files = Vec::new(); + for group in group_map.into_values() { + let mut units = Vec::new(); + for (id, source_entry) in group.source_entries { + let target_entry = group.target_entries.get(&id); + let comment = source_entry + .comment + .clone() + .or_else(|| target_entry.and_then(|entry| entry.comment.clone())); + let target = target_entry.map(|entry| entry.value.clone()); + + units.push(TransUnit { + id, + resname: source_entry.resname.clone(), + source: source_entry.value, + target, + notes: split_comment_into_notes(comment.as_deref()), + }); + } + + for extra_target_id in group.target_entries.keys() { + if !units.iter().any(|unit| &unit.id == extra_target_id) { + return Err(Error::InvalidResource(format!( + "Target XLIFF entry '{}' is missing a matching source entry in group '{}'", + extra_target_id, + group.original.as_deref().unwrap_or(""), + ))); + } + } + + files.push(FileGroup { + original: group.original, + source_language: source_language.clone(), + target_language: Some(target_language.clone()), + datatype: group.datatype, + units, + }); + } + + Ok(Self { files }) + } +} + +impl TryFrom> for Format { + type Error = Error; + + fn try_from(resources: Vec) -> Result { + Self::from_resources(resources, None, None) + } +} + +impl TryFrom for Vec { + type Error = Error; + + fn try_from(format: Format) -> Result { + let mut resources = Vec::new(); + + for file in format.files { + validate_file_group(&file)?; + + let domain = domain_from_original(file.original.as_deref()); + let mut base_custom = HashMap::new(); + base_custom.insert("source_language".to_string(), file.source_language.clone()); + if let Some(original) = &file.original { + base_custom.insert(XLIFF_ORIGINAL_KEY.to_string(), original.clone()); + } + base_custom.insert(XLIFF_DATATYPE_KEY.to_string(), file.datatype.clone()); + + let mut source_entries = Vec::new(); + let mut target_entries = Vec::new(); + let mut seen_ids = HashSet::new(); + + for unit in file.units { + if !seen_ids.insert(unit.id.clone()) { + return Err(Error::InvalidResource(format!( + "Duplicate trans-unit id '{}' within XLIFF file group '{}'", + unit.id, + file.original.as_deref().unwrap_or(""), + ))); + } + + let comment = notes_to_comment(&unit.notes); + + let mut source_custom = HashMap::new(); + if let Some(resname) = unit.resname.as_ref().filter(|resname| *resname != &unit.id) + { + source_custom.insert(XLIFF_RESNAME_KEY.to_string(), resname.clone()); + } + + source_entries.push(Entry { + id: unit.id.clone(), + value: Translation::Singular(unit.source), + comment: comment.clone(), + status: EntryStatus::Translated, + custom: source_custom.clone(), + }); + + if let Some(target_language) = &file.target_language { + let target_value = unit.target.unwrap_or_default(); + let has_target_value = !target_value.is_empty(); + target_entries.push(Entry { + id: unit.id, + value: if has_target_value { + Translation::Singular(target_value) + } else { + Translation::Empty + }, + comment, + status: if has_target_value { + EntryStatus::Translated + } else { + EntryStatus::New + }, + custom: source_custom, + }); + + if target_language.trim().is_empty() { + return Err(Error::InvalidResource( + "XLIFF target-language attribute cannot be empty".to_string(), + )); + } + } + } + + resources.push(Resource { + metadata: Metadata { + language: file.source_language.clone(), + domain: domain.clone(), + custom: base_custom.clone(), + }, + entries: source_entries, + }); + + if let Some(target_language) = file.target_language { + resources.push(Resource { + metadata: Metadata { + language: target_language, + domain, + custom: base_custom, + }, + entries: target_entries, + }); + } + } + + Ok(resources) + } +} + +#[derive(Debug, Clone)] +struct PreparedEntry { + id: String, + value: String, + comment: Option, + resname: Option, +} + +impl PreparedEntry { + fn from_entry(entry: Entry) -> Result { + let value = match entry.value { + Translation::Empty => String::new(), + Translation::Singular(value) => value, + Translation::Plural(_) => { + return Err(Error::UnsupportedFormat(format!( + "Plural entry '{}' cannot be represented in XLIFF v1 output", + entry.id + ))); + } + }; + + let resname = entry + .custom + .get(XLIFF_RESNAME_KEY) + .cloned() + .filter(|resname| resname != &entry.id); + + Ok(Self { + id: entry.id, + value, + comment: entry.comment, + resname, + }) + } +} + +#[derive(Debug, Default)] +struct GroupAccumulator { + original: Option, + datatype: String, + source_entries: BTreeMap, + target_entries: BTreeMap, +} + +fn parse_file_group_start(e: &BytesStart<'_>) -> Result { + let source_language = required_attr(e, b"source-language", "")?; + let target_language = optional_attr(e, b"target-language")?; + let original = optional_attr(e, b"original")?; + let datatype = optional_attr(e, b"datatype")?.unwrap_or_else(|| DEFAULT_DATATYPE.to_string()); + + reject_plural_like_metadata(original.as_deref(), Some(datatype.as_str()))?; + + Ok(FileGroup { + original, + source_language, + target_language, + datatype, + units: Vec::new(), + }) +} + +fn parse_trans_unit( + e: &BytesStart<'_>, + xml_reader: &mut Reader, + target_language: &Option, +) -> Result { + let id = required_attr(e, b"id", "")?; + let resname = optional_attr(e, b"resname")?; + + let mut buf = Vec::new(); + let mut source = None; + let mut target = None; + let mut notes = Vec::new(); + + loop { + match xml_reader.read_event_into(&mut buf) { + Ok(Event::Start(ref child)) if child.name().as_ref() == b"source" => { + source = Some(read_plain_text_element(xml_reader, b"source")?); + } + Ok(Event::Empty(ref child)) if child.name().as_ref() == b"source" => { + source = Some(String::new()); + } + Ok(Event::Start(ref child)) if child.name().as_ref() == b"target" => { + if target_language.is_none() { + return Err(Error::InvalidResource(format!( + "Found for trans-unit '{}' but the enclosing is missing target-language", + id + ))); + } + target = Some(read_plain_text_element(xml_reader, b"target")?); + } + Ok(Event::Empty(ref child)) if child.name().as_ref() == b"target" => { + if target_language.is_none() { + return Err(Error::InvalidResource(format!( + "Found for trans-unit '{}' but the enclosing is missing target-language", + id + ))); + } + target = Some(String::new()); + } + Ok(Event::Start(ref child)) if child.name().as_ref() == b"note" => { + notes.push(read_plain_text_element(xml_reader, b"note")?); + } + Ok(Event::Empty(ref child)) if child.name().as_ref() == b"note" => { + notes.push(String::new()); + } + Ok(Event::Start(ref child)) if child.name().as_ref() == b"group" => { + return Err(Error::UnsupportedFormat(format!( + "Plural/group XLIFF payloads are not supported in trans-unit '{}'", + id + ))); + } + Ok(Event::End(ref child)) if child.name().as_ref() == b"trans-unit" => break, + Ok(Event::Eof) => { + return Err(Error::InvalidResource(format!( + "Unexpected EOF inside trans-unit '{}'", + id + ))); + } + Ok(_) => {} + Err(e) => return Err(Error::XmlParse(e)), + } + buf.clear(); + } + + let source = source.ok_or_else(|| { + Error::InvalidResource(format!( + "trans-unit '{}' is missing a required element", + id + )) + })?; + + Ok(TransUnit { + id, + resname, + source, + target, + notes, + }) +} + +fn read_plain_text_element( + xml_reader: &mut Reader, + element_name: &[u8], +) -> Result { + let mut buf = Vec::new(); + let mut text = String::new(); + + loop { + match xml_reader.read_event_into(&mut buf) { + Ok(Event::Text(e)) => { + text.push_str(e.unescape().map_err(Error::XmlParse)?.as_ref()); + } + Ok(Event::CData(e)) => { + let cdata = std::str::from_utf8(e.as_ref()).map_err(|_| { + Error::InvalidResource(format!( + "Invalid UTF-8 inside <{}> CDATA section", + String::from_utf8_lossy(element_name) + )) + })?; + text.push_str(cdata); + } + Ok(Event::Start(_)) | Ok(Event::Empty(_)) => { + return Err(Error::UnsupportedFormat(format!( + "Inline markup inside <{}> is not supported in XLIFF v1", + String::from_utf8_lossy(element_name) + ))); + } + Ok(Event::End(ref e)) if e.name().as_ref() == element_name => break, + Ok(Event::Eof) => { + return Err(Error::InvalidResource(format!( + "Unexpected EOF inside <{}>", + String::from_utf8_lossy(element_name) + ))); + } + Ok(_) => {} + Err(e) => return Err(Error::XmlParse(e)), + } + buf.clear(); + } + + Ok(text) +} + +fn validate_file_group(file: &FileGroup) -> Result<(), Error> { + if file.source_language.trim().is_empty() { + return Err(Error::InvalidResource( + "XLIFF file group is missing source-language metadata".to_string(), + )); + } + if let Some(target_language) = &file.target_language + && target_language.trim().is_empty() + { + return Err(Error::InvalidResource( + "XLIFF target-language attribute cannot be empty".to_string(), + )); + } + + reject_plural_like_metadata(file.original.as_deref(), Some(file.datatype.as_str()))?; + + let mut seen_ids = HashSet::new(); + for unit in &file.units { + if !seen_ids.insert(unit.id.clone()) { + return Err(Error::InvalidResource(format!( + "Duplicate trans-unit id '{}' within XLIFF file group '{}'", + unit.id, + file.original.as_deref().unwrap_or(""), + ))); + } + } + + Ok(()) +} + +fn collect_languages(resources: &[Resource]) -> Result, Error> { + let mut languages = BTreeSet::new(); + for resource in resources { + let language = resource.metadata.language.trim(); + if language.is_empty() { + return Err(Error::InvalidResource( + "XLIFF output requires every resource to have a language".to_string(), + )); + } + languages.insert(language.to_string()); + } + Ok(languages) +} + +fn resolve_source_language( + resources: &[Resource], + languages: &BTreeSet, + source_language_hint: Option<&str>, + target_language_hint: Option<&str>, +) -> Result { + if let Some(source_language_hint) = source_language_hint { + let source_language = source_language_hint.trim(); + if source_language.is_empty() { + return Err(Error::InvalidResource( + "--source-language cannot be empty for XLIFF output".to_string(), + )); + } + if !languages.contains(source_language) { + return Err(Error::InvalidResource(format!( + "XLIFF source language '{}' was requested, but available resource languages are: {}", + source_language, + languages.iter().cloned().collect::>().join(", ") + ))); + } + return Ok(source_language.to_string()); + } + + let metadata_source_languages = resources + .iter() + .filter_map(|resource| resource.metadata.custom.get("source_language")) + .filter(|value| !value.trim().is_empty()) + .cloned() + .collect::>(); + + if metadata_source_languages.len() == 1 { + let source_language = metadata_source_languages.iter().next().cloned().unwrap(); + if !languages.contains(&source_language) { + return Err(Error::InvalidResource(format!( + "XLIFF source language metadata '{}' does not match any available resource languages ({})", + source_language, + languages.iter().cloned().collect::>().join(", ") + ))); + } + return Ok(source_language); + } + + if let Some(target_language_hint) = target_language_hint { + let candidates = languages + .iter() + .filter(|language| language.as_str() != target_language_hint) + .cloned() + .collect::>(); + + if candidates.len() == 1 { + return Ok(candidates[0].clone()); + } + } + + Err(Error::InvalidResource( + "XLIFF output requires a resolvable source language. Pass --source-language or provide consistent source_language metadata.".to_string(), + )) +} + +fn resolve_target_language( + languages: &BTreeSet, + source_language: &str, + target_language_hint: Option<&str>, +) -> Result { + if let Some(target_language_hint) = target_language_hint { + let target_language = target_language_hint.trim(); + if target_language.is_empty() { + return Err(Error::InvalidResource( + "--output-lang cannot be empty for XLIFF output".to_string(), + )); + } + if target_language == source_language { + return Err(Error::InvalidResource(format!( + "XLIFF target language '{}' must differ from source language '{}'", + target_language, source_language + ))); + } + + let extras = languages + .iter() + .filter(|language| { + language.as_str() != source_language && language.as_str() != target_language + }) + .cloned() + .collect::>(); + if !extras.is_empty() { + return Err(Error::InvalidResource(format!( + "XLIFF output requires exactly one target language. Extra languages present: {}", + extras.join(", ") + ))); + } + + return Ok(target_language.to_string()); + } + + let targets = languages + .iter() + .filter(|language| language.as_str() != source_language) + .cloned() + .collect::>(); + + match targets.as_slice() { + [target_language] => Ok(target_language.clone()), + [] => Err(Error::InvalidResource( + "XLIFF output requires a target language. Pass --output-lang and include the target resource.".to_string(), + )), + _ => Err(Error::InvalidResource(format!( + "XLIFF output requires exactly one target language, but found: {}. Pass --output-lang.", + targets.join(", ") + ))), + } +} + +fn required_attr(e: &BytesStart<'_>, attr_name: &[u8], element: &str) -> Result { + optional_attr(e, attr_name)?.ok_or_else(|| { + Error::InvalidResource(format!( + "{} is missing required attribute '{}'", + element, + String::from_utf8_lossy(attr_name) + )) + }) +} + +fn optional_attr(e: &BytesStart<'_>, attr_name: &[u8]) -> Result, Error> { + for attr in e.attributes().with_checks(false) { + let attr = attr.map_err(|e| Error::DataMismatch(e.to_string()))?; + if attr.key.as_ref() == attr_name { + return Ok(Some(attr.unescape_value()?.to_string())); + } + } + Ok(None) +} + +fn reject_plural_like_metadata( + original: Option<&str>, + datatype: Option<&str>, +) -> Result<(), Error> { + let looks_plural = |value: &str| { + let lower = value.to_ascii_lowercase(); + lower.contains("stringsdict") || lower.contains("plural") + }; + + if let Some(original) = original + && looks_plural(original) + { + return Err(Error::UnsupportedFormat(format!( + "Plural-like XLIFF source '{}' is not supported in v1", + original + ))); + } + if let Some(datatype) = datatype + && looks_plural(datatype) + { + return Err(Error::UnsupportedFormat(format!( + "Plural-like XLIFF datatype '{}' is not supported in v1", + datatype + ))); + } + Ok(()) +} + +fn domain_from_original(original: Option<&str>) -> String { + original + .and_then(|original| { + Path::new(original) + .file_name() + .and_then(|file| Path::new(file).file_stem()) + }) + .and_then(|stem| stem.to_str()) + .unwrap_or_default() + .to_string() +} + +fn notes_to_comment(notes: &[String]) -> Option { + let notes = notes + .iter() + .map(|note| note.trim().to_string()) + .filter(|note| !note.is_empty()) + .collect::>(); + if notes.is_empty() { + None + } else { + Some(notes.join("\n\n")) + } +} + +fn split_comment_into_notes(comment: Option<&str>) -> Vec { + let Some(comment) = comment else { + return Vec::new(); + }; + + let normalized = comment.replace("\r\n", "\n"); + normalized + .split("\n\n") + .map(str::trim) + .filter(|segment| !segment.is_empty()) + .map(|segment| segment.to_string()) + .collect() +} + +fn write_indent(writer: &mut Writer, depth: usize) -> Result<(), Error> { + let indent = " ".repeat(depth); + writer.write_event(Event::Text(BytesText::new(&indent)))?; + Ok(()) +} + +fn write_text_element( + writer: &mut Writer, + depth: usize, + name: &str, + value: &str, +) -> Result<(), Error> { + write_indent(writer, depth)?; + writer.write_event(Event::Start(BytesStart::new(name)))?; + if !value.is_empty() { + writer.write_event(Event::Text(BytesText::new(value)))?; + } + writer.write_event(Event::End(BytesEnd::new(name)))?; + writer.write_event(Event::Text(BytesText::new("\n")))?; + Ok(()) +} + +fn write_optional_text_element( + writer: &mut Writer, + depth: usize, + name: &str, + value: Option<&str>, +) -> Result<(), Error> { + if let Some(value) = value { + write_text_element(writer, depth, name, value) + } else { + write_indent(writer, depth)?; + writer.write_event(Event::Empty(BytesStart::new(name)))?; + writer.write_event(Event::Text(BytesText::new("\n")))?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_xliff(input: &str) -> Format { + Format::from_str(input).unwrap() + } + + #[test] + fn parses_bilingual_xcode_xliff_with_notes_and_missing_target() { + let xliff = r#" + + + + + Hello + Bonjour + Shown on the home screen. + Keep it short. + + + Pending + + + + +"#; + + let resources = Vec::::try_from(parse_xliff(xliff)).unwrap(); + assert_eq!(resources.len(), 2); + + let source = resources + .iter() + .find(|r| r.metadata.language == "en") + .unwrap(); + let target = resources + .iter() + .find(|r| r.metadata.language == "fr") + .unwrap(); + + assert_eq!(source.metadata.domain, "Localizable"); + assert_eq!( + source + .metadata + .custom + .get(XLIFF_ORIGINAL_KEY) + .map(String::as_str), + Some("Base.lproj/Localizable.strings") + ); + assert_eq!( + source.find_entry("greeting").unwrap().comment.as_deref(), + Some("Shown on the home screen.\n\nKeep it short.") + ); + assert_eq!( + source + .find_entry("greeting") + .unwrap() + .custom + .get(XLIFF_RESNAME_KEY) + .map(String::as_str), + Some("GREETING") + ); + assert_eq!( + target.find_entry("pending").unwrap().value, + Translation::Empty + ); + assert_eq!( + target.find_entry("pending").unwrap().status, + EntryStatus::New + ); + } + + #[test] + fn rejects_missing_trans_unit_id() { + let xliff = r#" + + + + + Hello + + + + +"#; + + let err = Format::from_str(xliff).unwrap_err(); + assert!(err.to_string().contains("trans-unit")); + assert!(err.to_string().contains("id")); + } + + #[test] + fn rejects_duplicate_ids_within_file_group() { + let xliff = r#" + + + + Hello + Hi + + + +"#; + + let err = Format::from_str(xliff).unwrap_err(); + assert!(err.to_string().contains("Duplicate trans-unit id")); + } + + #[test] + fn rejects_missing_target_language_when_target_elements_exist() { + let xliff = r#" + + + + + Hello + Bonjour + + + + +"#; + + let err = Format::from_str(xliff).unwrap_err(); + assert!(err.to_string().contains("target-language")); + } + + #[test] + fn rejects_non_12_version() { + let xliff = r#" + +"#; + + let err = Format::from_str(xliff).unwrap_err(); + assert!(err.to_string().contains("Only XLIFF 1.2 is supported")); + } + + #[test] + fn rejects_plural_like_payloads() { + let xliff = r#" + + + + Hello + + + +"#; + + let err = Format::from_str(xliff).unwrap_err(); + assert!(err.to_string().contains("not supported")); + } + + #[test] + fn writes_stable_bilingual_xliff() { + let mut source_custom = HashMap::new(); + source_custom.insert("source_language".to_string(), "en".to_string()); + source_custom.insert( + XLIFF_ORIGINAL_KEY.to_string(), + "Base.lproj/Localizable.strings".to_string(), + ); + source_custom.insert(XLIFF_DATATYPE_KEY.to_string(), "plaintext".to_string()); + + let source = Resource { + metadata: Metadata { + language: "en".to_string(), + domain: "Localizable".to_string(), + custom: source_custom.clone(), + }, + entries: vec![Entry { + id: "greeting".to_string(), + value: Translation::Singular("Hello".to_string()), + comment: Some("Shown on the home screen.\n\nKeep it short.".to_string()), + status: EntryStatus::Translated, + custom: HashMap::from([(XLIFF_RESNAME_KEY.to_string(), "GREETING".to_string())]), + }], + }; + + let target = Resource { + metadata: Metadata { + language: "fr".to_string(), + domain: "Localizable".to_string(), + custom: source_custom, + }, + entries: vec![Entry { + id: "greeting".to_string(), + value: Translation::Singular("Bonjour".to_string()), + comment: None, + status: EntryStatus::Translated, + custom: HashMap::new(), + }], + }; + + let format = Format::from_resources(vec![source, target], Some("en"), Some("fr")).unwrap(); + let mut out = Vec::new(); + format.to_writer(&mut out).unwrap(); + let xml = String::from_utf8(out).unwrap(); + + assert!(xml.contains(r#""#)); + assert!( + xml.contains(r#""#) + ); + assert!(xml.contains("Shown on the home screen.")); + assert!(xml.contains("Keep it short.")); + assert!(xml.contains("Bonjour")); + } + + #[test] + fn infers_source_and_target_for_round_trip_output() { + let resources = vec![ + Resource { + metadata: Metadata { + language: "en".to_string(), + domain: "Localizable".to_string(), + custom: HashMap::new(), + }, + entries: vec![Entry { + id: "hello".to_string(), + value: Translation::Singular("Hello".to_string()), + comment: None, + status: EntryStatus::Translated, + custom: HashMap::new(), + }], + }, + Resource { + metadata: Metadata { + language: "fr".to_string(), + domain: "Localizable".to_string(), + custom: HashMap::from([("source_language".to_string(), "en".to_string())]), + }, + entries: vec![Entry { + id: "hello".to_string(), + value: Translation::Singular("Bonjour".to_string()), + comment: None, + status: EntryStatus::Translated, + custom: HashMap::new(), + }], + }, + ]; + + let format = Format::try_from(resources).unwrap(); + assert_eq!(format.files.len(), 1); + assert_eq!(format.files[0].source_language, "en"); + assert_eq!(format.files[0].target_language.as_deref(), Some("fr")); + } + + #[test] + fn allows_explicit_target_language_without_existing_target_resource() { + let resources = vec![Resource { + metadata: Metadata { + language: "en".to_string(), + domain: "Localizable".to_string(), + custom: HashMap::new(), + }, + entries: vec![Entry { + id: "hello".to_string(), + value: Translation::Singular("Hello".to_string()), + comment: None, + status: EntryStatus::Translated, + custom: HashMap::new(), + }], + }]; + + let format = Format::from_resources(resources, Some("en"), Some("fr")).unwrap(); + assert_eq!(format.files[0].target_language.as_deref(), Some("fr")); + assert_eq!(format.files[0].units[0].target, None); + } +} diff --git a/langcodec/src/lib.rs b/langcodec/src/lib.rs index fd37c32..34c3254 100644 --- a/langcodec/src/lib.rs +++ b/langcodec/src/lib.rs @@ -1,7 +1,8 @@ #![forbid(unsafe_code)] //! Universal localization file toolkit for Rust. //! -//! Supports parsing, writing, and converting between Apple `.strings`, `.xcstrings`, Android `strings.xml`, and CSV files. +//! Supports parsing, writing, and converting between Apple `.strings`, `.xcstrings`, `.xliff`, +//! Android `strings.xml`, CSV, and TSV files. //! All conversion happens through the unified `Resource` model. //! //! # Quick Start @@ -31,8 +32,10 @@ //! //! - **Apple `.strings`**: Traditional iOS/macOS localization files //! - **Apple `.xcstrings`**: Modern Xcode localization format with plural support +//! - **Apple `.xliff`**: Xcode localization exchange files (XLIFF 1.2) //! - **Android `strings.xml`**: Android resource files //! - **CSV**: Comma-separated values for simple key-value pairs +//! - **TSV**: Tab-separated values for simple key-value pairs //! //! # Features //!