From 725972fb471c3485bee0a9c9cd538167f1dbc734 Mon Sep 17 00:00:00 2001 From: immortal71 Date: Thu, 1 Jan 2026 20:34:16 -0800 Subject: [PATCH 1/8] feat: Added translation tag checker for issue #1102 --> Add check_translations.py script to detect missing, untranslated, and empty T0xxx tags --> Added comprehensive pytest tests for translation validation --> Updatd run-tests-generate-output.yaml to run checker and include report in PR comments --> Updated pre-release.yml to include translation report in release body --> Resolved missing tag detection as requested in #1102 --- .github/workflows/pre-release.yml | 25 ++ .../workflows/run-tests-generate-output.yaml | 43 +++- scripts/check_translations.py | 230 ++++++++++++++++++ tests/scripts/test_translation_tags.py | 124 ++++++++++ 4 files changed, 416 insertions(+), 6 deletions(-) create mode 100644 scripts/check_translations.py create mode 100644 tests/scripts/test_translation_tags.py diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index ef483e020..598a0f672 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -46,6 +46,15 @@ with: token: ${{ secrets.QLTY_COVERAGE_TOKEN }} files: coverage.xml + # Check translation tags + - name: Check translation tags + id: translation_check + run: | + pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + # Read the report content and save it as an output + echo "TRANSLATION_REPORT<> $GITHUB_ENV + cat translation_check_report.md >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV - name: Generate new output files run: | # @@ -112,12 +121,28 @@ cp output/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml output/cornucopia_webapp/ zip -r output/owasp_cornucopia_webapp_3.0_en.zip output/cornucopia_webapp/Links/* output/cornucopia_webapp/Fonts/* output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml ./resources/templates/owasp_cornucopia_webapp_scoresheet.pdf + - name: Prepare release body with translation report + id: prepare_release + run: | + # Read the translation report + TRANSLATION_REPORT=$(cat translation_check_report.md) + # Create a combined release body + cat > release_body.md << 'EOF' + ## OWASP Cornucopia Pre-Release + + This is an automated pre-release build from the latest master branch. + + --- + + EOF + cat translation_check_report.md >> release_body.md - uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0 name: "Create pre-release" with: tag_name: pre-release prerelease: true name: Latest pre-release + body_path: release_body.md files: | CHANGELOG.md LICENSE.md diff --git a/.github/workflows/run-tests-generate-output.yaml b/.github/workflows/run-tests-generate-output.yaml index 9d708ffee..bb3c96174 100644 --- a/.github/workflows/run-tests-generate-output.yaml +++ b/.github/workflows/run-tests-generate-output.yaml @@ -58,6 +58,16 @@ jobs: run: | pip install -r requirements.txt --require-hashes pipenv install -d + - name: Check translation tags + run: | + pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + - name: Upload translation check report + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + retention-days: 5 + name: translation-check-report.${{ github.sha }}.md + path: translation_check_report.md - name: Generate new output files run: | # @@ -146,23 +156,44 @@ jobs: contents: read needs: uploadoutputfiles steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + ref: ${{ github.event.pull_request.head.ref }} + - name: Download translation check report + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: translation-check-report.${{ github.sha }}.md + path: . - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: PR_NUMBER: ${{ github.event.number }} - PR_NOTES: | - [badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge + ARTIFACT_URL: ${{needs.uploadoutputfiles.outputs.artifact-url}} + with: + script: | + const fs = require('fs'); + let translationReport = ''; + try { + translationReport = fs.readFileSync('translation_check_report.md', 'utf8'); + } catch (error) { + translationReport = 'Translation check report not found.'; + } + + const prNotes = `[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge ## Build artifacts: | Name | Link | |------|------| - | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${{needs.uploadoutputfiles.outputs.artifact-url}}) | + | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${process.env.ARTIFACT_URL}) | + + --- + + ${translationReport}`; - with: - script: | github.rest.issues.createComment({ issue_number: process.env.PR_NUMBER, owner: context.repo.owner, repo: context.repo.repo, - body: process.env.PR_NOTES + body: prNotes }) diff --git a/scripts/check_translations.py b/scripts/check_translations.py new file mode 100644 index 000000000..0154992c1 --- /dev/null +++ b/scripts/check_translations.py @@ -0,0 +1,230 @@ +""" +Translation Tag Checker for OWASP Cornucopia + +This script checks that translation files have the same T0xxx tags as the English version. +It detects: +- Missing tags in translations +- Untranslated tags (text identical to English) +- Empty tag values +""" + +import os +import sys +import yaml +from pathlib import Path +from typing import Dict, List, Set, Tuple +from collections import defaultdict + + +class TranslationChecker: + """Check translations for missing, untranslated, or empty tags.""" + + def __init__(self, source_dir: Path): + self.source_dir = source_dir + self.results = defaultdict(lambda: defaultdict(dict)) + + def extract_tags(self, yaml_file: Path) -> Dict[str, str]: + """Extract T0xxx tags and their text from a YAML file.""" + tags = {} + try: + with open(yaml_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # Check if data has common_ids section + if data and 'common_ids' in data: + for item in data['common_ids']: + tag_id = item.get('id', '') + if tag_id.startswith('T0'): + tags[tag_id] = item.get('text', '') + + except Exception as e: + print(f"Error reading {yaml_file}: {e}", file=sys.stderr) + + return tags + + def get_file_groups(self) -> Dict[str, List[Path]]: + """Group YAML files by their base name (e.g., webapp-cards-2.2).""" + file_groups = defaultdict(list) + + for yaml_file in self.source_dir.glob('*-*.yaml'): + # Skip archived files + if 'archive' in str(yaml_file): + continue + + # Extract base name and language + # Format: {edition}-{component}-{version}-{lang}.yaml + parts = yaml_file.stem.split('-') + if len(parts) >= 3: + # Find language code (usually last part or second to last) + lang = parts[-1] + base_name = '-'.join(parts[:-1]) + + # Only process card files with language codes + if 'cards' in base_name and len(lang) == 2: + file_groups[base_name].append(yaml_file) + + return file_groups + + def check_translations(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]: + """ + Check all translation files against English versions. + + Returns: + Dict with structure: + { + 'base_name': { + 'language': { + 'missing': ['T00145', ...], + 'untranslated': ['T00100', ...], + 'empty': ['T00200', ...] + } + } + } + """ + file_groups = self.get_file_groups() + + for base_name, files in file_groups.items(): + # Find English reference file + english_file = None + translation_files = [] + + for f in files: + lang = f.stem.split('-')[-1] + if lang == 'en': + english_file = f + else: + translation_files.append(f) + + if not english_file: + print(f"Warning: No English file found for {base_name}", file=sys.stderr) + continue + + # Extract English tags + english_tags = self.extract_tags(english_file) + + if not english_tags: + continue + + # Check each translation + for trans_file in translation_files: + lang = trans_file.stem.split('-')[-1] + trans_tags = self.extract_tags(trans_file) + + # Find missing tags + missing = [] + untranslated = [] + empty = [] + + for tag_id, eng_text in english_tags.items(): + if tag_id not in trans_tags: + missing.append(tag_id) + elif not trans_tags[tag_id]: + empty.append(tag_id) + elif trans_tags[tag_id] == eng_text: + untranslated.append(tag_id) + + # Store results + if missing or untranslated or empty: + self.results[base_name][lang] = { + 'missing': sorted(missing), + 'untranslated': sorted(untranslated), + 'empty': sorted(empty), + 'file': str(trans_file.name) + } + + return dict(self.results) + + def generate_markdown_report(self) -> str: + """Generate a Markdown report of translation issues.""" + report_lines = [] + + if not self.results: + report_lines.append("# Translation Check Report\n") + report_lines.append("✅ All translations have the same tags as the English version.\n") + return '\n'.join(report_lines) + + report_lines.append("# Translation Check Report\n") + report_lines.append("The following sentences/tags have issues in the translations:\n") + + # Language name mapping + lang_names = { + 'es': 'Spanish', + 'fr': 'French', + 'hu': 'Hungarian', + 'it': 'Italian', + 'nl': 'Dutch', + 'no_nb': 'Norwegian', + 'pt_br': 'Portuguese (Brazil)', + 'pt_pt': 'Portuguese (Portugal)', + 'ru': 'Russian' + } + + for base_name in sorted(self.results.keys()): + languages = self.results[base_name] + + for lang in sorted(languages.keys()): + lang_name = lang_names.get(lang, lang.upper()) + issues = languages[lang] + filename = issues.get('file', '') + + report_lines.append(f"\n## {lang_name}\n") + report_lines.append(f"**File:** `{filename}`\n") + + if issues['missing']: + report_lines.append("### Missing Tags\n") + report_lines.append("The following tags are present in the English version but missing in this translation:\n") + tags_str = ', '.join(issues['missing']) + report_lines.append(f"{tags_str}\n") + + if issues['untranslated']: + report_lines.append("### Untranslated Tags\n") + report_lines.append("The following tags have identical text to English (not translated):\n") + tags_str = ', '.join(issues['untranslated']) + report_lines.append(f"{tags_str}\n") + + if issues['empty']: + report_lines.append("### Empty Tags\n") + report_lines.append("The following tags are empty:\n") + tags_str = ', '.join(issues['empty']) + report_lines.append(f"{tags_str}\n") + + return '\n'.join(report_lines) + + +def main(): + """Main entry point for the translation checker.""" + # Determine source directory + script_dir = Path(__file__).parent + base_dir = script_dir.parent + source_dir = base_dir / 'source' + + if not source_dir.exists(): + print(f"Error: Source directory not found: {source_dir}", file=sys.stderr) + sys.exit(1) + + # Run checker + checker = TranslationChecker(source_dir) + results = checker.check_translations() + + # Generate report + report = checker.generate_markdown_report() + + # Output report + print(report) + + # Write to file + output_file = base_dir / 'translation_check_report.md' + with open(output_file, 'w', encoding='utf-8') as f: + f.write(report) + + print(f"\n---\nReport written to: {output_file}", file=sys.stderr) + + # Exit with error code if issues found + if results: + sys.exit(1) + else: + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/tests/scripts/test_translation_tags.py b/tests/scripts/test_translation_tags.py new file mode 100644 index 000000000..e7447d1e5 --- /dev/null +++ b/tests/scripts/test_translation_tags.py @@ -0,0 +1,124 @@ +""" +Integration tests for translation tag checking. + +Tests that all translations have the same T0xxx tags as the English version. +""" + +import unittest +import os +from pathlib import Path +import sys + +# Add scripts directory to path +scripts_path = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_path)) + +from check_translations import TranslationChecker + + +class TestTranslationTags(unittest.TestCase): + """Test that translations have the same tags as English versions.""" + + def setUp(self): + """Set up test fixtures.""" + # Navigate up from tests/scripts to cornucopia root + self.base_path = Path(__file__).parent.parent.parent + self.source_dir = self.base_path / 'source' + self.checker = TranslationChecker(self.source_dir) + + def test_source_directory_exists(self): + """Test that the source directory exists.""" + self.assertTrue( + self.source_dir.exists(), + f"Source directory not found: {self.source_dir}" + ) + + def test_english_files_exist(self): + """Test that English card files exist.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + self.assertGreater( + len(english_files), 0, + "No English card files found in source directory" + ) + + def test_translations_have_all_tags(self): + """ + Test that all translations have the same T0xxx tags as English. + + This test will fail if: + - Tags are missing in translations + - Tags are untranslated (identical to English) + - Tags are empty + """ + results = self.checker.check_translations() + + if results: + # Generate detailed report + report = self.checker.generate_markdown_report() + + # Count total issues + total_issues = 0 + for base_name, languages in results.items(): + for lang, issues in languages.items(): + total_issues += len(issues.get('missing', [])) + total_issues += len(issues.get('untranslated', [])) + total_issues += len(issues.get('empty', [])) + + self.fail( + f"\n\nTranslation issues found ({total_issues} total):\n\n{report}\n" + ) + + def test_no_duplicate_tags_in_english(self): + """Test that English files don't have duplicate T0xxx tags.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + tags = self.checker.extract_tags(eng_file) + # Extract_tags returns a dict, so duplicates would be overwritten + # We need to check the raw file for duplicates + import yaml + with open(eng_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + if data and 'common_ids' in data: + seen_ids = set() + duplicates = [] + + for item in data['common_ids']: + tag_id = item.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) + + self.assertEqual( + len(duplicates), 0, + f"Duplicate tags found in {eng_file.name}: {duplicates}" + ) + + def test_tag_format(self): + """Test that tags follow the T0xxxx format.""" + import re + tag_pattern = re.compile(r'^T0\d{4,5}$') + + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + tags = self.checker.extract_tags(eng_file) + + for tag_id in tags.keys(): + self.assertIsNotNone( + tag_pattern.match(tag_id), + f"Tag {tag_id} in {eng_file.name} doesn't match format T0xxxx" + ) + + def test_generate_markdown_report(self): + """Test that markdown report generation works.""" + report = self.checker.generate_markdown_report() + + self.assertIsInstance(report, str) + self.assertIn("Translation Check Report", report) + + +if __name__ == '__main__': + unittest.main() From d6b3263539683e422e853926610871f802d6d502 Mon Sep 17 00:00:00 2001 From: immortal71 Date: Thu, 1 Jan 2026 21:02:00 -0800 Subject: [PATCH 2/8] fixed: Address Copilot review feedback - fix language codes and remove unused imports --- scripts/check_translations.py | 9 ++++----- tests/scripts/test_translation_tags.py | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/check_translations.py b/scripts/check_translations.py index 0154992c1..e05ebb4c9 100644 --- a/scripts/check_translations.py +++ b/scripts/check_translations.py @@ -8,11 +8,10 @@ - Empty tag values """ -import os import sys import yaml from pathlib import Path -from typing import Dict, List, Set, Tuple +from typing import Dict, List from collections import defaultdict @@ -153,9 +152,9 @@ def generate_markdown_report(self) -> str: 'hu': 'Hungarian', 'it': 'Italian', 'nl': 'Dutch', - 'no_nb': 'Norwegian', - 'pt_br': 'Portuguese (Brazil)', - 'pt_pt': 'Portuguese (Portugal)', + 'no-nb': 'Norwegian', + 'pt-br': 'Portuguese (Brazil)', + 'pt-pt': 'Portuguese (Portugal)', 'ru': 'Russian' } diff --git a/tests/scripts/test_translation_tags.py b/tests/scripts/test_translation_tags.py index e7447d1e5..f2fab4b18 100644 --- a/tests/scripts/test_translation_tags.py +++ b/tests/scripts/test_translation_tags.py @@ -5,7 +5,6 @@ """ import unittest -import os from pathlib import Path import sys From 0040e14a3e5c5242f1a76d7d327813d827e202a3 Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:20:52 -0800 Subject: [PATCH 3/8] Update scripts/check_translations.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/check_translations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/check_translations.py b/scripts/check_translations.py index e05ebb4c9..83a4a010a 100644 --- a/scripts/check_translations.py +++ b/scripts/check_translations.py @@ -152,9 +152,9 @@ def generate_markdown_report(self) -> str: 'hu': 'Hungarian', 'it': 'Italian', 'nl': 'Dutch', - 'no-nb': 'Norwegian', - 'pt-br': 'Portuguese (Brazil)', - 'pt-pt': 'Portuguese (Portugal)', + 'no_nb': 'Norwegian', + 'pt_br': 'Portuguese (Brazil)', + 'pt_pt': 'Portuguese (Portugal)', 'ru': 'Russian' } From abe658529a2a4e3b1b6b865721bf70313cfd052a Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:21:59 -0800 Subject: [PATCH 4/8] Update .github/workflows/pre-release.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/pre-release.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index 598a0f672..a92993ba4 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -51,10 +51,6 @@ id: translation_check run: | pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." - # Read the report content and save it as an output - echo "TRANSLATION_REPORT<> $GITHUB_ENV - cat translation_check_report.md >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - name: Generate new output files run: | # From fce2bf35d991ddce16636dc5e2db795599ed8e4f Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:22:23 -0800 Subject: [PATCH 5/8] Update .github/workflows/pre-release.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/pre-release.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index a92993ba4..9a738c6e9 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -120,9 +120,7 @@ - name: Prepare release body with translation report id: prepare_release run: | - # Read the translation report - TRANSLATION_REPORT=$(cat translation_check_report.md) - # Create a combined release body + # Read the translation report and create a combined release body cat > release_body.md << 'EOF' ## OWASP Cornucopia Pre-Release From dc514337f65912449ce92c61544f6620ef6bb73f Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:22:37 -0800 Subject: [PATCH 6/8] Update .github/workflows/run-tests-generate-output.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/run-tests-generate-output.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests-generate-output.yaml b/.github/workflows/run-tests-generate-output.yaml index bb3c96174..418ff0bd4 100644 --- a/.github/workflows/run-tests-generate-output.yaml +++ b/.github/workflows/run-tests-generate-output.yaml @@ -161,7 +161,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.ref }} - name: Download translation check report - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + uses: actions/download-artifact@v6 # v6 with: name: translation-check-report.${{ github.sha }}.md path: . From 0ba0fa40b33f432827d1ce1a2239d087eeec3917 Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:23:20 -0800 Subject: [PATCH 7/8] Update scripts/check_translations.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/check_translations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_translations.py b/scripts/check_translations.py index 83a4a010a..abc82b014 100644 --- a/scripts/check_translations.py +++ b/scripts/check_translations.py @@ -59,7 +59,7 @@ def get_file_groups(self) -> Dict[str, List[Path]]: base_name = '-'.join(parts[:-1]) # Only process card files with language codes - if 'cards' in base_name and len(lang) == 2: + if 'cards' in base_name and (len(lang) == 2 or ('_' in lang and all(len(part) == 2 for part in lang.split('_')))): file_groups[base_name].append(yaml_file) return file_groups From 6cddfa87c9506d0c81fed16f4220f90998a7f027 Mon Sep 17 00:00:00 2001 From: Aashish kharel Date: Thu, 1 Jan 2026 21:23:57 -0800 Subject: [PATCH 8/8] Update tests/scripts/test_translation_tags.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/scripts/test_translation_tags.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/scripts/test_translation_tags.py b/tests/scripts/test_translation_tags.py index f2fab4b18..7a2b9aeed 100644 --- a/tests/scripts/test_translation_tags.py +++ b/tests/scripts/test_translation_tags.py @@ -72,7 +72,6 @@ def test_no_duplicate_tags_in_english(self): english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) for eng_file in english_files: - tags = self.checker.extract_tags(eng_file) # Extract_tags returns a dict, so duplicates would be overwritten # We need to check the raw file for duplicates import yaml