diff --git a/.github/workflows/consistency-checks.yml b/.github/workflows/consistency-checks.yml
index 0dbf299..b75dc28 100644
--- a/.github/workflows/consistency-checks.yml
+++ b/.github/workflows/consistency-checks.yml
@@ -9,6 +9,8 @@ on:
jobs:
build:
runs-on: ubuntu-latest
+ env:
+ NLTK_DATA: "/tmp"
strategy:
matrix:
python-version: ['3.13']
@@ -24,7 +26,7 @@ jobs:
python -m pip install pytest
python -m pip install Mathics3-Module-Base
# Can comment out when next Mathics3 core and Mathics-scanner are released
- # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
+ python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
# Until the next mathics-core release
git clone https://github.com/Mathics3/mathics-core
(cd mathics-core && python -m pip install -e .[full])
@@ -33,7 +35,7 @@ jobs:
- name: Install Mathics3 Module nltk
run: |
python -m pip install --no-build-isolation setuptools Mathics3[full] nltk PatternLite enchant
- make develop
- name: Test Mathics Consistency and Style
run: |
+ make develop
make check-consistency-and-style
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 8fea1ac..ab67917 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -9,6 +9,8 @@ on:
jobs:
build:
runs-on: ubuntu-latest
+ env:
+ NLTK_DATA: "/tmp"
strategy:
matrix:
python-version: ['3.12', '3.13']
@@ -23,7 +25,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install pytest
# Go over and comment out stuff when next Mathics core and Mathics-scanner are released
- # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
+ python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
# Until the next mathics-core release
git clone https://github.com/Mathics3/mathics-core
(cd mathics-core && python -m pip install -e .[full])
@@ -32,7 +34,7 @@ jobs:
- name: Install Mathics3 Module nltk
run: |
python -m pip install --no-build-isolation setuptools Mathics3[full] nltk PatternLite enchant
- make develop
- name: Test Mathics3 Module nltk
run: |
+ make develop
make -j3 check
diff --git a/.gitignore b/.gitignore
index 61b4358..0da6c50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
*~
.python-version
+/.nltk_data
/.python-version
/ChangeLog
/ChangeLog-spell-corrected
diff --git a/Makefile b/Makefile
index 52cb4d6..e89d274 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,10 @@ SPACY_DOWNLOAD ?= $(lang)_core_web_$(WORDLIST_SIZE)
#: Default target - same as "develop"
all: develop
+#: Download bcp47 which is used needed to support Mathics3 builtin WordTranslation
+download-bcp47:
+ $(PYTHON) ./admin-tools/download-bcp47.py
+
#: Word-list data. Customize with lang and eventually WORDLIST_SIZE variables
wordlist:
$(PYTHON) -m nltk.downloader wordnet2022 omw-1.4
@@ -40,10 +44,7 @@ pypi-setup:
#: Set up to run from the source tree
develop: pypi-setup
$(MAKE) wordlist
-
-#: Install Mathics3 Module nltk
-install: pypi-setup
- $(PYTHON) setup.py install
+ $(MAKE) download-bcp47
#: Run tests
test check: pytest doctest
diff --git a/admin-tools/download-bcp47.py b/admin-tools/download-bcp47.py
new file mode 100755
index 0000000..df1d3a5
--- /dev/null
+++ b/admin-tools/download-bcp47.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+"""
+Load bcp47 which is needed to support Mathics3 builtin-function WordTranslation.
+"""
+import os
+
+import nltk
+
+# choose a local data dir so we don't require system-wide write access
+data_dir = os.environ.get("NLTK_DATA", os.path.join(os.getcwd(), ".nltk_data"))
+os.makedirs(data_dir, exist_ok=True)
+
+# ensure nltk knows about it
+if data_dir not in nltk.data.path:
+ nltk.data.path.append(data_dir)
+
+# only download if missing
+try:
+ nltk.data.find("corpora/bcp47")
+except LookupError:
+ nltk.download("bcp47", download_dir=data_dir, quiet=False)
diff --git a/pymathics/natlang/__init__.py b/pymathics/natlang/__init__.py
index 9e8cb39..548bdcc 100644
--- a/pymathics/natlang/__init__.py
+++ b/pymathics/natlang/__init__.py
@@ -48,6 +48,7 @@
WordDefinition,
WordList,
)
+
from pymathics.natlang.manipulate import Pluralize
from pymathics.natlang.normalization import (
DeleteStopwords,
@@ -65,8 +66,7 @@
WordSimilarity,
WordStem,
)
-
-from pymathics.natlang.linguistic_data.translation import LanguageIdentify
+from pymathics.natlang.translation import LanguageIdentify, WordTranslation
from pymathics.natlang.version import __version__
pymathics_version_data = {
@@ -99,6 +99,7 @@
"WordList",
"WordSimilarity",
"WordStem",
+ "WordTranslation",
"__version__",
"pymathics_version_data",
]
diff --git a/pymathics/natlang/linguistic_data/__init__.py b/pymathics/natlang/linguistic_data.py
similarity index 100%
rename from pymathics/natlang/linguistic_data/__init__.py
rename to pymathics/natlang/linguistic_data.py
diff --git a/pymathics/natlang/linguistic_data/lang_trans.py b/pymathics/natlang/linguistic_data/lang_trans.py
deleted file mode 100644
index e1d7f13..0000000
--- a/pymathics/natlang/linguistic_data/lang_trans.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-Languages & Translations
-"""
diff --git a/pymathics/natlang/linguistic_data/translation.py b/pymathics/natlang/linguistic_data/translation.py
deleted file mode 100644
index 94486c2..0000000
--- a/pymathics/natlang/linguistic_data/translation.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# -*- coding: utf-8 -*-
-
-
-"""
-Language Translation
-
-
-"""
-
-# This is under Text Normalization in WR. But also in Natural Language Processing,
-# and Linguistic Data. I put here because is the only module that uses langid and pycountry
-# modules.
-#
-# TODO: WordTranslation, TextTranslation
-
-from typing import Union
-
-import langid # see https://github.com/saffsd/langid.py
-import pycountry
-from mathics.core.atoms import String
-from mathics.core.builtin import Builtin
-from mathics.core.evaluation import Evaluation
-from mathics.core.symbols import Symbol
-from mathics.core.systemsymbols import SymbolFailed
-
-sort_order = "Language Translation"
-
-
-class LanguageIdentify(Builtin):
- """
-