Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/consistency-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ on:
jobs:
build:
runs-on: ubuntu-latest
env:
NLTK_DATA: "/tmp"
strategy:
matrix:
python-version: ['3.13']
Expand All @@ -24,7 +26,7 @@ jobs:
python -m pip install pytest
python -m pip install Mathics3-Module-Base
# Can comment out when next Mathics3 core and Mathics-scanner are released
# python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
# Until the next mathics-core release
git clone https://github.com/Mathics3/mathics-core
(cd mathics-core && python -m pip install -e .[full])
Expand All @@ -33,7 +35,7 @@ jobs:
- name: Install Mathics3 Module nltk
run: |
python -m pip install --no-build-isolation setuptools Mathics3[full] nltk PatternLite enchant
make develop
- name: Test Mathics Consistency and Style
run: |
make develop
make check-consistency-and-style
6 changes: 4 additions & 2 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ on:
jobs:
build:
runs-on: ubuntu-latest
env:
NLTK_DATA: "/tmp"
strategy:
matrix:
python-version: ['3.12', '3.13']
Expand All @@ -23,7 +25,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install pytest
# Go over and comment out stuff when next Mathics core and Mathics-scanner are released
# python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full]
# Until the next mathics-core release
git clone https://github.com/Mathics3/mathics-core
(cd mathics-core && python -m pip install -e .[full])
Expand All @@ -32,7 +34,7 @@ jobs:
- name: Install Mathics3 Module nltk
run: |
python -m pip install --no-build-isolation setuptools Mathics3[full] nltk PatternLite enchant
make develop
- name: Test Mathics3 Module nltk
run: |
make develop
make -j3 check
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*~
.python-version
/.nltk_data
/.python-version
/ChangeLog
/ChangeLog-spell-corrected
Expand Down
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ SPACY_DOWNLOAD ?= $(lang)_core_web_$(WORDLIST_SIZE)
#: Default target - same as "develop"
all: develop

#: Download bcp47 which is used needed to support Mathics3 builtin WordTranslation
download-bcp47:
$(PYTHON) ./admin-tools/download-bcp47.py

#: Word-list data. Customize with lang and eventually WORDLIST_SIZE variables
wordlist:
$(PYTHON) -m nltk.downloader wordnet2022 omw-1.4
Expand All @@ -40,10 +44,7 @@ pypi-setup:
#: Set up to run from the source tree
develop: pypi-setup
$(MAKE) wordlist

#: Install Mathics3 Module nltk
install: pypi-setup
$(PYTHON) setup.py install
$(MAKE) download-bcp47

#: Run tests
test check: pytest doctest
Expand Down
21 changes: 21 additions & 0 deletions admin-tools/download-bcp47.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python3
"""
Load bcp47 which is needed to support Mathics3 builtin-function WordTranslation.
"""
import os

import nltk

# choose a local data dir so we don't require system-wide write access
data_dir = os.environ.get("NLTK_DATA", os.path.join(os.getcwd(), ".nltk_data"))
os.makedirs(data_dir, exist_ok=True)

# ensure nltk knows about it
if data_dir not in nltk.data.path:
nltk.data.path.append(data_dir)

# only download if missing
try:
nltk.data.find("corpora/bcp47")
except LookupError:
nltk.download("bcp47", download_dir=data_dir, quiet=False)
5 changes: 3 additions & 2 deletions pymathics/natlang/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
WordDefinition,
WordList,
)

from pymathics.natlang.manipulate import Pluralize
from pymathics.natlang.normalization import (
DeleteStopwords,
Expand All @@ -65,8 +66,7 @@
WordSimilarity,
WordStem,
)

from pymathics.natlang.linguistic_data.translation import LanguageIdentify
from pymathics.natlang.translation import LanguageIdentify, WordTranslation
from pymathics.natlang.version import __version__

pymathics_version_data = {
Expand Down Expand Up @@ -99,6 +99,7 @@
"WordList",
"WordSimilarity",
"WordStem",
"WordTranslation",
"__version__",
"pymathics_version_data",
]
3 changes: 0 additions & 3 deletions pymathics/natlang/linguistic_data/lang_trans.py

This file was deleted.

54 changes: 0 additions & 54 deletions pymathics/natlang/linguistic_data/translation.py

This file was deleted.

110 changes: 110 additions & 0 deletions pymathics/natlang/translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
"""
Language Translation

"""

# This is under Text Normalization in WR. But also in Natural Language Processing,
# and Linguistic Data. I put here because is the only module that uses langid and pycountry
# modules.
#
# TODO: WordTranslation, TextTranslation

from typing import Union

import langid # see https://github.com/saffsd/langid.py
import pycountry
from mathics.core.atoms import String
from mathics.core.builtin import Builtin
from mathics.core.evaluation import Evaluation
from mathics.core.list import ListExpression
from mathics.core.symbols import Symbol
from mathics.core.systemsymbols import SymbolFailed
from nltk.corpus import wordnet as wn
from nltk.corpus.reader.wordnet import WordNetError

sort_order = "Language Translation"


class LanguageIdentify(Builtin):
"""
<url>:WMA link:
https://reference.wolfram.com/language/ref/LanguageIdentify.html</url>

<dl>
<dt>'LanguageIdentify'[$text$]
<dd>returns the name of the language used in $text$.
</dl>

>> LanguageIdentify["eins zwei drei"]
= German
"""

summary_text = "determine the predominant human language in a string"

def eval(self, text: String, evaluation: Evaluation) -> Union[Symbol, String]:
"LanguageIdentify[text_String]"

# an alternative: https://github.com/Mimino666/langdetect

code, _ = langid.classify(text.value)
language = pycountry.languages.get(alpha_2=code)
if language is None:
return SymbolFailed
return String(language.name)


# FIXME generalize
class WordTranslation(Builtin):
"""
<url>:WMA link:
https://reference.wolfram.com/language/ref/WordTranslation.html</url>

<dl>
<dt>'WordTranslation'[word, $lang$]
<dd>returns a list of translation for $word$ into $lang$.
</dl>

>> WordTranslation["dog", "French"]
= ...
"""

requires = ("lgn",)

# Set checking that the number of arguments required to exactly two.
eval_error = Builtin.generic_argument_error
expected_args = 2

summary_text = "give word translations"

def eval(
self, word: String, lang: String, evaluation: Evaluation
) -> ListExpression:
"WordTranslation[word_String, lang_String]"
return eval_WordTranslation(word.value, lang.value)


def eval_WordTranslation(word: str, language_name: str):
"""
Return a list of translations of `word` in English to `language_name`.
"""

# Convert "language_name" using NLTK's langnames utility
# to its 3-letter ISO 639-3 code.
iso_code = lgn.langcode(language_name, typ=3)

if iso_code is None:
return SymbolFailed

synsets = wn.synsets(word)
translations = set()

for ss in synsets:
# Pass the converted code to WordNet
try:
for lemma in ss.lemmas(lang=iso_code):
translations.add(lemma.name())
except WordNetError:
return SymbolFailed

return ListExpression(*[String(word) for word in translations])
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ requires = [
"langid", # replace with a supported newer package, e.g. via spacy
"matplotlib",
"mpmath>=1.2.0",
"nltk>=3.8.0",
"numpy",
"pycountry>=3.2.0",
"pyenchant>=3.2.0",
Expand Down