From 7ed9e9f5b8934d51824d4f606b85e4e7b0e86e77 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Sun, 29 Mar 2026 12:56:00 -0300 Subject: [PATCH 01/11] add UNICODE_CHARACTER_TO_ASCII and CHARACTER_TO_NAME tables --- mathics_scanner/characters.py | 29 +++++++++++++++++++++++ mathics_scanner/data/named-characters.yml | 3 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index edabecc5..d8f14552 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -165,6 +165,35 @@ def replace_box_unicode_with_ascii(input_string): NAMED_CHARACTERS_COLLECTION.get("unicode-to-wl-re", "") ) +# Unicode to ASCII + +CHARACTER_TO_NAME = { + char: rf"\[{name}]" + for name, char in NAMED_CHARACTERS_COLLECTION["named-characters"].items() +} +# TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uF74C" in WMA is named as +# \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character +# we can print with standard fonts. The problem with this approach is that the map +# would not be invertible anymore. + + +# This dictionary is used for the default encoding from Unicode/UTF-8 to ASCII + +UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() +UNICODE_CHARACTER_TO_ASCII.update( + { + ch: NAMED_CHARACTERS_COLLECTION["operator-to-ascii"][name] + for name, ch in NAMED_CHARACTERS_COLLECTION["operator-to-unicode"].items() + if name in NAMED_CHARACTERS_COLLECTION["operator-to-ascii"] + } +) +# TODO: add WL characters to UNICODE_CHARACTER_TO_ASCII. For example, "\uF74C" in WMA is named as +# \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character +# we can print with standard fonts. For the effects of this table, "\uF74C" should be mapped to +# something that can be print as an ASCII string (probably, "d"). + + + # Deprecated def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str: diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml index 9bdae280..d50c9ba1 100644 --- a/mathics_scanner/data/named-characters.yml +++ b/mathics_scanner/data/named-characters.yml @@ -813,7 +813,7 @@ CapitalDelta: CapitalDifferentialD: amslatex: '\mathbb{D}' - ascii: "d" + ascii: "D" esc-alias: DD has-unicode-inverse: true is-letter-like: true @@ -2131,6 +2131,7 @@ DifferenceDelta: # \u2146 DifferentialD: amslatex: '\,d' + ascii: "d" esc-alias: dd has-unicode-inverse: true # This can't be letter-like because it is used in derivatives as a function From 582a161b4870f108649b9d848bc57b9363c43fb4 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Sun, 29 Mar 2026 13:18:39 -0300 Subject: [PATCH 02/11] using the variable --- mathics_scanner/characters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index d8f14552..60a8af4f 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -169,7 +169,7 @@ def replace_box_unicode_with_ascii(input_string): CHARACTER_TO_NAME = { char: rf"\[{name}]" - for name, char in NAMED_CHARACTERS_COLLECTION["named-characters"].items() + for name, char in NAMED_CHARACTERS.items() } # TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uF74C" in WMA is named as # \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character From 44ba4f353242d6864f6e4c770389a4c4f0bb0c2f Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 14:18:20 -0300 Subject: [PATCH 03/11] black --- mathics_scanner/characters.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index 60a8af4f..07168eed 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -167,14 +167,11 @@ def replace_box_unicode_with_ascii(input_string): # Unicode to ASCII -CHARACTER_TO_NAME = { - char: rf"\[{name}]" - for name, char in NAMED_CHARACTERS.items() -} +CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} # TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uF74C" in WMA is named as # \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character # we can print with standard fonts. The problem with this approach is that the map -# would not be invertible anymore. +# would not be invertible anymore. # This dictionary is used for the default encoding from Unicode/UTF-8 to ASCII @@ -193,8 +190,6 @@ def replace_box_unicode_with_ascii(input_string): # something that can be print as an ASCII string (probably, "d"). - - # Deprecated def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str: """ From cd524c9dfd3761d6333c071a89f8b9de18ccfda0 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 14:24:50 -0300 Subject: [PATCH 04/11] add operator-to-unicode and operator-to-ascii just if the tables are already loaded. --- mathics_scanner/characters.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index 07168eed..441a9c87 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -177,17 +177,18 @@ def replace_box_unicode_with_ascii(input_string): # This dictionary is used for the default encoding from Unicode/UTF-8 to ASCII UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() -UNICODE_CHARACTER_TO_ASCII.update( - { - ch: NAMED_CHARACTERS_COLLECTION["operator-to-ascii"][name] - for name, ch in NAMED_CHARACTERS_COLLECTION["operator-to-unicode"].items() - if name in NAMED_CHARACTERS_COLLECTION["operator-to-ascii"] - } -) -# TODO: add WL characters to UNICODE_CHARACTER_TO_ASCII. For example, "\uF74C" in WMA is named as -# \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character -# we can print with standard fonts. For the effects of this table, "\uF74C" should be mapped to -# something that can be print as an ASCII string (probably, "d"). +if "operator-to-ascii" in NAMED_CHARACTERS_COLLECTION: + UNICODE_CHARACTER_TO_ASCII.update( + { + ch: NAMED_CHARACTERS_COLLECTION["operator-to-ascii"][name] + for name, ch in NAMED_CHARACTERS_COLLECTION["operator-to-unicode"].items() + if name in NAMED_CHARACTERS_COLLECTION["operator-to-ascii"] + } + ) + # TODO: add WL characters to UNICODE_CHARACTER_TO_ASCII. For example, "\uF74C" in WMA is named as + # \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character + # we can print with standard fonts. For the effects of this table, "\uF74C" should be mapped to + # something that can be print as an ASCII string (probably, "d"). # Deprecated From 5bf07a4ad238df06f6c4a2f610b2ae3b87eab3e4 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 14:34:17 -0300 Subject: [PATCH 05/11] install mathics3 in pyodide workflow --- .github/workflows/pyodide.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pyodide.yml b/.github/workflows/pyodide.yml index 5bd83df7..f9a64dc4 100644 --- a/.github/workflows/pyodide.yml +++ b/.github/workflows/pyodide.yml @@ -54,6 +54,10 @@ jobs: pip install "setuptools" PyYAML click packaging pytest # We should comment out after next mathics-core release + python -m pip install --no-build-isolation -e . + python -m mathics_scanner.generate.boxing_characters + python -m mathics_scanner.generate.named_characters + python -m mathics_scanner.generate.operators python -m pip install --no-build-isolation --no-deps -e git+https://github.com/Mathics3/mathics-core#egg=Mathics3 # python -m pip Mathics3 From aad225f5f0efbf39dd6f37fa4a3cbd6069a242a7 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:14:27 -0300 Subject: [PATCH 06/11] no isolation in mathics3-doctest workflow --- .github/workflows/mathics3-doctest.yml | 2 +- .github/workflows/pyodide.yml | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml index fd1bf687..674d46d3 100644 --- a/.github/workflows/mathics3-doctest.yml +++ b/.github/workflows/mathics3-doctest.yml @@ -38,7 +38,7 @@ jobs: # Until next Mathics3/mathics-core release is out... git clone --depth 1 https://github.com/Mathics3/mathics-core.git cd mathics-core/ - python -m pip install -e .[dev] + python -m pip install --no-build-isolation -e .[dev] cd .. - name: Run Mathics3 tests run: | diff --git a/.github/workflows/pyodide.yml b/.github/workflows/pyodide.yml index f9a64dc4..5bd83df7 100644 --- a/.github/workflows/pyodide.yml +++ b/.github/workflows/pyodide.yml @@ -54,10 +54,6 @@ jobs: pip install "setuptools" PyYAML click packaging pytest # We should comment out after next mathics-core release - python -m pip install --no-build-isolation -e . - python -m mathics_scanner.generate.boxing_characters - python -m mathics_scanner.generate.named_characters - python -m mathics_scanner.generate.operators python -m pip install --no-build-isolation --no-deps -e git+https://github.com/Mathics3/mathics-core#egg=Mathics3 # python -m pip Mathics3 From 95d18eec8f271327796276d11ac7735e4ea8faaa Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:20:25 -0300 Subject: [PATCH 07/11] no build isolation for mathics3-doctst --- .github/workflows/mathics3-doctest.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml index 674d46d3..f2a35cfe 100644 --- a/.github/workflows/mathics3-doctest.yml +++ b/.github/workflows/mathics3-doctest.yml @@ -24,7 +24,7 @@ jobs: - name: Install Mathics3 scanner without JSON run: | python -m pip install --upgrade pip - pip install -e . + pip install --no-build-isolation -e . - name: Install JSON files run: | python -m mathics_scanner.generate.boxing_characters -o mathics_scanner/data/boxing-characters.json diff --git a/pyproject.toml b/pyproject.toml index 1e78c6b3..bdb26f87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools", + "setuptools>=80.10.2", # needed for building tables for the sdist: "PyYAML", "click", From b01e7030545662e5b8f57b9b075e5b3af180cd97 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:24:03 -0300 Subject: [PATCH 08/11] undo changes --- .github/workflows/mathics3-doctest.yml | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml index f2a35cfe..fd1bf687 100644 --- a/.github/workflows/mathics3-doctest.yml +++ b/.github/workflows/mathics3-doctest.yml @@ -24,7 +24,7 @@ jobs: - name: Install Mathics3 scanner without JSON run: | python -m pip install --upgrade pip - pip install --no-build-isolation -e . + pip install -e . - name: Install JSON files run: | python -m mathics_scanner.generate.boxing_characters -o mathics_scanner/data/boxing-characters.json @@ -38,7 +38,7 @@ jobs: # Until next Mathics3/mathics-core release is out... git clone --depth 1 https://github.com/Mathics3/mathics-core.git cd mathics-core/ - python -m pip install --no-build-isolation -e .[dev] + python -m pip install -e .[dev] cd .. - name: Run Mathics3 tests run: | diff --git a/pyproject.toml b/pyproject.toml index bdb26f87..1e78c6b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=80.10.2", + "setuptools", # needed for building tables for the sdist: "PyYAML", "click", From aed4e808392a1a99aac00970ca6aaddad432e16a Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:26:57 -0300 Subject: [PATCH 09/11] try what happens if we comment out the new code... --- mathics_scanner/characters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index 441a9c87..4dde94f6 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -167,7 +167,7 @@ def replace_box_unicode_with_ascii(input_string): # Unicode to ASCII -CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} +# CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} # TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uF74C" in WMA is named as # \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character # we can print with standard fonts. The problem with this approach is that the map @@ -176,8 +176,8 @@ def replace_box_unicode_with_ascii(input_string): # This dictionary is used for the default encoding from Unicode/UTF-8 to ASCII -UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() -if "operator-to-ascii" in NAMED_CHARACTERS_COLLECTION: +# UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() +if False and "operator-to-ascii" in NAMED_CHARACTERS_COLLECTION: UNICODE_CHARACTER_TO_ASCII.update( { ch: NAMED_CHARACTERS_COLLECTION["operator-to-ascii"][name] From eaa7a03fd42c4815f37691a534c7f73edf9b51a2 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:28:49 -0300 Subject: [PATCH 10/11] try what happens if we comment out the new code... --- mathics_scanner/characters.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index 4dde94f6..fe0cc3db 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -166,18 +166,18 @@ def replace_box_unicode_with_ascii(input_string): ) # Unicode to ASCII - -# CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} -# TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uF74C" in WMA is named as -# \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character +""" +CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} +# TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uf74c" in WMA is named as +# \[DifferentialD]. Here we are using "\U0001d451" for that name, because is a character # we can print with standard fonts. The problem with this approach is that the map # would not be invertible anymore. # This dictionary is used for the default encoding from Unicode/UTF-8 to ASCII -# UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() -if False and "operator-to-ascii" in NAMED_CHARACTERS_COLLECTION: +UNICODE_CHARACTER_TO_ASCII = CHARACTER_TO_NAME.copy() +if "operator-to-ascii" in NAMED_CHARACTERS_COLLECTION: UNICODE_CHARACTER_TO_ASCII.update( { ch: NAMED_CHARACTERS_COLLECTION["operator-to-ascii"][name] @@ -185,10 +185,11 @@ def replace_box_unicode_with_ascii(input_string): if name in NAMED_CHARACTERS_COLLECTION["operator-to-ascii"] } ) - # TODO: add WL characters to UNICODE_CHARACTER_TO_ASCII. For example, "\uF74C" in WMA is named as - # \[DifferentialD]. Here we are using "\U0001D451" for that name, because is a character - # we can print with standard fonts. For the effects of this table, "\uF74C" should be mapped to + # TODO: add WL characters to UNICODE_CHARACTER_TO_ASCII. For example, "\uf74c" in WMA is named as + # \[DifferentialD]. Here we are using "\U0001d451" for that name, because is a character + # we can print with standard fonts. For the effects of this table, "\uf74c" should be mapped to # something that can be print as an ASCII string (probably, "d"). +""" # Deprecated From 7024b8dcc58ad56cc1ce745f888b19a2692c4352 Mon Sep 17 00:00:00 2001 From: Juan Mauricio Matera Date: Thu, 2 Apr 2026 15:31:07 -0300 Subject: [PATCH 11/11] resinstate changes --- mathics_scanner/characters.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py index fe0cc3db..995484e6 100644 --- a/mathics_scanner/characters.py +++ b/mathics_scanner/characters.py @@ -166,7 +166,6 @@ def replace_box_unicode_with_ascii(input_string): ) # Unicode to ASCII -""" CHARACTER_TO_NAME = {char: rf"\[{name}]" for name, char in NAMED_CHARACTERS.items()} # TODO: add WL characters to CHARACTER_TO_NAME. For example, "\uf74c" in WMA is named as # \[DifferentialD]. Here we are using "\U0001d451" for that name, because is a character @@ -189,7 +188,6 @@ def replace_box_unicode_with_ascii(input_string): # \[DifferentialD]. Here we are using "\U0001d451" for that name, because is a character # we can print with standard fonts. For the effects of this table, "\uf74c" should be mapped to # something that can be print as an ASCII string (probably, "d"). -""" # Deprecated