From 3e0c9876570c1a3c43a30e20f48d482c92e628d4 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 14 Apr 2025 08:24:29 -0700 Subject: [PATCH 1/3] Clean up unnecessary pycountry based functionality. Remove ka_lang reference. Clean up broken BCP 47 parsing and validation. --- le_utils/constants/languages.py | 76 +++------------------------------ setup.py | 6 +-- 2 files changed, 6 insertions(+), 76 deletions(-) diff --git a/le_utils/constants/languages.py b/le_utils/constants/languages.py index 49962b1..b2d7e4f 100644 --- a/le_utils/constants/languages.py +++ b/le_utils/constants/languages.py @@ -1,14 +1,8 @@ import json import logging import pkgutil -import re -from collections import defaultdict from collections import namedtuple -try: - import pycountry -except ImportError: - pycountry = None logger = logging.getLogger("le_utils") logger.setLevel(logging.INFO) @@ -34,9 +28,7 @@ class Language( - namedtuple( - "Language", ["native_name", "primary_code", "subcode", "name", "ka_name"] - ) + namedtuple("Language", ["native_name", "primary_code", "subcode", "name"]) ): @property def code(self): @@ -59,26 +51,13 @@ def first_native_name(self): return self.native_name.split(",")[0] -def _parse_out_iso_639_code(code): - code_regex = r"(?P\w{2,3})(-(?P\w{2,4}))?" - - match = re.match(code_regex, code) - if match: - return defaultdict(lambda: None, **match.groupdict()) - else: - return None - - def generate_list(constantlist): for code, lang in constantlist.items(): - values = _parse_out_iso_639_code(code) - values.update(lang) - - # add a default value to ka_name - if "ka_name" not in values: - values["ka_name"] = None + parts = code.split("-", maxsplit=1) + lang["primary_code"] = parts[0] + lang["subcode"] = None if len(parts) == 1 else parts[1] - yield Language(**values) + yield Language(**lang) def _initialize_language_list(): @@ -220,51 +199,6 @@ def getlang_by_native_name(native_name): return _LANGUAGE_NATIVE_NAME_LOOKUP.get(simple_native_name, None) -def getlang_by_alpha2(code): # noqa: C901 - """ - Lookup a Language object for language code `code` based on these strategies: - - Special case rules for Hebrew and Chinese Hans/Hant scripts - - Using `alpha_2` lookup in `pycountry.languages` followed by lookup for a - language with the same `name` in the internal representaion - Returns `None` if no matching language is found. - """ - if pycountry is None: - logger.warn("pycountry is not installed, cannot lookup language by alpha2") - return None - - # First try exact match to a language code in the internal representaion - exact_match = getlang(code) - if exact_match: - return exact_match - - # Handle special cases for language codes returned by YouTube API - if code == "iw": # handle old Hebrew code 'iw' and return modern code 'he' - return getlang("he") - elif "zh-Hans" in code: # use code `zh-CN` for all simplified Chinese - return getlang("zh-CN") - elif re.match("zh(.*)?-TW", code): # matches all Taiwan chinese codes - return getlang("zh-TW") - elif re.match("zh(.*)?-HK", code): # use code `zh-Hant` for Hong Kong - return getlang("zh-Hant") - - # extract prefix only if specified with subcode: e.g. zh-Hans --> zh - first_part = code.split("-")[0] - - # See if pycountry can find this language - try: - pyc_lang = pycountry.languages.get(alpha_2=first_part) - if pyc_lang: - if hasattr(pyc_lang, "inverted_name"): - lang_name = pyc_lang.inverted_name - else: - lang_name = pyc_lang.name - return getlang_by_name(lang_name) - else: - return None - except KeyError: - return None - - def getlang_direction(code): if code in RTL_LANG_CODES: return RTL_LANGUAGE diff --git a/setup.py b/setup.py index 1b800f9..f4c1975 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,6 @@ long_description = io.open("README.md", encoding="utf-8").read() -lang_utils_requirements = [ - "pycountry==17.5.14", -] - setup( name="le-utils", packages=find_packages(), @@ -17,7 +13,7 @@ long_description=long_description, long_description_content_type="text/markdown", install_requires=[], - extras_require={"lang_utils": lang_utils_requirements}, + extras_require={}, license="MIT", url="https://github.com/learningequality/le-utils", download_url="https://github.com/learningequality/le-utils/releases", From fe838c88a78a5a884b474b007e3dde8b7cb16a59 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 14 Apr 2025 08:26:52 -0700 Subject: [PATCH 2/3] Add developer script to add new languages using langcodes/langdata and pycountry. Clean up language lookup json. Add several new languages. --- Makefile | 3 + le_utils/resources/languagelookup.json | 1870 ++++++++++++------------ scripts/add_language.py | 139 ++ 3 files changed, 1081 insertions(+), 931 deletions(-) create mode 100644 scripts/add_language.py diff --git a/Makefile b/Makefile index 6ff5e27..15e3626 100644 --- a/Makefile +++ b/Makefile @@ -35,3 +35,6 @@ release: dist release-npm: clean build cd js && npm publish + +add-language: + python scripts/add_language.py diff --git a/le_utils/resources/languagelookup.json b/le_utils/resources/languagelookup.json index b2fc601..c906bc2 100644 --- a/le_utils/resources/languagelookup.json +++ b/le_utils/resources/languagelookup.json @@ -1,1083 +1,732 @@ { - "und":{ - "name":"Undetermined", - "native_name":"Undetermined" - }, - "mul":{ - "name":"Multiple languages", - "native_name":"Multiple languages" - }, - "ab":{ - "name":"Abkhaz", - "native_name":"аҧсуа" - }, - "aa":{ - "name":"Afar", - "native_name":"Afaraf" - }, - "af":{ - "name":"Afrikaans", - "native_name":"Afrikaans" - }, - "ak":{ - "name":"Akan", - "native_name":"Akan" - }, - "sq":{ - "name":"Albanian", - "native_name":"Shqip" - }, - "am":{ - "name":"Amharic", - "native_name":"አማርኛ" - }, - "ar":{ - "name":"Arabic", - "native_name":"العربية" - }, - "arq":{ - "name":"Algerian; Darja", - "native_name":"دزيرية" - }, - "an":{ - "name":"Aragonese", - "native_name":"Aragonés" - }, - "hy":{ - "name":"Armenian", - "native_name":"Հայերեն" - }, - "as":{ - "name":"Assamese", - "native_name":"অসমীয়া" - }, - "av":{ - "name":"Avaric", - "native_name":"авар мацӀ, магӀарул мацӀ" - }, - "ae":{ - "name":"Avestan", - "native_name":"avesta" - }, - "ay":{ - "name":"Aymara", - "native_name":"aymar aru" - }, - "az":{ - "name":"Azerbaijani", - "native_name":"azərbaycan dili" - }, - "bm":{ - "name":"Bambara", - "native_name":"bamanankan" - }, - "ba":{ - "name":"Bashkir", - "native_name":"башҡорт теле" - }, - "eu":{ - "name":"Basque", - "native_name":"euskara, euskera" - }, - "be":{ - "name":"Belarusian", - "native_name":"Беларуская" - }, - "be-tarask":{ - "name":"Belarusian, Classical Orthography", - "native_name":"Taraškievica, тарашкевіца, Беларуская клясычны правапіс" - }, - "bn":{ - "name":"Bengali", - "native_name":"বাংলা", - "ka_name":"bangla" + "aa": { + "name": "Afar", + "native_name": "Afaraf" }, - "bh":{ - "name":"Bihari", - "native_name":"भोजपुरी" + "ab": { + "name": "Abkhaz", + "native_name": "аҧсуа" }, - "bi":{ - "name":"Bislama", - "native_name":"Bislama" + "ach": { + "name": "Acholi; Acooli; Akoli", + "native_name": "Acholi" }, - "bs":{ - "name":"Bosnian", - "native_name":"bosanski jezik" + "ae": { + "name": "Avestan", + "native_name": "avesta" }, - "br":{ - "name":"Breton", - "native_name":"brezhoneg" - }, - "bg":{ - "name":"Bulgarian", - "native_name":"български език" - }, - "my":{ - "name":"Burmese", - "native_name":"ဗမာစာ" - }, - "ca":{ - "name":"Catalan; Valencian", - "native_name":"Català" - }, - "ch":{ - "name":"Chamorro", - "native_name":"Chamoru" - }, - "ce":{ - "name":"Chechen", - "native_name":"нохчийн мотт" - }, - "ny":{ - "name":"Chichewa; Chewa; Nyanja", - "native_name":"chiCheŵa, chinyanja" - }, - "cv":{ - "name":"Chuvash", - "native_name":"чӑваш чӗлхи" - }, - "kw":{ - "name":"Cornish", - "native_name":"Kernewek" - }, - "co":{ - "name":"Corsican", - "native_name":"corsu, lingua corsa" - }, - "cr":{ - "name":"Cree", - "native_name":"ᓀᐦᐃᔭᐍᐏᐣ" - }, - "lkt":{ - "name":"Lakhota; Lakotiyapi; Teton", - "native_name":"Lakhota" - }, - "hr":{ - "name":"Croatian", - "native_name":"hrvatski" - }, - "cs":{ - "name":"Czech", - "native_name":"česky, čeština" - }, - "csx":{ - "name":"Cambodian Sign Language", - "native_name":"Cambodian Sign Language" - }, - "da":{ - "name":"Danish", - "native_name":"Dansk" - }, - "dv":{ - "name":"Divehi; Dhivehi; Maldivian", - "native_name":"ދިވެހި" - }, - "nl":{ - "name":"Dutch", - "native_name":"Nederlands, Vlaams" - }, - "en":{ - "name":"English", - "native_name":"English" - }, - "eo":{ - "name":"Esperanto", - "native_name":"Esperanto" - }, - "et":{ - "name":"Estonian", - "native_name":"eesti, eesti keel" - }, - "ee":{ - "name":"Ewe", - "native_name":"Eʋegbe" + "af": { + "name": "Afrikaans", + "native_name": "Afrikaans" }, - "fo":{ - "name":"Faroese", - "native_name":"føroyskt" + "ak": { + "name": "Akan", + "native_name": "Akan" }, - "fj":{ - "name":"Fijian", - "native_name":"vosa Vakaviti" + "aka": { + "name": "Akan", + "native_name": "Akan" }, - "fi":{ - "name":"Finnish", - "native_name":"suomi, suomen kieli" + "am": { + "name": "Amharic", + "native_name": "አማርኛ" }, - "fr":{ - "name":"French", - "native_name":"Français, langue française", - "ka_name":"francais" - }, - "ff":{ - "name":"Fula; Fulah; Pulaar; Pular", - "native_name":"Fulfulde, Pulaar, Pular" - }, - "gl":{ - "name":"Galician", - "native_name":"Galego" - }, - "ka":{ - "name":"Georgian", - "native_name":"ქართული" - }, - "de":{ - "name":"German", - "native_name":"Deutsch" - }, - "el":{ - "name":"Greek, Modern", - "native_name":"Ελληνικά", - "ka_name":"greek" - }, - "gn":{ - "name":"Guaraní", - "native_name":"Avañeẽ" - }, - "gu":{ - "name":"Gujarati", - "native_name":"ગુજરાતી" - }, - "ht":{ - "name":"Haitian; Haitian Creole", - "native_name":"Kreyòl ayisyen" - }, - "ha":{ - "name":"Hausa", - "native_name":"Hausa, هَوُسَ" - }, - "he":{ - "name":"Hebrew (modern)", - "native_name":"עברית", - "ka_name":"Hebrew" + "an": { + "name": "Aragonese", + "native_name": "Aragonés" }, - "hz":{ - "name":"Herero", - "native_name":"Otjiherero" + "ar": { + "name": "Arabic", + "native_name": "العربية" }, - "hi":{ - "name":"Hindi", - "native_name":"हिन्दी, हिंदी" + "arq": { + "name": "Algerian; Darja", + "native_name": "دزيرية" }, - "ho":{ - "name":"Hiri Motu", - "native_name":"Hiri Motu" + "as": { + "name": "Assamese", + "native_name": "অসমীয়া" }, - "hu":{ - "name":"Hungarian", - "native_name":"Magyar" + "av": { + "name": "Avaric", + "native_name": "авар мацӀ, магӀарул мацӀ" }, - "hrx":{ - "name":"Hunsrik", - "native_name":"Hunsrückisch, Hunsrück, Hunsriker" + "awa": { + "name": "Awadhi", + "native_name": "अवधी" }, - "hdy":{ - "name":"Hadiyya; Hadiya; Adea; Adiya; Hadia", - "native_name":"Hadiyyisa" + "ay": { + "name": "Aymara", + "native_name": "aymar aru" }, - "ia":{ - "name":"Interlingua", - "native_name":"Interlingua" + "az": { + "name": "Azerbaijani", + "native_name": "azərbaycan dili" }, - "id":{ - "name":"Indonesian", - "native_name":"Bahasa Indonesia" + "ba": { + "name": "Bashkir", + "native_name": "башҡорт теле" }, - "ie":{ - "name":"Interlingue", - "native_name":"Originally called Occidental; then Interlingue after WWII" + "bcc": { + "name": "Southern Balochi", + "native_name": "Southern Balochi" }, - "ga":{ - "name":"Irish", - "native_name":"Gaeilge" + "be": { + "name": "Belarusian", + "native_name": "Беларуская" }, - "ig":{ - "name":"Igbo", - "native_name":"Asụsụ Igbo" + "be-tarask": { + "name": "Belarusian, Classical Orthography", + "native_name": "Taraškievica, тарашкевіца, Беларуская клясычны правапіс" }, - "ik":{ - "name":"Inupiaq", - "native_name":"Iñupiaq, Iñupiatun" + "bg": { + "name": "Bulgarian", + "native_name": "български език" }, - "io":{ - "name":"Ido", - "native_name":"Ido" + "bh": { + "name": "Bihari", + "native_name": "भोजपुरी" }, - "is":{ - "name":"Icelandic", - "native_name":"Íslenska" + "bho": { + "name": "Bhojpuri; Bajpuri; Bhojapuri", + "native_name": "भोजपुरी" }, - "it":{ - "name":"Italian", - "native_name":"Italiano" + "bi": { + "name": "Bislama", + "native_name": "Bislama" }, - "iu":{ - "name":"Inuktitut", - "native_name":"ᐃᓄᒃᑎᑐᑦ" + "bm": { + "name": "Bambara", + "native_name": "bamanankan" }, - "ja":{ - "name":"Japanese", - "native_name":"日本語 (にほんご/にっぽんご)" + "bma": { + "name": "Lame", + "native_name": "Lame" }, - "jv":{ - "name":"Javanese", - "native_name":"basa Jawa" + "bn": { + "name": "Bengali", + "native_name": "বাংলা" }, - "kl":{ - "name":"Kalaallisut, Greenlandic", - "native_name":"kalaallisut, kalaallit oqaasii" + "bo": { + "name": "Tibetan Standard; Tibetan, Central", + "native_name": "བོད་ཡིག" }, - "kn":{ - "name":"Kannada", - "native_name":"ಕನ್ನಡ" + "bog": { + "name": "Bamako Sign Language", + "native_name": "Bamako Sign Language" }, - "kr":{ - "name":"Kanuri", - "native_name":"Kanuri" + "br": { + "name": "Breton", + "native_name": "brezhoneg" }, - "ks":{ - "name":"Kashmiri", - "native_name":"कश्मीरी, كشميري‎" + "brh": { + "name": "Brahui", + "native_name": "Brahui" }, - "kk":{ - "name":"Kazakh", - "native_name":"Қазақ тілі" + "bs": { + "name": "Bosnian", + "native_name": "bosanski jezik" }, - "km":{ - "name":"Khmer", - "native_name":"ភាសាខ្មែរ" + "bug": { + "name": "Buginese", + "native_name": "Buginese" }, - "ki":{ - "name":"Kikuyu; Gikuyu", - "native_name":"Gĩkũyũ" + "bxk": { + "name": "Bukusu", + "native_name": "Bukusu" }, - "rw":{ - "name":"Kinyarwanda", - "native_name":"Ikinyarwanda" + "ca": { + "name": "Catalan; Valencian", + "native_name": "Català" }, - "rsn":{ - "name":"Rwandan Sign Language", - "native_name":"Amarenga y'Ikinyarwanda" + "ce": { + "name": "Chechen", + "native_name": "нохчийн мотт" }, - "ky":{ - "name":"Kirghiz; Kyrgyz", - "native_name":"кыргыз тили" + "ceb": { + "name": "Cebuano", + "native_name": "Cebuano" }, - "kv":{ - "name":"Komi", - "native_name":"коми кыв" + "ch": { + "name": "Chamorro", + "native_name": "Chamoru" }, - "kg":{ - "name":"Kongo", - "native_name":"KiKongo" + "cho": { + "name": "Choctaw", + "native_name": "Choctaw" }, - "ko":{ - "name":"Korean", - "native_name":"한국어 (韓國語), 조선말 (朝鮮語)" + "co": { + "name": "Corsican", + "native_name": "corsu, lingua corsa" }, - "ku":{ - "name":"Kurdish", - "native_name":"Kurdî, كوردی‎" + "cr": { + "name": "Cree", + "native_name": "ᓀᐦᐃᔭᐍᐏᐣ" }, - "kj":{ - "name":"Kwanyama; Kuanyama", - "native_name":"Kuanyama" + "cs": { + "name": "Czech", + "native_name": "česky, čeština" }, - "la":{ - "name":"Latin", - "native_name":"latine, lingua latina" + "csx": { + "name": "Cambodian Sign Language", + "native_name": "Cambodian Sign Language" }, - "lb":{ - "name":"Luxembourgish; Letzeburgesch", - "native_name":"Lëtzebuergesch" + "cu": { + "name": "Old Church Slavonic; Church Slavic; Church Slavonic; Old Bulgarian; Old Slavonic", + "native_name": "ѩзыкъ словѣньскъ" }, - "lg":{ - "name":"Luganda", - "native_name":"Luganda" + "cv": { + "name": "Chuvash", + "native_name": "чӑваш чӗлхи" }, - "li":{ - "name":"Limburgish; Limburgan; Limburger", - "native_name":"Limburgs" + "cy": { + "name": "Welsh", + "native_name": "Cymraeg" }, - "ln":{ - "name":"Lingala", - "native_name":"Lingála" + "da": { + "name": "Danish", + "native_name": "Dansk" }, - "lo":{ - "name":"Lao", - "native_name":"ພາສາລາວ" + "de": { + "name": "German", + "native_name": "Deutsch" }, - "lt":{ - "name":"Lithuanian", - "native_name":"lietuvių kalba" + "dty": { + "name": "Dotyali", + "native_name": "Dotyali" }, - "lu":{ - "name":"Luba-Katanga", - "native_name":"Luba-Katanga" + "dv": { + "name": "Divehi; Dhivehi; Maldivian", + "native_name": "ދިވެހި" }, - "lua":{ - "name":"Luba-Kasai", - "native_name":"Luba-Kasai, Tshiluba" + "ee": { + "name": "Ewe", + "native_name": "Eʋegbe" }, - "lv":{ - "name":"Latvian", - "native_name":"latviešu valoda" + "efi": { + "name": "Efik", + "native_name": "Efik" }, - "gv":{ - "name":"Manx", - "native_name":"Gaelg, Gailck" + "el": { + "name": "Greek, Modern", + "native_name": "Ελληνικά" }, - "mk":{ - "name":"Macedonian", - "native_name":"македонски јазик" + "en": { + "name": "English", + "native_name": "English" }, - "mg":{ - "name":"Malagasy", - "native_name":"Malagasy fiteny" + "en-GB": { + "name": "English, Britain", + "native_name": "British English" }, - "ms":{ - "name":"Malay", - "native_name":"bahasa Melayu, بهاس ملايو‎" + "en-PT": { + "name": "English, Pirate", + "native_name": "Argh! Pirates!" }, - "ml":{ - "name":"Malayalam", - "native_name":"മലയാളം" + "enq": { + "name": "Enga", + "native_name": "Enga" }, - "mt":{ - "name":"Maltese", - "native_name":"Malti" + "eo": { + "name": "Esperanto", + "native_name": "Esperanto" }, - "mi":{ - "name":"Māori", - "native_name":"te reo Māori" + "es": { + "name": "Spanish", + "native_name": "Español" }, - "mr":{ - "name":"Marathi (Marāṭhī)", - "native_name":"मराठी" + "es-AR": { + "name": "Spanish, Argentina", + "native_name": "Español (Argentina)" }, - "mh":{ - "name":"Marshallese", - "native_name":"Kajin M̧ajeļ" + "es-ES": { + "name": "Spanish, Spain; Castilian", + "native_name": "Español (España), Castellano" }, - "mwr":{ - "name":"Marwari; Marwadi", - "native_name":"मारवाड़ी, مارواڑی" + "es-MX": { + "name": "Spanish, Mexico", + "native_name": "Español (Mexico)" }, - "mn":{ - "name":"Mongolian", - "native_name":"монгол" + "es-NI": { + "name": "Spanish, Nicaragua", + "native_name": "Español (Nicaragua)" }, - "na":{ - "name":"Nauru", - "native_name":"Ekakairũ Naoero" + "et": { + "name": "Estonian", + "native_name": "eesti, eesti keel" }, - "nv":{ - "name":"Navajo; Navaho", - "native_name":"Diné bizaad, Dinékʼehǰí" + "eu": { + "name": "Basque", + "native_name": "euskara, euskera" }, - "nb":{ - "name":"Norwegian Bokmål", - "native_name":"Norsk bokmål" + "fa": { + "name": "Persian", + "native_name": "فارسی" }, - "nd":{ - "name":"North Ndebele", - "native_name":"isiNdebele" + "ff": { + "name": "Fula; Fulah; Pulaar; Pular", + "native_name": "Fulfulde, Pulaar, Pular" }, - "ne":{ - "name":"Nepali", - "native_name":"नेपाली" + "fi": { + "name": "Finnish", + "native_name": "suomi, suomen kieli" }, - "nsp":{ - "name":"Nepalese Sign Language", - "native_name":"नेपाली सांकेतिक भाषा" + "fil": { + "name": "Filipino", + "native_name": "Pilipino" }, - "ng":{ - "name":"Ndonga", - "native_name":"Owambo" + "fj": { + "name": "Fijian", + "native_name": "vosa Vakaviti" }, - "nn":{ - "name":"Norwegian Nynorsk", - "native_name":"Norsk nynorsk" + "fo": { + "name": "Faroese", + "native_name": "føroyskt" }, - "no":{ - "name":"Norwegian", - "native_name":"Norsk" + "fr": { + "name": "French", + "native_name": "Français, langue française" }, - "ii":{ - "name":"Nuosu", - "native_name":"ꆈꌠ꒿ Nuosuhxop" + "fr-CA": { + "name": "French, Canada", + "native_name": "Français (Canada)" }, - "nr":{ - "name":"South Ndebele", - "native_name":"isiNdebele" + "ful": { + "name": "Fula; Fulah", + "native_name": "Fulfulde" }, - "oc":{ - "name":"Occitan", - "native_name":"Occitan" + "fuv": { + "name": "Fulfulde Mbororo; Fulfulde Mbororoore", + "native_name": "Fulfulde" }, - "oj":{ - "name":"Ojibwe; Ojibwa", - "native_name":"ᐊᓂᔑᓈᐯᒧᐎᓐ" + "fy": { + "name": "Western Frisian", + "native_name": "Frysk" }, - "cu":{ - "name":"Old Church Slavonic; Church Slavic; Church Slavonic; Old Bulgarian; Old Slavonic", - "native_name":"ѩзыкъ словѣньскъ" + "fy-NL": { + "name": "Frisian", + "native_name": "Frysk" }, - "om":{ - "name":"Oromo", - "native_name":"Afaan Oromoo" + "ga": { + "name": "Irish", + "native_name": "Gaeilge" }, - "or":{ - "name":"Oriya", - "native_name":"ଓଡ଼ିଆ" + "gd": { + "name": "Scottish Gaelic; Gaelic", + "native_name": "Gàidhlig" }, - "os":{ - "name":"Ossetian; Ossetic", - "native_name":"ирон æвзаг" + "gl": { + "name": "Galician", + "native_name": "Galego" }, - "pa":{ - "name":"Panjabi; Punjabi", - "native_name":"ਪੰਜਾਬੀ, پنجابی‎", - "ka_name":"punjabi" - }, - "pi":{ - "name":"Pāli", - "native_name":"पाऴि" - }, - "fa":{ - "name":"Persian", - "native_name":"فارسی", - "ka_name":"farsi" - }, - "pl":{ - "name":"Polish", - "native_name":"Polski" - }, - "ps":{ - "name":"Pashto; Pushto", - "native_name":"پښتو" - }, - "pt":{ - "name":"Portuguese", - "native_name":"Português", - "ka_name":"portugues" - }, - "pms":{ - "name":"Piedmontese", - "native_name":"Piemontèis" - }, - "qu":{ - "name":"Quechua", - "native_name":"Runa Simi, Kichwa" - }, - "rm":{ - "name":"Romansh", - "native_name":"rumantsch grischun" - }, - "rn":{ - "name":"Kirundi", - "native_name":"kiRundi" - }, - "ro":{ - "name":"Romanian; Moldavian; Moldovan", - "native_name":"română" - }, - "ru":{ - "name":"Russian", - "native_name":"русский язык" - }, - "sa":{ - "name":"Sanskrit (Saṁskṛta)", - "native_name":"संस्कृतम्" - }, - "sc":{ - "name":"Sardinian", - "native_name":"sardu" - }, - "sd":{ - "name":"Sindhi", - "native_name":"सिन्धी, سنڌي، سندھی‎" - }, - "se":{ - "name":"Northern Sami", - "native_name":"Davvisámegiella" - }, - "sm":{ - "name":"Samoan", - "native_name":"gagana faa Samoa" - }, - "sg":{ - "name":"Sango", - "native_name":"yângâ tî sängö" - }, - "sr":{ - "name":"Serbian", - "native_name":"српски језик" - }, - "sid":{ - "name":"Sidamo; Sidaamu afii; Sidaama; Sidama", - "native_name":"Sidaamu Afoo" - }, - "gd":{ - "name":"Scottish Gaelic; Gaelic", - "native_name":"Gàidhlig" - }, - "sco":{ - "name":"Scots", - "native_name":"Scots" - }, - "sn":{ - "name":"Shona", - "native_name":"chiShona" - }, - "si":{ - "name":"Sinhala; Sinhalese", - "native_name":"සිංහල", - "ka_name":"sinhala" - }, - "sk":{ - "name":"Slovak", - "native_name":"slovenčina" - }, - "sl":{ - "name":"Slovene", - "native_name":"slovenščina" - }, - "so":{ - "name":"Somali", - "native_name":"Soomaaliga, af Soomaali" - }, - "st":{ - "name":"Southern Sotho", - "native_name":"Sesotho" - }, - "es":{ - "name":"Spanish", - "native_name":"Español", - "ka_name":"espanol" - }, - "su":{ - "name":"Sundanese", - "native_name":"Basa Sunda" - }, - "sw":{ - "name":"Swahili", - "native_name":"Kiswahili" - }, - "ss":{ - "name":"Swati", - "native_name":"SiSwati" - }, - "sv":{ - "name":"Swedish", - "native_name":"svenska" - }, - "ta":{ - "name":"Tamil", - "native_name":"தமிழ்" - }, - "te":{ - "name":"Telugu", - "native_name":"తెలుగు" - }, - "tg":{ - "name":"Tajik", - "native_name":"тоҷикӣ, toğikī, تاجیکی‎" - }, - "th":{ - "name":"Thai", - "native_name":"ไทย" - }, - "ti":{ - "name":"Tigrinya", - "native_name":"ትግርኛ" - }, - "bo":{ - "name":"Tibetan Standard; Tibetan, Central", - "native_name":"བོད་ཡིག" - }, - "tk":{ - "name":"Turkmen", - "native_name":"Türkmen, Түркмен" - }, - "tl":{ - "name":"Tagalog", - "native_name":"Wikang Tagalog" - }, - "tn":{ - "name":"Tswana", - "native_name":"Setswana" - }, - "to":{ - "name":"Tonga (Tonga Islands)", - "native_name":"faka Tonga" - }, - "tr":{ - "name":"Turkish", - "native_name":"Türkçe" - }, - "ts":{ - "name":"Tsonga", - "native_name":"Xitsonga" - }, - "tt":{ - "name":"Tatar", - "native_name":"татарча, tatarça, تاتارچا‎" - }, - "tw":{ - "name":"Twi", - "native_name":"Twi" - }, - "ty":{ - "name":"Tahitian", - "native_name":"Reo Tahiti" - }, - "ug":{ - "name":"Uighur; Uyghur", - "native_name":"Uyƣurqə, ئۇيغۇرچە‎" - }, - "uk":{ - "name":"Ukrainian", - "native_name":"українська", - "ka_name":"ukranian" - }, - "rue":{ - "name":"Rusyn", - "native_name":"русиньскый язык" - }, - "ur":{ - "name":"Urdu", - "native_name":"اردو" - }, - "uz":{ - "name":"Uzbek", - "native_name":"zbek, Ўзбек, أۇزبېك‎" - }, - "ve":{ - "name":"Venda", - "native_name":"Tshivenḓa, Tshivenda" - }, - "vi":{ - "name":"Vietnamese", - "native_name":"Tiếng Việt" - }, - "vo":{ - "name":"Volapük", - "native_name":"Volapük" - }, - "wa":{ - "name":"Walloon", - "native_name":"Walon" - }, - "cy":{ - "name":"Welsh", - "native_name":"Cymraeg" - }, - "wo":{ - "name":"Wolof", - "native_name":"Wollof" - }, - "fy":{ - "name":"Western Frisian", - "native_name":"Frysk" - }, - "xh":{ - "name":"Xhosa", - "native_name":"isiXhosa" - }, - "yi":{ - "name":"Yiddish", - "native_name":"ייִדיש" - }, - "yo":{ - "name":"Yoruba", - "native_name":"Yorùbá" - }, - "za":{ - "name":"Zhuang; Chuang", - "native_name":"Saɯ cueŋƅ, Saw cuengh" - }, - "en-PT":{ - "name":"English, Pirate", - "native_name":"Argh! Pirates!" - }, - "es-ES":{ - "name":"Spanish, Spain; Castilian", - "native_name":"Español (España), Castellano" - }, - "fil":{ - "name":"Filipino", - "native_name":"Pilipino" - }, - "ne-NP":{ - "name":"Nepali", - "native_name":"नेपाली" - }, - "pt-BR":{ - "name":"Portuguese, Brazil", - "native_name":"Português (Brasil)" - }, - "pt-PT":{ - "name":"Portuguese, Portugal", - "native_name":"Português (Portugal)", - "ka_name":"portugal portugues" + "gn": { + "name": "Guaraní", + "native_name": "Avañeẽ" }, - "sr-CS":{ - "name":"Serbian, Cyrillic", - "native_name":"српски језик" + "gu": { + "name": "Gujarati", + "native_name": "ગુજરાતી" }, - "sv-SE":{ - "name":"Swedish", - "native_name":"Svenska" + "guz": { + "name": "Gusii", + "native_name": "Ekegusii" }, - "sv-FI":{ - "name":"Swedish", - "native_name":"Svenska" + "gv": { + "name": "Manx", + "native_name": "Gaelg, Gailck" }, - "ur-PK":{ - "name":"Urdu", - "native_name":"Urdu" + "ha": { + "name": "Hausa", + "native_name": "Hausa, هَوُسَ" }, - "zh":{ - "name":"Chinese", - "native_name":"中文, 汉语, 漢語" + "hau": { + "name": "Hausa", + "native_name": "Hausa, هَوُسَ" }, - "zh-CN":{ - "name":"Chinese, Simplified", - "native_name":"中国大陆" + "hdy": { + "name": "Hadiyya; Hadiya; Adea; Adiya; Hadia", + "native_name": "Hadiyyisa" }, - "zh-Hant":{ - "name":"Chinese, Traditional", - "native_name":"漢語 (繁體字)" + "he": { + "name": "Hebrew (modern)", + "native_name": "עברית" }, - "zh-TW":{ - "name":"Chinese, Taiwan", - "native_name":"漢語 (臺灣)" + "hi": { + "name": "Hindi", + "native_name": "हिन्दी, हिंदी" }, - "aka":{ - "name":"Akan", - "native_name":"Akan" + "ho": { + "name": "Hiri Motu", + "native_name": "Hiri Motu" }, - "bho":{ - "name":"Bhojpuri; Bajpuri; Bhojapuri", - "native_name":"भोजपुरी" + "hr": { + "name": "Croatian", + "native_name": "hrvatski" }, - "bug":{ - "name":"Buginese", - "native_name":"Buginese" + "hrx": { + "name": "Hunsrik", + "native_name": "Hunsrückisch, Hunsrück, Hunsriker" }, - "ceb":{ - "name":"Cebuano", - "native_name":"Cebuano" + "ht": { + "name": "Haitian; Haitian Creole", + "native_name": "Kreyòl ayisyen" }, - "cho":{ - "name":"Choctaw", - "native_name":"Choctaw" + "hu": { + "name": "Hungarian", + "native_name": "Magyar" }, - "efi":{ - "name":"Efik", - "native_name":"Efik" + "hy": { + "name": "Armenian", + "native_name": "Հայերեն" }, - "en-GB":{ - "name":"English, Britain", - "native_name":"British English" + "hz": { + "name": "Herero", + "native_name": "Otjiherero" }, - "es-AR":{ - "name":"Spanish, Argentina", - "native_name":"Español (Argentina)" + "ia": { + "name": "Interlingua", + "native_name": "Interlingua" }, - "es-MX":{ - "name":"Spanish, Mexico", - "native_name":"Español (Mexico)" + "ibo": { + "name": "Igbo", + "native_name": "Asụsụ Igbo" }, - "es-NI":{ - "name":"Spanish, Nicaragua", - "native_name":"Español (Nicaragua)" + "id": { + "name": "Indonesian", + "native_name": "Bahasa Indonesia" }, - "fr-CA":{ - "name":"French, Canada", - "native_name":"Français (Canada)" + "ie": { + "name": "Interlingue", + "native_name": "Originally called Occidental; then Interlingue after WWII" }, - "ful":{ - "name":"Fula; Fulah", - "native_name":"Fulfulde" + "ig": { + "name": "Igbo", + "native_name": "Asụsụ Igbo" }, - "fy-NL":{ - "name":"Frisian", - "native_name":"Frysk" + "ii": { + "name": "Nuosu", + "native_name": "ꆈꌠ꒿ Nuosuhxop" }, - "hau":{ - "name":"Hausa", - "native_name":"Hausa, هَوُسَ" + "ik": { + "name": "Inupiaq", + "native_name": "Iñupiaq, Iñupiatun" }, - "ibo":{ - "name":"Igbo", - "native_name":"Asụsụ Igbo" + "io": { + "name": "Ido", + "native_name": "Ido" }, - "kik":{ - "name":"Kikuyu", - "native_name":"Gĩkũyũ" + "is": { + "name": "Icelandic", + "native_name": "Íslenska" }, - "kon":{ - "name":"Kongo", - "native_name":"KiKongo" + "it": { + "name": "Italian", + "native_name": "Italiano" }, - "lin":{ - "name":"Lingala", - "native_name":"Lingála" + "iu": { + "name": "Inuktitut", + "native_name": "ᐃᓄᒃᑎᑐᑦ" }, - "mlg":{ - "name":"Malagasy", - "native_name":"fiteny malagasy" + "ja": { + "name": "Japanese", + "native_name": "日本語 (にほんご/にっぽんご)" }, - "nya":{ - "name":"Chichewa", - "native_name":"chiCheŵa" + "jv": { + "name": "Javanese", + "native_name": "basa Jawa" }, - "oji":{ - "name":"Ojibwe", - "native_name":"ᐊᓂᔑᓈᐯᒧᐎᓐ" + "ka": { + "name": "Georgian", + "native_name": "ქართული" }, - "pnb":{ - "name":"Punjabi", - "native_name":"ਪੰਜਾਬੀ" + "kak": { + "name": "Kalanguya", + "native_name": "Kalanguya" }, - "que":{ - "name":"Quechua", - "native_name":"Runa Simi" + "kam": { + "name": "Kamba (Kenya)", + "native_name": "Kikamba" }, - "sna":{ - "name":"Shona", - "native_name":"chiShona" + "kg": { + "name": "Kongo", + "native_name": "KiKongo" }, - "som":{ - "name":"Somali", - "native_name":"Soomaaliga" + "ki": { + "name": "Kikuyu; Gikuyu", + "native_name": "Gĩkũyũ" }, - "nso":{ - "name":"Northern Sotho", - "native_name":"Sepedi, Sesotho sa Leboa, Pedi" + "kik": { + "name": "Kikuyu", + "native_name": "Gĩkũyũ" }, - "sot":{ - "name":"Southern Sotho", - "native_name":"Sesotho" + "kj": { + "name": "Kwanyama; Kuanyama", + "native_name": "Kuanyama" }, - "sr-LATN":{ - "name":"Serbian, Latin", - "native_name":"српски језик" + "kk": { + "name": "Kazakh", + "native_name": "Қазақ тілі" }, - "swa":{ - "name":"Swahili", - "native_name":"Kiswahili" + "kl": { + "name": "Kalaallisut, Greenlandic", + "native_name": "kalaallisut, kalaallit oqaasii" }, - "tir":{ - "name":"Tigrinya", - "native_name":"ትግርኛ" + "km": { + "name": "Khmer", + "native_name": "ភាសាខ្មែរ" }, - "tsn":{ - "name":"Tswana", - "native_name":"Setswana" + "kn": { + "name": "Kannada", + "native_name": "ಕನ್ನಡ" }, - "wol":{ - "name":"Wolof", - "native_name":"Wollof" + "ko": { + "name": "Korean", + "native_name": "한국어 (韓國語), 조선말 (朝鮮語)" }, - "wal":{ - "name":"Wolaytta; Borodda; Uba; Ometo", - "native_name":"Wolaytta" + "kon": { + "name": "Kongo", + "native_name": "KiKongo" }, - "xho":{ - "name":"Xhosa", - "native_name":"isiXhosa" + "kr": { + "name": "Kanuri", + "native_name": "Kanuri" }, - "xki":{ - "name":"Kenyan Sign Language", - "native_name":"Swahili Lugha ya ishara" + "ks": { + "name": "Kashmiri", + "native_name": "कश्मीरी, كشميري‎" }, - "yor":{ - "name":"Yoruba", - "native_name":"Yorùbá" + "ku": { + "name": "Kurdish", + "native_name": "Kurdî, كوردی‎" }, - "zul":{ - "name":"Zulu", - "native_name":"isiZulu" + "kv": { + "name": "Komi", + "native_name": "коми кыв" }, - "ach":{ - "name":"Acholi; Acooli; Akoli", - "native_name":"Acholi" + "kw": { + "name": "Cornish", + "native_name": "Kernewek" }, - "fuv":{ - "name":"Fulfulde Mbororo; Fulfulde Mbororoore", - "native_name":"Fulfulde" + "ky": { + "name": "Kirghiz; Kyrgyz", + "native_name": "кыргыз тили" }, - "xog":{ - "name":"Soga", - "native_name":"Lusoga, Olusoga" + "la": { + "name": "Latin", + "native_name": "latine, lingua latina" }, - "tuv":{ - "name":"Turkana; Turkwana; Buma; Bume", - "native_name":"Ng’aturkana, Ng'aturkana" + "lb": { + "name": "Luxembourgish; Letzeburgesch", + "native_name": "Lëtzebuergesch" }, - "lwg":{ - "name":"Oluwanga; Oluhanga; Hanga; Kawanga", - "native_name":"Oluwanga" + "lg": { + "name": "Luganda", + "native_name": "Luganda" }, - "nyn":{ - "name":"Nyankore; Nkole; Nyankole", - "native_name":"Runyankore" + "li": { + "name": "Limburgish; Limburgan; Limburger", + "native_name": "Limburgs" }, - "myx":{ - "name":"Masaaba; Bagisu", - "native_name":"Lumasaaba, Masaaba" + "lin": { + "name": "Lingala", + "native_name": "Lingála" }, - "awa": { - "name": "Awadhi", - "native_name": "अवधी" + "lkt": { + "name": "Lakhota; Lakotiyapi; Teton", + "native_name": "Lakhota" }, - "bcc": { - "name": "Southern Balochi", - "native_name": "Southern Balochi" + "ln": { + "name": "Lingala", + "native_name": "Lingála" }, - "bma": { - "name": "Lame", - "native_name": "Lame" + "lo": { + "name": "Lao", + "native_name": "ພາສາລາວ" }, - "brh": { - "name": "Brahui", - "native_name": "Brahui" + "lt": { + "name": "Lithuanian", + "native_name": "lietuvių kalba" }, - "bxk": { - "name": "Bukusu", - "native_name": "Bukusu" + "lu": { + "name": "Luba-Katanga", + "native_name": "Luba-Katanga" }, - "dty": { - "name": "Dotyali", - "native_name": "Dotyali" + "lua": { + "name": "Luba-Kasai", + "native_name": "Luba-Kasai, Tshiluba" }, - "enq": { - "name": "Enga", - "native_name": "Enga" - }, - "guz": { - "name": "Gusii", - "native_name": "Ekegusii" + "lv": { + "name": "Latvian", + "native_name": "latviešu valoda" }, - "kak": { - "name": "Kalanguya", - "native_name": "Kalanguya" + "lwg": { + "name": "Oluwanga; Oluhanga; Hanga; Kawanga", + "native_name": "Oluwanga" }, - "kam": { - "name": "Kamba (Kenya)", - "native_name": "Kikamba" + "lws": { + "name": "Malawian Sign Language", + "native_name": "Malawian Sign Language" }, "mai": { "name": "Maithili", "native_name": "थिली, মৈথিলী" }, + "mg": { + "name": "Malagasy", + "native_name": "Malagasy fiteny" + }, + "mh": { + "name": "Marshallese", + "native_name": "Kajin M̧ajeļ" + }, + "mi": { + "name": "Māori", + "native_name": "te reo Māori" + }, + "mk": { + "name": "Macedonian", + "native_name": "македонски јазик" + }, + "ml": { + "name": "Malayalam", + "native_name": "മലയാളം" + }, + "mlg": { + "name": "Malagasy", + "native_name": "fiteny malagasy" + }, + "mn": { + "name": "Mongolian", + "native_name": "монгол" + }, + "mr": { + "name": "Marathi (Marāṭhī)", + "native_name": "मराठी" + }, + "ms": { + "name": "Malay", + "native_name": "bahasa Melayu, بهاس ملايو‎" + }, + "mt": { + "name": "Maltese", + "native_name": "Malti" + }, + "mul": { + "name": "Multiple languages", + "native_name": "Multiple languages" + }, + "mwr": { + "name": "Marwari; Marwadi", + "native_name": "मारवाड़ी, مارواڑی" + }, + "my": { + "name": "Burmese", + "native_name": "ဗမာစာ" + }, + "myk": { + "name": "Mamara Senoufo", + "native_name": "Mamara Senoufo" + }, + "myx": { + "name": "Masaaba; Bagisu", + "native_name": "Lumasaaba, Masaaba" + }, + "na": { + "name": "Nauru", + "native_name": "Ekakairũ Naoero" + }, + "nb": { + "name": "Norwegian Bokmål", + "native_name": "Norsk bokmål" + }, + "nd": { + "name": "North Ndebele", + "native_name": "isiNdebele" + }, + "ne": { + "name": "Nepali", + "native_name": "नेपाली" + }, + "ne-NP": { + "name": "Nepali", + "native_name": "नेपाली" + }, "new": { "name": "Newari", "native_name": "नेपाल भाषा, नेवाः भाय्" }, + "ng": { + "name": "Ndonga", + "native_name": "Owambo" + }, + "nl": { + "name": "Dutch", + "native_name": "Nederlands, Vlaams" + }, + "nn": { + "name": "Norwegian Nynorsk", + "native_name": "Norsk nynorsk" + }, + "no": { + "name": "Norwegian", + "native_name": "Norsk" + }, + "nr": { + "name": "South Ndebele", + "native_name": "isiNdebele" + }, + "nso": { + "name": "Northern Sotho", + "native_name": "Sepedi, Sesotho sa Leboa, Pedi" + }, + "nsp": { + "name": "Nepalese Sign Language", + "native_name": "नेपाली सांकेतिक भाषा" + }, + "nv": { + "name": "Navajo; Navaho", + "native_name": "Diné bizaad, Dinékʼehǰí" + }, + "ny": { + "name": "Chichewa; Chewa; Nyanja", + "native_name": "chiCheŵa, chinyanja" + }, + "nya": { + "name": "Chichewa", + "native_name": "chiCheŵa" + }, + "nyn": { + "name": "Nyankore; Nkole; Nyankole", + "native_name": "Runyankore" + }, + "oc": { + "name": "Occitan", + "native_name": "Occitan" + }, + "oj": { + "name": "Ojibwe; Ojibwa", + "native_name": "ᐊᓂᔑᓈᐯᒧᐎᓐ" + }, + "oji": { + "name": "Ojibwe", + "native_name": "ᐊᓂᔑᓈᐯᒧᐎᓐ" + }, + "om": { + "name": "Oromo", + "native_name": "Afaan Oromoo" + }, + "or": { + "name": "Oriya", + "native_name": "ଓଡ଼ିଆ" + }, + "os": { + "name": "Ossetian; Ossetic", + "native_name": "ирон æвзаг" + }, + "pa": { + "name": "Panjabi; Punjabi", + "native_name": "ਪੰਜਾਬੀ, پنجابی‎" + }, "pbt": { "name": "Southern Pashto", "native_name": "Southern Pashto" @@ -1086,29 +735,388 @@ "name": "Northern Pashto", "native_name": "Northern Pashto" }, + "pgz": { + "name": "Papua New Guinean Sign Language", + "native_name": "Papua New Guinean Sign Language" + }, + "pi": { + "name": "Pāli", + "native_name": "पाऴि" + }, + "pl": { + "name": "Polish", + "native_name": "Polski" + }, + "pms": { + "name": "Piedmontese", + "native_name": "Piemontèis" + }, + "pnb": { + "name": "Punjabi", + "native_name": "ਪੰਜਾਬੀ" + }, + "ps": { + "name": "Pashto; Pushto", + "native_name": "پښتو" + }, + "pt": { + "name": "Portuguese", + "native_name": "Português" + }, + "pt-BR": { + "name": "Portuguese, Brazil", + "native_name": "Português (Brasil)" + }, + "pt-PT": { + "name": "Portuguese, Portugal", + "native_name": "Português (Portugal)" + }, + "qu": { + "name": "Quechua", + "native_name": "Runa Simi, Kichwa" + }, + "que": { + "name": "Quechua", + "native_name": "Runa Simi" + }, + "rm": { + "name": "Romansh", + "native_name": "rumantsch grischun" + }, + "rn": { + "name": "Kirundi", + "native_name": "kiRundi" + }, + "ro": { + "name": "Romanian; Moldavian; Moldovan", + "native_name": "română" + }, + "rsn": { + "name": "Rwandan Sign Language", + "native_name": "Amarenga y'Ikinyarwanda" + }, + "ru": { + "name": "Russian", + "native_name": "русский язык" + }, + "rue": { + "name": "Rusyn", + "native_name": "русиньскый язык" + }, + "rw": { + "name": "Kinyarwanda", + "native_name": "Ikinyarwanda" + }, + "sa": { + "name": "Sanskrit (Saṁskṛta)", + "native_name": "संस्कृतम्" + }, + "sc": { + "name": "Sardinian", + "native_name": "sardu" + }, + "sco": { + "name": "Scots", + "native_name": "Scots" + }, + "sd": { + "name": "Sindhi", + "native_name": "सिन्धी, سنڌي، سندھی‎" + }, + "se": { + "name": "Northern Sami", + "native_name": "Davvisámegiella" + }, + "sg": { + "name": "Sango", + "native_name": "yângâ tî sängö" + }, + "si": { + "name": "Sinhala; Sinhalese", + "native_name": "සිංහල" + }, + "sid": { + "name": "Sidamo; Sidaamu afii; Sidaama; Sidama", + "native_name": "Sidaamu Afoo" + }, + "sk": { + "name": "Slovak", + "native_name": "slovenčina" + }, + "sl": { + "name": "Slovene", + "native_name": "slovenščina" + }, + "sm": { + "name": "Samoan", + "native_name": "gagana faa Samoa" + }, + "sn": { + "name": "Shona", + "native_name": "chiShona" + }, + "sna": { + "name": "Shona", + "native_name": "chiShona" + }, + "snk": { + "name": "Soninke", + "native_name": "Soninke" + }, + "so": { + "name": "Somali", + "native_name": "Soomaaliga, af Soomaali" + }, + "som": { + "name": "Somali", + "native_name": "Soomaaliga" + }, + "sot": { + "name": "Southern Sotho", + "native_name": "Sesotho" + }, + "sq": { + "name": "Albanian", + "native_name": "Shqip" + }, + "sr": { + "name": "Serbian", + "native_name": "српски језик" + }, + "sr-CS": { + "name": "Serbian, Cyrillic", + "native_name": "српски језик" + }, + "sr-LATN": { + "name": "Serbian, Latin", + "native_name": "српски језик" + }, "srr": { "name": "Serer", "native_name": "Seereer" }, + "ss": { + "name": "Swati", + "native_name": "SiSwati" + }, + "st": { + "name": "Southern Sotho", + "native_name": "Sesotho" + }, + "su": { + "name": "Sundanese", + "native_name": "Basa Sunda" + }, + "sv": { + "name": "Swedish", + "native_name": "svenska" + }, + "sv-FI": { + "name": "Swedish", + "native_name": "Svenska" + }, + "sv-SE": { + "name": "Swedish", + "native_name": "Svenska" + }, + "sw": { + "name": "Swahili", + "native_name": "Kiswahili" + }, + "swa": { + "name": "Swahili", + "native_name": "Kiswahili" + }, + "ta": { + "name": "Tamil", + "native_name": "தமிழ்" + }, "taj": { "name": "Eastern Tamang", "native_name": "Eastern Tamang" }, + "te": { + "name": "Telugu", + "native_name": "తెలుగు" + }, + "tg": { + "name": "Tajik", + "native_name": "тоҷикӣ, toğikī, تاجیکی‎" + }, + "th": { + "name": "Thai", + "native_name": "ไทย" + }, "thl": { "name": "Dangaura Tharu", "native_name": "Dangaura Tharu" }, + "ti": { + "name": "Tigrinya", + "native_name": "ትግርኛ" + }, + "tir": { + "name": "Tigrinya", + "native_name": "ትግርኛ" + }, + "tk": { + "name": "Turkmen", + "native_name": "Türkmen, Түркмен" + }, + "tl": { + "name": "Tagalog", + "native_name": "Wikang Tagalog" + }, + "tn": { + "name": "Tswana", + "native_name": "Setswana" + }, + "to": { + "name": "Tonga (Tonga Islands)", + "native_name": "faka Tonga" + }, "tpi": { "name": "Tok Pisin", "native_name": "Tok Pisin" }, + "tr": { + "name": "Turkish", + "native_name": "Türkçe" + }, + "ts": { + "name": "Tsonga", + "native_name": "Xitsonga" + }, + "tsn": { + "name": "Tswana", + "native_name": "Setswana" + }, + "tt": { + "name": "Tatar", + "native_name": "татарча, tatarça, تاتارچا‎" + }, "tum": { "name": "Tumbuka", "native_name": "chiTumbuka" }, + "tuv": { + "name": "Turkana; Turkwana; Buma; Bume", + "native_name": "Ng’aturkana, Ng'aturkana" + }, + "tw": { + "name": "Twi", + "native_name": "Twi" + }, + "ty": { + "name": "Tahitian", + "native_name": "Reo Tahiti" + }, + "ug": { + "name": "Uighur; Uyghur", + "native_name": "Uyƣurqə, ئۇيغۇرچە‎" + }, + "uk": { + "name": "Ukrainian", + "native_name": "українська" + }, + "und": { + "name": "Undetermined", + "native_name": "Undetermined" + }, + "ur": { + "name": "Urdu", + "native_name": "اردو" + }, + "ur-PK": { + "name": "Urdu", + "native_name": "Urdu" + }, + "uz": { + "name": "Uzbek", + "native_name": "zbek, Ўзбек, أۇزبېك‎" + }, + "ve": { + "name": "Venda", + "native_name": "Tshivenḓa, Tshivenda" + }, + "vi": { + "name": "Vietnamese", + "native_name": "Tiếng Việt" + }, + "vo": { + "name": "Volapük", + "native_name": "Volapük" + }, + "wa": { + "name": "Walloon", + "native_name": "Walon" + }, + "wal": { + "name": "Wolaytta; Borodda; Uba; Ometo", + "native_name": "Wolaytta" + }, + "wo": { + "name": "Wolof", + "native_name": "Wollof" + }, + "wol": { + "name": "Wolof", + "native_name": "Wollof" + }, + "xh": { + "name": "Xhosa", + "native_name": "isiXhosa" + }, + "xho": { + "name": "Xhosa", + "native_name": "isiXhosa" + }, + "xki": { + "name": "Kenyan Sign Language", + "native_name": "Swahili Lugha ya ishara" + }, + "xog": { + "name": "Soga", + "native_name": "Lusoga, Olusoga" + }, "yao": { "name": "Yao", "native_name": "Yao" + }, + "yi": { + "name": "Yiddish", + "native_name": "ייִדיש" + }, + "yo": { + "name": "Yoruba", + "native_name": "Yorùbá" + }, + "yor": { + "name": "Yoruba", + "native_name": "Yorùbá" + }, + "za": { + "name": "Zhuang; Chuang", + "native_name": "Saɯ cueŋƅ, Saw cuengh" + }, + "zh": { + "name": "Chinese", + "native_name": "中文, 汉语, 漢語" + }, + "zh-CN": { + "name": "Chinese, Simplified", + "native_name": "中国大陆" + }, + "zh-Hant": { + "name": "Chinese, Traditional", + "native_name": "漢語 (繁體字)" + }, + "zh-TW": { + "name": "Chinese, Taiwan", + "native_name": "漢語 (臺灣)" + }, + "zul": { + "name": "Zulu", + "native_name": "isiZulu" } -} - +} \ No newline at end of file diff --git a/scripts/add_language.py b/scripts/add_language.py new file mode 100644 index 0000000..714a814 --- /dev/null +++ b/scripts/add_language.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +import json +import os +import sys +from typing import Any +from typing import Dict +from typing import Optional + +try: + import langcodes + import pycountry +except ImportError: + print("Required libraries not found. Installing them now...") + import subprocess + + subprocess.check_call( + [sys.executable, "-m", "pip", "install", "langcodes[data]", "pycountry"] + ) + import langcodes + import pycountry + + +def load_json_file(file_path: str) -> Dict[str, Any]: + """Load JSON data from a file.""" + try: + with open(file_path, "r", encoding="utf-8") as file: + return json.load(file) + except (FileNotFoundError, json.JSONDecodeError) as e: + print(f"Error loading file: {e}") + return {} + + +def save_json_file(data: Dict[str, Any], file_path: str) -> None: + """Save JSON data to a file with proper formatting.""" + with open(file_path, "w", encoding="utf-8") as file: + json.dump(data, file, ensure_ascii=False, indent=2, sort_keys=True) + print(f"JSON data saved to {file_path}") + + +def _get_pycountry_language(code): + language = pycountry.languages.get(alpha_2=code) or pycountry.languages.get( + alpha_3=code + ) + if not language: + return {} + data = { + "code": language.alpha_2 if hasattr(language, "alpha_2") else language.alpha_3, + "name": language.name, + } + if hasattr(language, "common_name"): + data["native_name"] = language.common_name + return data + + +def get_language_info(query: str) -> Optional[Dict[str, str]]: + """Get language information using langcodes and pycountry.""" + # Try to parse as a language code first + data = {} + try: + lang = langcodes.get(query) or langcodes.find(query) + except LookupError: + lang = None + if not lang: + return _get_pycountry_language(query) or None + + data["code"] = lang.to_tag() + + # Get the language name in English + data["name"] = lang.display_name() + + # Try to get the native name + data["native_name"] = lang.autonym() or data["name"] + + if data["native_name"] == data["name"]: + data.update(_get_pycountry_language(data["code"])) + + return data + + +def main(): + # Get file path + file_path = os.path.join( + os.path.dirname(__file__), "../le_utils/resources/languagelookup.json" + ) + + # Load existing data + data = load_json_file(file_path) + if not data: + print("Starting with an empty language dictionary.") + data = {} + else: + print(f"Loaded {len(data)} language entries.") + + while True: + query = input( + "\nEnter language name or code to add (or press Enter to finish): " + ) + if not query: + break + + language_info = get_language_info(query) + if language_info: + code = language_info["code"] + name = language_info["name"] + native_name = language_info["native_name"] + + print("\nFound language information:") + print(f"Code: {code}") + print(f"Name: {name}") + print(f"Native name: {native_name}") + + if code in data: + print( + f"Warning: Language code '{code}' already exists in the data with the following information:" + ) + print(f"Name: {data[code].get('name')}") + print(f"Native name: {data[code].get('native_name')}") + + confirm = input("Add this language? (Y/N): ") + if confirm.lower() == "y": + data[code] = {"name": name, "native_name": native_name} + print(f"Added language: {name}") + else: + print("Language not added.") + else: + print("Language not found. Please try a different query.") + + # Save the updated data + if input("\nSave changes to the JSON file? (Y/N): ").lower() == "y": + save_json_file(data, file_path) + else: + print("Changes not saved.") + + +if __name__ == "__main__": + print("Language Manager: Add new languages to your JSON file") + print("---------------------------------------------------") + main() + print("Program finished.") From 6a9784d5a2a42bcb55fabd583b10be4e11fdadc4 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 14 Apr 2025 15:20:34 -0700 Subject: [PATCH 3/3] Delete obselete getlang tests. Clean up zh language codes. --- le_utils/resources/languagelookup.json | 30 ++++++++-- tests/test_getlangs.py | 79 -------------------------- tests/test_languages.py | 2 +- 3 files changed, 26 insertions(+), 85 deletions(-) diff --git a/le_utils/resources/languagelookup.json b/le_utils/resources/languagelookup.json index c906bc2..06d738f 100644 --- a/le_utils/resources/languagelookup.json +++ b/le_utils/resources/languagelookup.json @@ -1101,15 +1101,35 @@ }, "zh": { "name": "Chinese", - "native_name": "中文, 汉语, 漢語" + "native_name": "中文" }, "zh-CN": { - "name": "Chinese, Simplified", - "native_name": "中国大陆" + "name": "Chinese (China)", + "native_name": "中文(中国)" + }, + "zh-Hans": { + "name": "Chinese (Simplified)", + "native_name": "中文(简体)" + }, + "zh-Hans-CN": { + "name": "Chinese (Simplified, China)", + "native_name": "中文(简体,中国)" + }, + "zh-Hans-TW": { + "name": "Chinese (Simplified, Taiwan)", + "native_name": "中文(简体,台湾)" }, "zh-Hant": { - "name": "Chinese, Traditional", - "native_name": "漢語 (繁體字)" + "name": "Chinese (Traditional)", + "native_name": "中文(繁體)" + }, + "zh-Hant-CN": { + "name": "Chinese (Traditional, China)", + "native_name": "中文(繁體, 中國)" + }, + "zh-Hant-TW": { + "name": "Chinese (Traditional, Taiwan)", + "native_name": "中文(繁體, 台灣)" }, "zh-TW": { "name": "Chinese, Taiwan", diff --git a/tests/test_getlangs.py b/tests/test_getlangs.py index 81c48d7..6d8da7a 100644 --- a/tests/test_getlangs.py +++ b/tests/test_getlangs.py @@ -109,85 +109,6 @@ def test_list_like_language_names(): assert lang_obj.native_name == "Gàidhlig", "Wrong native_name" -# getlang_by_alpha2 ==> Lookup by two-letter Language code -################################################################################ - - -def test_known_alpha2_codes(): - lang_obj = languages.getlang_by_alpha2("en") - assert lang_obj is not None, "English not found" - assert lang_obj.code == "en", "Wrong code" - assert lang_obj.name == "English", "Wrong name" - assert lang_obj.native_name == "English", "Wrong native_name" - - lang_obj = languages.getlang_by_alpha2("zu") - assert lang_obj is not None, "Zulu not found" - assert lang_obj.code == "zul", "Wrong internal repr. code" - assert lang_obj.name == "Zulu", "Wrong name" - assert lang_obj.native_name == "isiZulu", "Wrong native_name" - - lang_obj = languages.getlang_by_alpha2("pt") - assert lang_obj is not None, "Portuguese not found" - assert lang_obj.code == "pt", "Wrong code" - assert lang_obj.name == "Portuguese", "Wrong name" - assert lang_obj.native_name == "Português", "Wrong native_name" - - -def test_unknown_alpha2_code(): - lang_obj = languages.getlang_by_alpha2("zz") - assert lang_obj is None, "Uknown code zz returned non-None" - - -@pytest.fixture -def simplified_chinese_codes(): - return ["zh-CN", "zh-Hans", "zh-Hans-CN"] - - -@pytest.fixture -def traditional_chinese_codes(): - return ["zh-Hant", "zh-Hant-HK", "zh-HK"] - - -@pytest.fixture -def taiwan_chinese_codes(): - return ["zh-TW", "zh-Hant-TW"] - - -def test_youtube_edgecases_alpha2_codes( - simplified_chinese_codes, traditional_chinese_codes, taiwan_chinese_codes -): - # check old language code for Hebrew works `iw` - lang_obj = languages.getlang_by_alpha2("iw") - assert lang_obj is not None, "Hebrew not found" - assert lang_obj.code == "he", "Wrong code" - assert lang_obj.name == "Hebrew (modern)", "Wrong name" - assert lang_obj.native_name == "עברית", "Wrong native_name" - - # Check all Simplified Chinese codes are resolved correctly to zh-CN - for lang_code in simplified_chinese_codes: - lang_obj = languages.getlang_by_alpha2(lang_code) - assert lang_obj is not None, "Simplified Chinese not found" - assert lang_obj.code == "zh-CN", "Wrong internal repr. code" - assert lang_obj.name == "Chinese, Simplified", "Wrong name" - assert lang_obj.native_name == "中国大陆", "Wrong native_name" - - # Check all Traditional Chinese codes are resolved correctly to zh-Hant - for lang_code in traditional_chinese_codes: - lang_obj = languages.getlang_by_alpha2(lang_code) - assert lang_obj is not None, "Traditional Chinese not found" - assert lang_obj.code == "zh-Hant", "Wrong internal repr. code" - assert lang_obj.name == "Chinese, Traditional", "Wrong name" - assert lang_obj.native_name == "漢語 (繁體字)", "Wrong native_name" - - # Check all Taiwanese langauge codes are resolved correctly to zh-TW - for lang_code in taiwan_chinese_codes: - lang_obj = languages.getlang_by_alpha2(lang_code) - assert lang_obj is not None, "Taiwan Chinese not found" - assert lang_obj.code == "zh-TW", "Wrong internal repr. code" - assert lang_obj.name == "Chinese, Taiwan", "Wrong name" - assert lang_obj.native_name == "漢語 (臺灣)", "Wrong native_name" - - # getlang_by_native_name ==> Lookup by Language object by native_name ################################################################################ diff --git a/tests/test_languages.py b/tests/test_languages.py index f53f44a..02501e6 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -35,5 +35,5 @@ def test_first_native_name(): lang_obj = languages.getlang("zh") assert lang_obj is not None, "Chinese not found" assert lang_obj.name == "Chinese", "Wrong name" - assert lang_obj.native_name == "中文, 汉语, 漢語", "Wrong native_name" + assert lang_obj.native_name == "中文", "Wrong native_name" assert lang_obj.first_native_name == "中文", "Wrong first_native_name"