Skip to content

Commit 7c7da05

Browse files
authored
Merge branch 'main' into gh-143768-venv-symlink
2 parents 6f5792e + 6544bf4 commit 7c7da05

12 files changed

Lines changed: 300 additions & 53 deletions

File tree

Doc/library/difflib.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ Finally, we compare the two:
724724

725725
>>> result = list(d.compare(text1, text2))
726726

727-
``result`` is a list of strings, so let's pretty-print it:
727+
``result`` is a list of strings, so let's pretty-print it::
728728

729729
>>> from pprint import pprint
730730
>>> pprint(result)
@@ -739,7 +739,7 @@ Finally, we compare the two:
739739
'? ++++ ^ ^\n',
740740
'+ 5. Flat is better than nested.\n']
741741

742-
As a single multi-line string it looks like this:
742+
As a single multi-line string it looks like this::
743743

744744
>>> import sys
745745
>>> sys.stdout.writelines(result)

Lib/email/charset.py

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
'add_codec',
1010
]
1111

12+
import codecs
1213
from functools import partial
1314

1415
import email.base64mime
@@ -58,37 +59,71 @@
5859
'shift_jis': (BASE64, None, 'iso-2022-jp'),
5960
'iso-2022-jp': (BASE64, None, None),
6061
'koi8-r': (BASE64, BASE64, None),
61-
'utf-8': (SHORTEST, BASE64, 'utf-8'),
6262
}
6363

64-
# Aliases for other commonly-used names for character sets. Map
65-
# them to the real ones used in email.
64+
# Map Python codec names to their corresponding MIME/IANA names.
6665
ALIASES = {
67-
'latin_1': 'iso-8859-1',
68-
'latin-1': 'iso-8859-1',
69-
'latin_2': 'iso-8859-2',
70-
'latin-2': 'iso-8859-2',
71-
'latin_3': 'iso-8859-3',
72-
'latin-3': 'iso-8859-3',
73-
'latin_4': 'iso-8859-4',
74-
'latin-4': 'iso-8859-4',
75-
'latin_5': 'iso-8859-9',
76-
'latin-5': 'iso-8859-9',
77-
'latin_6': 'iso-8859-10',
78-
'latin-6': 'iso-8859-10',
79-
'latin_7': 'iso-8859-13',
80-
'latin-7': 'iso-8859-13',
81-
'latin_8': 'iso-8859-14',
82-
'latin-8': 'iso-8859-14',
83-
'latin_9': 'iso-8859-15',
84-
'latin-9': 'iso-8859-15',
85-
'latin_10':'iso-8859-16',
86-
'latin-10':'iso-8859-16',
87-
'cp949': 'ks_c_5601-1987',
88-
'euc_jp': 'euc-jp',
89-
'euc_kr': 'euc-kr',
90-
'ascii': 'us-ascii',
91-
}
66+
'ascii': 'us-ascii',
67+
'big5hkscs': 'big5-hkscs',
68+
'cp037': 'ibm037',
69+
'cp1026': 'ibm1026',
70+
'cp1140': 'ibm01140',
71+
'cp1250': 'windows-1250',
72+
'cp1251': 'windows-1251',
73+
'cp1252': 'windows-1252',
74+
'cp1253': 'windows-1253',
75+
'cp1254': 'windows-1254',
76+
'cp1255': 'windows-1255',
77+
'cp1256': 'windows-1256',
78+
'cp1257': 'windows-1257',
79+
'cp1258': 'windows-1258',
80+
'cp273': 'ibm273',
81+
'cp424': 'ibm424',
82+
'cp437': 'ibm437',
83+
'cp500': 'ibm500',
84+
'cp775': 'ibm775',
85+
'cp850': 'ibm850',
86+
'cp852': 'ibm852',
87+
'cp855': 'ibm855',
88+
'cp857': 'ibm857',
89+
'cp858': 'ibm00858',
90+
'cp860': 'ibm860',
91+
'cp861': 'ibm861',
92+
'cp862': 'ibm862',
93+
'cp863': 'ibm863',
94+
'cp864': 'ibm864',
95+
'cp865': 'ibm865',
96+
'cp866': 'ibm866',
97+
'cp869': 'ibm869',
98+
'cp874': 'windows-874',
99+
'euc_jp': 'euc-jp',
100+
'euc_kr': 'euc-kr',
101+
'hz': 'hz-gb-2312',
102+
'iso2022_jp': 'iso-2022-jp',
103+
'iso2022_jp_2': 'iso-2022-jp-2',
104+
'iso2022_kr': 'iso-2022-kr',
105+
'iso8859-1': 'iso-8859-1',
106+
'iso8859-10': 'iso-8859-10',
107+
'iso8859-11': 'iso-8859-11',
108+
'iso8859-13': 'iso-8859-13',
109+
'iso8859-14': 'iso-8859-14',
110+
'iso8859-15': 'iso-8859-15',
111+
'iso8859-16': 'iso-8859-16',
112+
'iso8859-2': 'iso-8859-2',
113+
'iso8859-3': 'iso-8859-3',
114+
'iso8859-4': 'iso-8859-4',
115+
'iso8859-5': 'iso-8859-5',
116+
'iso8859-6': 'iso-8859-6',
117+
'iso8859-7': 'iso-8859-7',
118+
'iso8859-8': 'iso-8859-8-i',
119+
'iso8859-9': 'iso-8859-9',
120+
'kz1048': 'kz-1048',
121+
'mac-roman': 'macintosh',
122+
123+
# CP949 is not registered in IANA. KS_C_5601-1987 is not the same,
124+
# but the closest registered option.
125+
'cp949': 'ks_c_5601-1987',
126+
}
92127

93128

94129
# Map charsets to their Unicode codec strings.
@@ -215,7 +250,18 @@ def __init__(self, input_charset=DEFAULT_CHARSET):
215250
raise errors.CharsetError(input_charset)
216251
input_charset = input_charset.lower()
217252
# Set the input charset after filtering through the aliases
218-
self.input_charset = ALIASES.get(input_charset, input_charset)
253+
# For backward compatibility, try ALIASES first to let the user
254+
# override it.
255+
if input_charset in ALIASES:
256+
input_charset = ALIASES[input_charset]
257+
else:
258+
try:
259+
input_codec = codecs.lookup(input_charset).name
260+
except LookupError:
261+
pass
262+
else:
263+
input_charset = ALIASES.get(input_codec, input_codec)
264+
self.input_charset = input_charset
219265
# We can try to guess which encoding and conversion to use by the
220266
# charset_map dictionary. Try that first, but let the user override
221267
# it.

Lib/email/contentmanager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,11 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
173173
disposition=None, filename=None, cid=None,
174174
params=None, headers=None):
175175
_prepare_set(msg, 'text', subtype, headers)
176+
177+
charset = email.charset.Charset(charset).input_charset
176178
cte, payload = _encode_text(string, charset, cte, msg.policy)
177179
msg.set_payload(payload)
178-
msg.set_param('charset',
179-
email.charset.ALIASES.get(charset, charset),
180-
replace=True)
180+
msg.set_param('charset', charset, replace=True)
181181
msg['Content-Transfer-Encoding'] = cte
182182
_finalize_set(msg, disposition, filename, cid, params)
183183
raw_data_manager.add_set_handler(str, set_text_content)

Lib/logging/__init__.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,9 +1377,10 @@ def getLogger(self, name):
13771377
raise TypeError('A logger name must be a string')
13781378
# Fast path: an already-registered, non-placeholder logger can be
13791379
# returned without taking the lock. dict.get() is atomic under both
1380-
# the GIL and free threading, and a Logger is fully initialised before
1381-
# being inserted into loggerDict under the lock, so this never sees a
1382-
# partially-constructed object.
1380+
# the GIL and free threading. A Logger is inserted into loggerDict only
1381+
# after it is fully wired up (parent/child references fixed) under the
1382+
# lock, so the fast path never observes a logger whose parent is not yet
1383+
# set.
13831384
rv = self.loggerDict.get(name)
13841385
if rv is not None and not isinstance(rv, PlaceHolder):
13851386
return rv
@@ -1390,14 +1391,18 @@ def getLogger(self, name):
13901391
ph = rv
13911392
rv = (self.loggerClass or _loggerClass)(name)
13921393
rv.manager = self
1393-
self.loggerDict[name] = rv
13941394
self._fixupChildren(ph, rv)
13951395
self._fixupParents(rv)
1396+
# Publish only after rv is fully wired: the fast path reads
1397+
# loggerDict without the lock.
1398+
self.loggerDict[name] = rv
13961399
else:
13971400
rv = (self.loggerClass or _loggerClass)(name)
13981401
rv.manager = self
1399-
self.loggerDict[name] = rv
14001402
self._fixupParents(rv)
1403+
# Publish only after rv is fully wired: the fast path reads
1404+
# loggerDict without the lock.
1405+
self.loggerDict[name] = rv
14011406
return rv
14021407

14031408
def setLoggerClass(self, klass):

Lib/test/test_email/test_asian_codecs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@ def test_chinese_codecs(self):
8383
h.append(s, Charset('big5hkscs'))
8484
eq(h.encode(), """\
8585
Chinese =?gb2312?b?1tDOxA==?= =?gbk?b?1tDOxA==?= =?gb18030?b?1tDOxA==?=
86-
=?hz?b?fntWUE5Efn0=?= =?big5?b?pKSk5Q==?= =?big5hkscs?b?pKSk5Q==?=""")
86+
=?hz-gb-2312?b?fntWUE5Efn0=?= =?big5?b?pKSk5Q==?= =?big5-hkscs?b?pKSk5Q==?=""")
8787
eq(decode_header(h.encode()),
8888
[(b'Chinese ', None),
8989
(b'\xd6\xd0\xce\xc4', 'gb2312'),
9090
(b'\xd6\xd0\xce\xc4', 'gbk'),
9191
(b'\xd6\xd0\xce\xc4', 'gb18030'),
92-
(b'~{VPND~}', 'hz'),
92+
(b'~{VPND~}', 'hz-gb-2312'),
9393
(b'\xa4\xa4\xa4\xe5', 'big5'),
94-
(b'\xa4\xa4\xa4\xe5', 'big5hkscs'),
94+
(b'\xa4\xa4\xa4\xe5', 'big5-hkscs'),
9595
])
9696

9797
def test_korean_codecs(self):

Lib/test/test_email/test_contentmanager.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,19 @@ def test_set_text_charset_latin_1(self):
342342
self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
343343
self.assertEqual(m.get_content(), content)
344344

345+
def test_set_text_charset_cp949(self):
346+
m = self._make_message()
347+
content = "\ud55c\uad6d\uc5b4\n\uac02\n"
348+
raw_data_manager.set_content(m, content, charset='cp949')
349+
self.assertEqual(str(m), textwrap.dedent("""\
350+
Content-Type: text/plain; charset="ks_c_5601-1987"
351+
Content-Transfer-Encoding: base64
352+
353+
x9Gxub7uCoFBCg==
354+
"""))
355+
self.assertEqual(m.get_payload(decode=True).decode('ks_c_5601-1987'), content)
356+
self.assertEqual(m.get_content(), content)
357+
345358
def test_set_text_plain_long_line_heuristics(self):
346359
m = self._make_message()
347360
content = ("Simple but long message that is over 78 characters"

Lib/test/test_email/test_email.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4970,6 +4970,128 @@ def tearDown(self):
49704970
except KeyError:
49714971
pass
49724972

4973+
def test_attributes(self):
4974+
from email import charset
4975+
c = Charset()
4976+
self.assertEqual(c.input_charset, 'us-ascii')
4977+
self.assertEqual(c.header_encoding, None)
4978+
self.assertEqual(c.body_encoding, None)
4979+
self.assertEqual(c.output_charset, 'us-ascii')
4980+
self.assertEqual(c.input_codec, None)
4981+
self.assertEqual(c.output_codec, None)
4982+
4983+
c = Charset('us-ascii')
4984+
self.assertEqual(c.input_charset, 'us-ascii')
4985+
self.assertEqual(c.header_encoding, None)
4986+
self.assertEqual(c.body_encoding, None)
4987+
self.assertEqual(c.output_charset, 'us-ascii')
4988+
self.assertEqual(c.input_codec, None)
4989+
self.assertEqual(c.output_codec, None)
4990+
4991+
c = Charset('utf8')
4992+
self.assertEqual(c.input_charset, 'utf-8')
4993+
self.assertEqual(c.header_encoding, charset.SHORTEST)
4994+
self.assertEqual(c.body_encoding, charset.BASE64)
4995+
self.assertEqual(c.output_charset, 'utf-8')
4996+
self.assertEqual(c.input_codec, 'utf-8')
4997+
self.assertEqual(c.output_codec, 'utf-8')
4998+
4999+
c = Charset('latin1')
5000+
self.assertEqual(c.input_charset, 'iso-8859-1')
5001+
self.assertEqual(c.header_encoding, charset.QP)
5002+
self.assertEqual(c.body_encoding, charset.QP)
5003+
self.assertEqual(c.output_charset, 'iso-8859-1')
5004+
self.assertEqual(c.input_codec, 'iso-8859-1')
5005+
self.assertEqual(c.output_codec, 'iso-8859-1')
5006+
5007+
c = Charset('latin9')
5008+
self.assertEqual(c.input_charset, 'iso-8859-15')
5009+
self.assertEqual(c.header_encoding, charset.QP)
5010+
self.assertEqual(c.body_encoding, charset.QP)
5011+
self.assertEqual(c.output_charset, 'iso-8859-15')
5012+
self.assertEqual(c.input_codec, 'iso-8859-15')
5013+
self.assertEqual(c.output_codec, 'iso-8859-15')
5014+
5015+
c = Charset('cyrillic')
5016+
self.assertEqual(c.input_charset, 'iso-8859-5')
5017+
self.assertEqual(c.header_encoding, charset.SHORTEST)
5018+
self.assertEqual(c.body_encoding, charset.BASE64)
5019+
self.assertEqual(c.output_charset, 'iso-8859-5')
5020+
self.assertEqual(c.input_codec, 'iso-8859-5')
5021+
self.assertEqual(c.output_codec, 'iso-8859-5')
5022+
5023+
c = Charset('cp1251')
5024+
self.assertEqual(c.input_charset, 'windows-1251')
5025+
self.assertEqual(c.header_encoding, charset.SHORTEST)
5026+
self.assertEqual(c.body_encoding, charset.BASE64)
5027+
self.assertEqual(c.output_charset, 'windows-1251')
5028+
self.assertEqual(c.input_codec, 'windows-1251')
5029+
self.assertEqual(c.output_codec, 'windows-1251')
5030+
5031+
c = Charset('cp1252')
5032+
self.assertEqual(c.input_charset, 'windows-1252')
5033+
self.assertEqual(c.header_encoding, charset.QP)
5034+
self.assertEqual(c.body_encoding, charset.QP)
5035+
self.assertEqual(c.output_charset, 'windows-1252')
5036+
self.assertEqual(c.input_codec, 'windows-1252')
5037+
self.assertEqual(c.output_codec, 'windows-1252')
5038+
5039+
c = Charset('eucjp')
5040+
self.assertEqual(c.input_charset, 'euc-jp')
5041+
self.assertEqual(c.header_encoding, charset.BASE64)
5042+
self.assertEqual(c.body_encoding, None)
5043+
self.assertEqual(c.output_charset, 'iso-2022-jp')
5044+
self.assertEqual(c.input_codec, 'euc-jp')
5045+
self.assertEqual(c.output_codec, 'iso-2022-jp')
5046+
5047+
c = Charset('cp949')
5048+
self.assertEqual(c.input_charset, 'ks_c_5601-1987')
5049+
self.assertEqual(c.header_encoding, charset.SHORTEST)
5050+
self.assertEqual(c.body_encoding, charset.BASE64)
5051+
self.assertEqual(c.output_charset, 'ks_c_5601-1987')
5052+
self.assertEqual(c.input_codec, 'ks_c_5601-1987')
5053+
self.assertEqual(c.output_codec, 'ks_c_5601-1987')
5054+
5055+
c = Charset('gb2312')
5056+
self.assertEqual(c.input_charset, 'gb2312')
5057+
self.assertEqual(c.header_encoding, charset.BASE64)
5058+
self.assertEqual(c.body_encoding, charset.BASE64)
5059+
self.assertEqual(c.output_charset, 'gb2312')
5060+
self.assertEqual(c.input_codec, 'gb2312')
5061+
self.assertEqual(c.output_codec, 'gb2312')
5062+
5063+
c = Charset('big5')
5064+
self.assertEqual(c.input_charset, 'big5')
5065+
self.assertEqual(c.header_encoding, charset.BASE64)
5066+
self.assertEqual(c.body_encoding, charset.BASE64)
5067+
self.assertEqual(c.output_charset, 'big5')
5068+
self.assertEqual(c.input_codec, 'big5')
5069+
self.assertEqual(c.output_codec, 'big5')
5070+
5071+
def test_user_charsets(self):
5072+
from email import charset
5073+
c = Charset('fake0')
5074+
self.assertEqual(c.input_charset, 'fake0')
5075+
self.assertEqual(c.header_encoding, charset.SHORTEST)
5076+
self.assertEqual(c.body_encoding, charset.BASE64)
5077+
self.assertEqual(c.output_charset, 'fake0')
5078+
self.assertEqual(c.input_codec, 'fake0')
5079+
self.assertEqual(c.output_codec, 'fake0')
5080+
5081+
charset.add_alias('fake1', 'mime-fake')
5082+
charset.add_alias('output-mime-fake', 'output-mime-fake-alias')
5083+
charset.add_codec('mime-fake', 'fakecodec')
5084+
charset.add_codec('output-mime-fake-alias', 'outputfakecodec')
5085+
charset.add_charset('mime-fake', charset.QP, None, 'output-mime-fake')
5086+
5087+
c = Charset('fake1')
5088+
self.assertEqual(c.input_charset, 'mime-fake')
5089+
self.assertEqual(c.header_encoding, charset.QP)
5090+
self.assertEqual(c.body_encoding, None)
5091+
self.assertEqual(c.output_charset, 'output-mime-fake-alias')
5092+
self.assertEqual(c.input_codec, 'fakecodec')
5093+
self.assertEqual(c.output_codec, 'outputfakecodec')
5094+
49735095
def test_codec_encodeable(self):
49745096
eq = self.assertEqual
49755097
# Make sure us-ascii = no Unicode conversion
@@ -5010,6 +5132,11 @@ def test_unicode_charset_name(self):
50105132
self.assertEqual(str(charset), 'us-ascii')
50115133
self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
50125134

5135+
def test_bytes_charset_name(self):
5136+
charset = Charset(b'us-ascii')
5137+
self.assertEqual(str(charset), 'us-ascii')
5138+
self.assertRaises(errors.CharsetError, Charset, b'asc\xffii')
5139+
50135140

50145141

50155142
# Test multilingual MIME headers.

Lib/test/test_lazy_import/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,11 +447,15 @@ def test_lazy_import_pkg(self):
447447

448448
def test_lazy_submodule_stored_in_parent_dict(self):
449449
"""Accessing a lazy submodule should store it in the parent's __dict__."""
450-
import test.test_lazy_import.data.lazy_import_pkg
450+
out = io.StringIO()
451+
452+
with contextlib.redirect_stdout(out):
453+
import test.test_lazy_import.data.lazy_import_pkg
451454

452455
pkg = sys.modules["test.test_lazy_import.data.pkg"]
453456
self.assertIn("bar", pkg.__dict__)
454457
self.assertIs(pkg.__dict__["bar"], sys.modules["test.test_lazy_import.data.pkg.bar"])
458+
self.assertIn("BAR_MODULE_LOADED", out.getvalue())
455459

456460
def test_lazy_import_pkg_cross_import(self):
457461
"""Cross-imports within package should preserve lazy imports."""

0 commit comments

Comments
 (0)