Skip to content

Commit 1c00c00

Browse files
authored
Merge branch 'main' into gh-150942-csv-appendtakeref
2 parents 7945e61 + ea4c855 commit 1c00c00

18 files changed

Lines changed: 402 additions & 70 deletions

Doc/library/collections.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ variants of :deco:`functools.lru_cache`:
12331233
.. testcode::
12341234

12351235
from collections import OrderedDict
1236-
from time import time
1236+
from time import monotonic
12371237

12381238
class TimeBoundedLRU:
12391239
"LRU Cache that invalidates and refreshes old entries."
@@ -1248,10 +1248,10 @@ variants of :deco:`functools.lru_cache`:
12481248
if args in self.cache:
12491249
self.cache.move_to_end(args)
12501250
timestamp, result = self.cache[args]
1251-
if time() - timestamp <= self.maxage:
1251+
if monotonic() - timestamp <= self.maxage:
12521252
return result
12531253
result = self.func(*args)
1254-
self.cache[args] = time(), result
1254+
self.cache[args] = monotonic(), result
12551255
if len(self.cache) > self.maxsize:
12561256
self.cache.popitem(last=False)
12571257
return result

Doc/library/difflib.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ Finally, we compare the two:
724724

725725
>>> result = list(d.compare(text1, text2))
726726

727-
``result`` is a list of strings, so let's pretty-print it:
727+
``result`` is a list of strings, so let's pretty-print it::
728728

729729
>>> from pprint import pprint
730730
>>> pprint(result)
@@ -739,7 +739,7 @@ Finally, we compare the two:
739739
'? ++++ ^ ^\n',
740740
'+ 5. Flat is better than nested.\n']
741741

742-
As a single multi-line string it looks like this:
742+
As a single multi-line string it looks like this::
743743

744744
>>> import sys
745745
>>> sys.stdout.writelines(result)

Doc/library/io.rst

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ will raise a :exc:`TypeError`. So will giving a :class:`bytes` object to the
3838
Operations that used to raise :exc:`IOError` now raise :exc:`OSError`, since
3939
:exc:`IOError` is now an alias of :exc:`OSError`.
4040

41+
.. _text-io:
4142

4243
Text I/O
4344
^^^^^^^^
@@ -65,6 +66,7 @@ In-memory text streams are also available as :class:`StringIO` objects::
6566
The text stream API is described in detail in the documentation of
6667
:class:`TextIOBase`.
6768

69+
.. _binary-io:
6870

6971
Binary I/O
7072
^^^^^^^^^^
@@ -103,6 +105,13 @@ stream by opening a file in binary mode with buffering disabled::
103105

104106
The raw stream API is described in detail in the docs of :class:`RawIOBase`.
105107

108+
.. warning::
109+
Raw I/O is a low-level interface and methods generally must have their return
110+
values checked and be explicitly retried to ensure an operation completes.
111+
For instance :meth:`~RawIOBase.write` returns the number of bytes written
112+
which may be less than the number of bytes provided (a partial write).
113+
High-level I/O objects like :ref:`binary-io` and :ref:`text-io` implement
114+
retry behavior.
106115

107116
.. _io-text-encoding:
108117

@@ -478,8 +487,11 @@ I/O Base Classes
478487

479488
Read up to *size* bytes from the object and return them. As a convenience,
480489
if *size* is unspecified or -1, all bytes until EOF are returned.
481-
Otherwise, only one system call is ever made. Fewer than *size* bytes may
482-
be returned if the operating system call returns fewer than *size* bytes.
490+
491+
Attempts to make only one system call but will retry if interrupted and
492+
the signal handler does not raise an exception (see :pep:`475` for the
493+
rationale). This means fewer than *size* bytes may be returned if the
494+
operating system call returns fewer than *size* bytes.
483495

484496
If 0 bytes are returned, and *size* was not 0, this indicates end of file.
485497
If the object is in non-blocking mode and no bytes are available,
@@ -493,13 +505,19 @@ I/O Base Classes
493505
Read and return all the bytes from the stream until EOF, using multiple
494506
calls to the stream if necessary.
495507

508+
If ``0`` bytes are returned this indicates end of file. If the object is in
509+
non-blocking mode and the underlying :meth:`read` returns ``None``
510+
indicating no bytes are available, ``None`` is returned.
511+
496512
.. method:: readinto(b, /)
497513

498514
Read bytes into a pre-allocated, writable
499515
:term:`bytes-like object` *b*, and return the
500516
number of bytes read. For example, *b* might be a :class:`bytearray`.
501-
If the object is in non-blocking mode and no bytes
502-
are available, ``None`` is returned.
517+
518+
If ``0`` is returned and ``len(b)`` is not ``0``, this indicates end of file. If
519+
the object is in non-blocking mode and no bytes are available, ``None`` is
520+
returned.
503521

504522
.. method:: write(b, /)
505523

@@ -513,6 +531,13 @@ I/O Base Classes
513531
this method returns, so the implementation should only access *b*
514532
during the method call.
515533

534+
.. warning::
535+
536+
This function does not ensure all bytes are written or an exception is
537+
thrown. Callers may implement that behavior by checking the return
538+
value and, if it is less than the length of *b*, looping with additional
539+
write calls until all unwritten bytes are written. High-level I/O
540+
objects like :ref:`binary-io` and :ref:`text-io` implement retry behavior.
516541

517542
.. class:: BufferedIOBase
518543

@@ -641,7 +666,11 @@ Raw File I/O
641666
.. class:: FileIO(name, mode='r', closefd=True, opener=None)
642667

643668
A raw binary stream representing an OS-level file containing bytes data. It
644-
inherits from :class:`RawIOBase`.
669+
inherits from :class:`RawIOBase` and implements its low-level access design.
670+
This means :meth:`~RawIOBase.write` does not guarantee all bytes are written
671+
and :meth:`~RawIOBase.read` may read less bytes than requested even when more
672+
bytes may be present in the underlying file. To get "write all" and
673+
"read at least" behavior, use :ref:`binary-io`.
645674

646675
The *name* can be one of two things:
647676

@@ -661,10 +690,6 @@ Raw File I/O
661690
implies writing, so this mode behaves in a similar way to ``'w'``. Add a
662691
``'+'`` to the mode to allow simultaneous reading and writing.
663692

664-
The :meth:`~RawIOBase.read` (when called with a positive argument),
665-
:meth:`~RawIOBase.readinto` and :meth:`~RawIOBase.write` methods on this
666-
class will only make one system call.
667-
668693
A custom opener can be used by passing a callable as *opener*. The underlying
669694
file descriptor for the file object is then obtained by calling *opener* with
670695
(*name*, *flags*). *opener* must return an open file descriptor (passing
@@ -676,6 +701,13 @@ Raw File I/O
676701
See the :func:`open` built-in function for examples on using the *opener*
677702
parameter.
678703

704+
.. warning::
705+
:class:`FileIO` is a low-level I/O object and members, such as
706+
:meth:`~RawIOBase.read` and :meth:`~RawIOBase.write`, need to have their
707+
return values checked explicitly in a retry loop to implement "write all"
708+
and "read at least" behavior. High-level I/O objects :ref:`binary-io` and
709+
:ref:`text-io` implement retry behavior.
710+
679711
.. versionchanged:: 3.3
680712
The *opener* parameter was added.
681713
The ``'x'`` mode was added.

Include/dynamic_annotations.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ int RunningOnValgrind(void);
461461

462462
#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
463463

464+
extern "C++" {
464465
/* _Py_ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
465466
466467
Instead of doing
@@ -476,6 +477,8 @@ int RunningOnValgrind(void);
476477
_Py_ANNOTATE_IGNORE_READS_END();
477478
return res;
478479
}
480+
}
481+
479482
/* Apply _Py_ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
480483
#define _Py_ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \
481484
namespace { \

Lib/email/charset.py

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
'add_codec',
1010
]
1111

12+
import codecs
1213
from functools import partial
1314

1415
import email.base64mime
@@ -58,37 +59,71 @@
5859
'shift_jis': (BASE64, None, 'iso-2022-jp'),
5960
'iso-2022-jp': (BASE64, None, None),
6061
'koi8-r': (BASE64, BASE64, None),
61-
'utf-8': (SHORTEST, BASE64, 'utf-8'),
6262
}
6363

64-
# Aliases for other commonly-used names for character sets. Map
65-
# them to the real ones used in email.
64+
# Map Python codec names to their corresponding MIME/IANA names.
6665
ALIASES = {
67-
'latin_1': 'iso-8859-1',
68-
'latin-1': 'iso-8859-1',
69-
'latin_2': 'iso-8859-2',
70-
'latin-2': 'iso-8859-2',
71-
'latin_3': 'iso-8859-3',
72-
'latin-3': 'iso-8859-3',
73-
'latin_4': 'iso-8859-4',
74-
'latin-4': 'iso-8859-4',
75-
'latin_5': 'iso-8859-9',
76-
'latin-5': 'iso-8859-9',
77-
'latin_6': 'iso-8859-10',
78-
'latin-6': 'iso-8859-10',
79-
'latin_7': 'iso-8859-13',
80-
'latin-7': 'iso-8859-13',
81-
'latin_8': 'iso-8859-14',
82-
'latin-8': 'iso-8859-14',
83-
'latin_9': 'iso-8859-15',
84-
'latin-9': 'iso-8859-15',
85-
'latin_10':'iso-8859-16',
86-
'latin-10':'iso-8859-16',
87-
'cp949': 'ks_c_5601-1987',
88-
'euc_jp': 'euc-jp',
89-
'euc_kr': 'euc-kr',
90-
'ascii': 'us-ascii',
91-
}
66+
'ascii': 'us-ascii',
67+
'big5hkscs': 'big5-hkscs',
68+
'cp037': 'ibm037',
69+
'cp1026': 'ibm1026',
70+
'cp1140': 'ibm01140',
71+
'cp1250': 'windows-1250',
72+
'cp1251': 'windows-1251',
73+
'cp1252': 'windows-1252',
74+
'cp1253': 'windows-1253',
75+
'cp1254': 'windows-1254',
76+
'cp1255': 'windows-1255',
77+
'cp1256': 'windows-1256',
78+
'cp1257': 'windows-1257',
79+
'cp1258': 'windows-1258',
80+
'cp273': 'ibm273',
81+
'cp424': 'ibm424',
82+
'cp437': 'ibm437',
83+
'cp500': 'ibm500',
84+
'cp775': 'ibm775',
85+
'cp850': 'ibm850',
86+
'cp852': 'ibm852',
87+
'cp855': 'ibm855',
88+
'cp857': 'ibm857',
89+
'cp858': 'ibm00858',
90+
'cp860': 'ibm860',
91+
'cp861': 'ibm861',
92+
'cp862': 'ibm862',
93+
'cp863': 'ibm863',
94+
'cp864': 'ibm864',
95+
'cp865': 'ibm865',
96+
'cp866': 'ibm866',
97+
'cp869': 'ibm869',
98+
'cp874': 'windows-874',
99+
'euc_jp': 'euc-jp',
100+
'euc_kr': 'euc-kr',
101+
'hz': 'hz-gb-2312',
102+
'iso2022_jp': 'iso-2022-jp',
103+
'iso2022_jp_2': 'iso-2022-jp-2',
104+
'iso2022_kr': 'iso-2022-kr',
105+
'iso8859-1': 'iso-8859-1',
106+
'iso8859-10': 'iso-8859-10',
107+
'iso8859-11': 'iso-8859-11',
108+
'iso8859-13': 'iso-8859-13',
109+
'iso8859-14': 'iso-8859-14',
110+
'iso8859-15': 'iso-8859-15',
111+
'iso8859-16': 'iso-8859-16',
112+
'iso8859-2': 'iso-8859-2',
113+
'iso8859-3': 'iso-8859-3',
114+
'iso8859-4': 'iso-8859-4',
115+
'iso8859-5': 'iso-8859-5',
116+
'iso8859-6': 'iso-8859-6',
117+
'iso8859-7': 'iso-8859-7',
118+
'iso8859-8': 'iso-8859-8-i',
119+
'iso8859-9': 'iso-8859-9',
120+
'kz1048': 'kz-1048',
121+
'mac-roman': 'macintosh',
122+
123+
# CP949 is not registered in IANA. KS_C_5601-1987 is not the same,
124+
# but the closest registered option.
125+
'cp949': 'ks_c_5601-1987',
126+
}
92127

93128

94129
# Map charsets to their Unicode codec strings.
@@ -215,7 +250,18 @@ def __init__(self, input_charset=DEFAULT_CHARSET):
215250
raise errors.CharsetError(input_charset)
216251
input_charset = input_charset.lower()
217252
# Set the input charset after filtering through the aliases
218-
self.input_charset = ALIASES.get(input_charset, input_charset)
253+
# For backward compatibility, try ALIASES first to let the user
254+
# override it.
255+
if input_charset in ALIASES:
256+
input_charset = ALIASES[input_charset]
257+
else:
258+
try:
259+
input_codec = codecs.lookup(input_charset).name
260+
except LookupError:
261+
pass
262+
else:
263+
input_charset = ALIASES.get(input_codec, input_codec)
264+
self.input_charset = input_charset
219265
# We can try to guess which encoding and conversion to use by the
220266
# charset_map dictionary. Try that first, but let the user override
221267
# it.

Lib/email/contentmanager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,11 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
173173
disposition=None, filename=None, cid=None,
174174
params=None, headers=None):
175175
_prepare_set(msg, 'text', subtype, headers)
176+
177+
charset = email.charset.Charset(charset).input_charset
176178
cte, payload = _encode_text(string, charset, cte, msg.policy)
177179
msg.set_payload(payload)
178-
msg.set_param('charset',
179-
email.charset.ALIASES.get(charset, charset),
180-
replace=True)
180+
msg.set_param('charset', charset, replace=True)
181181
msg['Content-Transfer-Encoding'] = cte
182182
_finalize_set(msg, disposition, filename, cid, params)
183183
raw_data_manager.add_set_handler(str, set_text_content)

0 commit comments

Comments
 (0)