diff --git a/Changelog b/Changelog index 88ba8c385..55bc6b0db 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,8 @@ + version 1.7.4.1 (tag v1.7.4.1rel) + ================================= + * Change default encoding for stringtochar/chartostring functions from 'utf-8' to 'utf-8'/'ascii' for dtype.kind='U'/'S' + (issue #1464). + version 1.7.4 (tag v1.7.4rel) ================================ * Make sure automatic conversion of character arrays <--> string arrays works for Unicode strings (issue #1440). diff --git a/docs/index.html b/docs/index.html index 8a379a4c1..a416acf07 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1226,7 +1226,7 @@
-def chartostring(b, encoding='utf-8')
+def chartostring(b, encoding=None)
chartostring(b,encoding='utf-8')
b.shape[-1] characters.
optional kwarg encoding can be used to specify character encoding (default
-utf-8). If encoding is 'none' or 'bytes', a numpy.string_ byte array is
-returned.
utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes',
+a numpy.string_ byte array is returned.
returns a numpy string array with datatype 'UN' (or 'SN') and shape
b.shape[:-1] where where N=b.shape[-1].
calendar: describes the calendar to be used in the time calculations.
All the values currently defined in the
CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported.
-Valid calendars 'standard', 'gregorian', 'proleptic_gregorian'
+Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai',
'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'.
Default is None which means the calendar associated with the first
input datetime instance will be used.
calendar: describes the calendar to be used in the time calculations.
All the values currently defined in the
CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported.
-Valid calendars 'standard', 'gregorian', 'proleptic_gregorian'
+Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai',
'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'.
Default is None which means the calendar associated with the first
input datetime instance will be used.
calendar: describes the calendar used in the time calculations.
All the values currently defined in the
CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported.
-Valid calendars 'standard', 'gregorian', 'proleptic_gregorian'
+Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai',
'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'.
Default is 'standard', which is a mixed Julian/Gregorian calendar.
only_use_cftime_datetimes: if False, python datetime.datetime @@ -1476,7 +1476,7 @@
'U1' (if dtype='U')
-def stringtochar(a, encoding='utf-8', n_strlen=None)
+def stringtochar(a, encoding=None, n_strlen=None)
stringtochar(a,encoding='utf-8',n_strlen=None)
'S1' or 'U1') of shape a.shape + (N,).
optional kwarg encoding can be used to specify character encoding (default
-utf-8). If encoding is 'none' or 'bytes', a numpy.string_ the input array
-is treated a raw byte strings (numpy.string_).
utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes',
+a numpy.string_ the input array is treated a raw byte strings (numpy.string_).
optional kwarg n_strlen is the number of characters in each string.
Default
is None, which means n_strlen will be set to a.itemsize (the number of bytes
diff --git a/examples/tutorial.py b/examples/tutorial.py
index 67573d8ae..3134881ca 100644
--- a/examples/tutorial.py
+++ b/examples/tutorial.py
@@ -163,6 +163,7 @@ def walktree(top):
datac2.imag = datain['imag']
print(datac.dtype,datac)
print(datac2.dtype,datac2)
+nc.close()
# more complex compound type example.
nc = Dataset('compound_example.nc','w') # create a new dataset.
diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi
index e27fb6370..4b11688b1 100644
--- a/src/netCDF4/__init__.pyi
+++ b/src/netCDF4/__init__.pyi
@@ -704,7 +704,8 @@ def stringtochar(
@overload
def stringtochar(
a: npt.NDArray[np.character],
- encoding: str = ...,
+ encoding: str | None = None,
+ n_strlen: int | None = None,
) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ...
@overload
def chartostring(
@@ -714,7 +715,7 @@ def chartostring(
@overload
def chartostring(
b: npt.NDArray[np.character],
- encoding: str = ...,
+ encoding: str | None = None,
) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ...
def getlibversion() -> str: ...
def rc_get(key: str) -> str | None: ...
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index 0043d3b3e..2f5a133c9 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -1,4 +1,4 @@
-"""Version 1.7.4
+"""Version 1.7.4.1
-------------
# Introduction
@@ -1283,7 +1283,7 @@ import sys
import functools
from typing import Union
-__version__ = "1.7.4"
+__version__ = "1.7.4.1"
# Initialize numpy
import posixpath
@@ -6788,7 +6788,7 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'`
arr[0:len(string)] = tuple(string)
return arr
-def stringtochar(a,encoding='utf-8',n_strlen=None):
+def stringtochar(a,encoding=None,n_strlen=None):
"""
**`stringtochar(a,encoding='utf-8',n_strlen=None)`**
@@ -6799,8 +6799,8 @@ is the number of characters in each string. Will be converted to
an array of characters (datatype `'S1'` or `'U1'`) of shape `a.shape + (N,)`.
optional kwarg `encoding` can be used to specify character encoding (default
-`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` the input array
-is treated a raw byte strings (`numpy.string_`).
+`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes',
+a `numpy.string_` the input array is treated a raw byte strings (`numpy.string_`).
optional kwarg `n_strlen` is the number of characters in each string. Default
is None, which means `n_strlen` will be set to a.itemsize (the number of bytes
@@ -6809,10 +6809,15 @@ used to represent each string in the input array).
returns a numpy character array with datatype `'S1'` or `'U1'`
and shape `a.shape + (N,)`, where N is the length of each string in a."""
dtype = a.dtype.kind
- if n_strlen is None:
- n_strlen = a.dtype.itemsize
if dtype not in ["S","U"]:
raise ValueError("type must string or unicode ('S' or 'U')")
+ if encoding is None:
+ if dtype == 'S':
+ encoding = 'ascii'
+ else:
+ encoding = 'utf-8'
+ if n_strlen is None:
+ n_strlen = a.dtype.itemsize
if encoding in ['none','None','bytes']:
b = numpy.array(tuple(a.tobytes()),'S1')
elif encoding == 'ascii':
@@ -6827,7 +6832,7 @@ and shape `a.shape + (N,)`, where N is the length of each string in a."""
b = numpy.array([[bb[i:i+1] for i in range(n_strlen)] for bb in bbytes])
return b
-def chartostring(b,encoding='utf-8'):
+def chartostring(b,encoding=None):
"""
**`chartostring(b,encoding='utf-8')`**
@@ -6838,14 +6843,19 @@ Will be converted to a array of strings, where each string has a fixed
length of `b.shape[-1]` characters.
optional kwarg `encoding` can be used to specify character encoding (default
-`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` byte array is
-returned.
+`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes',
+a `numpy.string_` byte array is returned.
returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape
`b.shape[:-1]` where where `N=b.shape[-1]`."""
dtype = b.dtype.kind
if dtype not in ["S","U"]:
raise ValueError("type must be string or unicode ('S' or 'U')")
+ if encoding is None:
+ if dtype == 'S':
+ encoding = 'ascii'
+ else:
+ encoding = 'utf-8'
bs = b.tobytes()
slen = int(b.shape[-1])
if encoding in ['none','None','bytes']: