From 5ad260cf85bca5f3e6148d6c81d889ab107bba30 Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Thu, 15 Jan 2026 22:10:00 -0700 Subject: [PATCH 1/7] change default encoding for stringtochar, chartostring to ascii for dtype='S' (issue #1464) --- examples/tutorial.py | 1 + src/netCDF4/_netCDF4.pyx | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/tutorial.py b/examples/tutorial.py index 67573d8ae..3134881ca 100644 --- a/examples/tutorial.py +++ b/examples/tutorial.py @@ -163,6 +163,7 @@ def walktree(top): datac2.imag = datain['imag'] print(datac.dtype,datac) print(datac2.dtype,datac2) +nc.close() # more complex compound type example. nc = Dataset('compound_example.nc','w') # create a new dataset. diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 0043d3b3e..5ff2a4a02 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -6788,7 +6788,7 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'` arr[0:len(string)] = tuple(string) return arr -def stringtochar(a,encoding='utf-8',n_strlen=None): +def stringtochar(a,encoding=None,n_strlen=None): """ **`stringtochar(a,encoding='utf-8',n_strlen=None)`** @@ -6809,10 +6809,15 @@ used to represent each string in the input array). returns a numpy character array with datatype `'S1'` or `'U1'` and shape `a.shape + (N,)`, where N is the length of each string in a.""" dtype = a.dtype.kind - if n_strlen is None: - n_strlen = a.dtype.itemsize if dtype not in ["S","U"]: raise ValueError("type must string or unicode ('S' or 'U')") + if encoding is None: + if dtype == 'S': + encoding = 'ascii' + else: + encoding = 'utf-8' + if n_strlen is None: + n_strlen = a.dtype.itemsize if encoding in ['none','None','bytes']: b = numpy.array(tuple(a.tobytes()),'S1') elif encoding == 'ascii': @@ -6827,7 +6832,7 @@ and shape `a.shape + (N,)`, where N is the length of each string in a.""" b = numpy.array([[bb[i:i+1] for i in range(n_strlen)] for bb in bbytes]) return b -def chartostring(b,encoding='utf-8'): +def chartostring(b,encoding=None): """ **`chartostring(b,encoding='utf-8')`** @@ -6846,6 +6851,11 @@ returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape dtype = b.dtype.kind if dtype not in ["S","U"]: raise ValueError("type must be string or unicode ('S' or 'U')") + if encoding is None: + if dtype == 'S': + encoding = 'ascii' + else: + encoding = 'utf-8' bs = b.tobytes() slen = int(b.shape[-1]) if encoding in ['none','None','bytes']: From 43d0ea8c73f77aaa6a259dc904125e6e55372abc Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Thu, 15 Jan 2026 22:13:33 -0700 Subject: [PATCH 2/7] update docstring --- src/netCDF4/_netCDF4.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 5ff2a4a02..c5769ab90 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -6799,8 +6799,8 @@ is the number of characters in each string. Will be converted to an array of characters (datatype `'S1'` or `'U1'`) of shape `a.shape + (N,)`. optional kwarg `encoding` can be used to specify character encoding (default -`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` the input array -is treated a raw byte strings (`numpy.string_`). +`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes', +a `numpy.string_` the input array is treated a raw byte strings (`numpy.string_`). optional kwarg `n_strlen` is the number of characters in each string. Default is None, which means `n_strlen` will be set to a.itemsize (the number of bytes @@ -6846,6 +6846,10 @@ optional kwarg `encoding` can be used to specify character encoding (default `utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` byte array is returned. +optional kwarg `encoding` can be used to specify character encoding (default +`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes', +a `numpy.string_` byte array is returned. + returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape `b.shape[:-1]` where where `N=b.shape[-1]`.""" dtype = b.dtype.kind From f6696efcc2ebb2dd791c849c060eb83988761049 Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Fri, 16 Jan 2026 07:30:20 -0700 Subject: [PATCH 3/7] update --- src/netCDF4/__init__.pyi | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi index e27fb6370..4c7838d4c 100644 --- a/src/netCDF4/__init__.pyi +++ b/src/netCDF4/__init__.pyi @@ -698,23 +698,13 @@ def stringtoarr( @overload def stringtochar( a: npt.NDArray[np.character], - encoding: Literal["none", "None", "bytes"], + encoding: str | None = None, n_strlen: int | None = None, -) -> npt.NDArray[np.bytes_]: ... -@overload -def stringtochar( - a: npt.NDArray[np.character], - encoding: str = ..., ) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ... @overload def chartostring( b: npt.NDArray[np.character], - encoding: Literal["none", "None", "bytes"] = ..., -) -> npt.NDArray[np.bytes_]: ... -@overload -def chartostring( - b: npt.NDArray[np.character], - encoding: str = ..., + encoding: str | None = None, ) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ... def getlibversion() -> str: ... def rc_get(key: str) -> str | None: ... From e928bc1d727de63db6a1d5b00a9dd571a25e082f Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Fri, 16 Jan 2026 07:43:33 -0700 Subject: [PATCH 4/7] update --- src/netCDF4/__init__.pyi | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi index 4c7838d4c..4b11688b1 100644 --- a/src/netCDF4/__init__.pyi +++ b/src/netCDF4/__init__.pyi @@ -698,10 +698,21 @@ def stringtoarr( @overload def stringtochar( a: npt.NDArray[np.character], - encoding: str | None = None, + encoding: Literal["none", "None", "bytes"], + n_strlen: int | None = None, +) -> npt.NDArray[np.bytes_]: ... +@overload +def stringtochar( + a: npt.NDArray[np.character], + encoding: str | None = None, n_strlen: int | None = None, ) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ... @overload +def chartostring( + b: npt.NDArray[np.character], + encoding: Literal["none", "None", "bytes"] = ..., +) -> npt.NDArray[np.bytes_]: ... +@overload def chartostring( b: npt.NDArray[np.character], encoding: str | None = None, From 701e11999bb33d0b5b185b7ccc096b710f185305 Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Fri, 16 Jan 2026 08:39:17 -0700 Subject: [PATCH 5/7] update --- Changelog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Changelog b/Changelog index 88ba8c385..a401c2c7e 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,8 @@ + since version 1.7.4 release + =========================== + * Change default encoding for stringtochar/chartostring functions from 'utf-8' to 'utf-8'/'ascii' for dtype.kind='U'/'S' + (issue #1464). + version 1.7.4 (tag v1.7.4rel) ================================ * Make sure automatic conversion of character arrays <--> string arrays works for Unicode strings (issue #1440). From 351e58e5212a4c6647777eaa162d1d8eb284f4f5 Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Fri, 16 Jan 2026 08:41:29 -0700 Subject: [PATCH 6/7] update docstring --- docs/index.html | 18 +++++++++--------- src/netCDF4/_netCDF4.pyx | 4 ---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/index.html b/docs/index.html index 8a379a4c1..a416acf07 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1226,7 +1226,7 @@

Support for complex numbers

Functions

-def chartostring(b, encoding='utf-8') +def chartostring(b, encoding=None)

chartostring(b,encoding='utf-8')

@@ -1236,8 +1236,8 @@

Functions

Will be converted to a array of strings, where each string has a fixed length of b.shape[-1] characters.

optional kwarg encoding can be used to specify character encoding (default -utf-8). If encoding is 'none' or 'bytes', a numpy.string_ byte array is -returned.

+utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes', +a numpy.string_ byte array is returned.

returns a numpy string array with datatype 'UN' (or 'SN') and shape b.shape[:-1] where where N=b.shape[-1].

@@ -1254,7 +1254,7 @@

Functions

calendar: describes the calendar to be used in the time calculations. All the values currently defined in the CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. Default is None which means the calendar associated with the first input datetime instance will be used.

@@ -1305,7 +1305,7 @@

Functions

calendar: describes the calendar to be used in the time calculations. All the values currently defined in the CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. Default is None which means the calendar associated with the first input datetime instance will be used.

@@ -1381,7 +1381,7 @@

Functions

calendar: describes the calendar used in the time calculations. All the values currently defined in the CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. Default is 'standard', which is a mixed Julian/Gregorian calendar.

only_use_cftime_datetimes: if False, python datetime.datetime @@ -1476,7 +1476,7 @@

Functions

(default) or 'U1' (if dtype='U')

-def stringtochar(a, encoding='utf-8', n_strlen=None) +def stringtochar(a, encoding=None, n_strlen=None)

stringtochar(a,encoding='utf-8',n_strlen=None)

@@ -1487,8 +1487,8 @@

Functions

Will be converted to an array of characters (datatype 'S1' or 'U1') of shape a.shape + (N,).

optional kwarg encoding can be used to specify character encoding (default -utf-8). If encoding is 'none' or 'bytes', a numpy.string_ the input array -is treated a raw byte strings (numpy.string_).

+utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes', +a numpy.string_ the input array is treated a raw byte strings (numpy.string_).

optional kwarg n_strlen is the number of characters in each string. Default is None, which means n_strlen will be set to a.itemsize (the number of bytes diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index c5769ab90..49710a5b1 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -6842,10 +6842,6 @@ convert a character array to a string array with one less dimension. Will be converted to a array of strings, where each string has a fixed length of `b.shape[-1]` characters. -optional kwarg `encoding` can be used to specify character encoding (default -`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` byte array is -returned. - optional kwarg `encoding` can be used to specify character encoding (default `utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes', a `numpy.string_` byte array is returned. From aab4fb311996fdaf6b6f576f440b7ddb442f037b Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Fri, 16 Jan 2026 09:19:33 -0700 Subject: [PATCH 7/7] bump version number --- Changelog | 4 ++-- src/netCDF4/_netCDF4.pyx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Changelog b/Changelog index a401c2c7e..55bc6b0db 100644 --- a/Changelog +++ b/Changelog @@ -1,5 +1,5 @@ - since version 1.7.4 release - =========================== + version 1.7.4.1 (tag v1.7.4.1rel) + ================================= * Change default encoding for stringtochar/chartostring functions from 'utf-8' to 'utf-8'/'ascii' for dtype.kind='U'/'S' (issue #1464). diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 49710a5b1..2f5a133c9 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1,4 +1,4 @@ -"""Version 1.7.4 +"""Version 1.7.4.1 ------------- # Introduction @@ -1283,7 +1283,7 @@ import sys import functools from typing import Union -__version__ = "1.7.4" +__version__ = "1.7.4.1" # Initialize numpy import posixpath