From 37fbe3f40b8c28d6a288040e9705675fc5ebd0a4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 31 Jan 2026 21:34:44 +0100 Subject: [PATCH] swidth: use cross platform implementation, fixes #7493 This implementation should be good enough for our usecase (paths) and has no external dependencies. There is also a wcwidth library which might be even better, but would add another dependency. --- src/borg/platform/__init__.py | 8 +------- src/borg/platform/base.py | 16 +++++++++++++++- src/borg/platform/posix.pyx | 23 ----------------------- src/borg/testsuite/platform/all_test.py | 13 +++++++++++++ src/borg/testsuite/platform/posix_test.py | 18 ------------------ 5 files changed, 29 insertions(+), 49 deletions(-) create mode 100644 src/borg/testsuite/platform/all_test.py delete mode 100644 src/borg/testsuite/platform/posix_test.py diff --git a/src/borg/platform/__init__.py b/src/borg/platform/__init__.py index 6954178ffa..fed7e2fe90 100644 --- a/src/borg/platform/__init__.py +++ b/src/borg/platform/__init__.py @@ -10,7 +10,7 @@ from .base import ENOATTR, API_VERSION from .base import SaveFile, sync_dir, fdatasync, safe_fadvise -from .base import get_process_id, fqdn, hostname, hostid +from .base import get_process_id, fqdn, hostname, hostid, swidth # work around pyinstaller "forgetting" to include the xattr module from . import xattr # noqa: F401 @@ -24,7 +24,6 @@ from .linux import set_flags, get_flags from .linux import SyncFile from .posix import process_alive, local_pid_alive - from .posix import swidth from .posix import get_errno from .posix import getosusername from . import posix_ug as platform_ug @@ -36,7 +35,6 @@ from .base import get_flags from .base import SyncFile from .posix import process_alive, local_pid_alive - from .posix import swidth from .posix import get_errno from .posix import getosusername from . import posix_ug as platform_ug @@ -47,7 +45,6 @@ from .base import set_flags, get_flags from .base import SyncFile from .posix import process_alive, local_pid_alive - from .posix import swidth from .posix import get_errno from .posix import getosusername from . import posix_ug as platform_ug @@ -60,7 +57,6 @@ from .base import get_flags from .base import SyncFile from .posix import process_alive, local_pid_alive - from .posix import swidth from .posix import get_errno from .posix import getosusername from . import posix_ug as platform_ug @@ -72,7 +68,6 @@ from .base import set_flags, get_flags from .base import SyncFile from .posix import process_alive, local_pid_alive - from .posix import swidth from .posix import get_errno from .posix import getosusername from . import posix_ug as platform_ug @@ -84,7 +79,6 @@ from .base import set_flags, get_flags from .base import SyncFile from .windows import process_alive, local_pid_alive - from .base import swidth from .windows import getosusername from . import windows_ug as platform_ug diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index e044b0f955..f4c39561b2 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -1,6 +1,7 @@ import errno import os import socket +import unicodedata import uuid from pathlib import Path @@ -266,7 +267,20 @@ def swidth(s): For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used. """ - return len(s) + width = 0 + for char in s: + # Get the East Asian Width property + ea_width = unicodedata.east_asian_width(char) + + # Wide (W) and Fullwidth (F) characters take 2 cells + if ea_width in ("W", "F"): + width += 2 + # Not a zero-width characters (combining marks, format characters) + elif unicodedata.category(char) not in ("Mn", "Me", "Cf"): + # Normal characters take 1 cell + width += 1 + + return width # patched socket.getfqdn() - see https://bugs.python.org/issue5004 diff --git a/src/borg/platform/posix.pyx b/src/borg/platform/posix.pyx index 835c15c1d2..52418f85eb 100644 --- a/src/borg/platform/posix.pyx +++ b/src/borg/platform/posix.pyx @@ -5,34 +5,11 @@ from . import posix_ug from libc.errno cimport errno as c_errno -from cpython.mem cimport PyMem_Free -from libc.stddef cimport wchar_t - -cdef extern from "wchar.h": - # https://www.man7.org/linux/man-pages/man3/wcswidth.3.html - cdef int wcswidth(const wchar_t *s, size_t n) - - -cdef extern from "Python.h": - # https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_AsWideCharString - wchar_t* PyUnicode_AsWideCharString(object, Py_ssize_t*) except NULL - def get_errno(): return c_errno -def swidth(s): - cdef Py_ssize_t size - cdef wchar_t *as_wchar = PyUnicode_AsWideCharString(s, &size) - terminal_width = wcswidth(as_wchar, size) - PyMem_Free(as_wchar) - if terminal_width >= 0: - return terminal_width - else: - return len(s) - - def process_alive(host, pid, thread): """ Check whether the (host, pid, thread_id) combination corresponds to a process potentially alive. diff --git a/src/borg/testsuite/platform/all_test.py b/src/borg/testsuite/platform/all_test.py new file mode 100644 index 0000000000..b97f62b647 --- /dev/null +++ b/src/borg/testsuite/platform/all_test.py @@ -0,0 +1,13 @@ +from ...platform import swidth + + +def test_swidth_ascii(): + assert swidth("borg") == 4 + + +def test_swidth_cjk(): + assert swidth("バックアップ") == 6 * 2 + + +def test_swidth_mixed(): + assert swidth("borgバックアップ") == 4 + 6 * 2 diff --git a/src/borg/testsuite/platform/posix_test.py b/src/borg/testsuite/platform/posix_test.py deleted file mode 100644 index 7e97d084f9..0000000000 --- a/src/borg/testsuite/platform/posix_test.py +++ /dev/null @@ -1,18 +0,0 @@ -from ...platform import swidth -from .platform_test import skipif_not_posix - - -# set module-level skips -pytestmark = skipif_not_posix - - -def test_posix_swidth_ascii(): - assert swidth("borg") == 4 - - -def test_posix_swidth_cjk(): - assert swidth("バックアップ") == 6 * 2 - - -def test_posix_swidth_mixed(): - assert swidth("borgバックアップ") == 4 + 6 * 2