From f78fa193b7cfe55613114e5da01abf6ed60b085f Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 16:41:30 -0400 Subject: [PATCH 1/6] Added regular expressions to detect ANSI escape and style sequences. --- cmd2/argparse_custom.py | 2 +- cmd2/rich_utils.py | 30 +++++++++++++++++------------- cmd2/string_utils.py | 17 ++++++++++++----- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/cmd2/argparse_custom.py b/cmd2/argparse_custom.py index c99ca82a2..2dce4463a 100644 --- a/cmd2/argparse_custom.py +++ b/cmd2/argparse_custom.py @@ -398,7 +398,7 @@ def __init__(self, value: object, descriptive_data: Sequence[Any], *args: Any) - # Make sure all objects are renderable by a Rich table. renderable_data = [obj if is_renderable(obj) else str(obj) for obj in descriptive_data] - # Convert objects with ANSI styles to Rich Text for correct display width. + # Convert objects with ANSI escape sequences to Rich Text for correct display width. self.descriptive_data = ru.prepare_objects_for_rendering(*renderable_data) # Save the original value to support CompletionItems as argparse choices. diff --git a/cmd2/rich_utils.py b/cmd2/rich_utils.py index 20001df13..2a4418457 100644 --- a/cmd2/rich_utils.py +++ b/cmd2/rich_utils.py @@ -1,5 +1,6 @@ """Provides common utilities to support Rich in cmd2-based applications.""" +import re from collections.abc import Mapping from enum import Enum from typing import ( @@ -28,13 +29,18 @@ from .styles import DEFAULT_CMD2_STYLES +# A compiled regular expression to detect ANSI escape sequences. +# The `[a-zA-Z]` at the end of the regex allows it to match all types of +# escape sequences, including those for styling, cursor movement, etc. +_ANSI_ESCAPE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*[a-zA-Z]") + class AllowStyle(Enum): """Values for ``cmd2.rich_utils.ALLOW_STYLE``.""" - ALWAYS = 'Always' # Always output ANSI style sequences - NEVER = 'Never' # Remove ANSI style sequences from all output - TERMINAL = 'Terminal' # Remove ANSI style sequences if the output is not going to the terminal + ALWAYS = "Always" # Always output ANSI style sequences + NEVER = "Never" # Remove ANSI style sequences from all output + TERMINAL = "Terminal" # Remove ANSI style sequences if the output is not going to the terminal def __str__(self) -> str: """Return value instead of enum name for printing in cmd2's set command.""" @@ -234,7 +240,7 @@ def rich_text_to_string(text: Text) -> str: """Convert a Rich Text object to a string. This function's purpose is to render a Rich Text object, including any styles (e.g., color, bold), - to a plain Python string with ANSI escape codes. It differs from `text.plain`, which strips + to a plain Python string with ANSI style sequences. It differs from `text.plain`, which strips all formatting. :param text: the text object to convert @@ -259,7 +265,7 @@ def rich_text_to_string(text: Text) -> str: def string_to_rich_text(text: str) -> Text: - r"""Create a Text object from a string which can contain ANSI escape codes. + r"""Create a Text object from a string which can contain ANSI style sequences. This wraps rich.Text.from_ansi() to handle an issue where it removes the trailing line break from a string (e.g. "Hello\n" becomes "Hello"). @@ -323,9 +329,9 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]: """Prepare a tuple of objects for printing by Rich's Console.print(). This function converts any non-Rich object whose string representation contains - ANSI style codes into a rich.Text object. This ensures correct display width - calculation, as Rich can then properly parse and account for the non-printing - ANSI codes. All other objects are left untouched, allowing Rich's native + ANSI escape sequences into a rich.Text object. This ensures correct display width + calculation, as Rich can then properly parse and account for these non-printing + codes. All other objects are left untouched, allowing Rich's native renderers to handle them. :param objects: objects to prepare @@ -342,12 +348,10 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]: if isinstance(renderable, ConsoleRenderable): continue - # Check if the object's string representation contains ANSI styles, and if so, - # replace it with a Rich Text object for correct width calculation. renderable_as_str = str(renderable) - renderable_as_text = string_to_rich_text(renderable_as_str) - if renderable_as_text.plain != renderable_as_str: - object_list[i] = renderable_as_text + # Check for any ANSI escape sequences in the string. + if _ANSI_ESCAPE_SEQUENCE_RE.search(renderable_as_str): + object_list[i] = string_to_rich_text(renderable_as_str) return tuple(object_list) diff --git a/cmd2/string_utils.py b/cmd2/string_utils.py index a77eb5f6b..6aaadda96 100644 --- a/cmd2/string_utils.py +++ b/cmd2/string_utils.py @@ -1,15 +1,20 @@ """Provides string utility functions. This module offers a collection of string utility functions built on the Rich library. -These utilities are designed to correctly handle strings with ANSI escape codes and +These utilities are designed to correctly handle strings with ANSI style sequences and full-width characters (like those used in CJK languages). """ +import re + from rich.align import AlignMethod from rich.style import StyleType from . import rich_utils as ru +# A compiled regular expression to detect ANSI style sequences. +_ANSI_STYLE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*m") + def align( val: str, @@ -94,13 +99,15 @@ def stylize(val: str, style: StyleType) -> str: def strip_style(val: str) -> str: - """Strip all ANSI styles from a string. + """Strip all ANSI style sequences from a string. + + This function uses a regular expression to efficiently remove ANSI style + sequences, which are a subset of ANSI escape sequences used for text formatting. :param val: string to be stripped :return: the stripped string """ - text = ru.string_to_rich_text(val) - return text.plain + return _ANSI_STYLE_SEQUENCE_RE.sub("", val) def str_width(val: str) -> int: @@ -163,4 +170,4 @@ def norm_fold(val: str) -> str: """ import unicodedata - return unicodedata.normalize('NFC', val).casefold() + return unicodedata.normalize("NFC", val).casefold() From dda999078e450afc399b93a230f65f46e8a8dc1a Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 17:38:22 -0400 Subject: [PATCH 2/6] Added unit tests. --- cmd2/rich_utils.py | 16 +++++++++++++--- tests/test_rich_utils.py | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/cmd2/rich_utils.py b/cmd2/rich_utils.py index 2a4418457..eea5236b0 100644 --- a/cmd2/rich_utils.py +++ b/cmd2/rich_utils.py @@ -30,9 +30,19 @@ from .styles import DEFAULT_CMD2_STYLES # A compiled regular expression to detect ANSI escape sequences. -# The `[a-zA-Z]` at the end of the regex allows it to match all types of -# escape sequences, including those for styling, cursor movement, etc. -_ANSI_ESCAPE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*[a-zA-Z]") +_ANSI_ESCAPE_SEQUENCE_RE = re.compile( + r""" + \x1b # Match the Escape character (ESC) + (?: # Start of non-capturing group for the different sequence types + \[[0-9;?]*[a-zA-Z] # Match a CSI sequence (e.g., \x1b[31m) + | # OR + \].*?(?:\x07|\x1b\x5c) # Match an OSC sequence (e.g., \x1b]2;Hello\x07) + | # OR + \x37|\x38 # Match DEC cursor save/restore sequences + ) # End of non-capturing group +""", + re.VERBOSE, +) class AllowStyle(Enum): diff --git a/tests/test_rich_utils.py b/tests/test_rich_utils.py index f471d7d58..f8d955428 100644 --- a/tests/test_rich_utils.py +++ b/tests/test_rich_utils.py @@ -139,3 +139,23 @@ def test_set_theme() -> None: assert ru.APP_THEME.styles[rich_style_key] != orig_rich_style assert ru.APP_THEME.styles[rich_style_key] == theme[rich_style_key] + + +def test_ansi_escape_sequence_re() -> None: + import cmd2.terminal_utils as tu + + # Test a CSI sequence + cursor_contol = tu.Cursor.UP(1) + assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_contol) + + # Test an OSC sequence + set_title = tu.set_title_str("Hello") + assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(set_title) + + # Test DEC cursor save + cursor_save = "\x1b\x37" + assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_save) + + # Test DEC cursor restore + cursor_restore = "\x1b\x38" + assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_restore) From 61aa696b1ec73d1b93d69b19dc867569e2376ca3 Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 22:19:19 -0400 Subject: [PATCH 3/6] Switched to only searching for styles. --- cmd2/rich_utils.py | 22 +++++----------------- cmd2/string_utils.py | 7 +------ tests/test_rich_utils.py | 20 -------------------- 3 files changed, 6 insertions(+), 43 deletions(-) diff --git a/cmd2/rich_utils.py b/cmd2/rich_utils.py index eea5236b0..3c011eb52 100644 --- a/cmd2/rich_utils.py +++ b/cmd2/rich_utils.py @@ -29,20 +29,8 @@ from .styles import DEFAULT_CMD2_STYLES -# A compiled regular expression to detect ANSI escape sequences. -_ANSI_ESCAPE_SEQUENCE_RE = re.compile( - r""" - \x1b # Match the Escape character (ESC) - (?: # Start of non-capturing group for the different sequence types - \[[0-9;?]*[a-zA-Z] # Match a CSI sequence (e.g., \x1b[31m) - | # OR - \].*?(?:\x07|\x1b\x5c) # Match an OSC sequence (e.g., \x1b]2;Hello\x07) - | # OR - \x37|\x38 # Match DEC cursor save/restore sequences - ) # End of non-capturing group -""", - re.VERBOSE, -) +# A compiled regular expression to detect ANSI style sequences. +ANSI_STYLE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*m") class AllowStyle(Enum): @@ -339,7 +327,7 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]: """Prepare a tuple of objects for printing by Rich's Console.print(). This function converts any non-Rich object whose string representation contains - ANSI escape sequences into a rich.Text object. This ensures correct display width + ANSI style sequences into a rich.Text object. This ensures correct display width calculation, as Rich can then properly parse and account for these non-printing codes. All other objects are left untouched, allowing Rich's native renderers to handle them. @@ -360,8 +348,8 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]: renderable_as_str = str(renderable) - # Check for any ANSI escape sequences in the string. - if _ANSI_ESCAPE_SEQUENCE_RE.search(renderable_as_str): + # Check for any ANSI style sequences in the string. + if ANSI_STYLE_SEQUENCE_RE.search(renderable_as_str): object_list[i] = string_to_rich_text(renderable_as_str) return tuple(object_list) diff --git a/cmd2/string_utils.py b/cmd2/string_utils.py index 6aaadda96..c7b41f4c5 100644 --- a/cmd2/string_utils.py +++ b/cmd2/string_utils.py @@ -5,16 +5,11 @@ full-width characters (like those used in CJK languages). """ -import re - from rich.align import AlignMethod from rich.style import StyleType from . import rich_utils as ru -# A compiled regular expression to detect ANSI style sequences. -_ANSI_STYLE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*m") - def align( val: str, @@ -107,7 +102,7 @@ def strip_style(val: str) -> str: :param val: string to be stripped :return: the stripped string """ - return _ANSI_STYLE_SEQUENCE_RE.sub("", val) + return ru.ANSI_STYLE_SEQUENCE_RE.sub("", val) def str_width(val: str) -> int: diff --git a/tests/test_rich_utils.py b/tests/test_rich_utils.py index f8d955428..f471d7d58 100644 --- a/tests/test_rich_utils.py +++ b/tests/test_rich_utils.py @@ -139,23 +139,3 @@ def test_set_theme() -> None: assert ru.APP_THEME.styles[rich_style_key] != orig_rich_style assert ru.APP_THEME.styles[rich_style_key] == theme[rich_style_key] - - -def test_ansi_escape_sequence_re() -> None: - import cmd2.terminal_utils as tu - - # Test a CSI sequence - cursor_contol = tu.Cursor.UP(1) - assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_contol) - - # Test an OSC sequence - set_title = tu.set_title_str("Hello") - assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(set_title) - - # Test DEC cursor save - cursor_save = "\x1b\x37" - assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_save) - - # Test DEC cursor restore - cursor_restore = "\x1b\x38" - assert ru._ANSI_ESCAPE_SEQUENCE_RE.search(cursor_restore) From 66b15aba363909dc35dea80bb30c0b7dcfc96e7e Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 22:33:35 -0400 Subject: [PATCH 4/6] Updated comments. --- cmd2/argparse_custom.py | 2 +- cmd2/cmd2.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd2/argparse_custom.py b/cmd2/argparse_custom.py index 2dce4463a..928c35eca 100644 --- a/cmd2/argparse_custom.py +++ b/cmd2/argparse_custom.py @@ -398,7 +398,7 @@ def __init__(self, value: object, descriptive_data: Sequence[Any], *args: Any) - # Make sure all objects are renderable by a Rich table. renderable_data = [obj if is_renderable(obj) else str(obj) for obj in descriptive_data] - # Convert objects with ANSI escape sequences to Rich Text for correct display width. + # Convert strings containing ANSI style sequences into Rich Text objects for correct display width. self.descriptive_data = ru.prepare_objects_for_rendering(*renderable_data) # Save the original value to support CompletionItems as argparse choices. diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index d60b752bb..d60f1f419 100644 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -1191,12 +1191,12 @@ def _completion_supported(self) -> bool: @property def visible_prompt(self) -> str: - """Read-only property to get the visible prompt with any ANSI style escape codes stripped. + """Read-only property to get the visible prompt with any ANSI style sequences stripped. - Used by transcript testing to make it easier and more reliable when users are doing things like coloring the - prompt using ANSI color codes. + Used by transcript testing to make it easier and more reliable when users are doing things like + coloring the prompt. - :return: prompt stripped of any ANSI escape codes + :return: the stripped prompt. """ return su.strip_style(self.prompt) @@ -4214,7 +4214,7 @@ def _print_documented_command_topics(self, header: str, cmds: list[str], verbose def render_columns(self, str_list: list[str] | None, display_width: int = 80) -> str: """Render a list of single-line strings as a compact set of columns. - This method correctly handles strings containing ANSI escape codes and + This method correctly handles strings containing ANSI style sequences and full-width characters (like those used in CJK languages). Each column is only as wide as necessary and columns are separated by two spaces. From 833341a471a74680b0ee203b83af454f58082f68 Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 22:39:24 -0400 Subject: [PATCH 5/6] Updated comments. --- cmd2/argparse_custom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd2/argparse_custom.py b/cmd2/argparse_custom.py index 928c35eca..bdf94da46 100644 --- a/cmd2/argparse_custom.py +++ b/cmd2/argparse_custom.py @@ -398,7 +398,7 @@ def __init__(self, value: object, descriptive_data: Sequence[Any], *args: Any) - # Make sure all objects are renderable by a Rich table. renderable_data = [obj if is_renderable(obj) else str(obj) for obj in descriptive_data] - # Convert strings containing ANSI style sequences into Rich Text objects for correct display width. + # Convert strings containing ANSI style sequences to Rich Text objects for correct display width. self.descriptive_data = ru.prepare_objects_for_rendering(*renderable_data) # Save the original value to support CompletionItems as argparse choices. From 80f6d460fa924c13a0aada43e0bee166abe870e4 Mon Sep 17 00:00:00 2001 From: Kevin Van Brunt Date: Tue, 26 Aug 2025 22:45:06 -0400 Subject: [PATCH 6/6] Updated comments. --- cmd2/cmd2.py | 2 +- cmd2/rich_utils.py | 4 ++-- cmd2/string_utils.py | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index d60f1f419..2eceae2f8 100644 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -1196,7 +1196,7 @@ def visible_prompt(self) -> str: Used by transcript testing to make it easier and more reliable when users are doing things like coloring the prompt. - :return: the stripped prompt. + :return: the stripped prompt """ return su.strip_style(self.prompt) diff --git a/cmd2/rich_utils.py b/cmd2/rich_utils.py index 3c011eb52..dfef892f7 100644 --- a/cmd2/rich_utils.py +++ b/cmd2/rich_utils.py @@ -263,7 +263,7 @@ def rich_text_to_string(text: Text) -> str: def string_to_rich_text(text: str) -> Text: - r"""Create a Text object from a string which can contain ANSI style sequences. + r"""Create a Rich Text object from a string which can contain ANSI style sequences. This wraps rich.Text.from_ansi() to handle an issue where it removes the trailing line break from a string (e.g. "Hello\n" becomes "Hello"). @@ -327,7 +327,7 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]: """Prepare a tuple of objects for printing by Rich's Console.print(). This function converts any non-Rich object whose string representation contains - ANSI style sequences into a rich.Text object. This ensures correct display width + ANSI style sequences into a Rich Text object. This ensures correct display width calculation, as Rich can then properly parse and account for these non-printing codes. All other objects are left untouched, allowing Rich's native renderers to handle them. diff --git a/cmd2/string_utils.py b/cmd2/string_utils.py index c7b41f4c5..9b9d590c7 100644 --- a/cmd2/string_utils.py +++ b/cmd2/string_utils.py @@ -96,9 +96,6 @@ def stylize(val: str, style: StyleType) -> str: def strip_style(val: str) -> str: """Strip all ANSI style sequences from a string. - This function uses a regular expression to efficiently remove ANSI style - sequences, which are a subset of ANSI escape sequences used for text formatting. - :param val: string to be stripped :return: the stripped string """