From d2fb1a48915804b1369c0745eecb8ac99d873b08 Mon Sep 17 00:00:00 2001 From: Ofosu Osei Date: Tue, 11 Nov 2025 16:24:42 -0500 Subject: [PATCH] IO/Stata: clarify unsupported-version error message (text-only change) --- pandas/io/stata.py | 13 +++++++++---- pandas/tests/io/test_stata.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1f953650365ef..598a1f241fe53 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -89,11 +89,16 @@ WriteBuffer, ) +# Error shown when a version number was parsed but is not supported. +# Wording intentionally mentions “either not a valid Stata dataset or +# an unsupported version” to avoid confusing users when input is not a +# real .dta. _version_error = ( - "Version of given Stata file is {version}. pandas supports importing " - "versions 102, 103, 104, 105, 108, 110 (Stata 7), 111 (Stata 7SE), " - "113 (Stata 8/9), 114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), " - "118 (Stata 14/15/16), and 119 (Stata 15/16, over 32,767 variables)." + "This is either not a valid Stata dataset or a Stata dataset from a " + "version pandas does not support (detected: {version}). pandas " + "supports importing versions 105, 108, 111 (Stata 7SE), 113 (Stata " + "8/9), 114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata " + "14/15/16), and 119 (Stata 15/16, over 32,767 variables)." ) _statafile_processing_params1 = """\ diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b44f595e73670..b09101f00d1da 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2620,3 +2620,13 @@ def test_ascii_error(temp_file, version): df.to_stata(temp_file, write_index=0, version=version) df_input = read_stata(temp_file) tm.assert_frame_equal(df, df_input) + + +def test_stata_v117_prefix_with_unsupported_version_raises_version_error(): + # _read_new_header reads 27 bytes, then the next 3 are the release digits + buf = io.BytesIO(b"
999" + b"\x00" * 64) + with pytest.raises( + ValueError, + match=(r"either not a valid Stata dataset|does not support.*999"), + ): + read_stata(buf)