Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions airbyte_cdk/sources/file_based/file_types/csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,12 @@ def read_data(
headers, raw_headers = self._read_and_validate_headers(
fp, config_format, dialect_name
)
except UnicodeError:
except UnicodeError as e:
raise AirbyteTracedException(
message=f"{FileBasedSourceError.ENCODING_ERROR.value} Expected encoding: {config_format.encoding}",
message=f"File contains bytes that cannot be decoded with the configured {config_format.encoding} encoding.",
internal_message=str(e),
failure_type=FailureType.config_error,
exception=e,
)

rows_to_skip = (
Expand Down
12 changes: 7 additions & 5 deletions unit_tests/sources/file_based/file_types/test_csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,15 +648,17 @@ def test_read_data_with_encoding_error(self) -> None:
self._stream_reader.open_file.return_value = (
CsvFileBuilder().with_data(["something"]).build()
)
self._csv_reader._read_and_validate_headers = Mock(
side_effect=UnicodeDecodeError("encoding", b"", 0, 1, "reason")
)
unicode_error = UnicodeDecodeError("utf-8", b"\xff", 0, 1, "invalid start byte")
self._csv_reader._read_and_validate_headers = Mock(side_effect=unicode_error)

with pytest.raises(AirbyteTracedException) as ate:
data_generator = self._read_data()
assert len(list(data_generator)) == 0
list(data_generator)

assert "encoding" in ate.value.message
assert "utf8" in ate.value.message
assert ate.value.failure_type == FailureType.config_error
assert ate.value.internal_message == str(unicode_error)
assert ate.value._exception is unicode_error
assert self._csv_reader._read_and_validate_headers.called

def _read_data(self) -> Generator[Dict[str, str], None, None]:
Expand Down
Loading