diff --git a/CHANGELOG.md b/CHANGELOG.md index 26a6fcb..57ce568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,15 +4,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/) + +## [Unreleased] 2025-02-11 + +### Added + +- Throw error if input file is empty. + + ## [1.0.1] 2025-02-11 ### Added -- handle cancelling of workflow +- Handle cancelling of workflow. ## [1.0.0] 2024-11-14 ### Added -- initial version +- Initial version. diff --git a/cmem_plugin_splitfile/plugin_splitfile.py b/cmem_plugin_splitfile/plugin_splitfile.py index ad6e177..93afeaf 100644 --- a/cmem_plugin_splitfile/plugin_splitfile.py +++ b/cmem_plugin_splitfile/plugin_splitfile.py @@ -198,6 +198,10 @@ def get_file(self, file_path: Path) -> None: } with requests.get(resource_url, headers=headers, stream=True) as r: # noqa: S113 r.raise_for_status() + if r.text == "": + setup_cmempy_user_access(self.context.user) + delete_resource(self.context.task.project_id(), self.input_filename) + raise OSError("Input file is empty.") with file_path.open("wb") as f: for chunk in r.iter_content(chunk_size=10485760): f.write(chunk) @@ -232,7 +236,12 @@ def execute_api(self) -> bool: def execute_filesystem(self) -> bool: """Execute plugin using file system""" resources_path = self.projects_path / self.context.task.project_id() / "resources" - self.split_file(resources_path / self.input_filename) + input_file_path = resources_path / self.input_filename + if input_file_path.stat().st_size == 0: + input_file_path.unlink() + raise OSError("Input file is empty.") + + self.split_file(input_file_path) input_file_parent = Path(self.input_filename).parent if str(input_file_parent) != ".": resources_path /= input_file_parent @@ -245,7 +254,7 @@ def execute_filesystem(self) -> bool: self.moved_files += 1 if self.delete_file: - (resources_path / self.input_filename).unlink() + input_file_path.unlink() return True def execute(self, inputs: Sequence[Entities], context: ExecutionContext) -> None: # noqa: ARG002 diff --git a/tests/test_splitfile.py b/tests/test_splitfile.py index 55e2283..e4dcb71 100644 --- a/tests/test_splitfile.py +++ b/tests/test_splitfile.py @@ -12,7 +12,7 @@ from requests import HTTPError from cmem_plugin_splitfile.plugin_splitfile import SplitFilePlugin -from tests.utils import TestExecutionContext, needs_cmem +from tests.utils import TestExecutionContext from . import __path__ @@ -33,21 +33,26 @@ def setup(request: pytest.FixtureRequest) -> None: Path(__path__[0]) / "test_files" / TEST_FILENAME, Path(__path__[0]) / PROJECT_ID / "resources" / TEST_FILENAME, ) + (Path(__path__[0]) / PROJECT_ID / "resources" / f"empty_{TEST_FILENAME}").open("w").close() with (Path(__path__[0]) / PROJECT_ID / "resources" / TEST_FILENAME).open("rb") as f: - buf = BytesIO(f.read()) create_resource( project_name=PROJECT_ID, resource_name=TEST_FILENAME, - file_resource=buf, + file_resource=BytesIO(f.read()), replace=True, ) + create_resource( + project_name=PROJECT_ID, + resource_name=f"empty_{TEST_FILENAME}", + file_resource=BytesIO(b""), + replace=True, + ) request.addfinalizer(lambda: rmtree(Path(__path__[0]) / PROJECT_ID)) request.addfinalizer(lambda: delete_project(PROJECT_ID)) # noqa: PT021 -@needs_cmem @pytest.mark.usefixtures("setup") def test_filesystem_size() -> None: """Test split by size using file system""" @@ -69,7 +74,6 @@ def test_filesystem_size() -> None: raise OSError("Input file deleted.") -@needs_cmem @pytest.mark.usefixtures("setup") def test_filesystem_size_header() -> None: """Test split by size with header using file system""" @@ -92,7 +96,6 @@ def test_filesystem_size_header() -> None: raise OSError("Input file deleted.") -@needs_cmem @pytest.mark.usefixtures("setup") def test_api_size() -> None: """Test split by size using API""" @@ -115,7 +118,6 @@ def test_api_size() -> None: get_resource(project_name=PROJECT_ID, resource_name=TEST_FILENAME) -@needs_cmem @pytest.mark.usefixtures("setup") def test_filesystem_size_delete() -> None: """Test split by size using file system and delete input file""" @@ -138,7 +140,6 @@ def test_filesystem_size_delete() -> None: raise OSError("Input file not deleted.") -@needs_cmem @pytest.mark.usefixtures("setup") def test_api_size_delete() -> None: """Test split by size using API and delete input file""" @@ -163,7 +164,6 @@ def test_api_size_delete() -> None: get_resource(project_name=PROJECT_ID, resource_name=TEST_FILENAME) -@needs_cmem @pytest.mark.usefixtures("setup") def test_filesystem_lines() -> None: """Test split by lines using file system""" @@ -182,7 +182,6 @@ def test_filesystem_lines() -> None: ) -@needs_cmem @pytest.mark.usefixtures("setup") def test_filesystem_lines_header() -> None: """Test split by lines with header using file system""" @@ -200,3 +199,34 @@ def test_filesystem_lines_header() -> None: Path(__path__[0]) / PROJECT_ID / "resources" / f"{UUID4}_00000000{n + 1}.nt", Path(__path__[0]) / "test_files" / f"{UUID4}_lines_header_00000000{n + 1}.nt", ) + + +@pytest.mark.usefixtures("setup") +def test_api_empty_file() -> None: + """Test split by size using API""" + plugin = SplitFilePlugin( + input_filename=f"empty_{TEST_FILENAME}", + chunk_size=6, + size_unit="KB", + projects_path=__path__[0], + ) + with pytest.raises(OSError, match="Input file is empty."): + plugin.execute(inputs=[], context=TestExecutionContext(PROJECT_ID)) + with pytest.raises(HTTPError, match="404 Client Error: Not Found for url:"): + get_resource(project_name=PROJECT_ID, resource_name=f"empty_{TEST_FILENAME}") + + +@pytest.mark.usefixtures("setup") +def test_filesystem_empty_file() -> None: + """Test split by size using API""" + plugin = SplitFilePlugin( + input_filename=f"empty_{TEST_FILENAME}", + chunk_size=6, + size_unit="KB", + projects_path=__path__[0], + use_directory=True, + ) + with pytest.raises(OSError, match="Input file is empty."): + plugin.execute(inputs=[], context=TestExecutionContext(PROJECT_ID)) + if (Path(__path__[0]) / PROJECT_ID / "resources" / f"empty_{TEST_FILENAME}").is_file(): + raise OSError("Input file not deleted.")