Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,23 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/)


## [Unreleased] 2025-02-11

### Added

- Throw error if input file is empty.


## [1.0.1] 2025-02-11

### Added

- handle cancelling of workflow
- Handle cancelling of workflow.

## [1.0.0] 2024-11-14

### Added

- initial version
- Initial version.

13 changes: 11 additions & 2 deletions cmem_plugin_splitfile/plugin_splitfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def get_file(self, file_path: Path) -> None:
}
with requests.get(resource_url, headers=headers, stream=True) as r: # noqa: S113
r.raise_for_status()
if r.text == "":
setup_cmempy_user_access(self.context.user)
delete_resource(self.context.task.project_id(), self.input_filename)
raise OSError("Input file is empty.")
with file_path.open("wb") as f:
for chunk in r.iter_content(chunk_size=10485760):
f.write(chunk)
Expand Down Expand Up @@ -232,7 +236,12 @@ def execute_api(self) -> bool:
def execute_filesystem(self) -> bool:
"""Execute plugin using file system"""
resources_path = self.projects_path / self.context.task.project_id() / "resources"
self.split_file(resources_path / self.input_filename)
input_file_path = resources_path / self.input_filename
if input_file_path.stat().st_size == 0:
input_file_path.unlink()
raise OSError("Input file is empty.")

self.split_file(input_file_path)
input_file_parent = Path(self.input_filename).parent
if str(input_file_parent) != ".":
resources_path /= input_file_parent
Expand All @@ -245,7 +254,7 @@ def execute_filesystem(self) -> bool:
self.moved_files += 1

if self.delete_file:
(resources_path / self.input_filename).unlink()
input_file_path.unlink()
return True

def execute(self, inputs: Sequence[Entities], context: ExecutionContext) -> None: # noqa: ARG002
Expand Down
50 changes: 40 additions & 10 deletions tests/test_splitfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from requests import HTTPError

from cmem_plugin_splitfile.plugin_splitfile import SplitFilePlugin
from tests.utils import TestExecutionContext, needs_cmem
from tests.utils import TestExecutionContext

from . import __path__

Expand All @@ -33,21 +33,26 @@ def setup(request: pytest.FixtureRequest) -> None:
Path(__path__[0]) / "test_files" / TEST_FILENAME,
Path(__path__[0]) / PROJECT_ID / "resources" / TEST_FILENAME,
)
(Path(__path__[0]) / PROJECT_ID / "resources" / f"empty_{TEST_FILENAME}").open("w").close()

with (Path(__path__[0]) / PROJECT_ID / "resources" / TEST_FILENAME).open("rb") as f:
buf = BytesIO(f.read())
create_resource(
project_name=PROJECT_ID,
resource_name=TEST_FILENAME,
file_resource=buf,
file_resource=BytesIO(f.read()),
replace=True,
)
create_resource(
project_name=PROJECT_ID,
resource_name=f"empty_{TEST_FILENAME}",
file_resource=BytesIO(b""),
replace=True,
)

request.addfinalizer(lambda: rmtree(Path(__path__[0]) / PROJECT_ID))
request.addfinalizer(lambda: delete_project(PROJECT_ID)) # noqa: PT021


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_filesystem_size() -> None:
"""Test split by size using file system"""
Expand All @@ -69,7 +74,6 @@ def test_filesystem_size() -> None:
raise OSError("Input file deleted.")


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_filesystem_size_header() -> None:
"""Test split by size with header using file system"""
Expand All @@ -92,7 +96,6 @@ def test_filesystem_size_header() -> None:
raise OSError("Input file deleted.")


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_api_size() -> None:
"""Test split by size using API"""
Expand All @@ -115,7 +118,6 @@ def test_api_size() -> None:
get_resource(project_name=PROJECT_ID, resource_name=TEST_FILENAME)


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_filesystem_size_delete() -> None:
"""Test split by size using file system and delete input file"""
Expand All @@ -138,7 +140,6 @@ def test_filesystem_size_delete() -> None:
raise OSError("Input file not deleted.")


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_api_size_delete() -> None:
"""Test split by size using API and delete input file"""
Expand All @@ -163,7 +164,6 @@ def test_api_size_delete() -> None:
get_resource(project_name=PROJECT_ID, resource_name=TEST_FILENAME)


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_filesystem_lines() -> None:
"""Test split by lines using file system"""
Expand All @@ -182,7 +182,6 @@ def test_filesystem_lines() -> None:
)


@needs_cmem
@pytest.mark.usefixtures("setup")
def test_filesystem_lines_header() -> None:
"""Test split by lines with header using file system"""
Expand All @@ -200,3 +199,34 @@ def test_filesystem_lines_header() -> None:
Path(__path__[0]) / PROJECT_ID / "resources" / f"{UUID4}_00000000{n + 1}.nt",
Path(__path__[0]) / "test_files" / f"{UUID4}_lines_header_00000000{n + 1}.nt",
)


@pytest.mark.usefixtures("setup")
def test_api_empty_file() -> None:
"""Test split by size using API"""
plugin = SplitFilePlugin(
input_filename=f"empty_{TEST_FILENAME}",
chunk_size=6,
size_unit="KB",
projects_path=__path__[0],
)
with pytest.raises(OSError, match="Input file is empty."):
plugin.execute(inputs=[], context=TestExecutionContext(PROJECT_ID))
with pytest.raises(HTTPError, match="404 Client Error: Not Found for url:"):
get_resource(project_name=PROJECT_ID, resource_name=f"empty_{TEST_FILENAME}")


@pytest.mark.usefixtures("setup")
def test_filesystem_empty_file() -> None:
"""Test split by size using API"""
plugin = SplitFilePlugin(
input_filename=f"empty_{TEST_FILENAME}",
chunk_size=6,
size_unit="KB",
projects_path=__path__[0],
use_directory=True,
)
with pytest.raises(OSError, match="Input file is empty."):
plugin.execute(inputs=[], context=TestExecutionContext(PROJECT_ID))
if (Path(__path__[0]) / PROJECT_ID / "resources" / f"empty_{TEST_FILENAME}").is_file():
raise OSError("Input file not deleted.")