diff --git a/CHANGELOG.md b/CHANGELOG.md index e4413e0..c5898bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Stop scraper from finishing ZIM when bad exception arises. (#55) + ## [0.2.0] - 2024-11-14 ### Added diff --git a/src/devdocs2zim/generator.py b/src/devdocs2zim/generator.py index 2a4759d..35d905c 100644 --- a/src/devdocs2zim/generator.py +++ b/src/devdocs2zim/generator.py @@ -497,21 +497,27 @@ def generate_zim( # Start creator early to detect problems early. with creator as started_creator: - logger.info(" Fetching the index...") - index = self.devdocs_client.get_index(doc_metadata.slug) - logger.debug(f" The index has {len(index.entries)} entries.") - - logger.info(" Fetching the document database...") - db = self.devdocs_client.get_db(doc_metadata.slug) - logger.debug(f" The database has {len(db)} entries.") - - self.add_zim_contents( - creator=started_creator, - doc_metadata=doc_metadata, - index=index, - db=db, - common_resources=common_resources, - ) + try: + logger.info(" Fetching the index...") + index = self.devdocs_client.get_index(doc_metadata.slug) + logger.debug(f" The index has {len(index.entries)} entries.") + + logger.info(" Fetching the document database...") + db = self.devdocs_client.get_db(doc_metadata.slug) + logger.debug(f" The database has {len(db)} entries.") + + self.add_zim_contents( + creator=started_creator, + doc_metadata=doc_metadata, + index=index, + db=db, + common_resources=common_resources, + ) + except Exception: + started_creator.can_finish = False + + raise + return zim_path @staticmethod diff --git a/tests/test_generator.py b/tests/test_generator.py index 454380e..aae98e7 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -471,3 +471,16 @@ def test_fetch_logo_bytes_does_not_exist_fails(self): def test_fetch_logo_bytes_returns_none_fails(self): self.assertRaises(Exception, Generator.fetch_logo_bytes, "") + + def test_generate_zim_cleans_up_on_failure(self): + doc_metadata = DevdocsMetadata(name="MockDoc", slug="mockdoc") + + self.mock_client.get_db.side_effect = RuntimeError("Simulated network timeout") + + with self.assertRaises(RuntimeError): + self.generator.generate_zim(doc_metadata, []) + + output_dir = Path(self.generator.output_folder) + zims = list(output_dir.glob("*.zim")) + + self.assertEqual(0, len(zims))