Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased - proposed 1.0.0?]

### Added
- `output_dir` parameter to `write_search_results_to_file`, defaulting to `./tmp`
- `fetch_file` now defaults `output_dir` to `./tmp` when not supplied

### Removed
- **BREAKING**: `default_storage_dir` constructor parameter removed from `TINDClient`; pass `output_dir` directly to `fetch_file` and `write_search_results_to_file` instead

## [0.2.2]

### Changed
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ Create a `TINDClient` with optional configuration values:

- `api_key` (optional): Your TIND API token. Falls back to the `TIND_API_KEY` environment variable.
- `api_url` (optional): Base URL of the TIND instance (e.g. `https://tind.example.edu`). Falls back to the `TIND_API_URL` environment variable.
- `default_storage_dir` (optional): Default output directory for downloaded files. Defaults to `./tmp`.

## Usage

Expand All @@ -43,7 +42,6 @@ from tind_client import TINDClient
client = TINDClient(
api_key="your-token",
api_url="https://tind.example.edu",
default_storage_dir="/tmp",
)
```

Expand Down Expand Up @@ -79,8 +77,8 @@ records = client.fetch_search_metadata("collection:'Disabled Students Program Ph
xml_results = client.search("collection:'Disabled Students Program Photos'", result_format="xml")
pymarc_results = client.search("collection:'Disabled Students Program Photos'", result_format="pymarc")

# search Tind with a query and write results to an XML file in the default storage directory
records_written = client.write_search_results_to_file("Old Emperor Norton", "full_norton_results.xml")
# search Tind with a query and write results to an XML file
records_written = client.write_search_results_to_file("Old Emperor Norton", "full_norton_results.xml", output_dir="/data")
```

## Running tests
Expand Down
2 changes: 0 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def tind_env(monkeypatch: pytest.MonkeyPatch) -> None:
"""Set required TIND environment variables for a test."""
monkeypatch.setenv("TIND_API_KEY", "test-api-key")
monkeypatch.setenv("TIND_API_URL", "https://tind.example.edu")
monkeypatch.setenv("DEFAULT_STORAGE_DIR", "/tmp")


@pytest.fixture
Expand All @@ -39,5 +38,4 @@ def client() -> TINDClient:
return TINDClient(
api_key="test-api-key",
api_url="https://tind.example.edu",
default_storage_dir="/tmp",
)
23 changes: 13 additions & 10 deletions tests/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,12 @@ def test_write_search_results_to_file_zero_hits(
tmp_path: Path,
) -> None:
"""write_search_results_to_file returns 0 immediately when the query has no hits."""
client.default_storage_dir = str(tmp_path)
requests_mock.get(
f"{BASE_URL}/search",
text=json.dumps({"hits": []}),
status_code=200,
)
assert client.write_search_results_to_file("collection:'empty'") == 0
assert client.write_search_results_to_file("collection:'empty'", output_dir=str(tmp_path)) == 0
assert not (tmp_path / "tind.xml").exists()


Expand All @@ -202,7 +201,6 @@ def test_write_search_results_to_file_success(
tmp_path: Path,
) -> None:
"""write_search_results_to_file writes 3 records and returns 3."""
client.default_storage_dir = str(tmp_path)
requests_mock.get(
f"{BASE_URL}/search",
response_list=[
Expand All @@ -214,7 +212,9 @@ def test_write_search_results_to_file_success(
{"text": (FIXTURES / "end-of-batch-tind-response.xml").read_text(), "status_code": 200},
],
)
count = client.write_search_results_to_file("collection:'test'", "out.xml")
count = client.write_search_results_to_file(
"collection:'test'", "out.xml", output_dir=str(tmp_path)
)
assert count == 3

marc21_ns = "http://www.loc.gov/MARC21/slim"
Expand All @@ -233,7 +233,6 @@ def test_write_search_results_to_file_matched_but_no_records_returned(
tmp_path: Path,
) -> None:
"""write_search_results_to_file raises TINDError when API returns no records for matched IDs"""
client.default_storage_dir = str(tmp_path)
requests_mock.get(
f"{BASE_URL}/search",
response_list=[
Expand All @@ -244,7 +243,9 @@ def test_write_search_results_to_file_matched_but_no_records_returned(
],
)
with pytest.raises(TINDError, match="API did not return any."):
client.write_search_results_to_file("collection:'test'", "mismatch.xml")
client.write_search_results_to_file(
"collection:'test'", "mismatch.xml", output_dir=str(tmp_path)
)


def test_write_search_results_to_file_matched_but_api_mismatch(
Expand All @@ -253,7 +254,6 @@ def test_write_search_results_to_file_matched_but_api_mismatch(
tmp_path: Path,
) -> None:
"""write_search_results_to_file raises TINDError when streamed record count != ID count."""
client.default_storage_dir = str(tmp_path)
requests_mock.get(
f"{BASE_URL}/search",
response_list=[
Expand All @@ -269,7 +269,9 @@ def test_write_search_results_to_file_matched_but_api_mismatch(
],
)
with pytest.raises(TINDError, match="Expected 4 records"):
client.write_search_results_to_file("collection:'test'", "mismatch.xml")
client.write_search_results_to_file(
"collection:'test'", "mismatch.xml", output_dir=str(tmp_path)
)


def test_write_search_results_to_file_malformed_xml_response(
Expand All @@ -278,7 +280,6 @@ def test_write_search_results_to_file_malformed_xml_response(
tmp_path: Path,
) -> None:
"""write_search_results_to_file raises TINDError when the API returns malformed XML."""
client.default_storage_dir = str(tmp_path)
requests_mock.get(
f"{BASE_URL}/search",
response_list=[
Expand All @@ -287,4 +288,6 @@ def test_write_search_results_to_file_malformed_xml_response(
],
)
with pytest.raises(TINDError, match="Failed to parse"):
client.write_search_results_to_file("collection:'test'", "malformed.xml")
client.write_search_results_to_file(
"collection:'test'", "malformed.xml", output_dir=str(tmp_path)
)
18 changes: 7 additions & 11 deletions tind_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,15 @@ class TINDClient:

:param str api_key: Your TIND API token.
:param str api_url: Base URL of the TIND instance, e.g. ``https://tind.example.edu``.
:param str default_storage_dir: Default directory used by :meth:`fetch_file`
when no ``output_dir`` is supplied.
"""

def __init__(
self,
api_key: str = "",
api_url: str = "",
default_storage_dir: str = "./tmp",
) -> None:
self.api_key = api_key or os.environ.get("TIND_API_KEY", "")
self.api_url = api_url or os.environ.get("TIND_API_URL", "")
self.default_storage_dir = default_storage_dir

def fetch_metadata(self, record: str) -> Record:
"""Fetch the MARC XML metadata for a given record.
Expand Down Expand Up @@ -69,12 +65,12 @@ def fetch_metadata(self, record: str) -> Record:

return records[0]

def fetch_file(self, file_url: str, output_dir: str = "") -> str:
def fetch_file(self, file_url: str, output_dir: str = "./tmp") -> str:
"""Download a file from TIND and save it locally.

:param str file_url: The TIND file download URL.
:param str output_dir: Directory in which to save the file.
Falls back to ``default_storage_dir`` when empty.
Defaults to ``./tmp``.
:raises AuthorizationError: When the TIND API key is invalid or the file is restricted.
:raises ValueError: When ``file_url`` is not a valid TIND file download URL.
:raises RecordNotFoundError: When the file is invalid or not found.
Expand All @@ -83,8 +79,7 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str:
if not re.match(r"^http.*/download(/)?(\?version=\d+)?$", file_url):
raise ValueError("URL is not a valid TIND file download URL.")

output_target = output_dir or self.default_storage_dir
(status, saved_to) = tind_download(file_url, output_dir=output_target, api_key=self.api_key)
(status, saved_to) = tind_download(file_url, output_dir=output_dir, api_key=self.api_key)

if status != 200:
raise RecordNotFoundError("Referenced file could not be downloaded.")
Expand Down Expand Up @@ -178,12 +173,13 @@ def search(self, query: str, result_format: str = "xml") -> list[Any]:
return recs

def write_search_results_to_file(
self, query: str = "", output_file_name: str = "tind.xml"
self, query: str = "", output_file_name: str = "tind.xml", output_dir: str = "./tmp"
) -> int:
"""Search TIND and stream results to an XML file.

:param str query: A TIND search query string.
:param str output_file_name: filename for the output XML file.
:param str output_dir: Directory in which to save the file. Defaults to ``./tmp``.
:returns int: The number of records written to the file.
"""

Expand All @@ -192,7 +188,7 @@ def write_search_results_to_file(
return 0

recs_written = 0
output_path = os.path.join(self.default_storage_dir, output_file_name)
output_path = Path(output_dir) / output_file_name
try:
with open(output_path, "w", encoding="utf-8") as f:
f.write(f'<?xml version="1.0" encoding="UTF-8"?>\n<collection xmlns="{NS}">\n')
Expand All @@ -206,7 +202,7 @@ def write_search_results_to_file(
raise TINDError(f"Matched {total_hits} tind ids, but API did not return any.")
f.write("</collection>\n")
except Exception:
Path(output_path).unlink(missing_ok=True)
output_path.unlink(missing_ok=True)
raise

if recs_written != total_hits:
Expand Down