Skip to content

Commit 4457ef7

Browse files
committed
refactor and docs
1 parent bc5b4b1 commit 4457ef7

File tree

3 files changed

+47
-23
lines changed

3 files changed

+47
-23
lines changed

dictdatabase/io_bytes.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,43 +4,61 @@
44

55

66

7-
def read(db_name: str, start: int = None, end: int = None) -> bytes:
7+
def read(db_name: str, *, start: int = None, end: int = None) -> bytes:
88
"""
99
Read the content of a file as bytes. Reading works even when the config
1010
changes, so a compressed ddb file can also be read if compression is
1111
disabled, and vice versa.
1212
13-
Note: Only specify either both start and end, or none of them.
13+
If no compression is used, efficient reading can be done by specifying a start
14+
and end byte index, such that only the bytes in that range are read from the
15+
file.
16+
17+
If compression is used, specifying a start and end byte index is still possible,
18+
but the entire file has to be read and decompressed first, and then the bytes
19+
in the range are returned. This is because the compressed file is not seekable.
1420
1521
Args:
1622
- `db_name`: The name of the database file to read from.
1723
- `start`: The start byte index to read from.
1824
- `end`: The end byte index to read up to (not included).
25+
26+
Raises:
27+
- `FileNotFoundError`: If the file does not exist as .json nor .ddb.
28+
- `OSError`: If no compression is used and `start` is negative.
29+
- `FileExistsError`: If the file exists as .json and .ddb.
1930
"""
2031

2132
json_path, json_exists, ddb_path, ddb_exists = utils.file_info(db_name)
2233

2334
if json_exists:
2435
if ddb_exists:
25-
raise FileExistsError(f"Inconsistent: \"{db_name}\" exists as .json and .ddb")
36+
raise FileExistsError(
37+
f"Inconsistent: \"{db_name}\" exists as .json and .ddb."
38+
"Please remove one of them."
39+
)
2640
with open(json_path, "rb") as f:
27-
if start is None:
41+
if start is None and end is None:
2842
return f.read()
43+
start = start or 0
2944
f.seek(start)
3045
if end is None:
3146
return f.read()
3247
return f.read(end - start)
3348
if not ddb_exists:
34-
raise FileNotFoundError(f"DB does not exist: \"{db_name}\"")
49+
raise FileNotFoundError(f"No database file exists for \"{db_name}\"")
3550
with open(ddb_path, "rb") as f:
3651
json_bytes = zlib.decompress(f.read())
37-
if start is None:
52+
if start is None and end is None:
3853
return json_bytes
54+
start = start or 0
55+
end = end or len(json_bytes)
3956
return json_bytes[start:end]
4057

4158

4259

43-
def write(db_name: str, dump: bytes, start: int = None):
60+
61+
def write(db_name: str, dump: bytes, *, start: int = None):
4462
"""
4563
Write the bytes to the file of the db_path. If the db was compressed but no
4664
compression is enabled, remove the compressed file, and vice versa.

dictdatabase/io_unsafe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def try_read_bytes_using_indexer(indexer: indexing.Indexer, db_name: str, key: s
5656
if (index := indexer.get(key)) is None:
5757
return None
5858
start, end, _, _, value_hash = index
59-
partial_bytes = io_bytes.read(db_name, start, end)
59+
partial_bytes = io_bytes.read(db_name, start=start, end=end)
6060
if value_hash != hashlib.sha256(partial_bytes).hexdigest():
6161
return None
6262
return partial_bytes
@@ -155,12 +155,12 @@ def try_get_parial_file_handle_by_index(indexer: indexing.Indexer, db_name, key)
155155

156156
# If compression is disabled, only the value and suffix have to be read
157157
else:
158-
value_and_suffix_bytes = io_bytes.read(db_name, start)
158+
value_and_suffix_bytes = io_bytes.read(db_name, start=start)
159159
value_length = end - start
160160
value_bytes = value_and_suffix_bytes[:value_length]
161161
if value_hash != hashlib.sha256(value_bytes).hexdigest():
162162
# If the hashes don't match, read the prefix to concat the full file bytes
163-
prefix_bytes = io_bytes.read(db_name, 0, start)
163+
prefix_bytes = io_bytes.read(db_name, end=start)
164164
return None, prefix_bytes + value_and_suffix_bytes
165165
value_data = orjson.loads(value_bytes)
166166
partial_dict = PartialDict(None, key, value_data, start, end, value_and_suffix_bytes[value_length:])

tests/test_io_bytes.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,48 @@
1-
import pytest
21
from dictdatabase import io_bytes
2+
import pytest
33

44

55

6-
def test_write_bytes(use_test_dir, name_of_test):
6+
def test_write_bytes(use_test_dir, name_of_test, use_compression):
7+
# No partial writing to compressed file allowed
8+
if use_compression:
9+
with pytest.raises(RuntimeError):
10+
io_bytes.write(name_of_test, b"test", start=5)
11+
return
712
# Write shorter content at index
813
io_bytes.write(name_of_test, b"0123456789")
9-
io_bytes.write(name_of_test, b"abc", 2)
14+
io_bytes.write(name_of_test, b"abc", start=2)
1015
assert io_bytes.read(name_of_test) == b"01abc"
1116
# Overwrite with shorter content
1217
io_bytes.write(name_of_test, b"xy")
1318
assert io_bytes.read(name_of_test) == b"xy"
1419
# Overwrite with longer content
1520
io_bytes.write(name_of_test, b"0123456789")
16-
io_bytes.write(name_of_test, b"abcdef", 8)
21+
io_bytes.write(name_of_test, b"abcdef", start=8)
1722
assert io_bytes.read(name_of_test) == b"01234567abcdef"
1823
# Write at index out of range
1924
io_bytes.write(name_of_test, b"01")
20-
io_bytes.write(name_of_test, b"ab", 4)
25+
io_bytes.write(name_of_test, b"ab", start=4)
2126
assert io_bytes.read(name_of_test) == b'01\x00\x00ab'
2227

2328

2429

2530
def test_read_bytes(use_test_dir, name_of_test, use_compression):
2631
io_bytes.write(name_of_test, b"0123456789")
2732
# In range
28-
assert io_bytes.read(name_of_test, 2, 5) == b"234"
29-
# Complete range
30-
assert io_bytes.read(name_of_test, 0, 10) == b"0123456789"
31-
assert io_bytes.read(name_of_test, 0, None) == b"0123456789"
33+
assert io_bytes.read(name_of_test, start=2, end=5) == b"234"
34+
# Normal ranges
35+
assert io_bytes.read(name_of_test, start=0, end=10) == b"0123456789"
36+
assert io_bytes.read(name_of_test, start=2) == b"23456789"
37+
assert io_bytes.read(name_of_test, end=2) == b"01"
3238
assert io_bytes.read(name_of_test) == b"0123456789"
3339
# End out of range
34-
assert io_bytes.read(name_of_test, 9, 20) == b"9"
40+
assert io_bytes.read(name_of_test, start=9, end=20) == b"9"
3541
# Completely out of range
36-
assert io_bytes.read(name_of_test, 25, 30) == b""
42+
assert io_bytes.read(name_of_test, start=25, end=30) == b""
3743
# Start negative
3844
if use_compression:
39-
assert io_bytes.read(name_of_test, -5, 3) == b""
45+
assert io_bytes.read(name_of_test, start=-5, end=3) == b""
4046
else:
4147
with pytest.raises(OSError):
42-
io_bytes.read(name_of_test, -5, 3)
48+
io_bytes.read(name_of_test, start=-5, end=3)

0 commit comments

Comments
 (0)