diff --git a/README.rst b/README.rst index cfa3a1d..7131424 100644 --- a/README.rst +++ b/README.rst @@ -251,6 +251,8 @@ It can do some stuff server-side, which is usually not possible when using other cloud storage servers: - enforcing permissions +- server rejects store operation if content hashsum does not match expected + hashsum (from http header X-Content-hash-sha256) - server-side hash computation (e.g. sha256) for item content - server-side defragmentation helper (copies blocks to new items) diff --git a/src/borgstore/backends/rest.py b/src/borgstore/backends/rest.py index acac323..61726bb 100644 --- a/src/borgstore/backends/rest.py +++ b/src/borgstore/backends/rest.py @@ -5,6 +5,7 @@ import os import re import json +import hashlib from typing import Iterator, Dict, Optional from types import ModuleType from http import HTTPStatus as HTTP @@ -202,7 +203,9 @@ def load(self, name: str, *, size=None, offset=0) -> bytes: def store(self, name: str, value: bytes) -> None: self._assert_open() validate_name(name) - response = self._request("post", self._url(name), data=value) + algorithm = "sha256" + headers = {f"X-Content-hash-{algorithm}": hashlib.new(algorithm, value).hexdigest()} + response = self._request("post", self._url(name), data=value, headers=headers) self._handle_response(response, name) def delete(self, name: str) -> None: diff --git a/src/borgstore/server/rest.py b/src/borgstore/server/rest.py index c77ffb0..99b301f 100644 --- a/src/borgstore/server/rest.py +++ b/src/borgstore/server/rest.py @@ -1,3 +1,4 @@ +import hashlib import argparse import json import base64 @@ -201,7 +202,14 @@ def do_POST(self): if self.name: try: content_length = int(self.headers.get("Content-Length", 0)) + algorithm = "sha256" + expected_hash = self.headers.get(f"X-Content-hash-{algorithm}") data = self.rfile.read(content_length) + if expected_hash: + got_hash = hashlib.new(algorithm, data).hexdigest() + if got_hash != expected_hash: + self.respond(HTTP.UNPROCESSABLE_ENTITY, b"Content hash verification failed, please retry") + return with self.server.backend: self.server.backend.store(self.name, data) self.respond(HTTP.OK) diff --git a/tests/test_server_rest.py b/tests/test_server_rest.py index 00b752a..90a942b 100644 --- a/tests/test_server_rest.py +++ b/tests/test_server_rest.py @@ -434,3 +434,52 @@ def test_rest_backend_defrag(rest_server_with_auth): finally: be.close() + + +def test_rest_content_hash_verification(rest_server_with_auth): + be = rest_server_with_auth + base_url = be.base_url + "/" + auth = be.auth + headers = {"Accept": "application/vnd.x.borgstore.rest.v1"} + + be.create() + be.open() + try: + # 1. Test store with correct hash + data1 = b"some data, correct hash" + correct_hash = hashlib.sha256(data1).hexdigest() + h = headers.copy() + h["X-Content-hash-sha256"] = correct_hash + + resp = requests.post(base_url + "item1", data=data1, auth=auth, headers=h) + assert resp.status_code == 200 + + # Verify it was stored + resp = requests.get(base_url + "item1", auth=auth, headers=headers) + assert resp.status_code == 200 + assert resp.content == data1 + + # 2. Test failed store with incorrect hash + data2 = b"some data, wrong hash" + wrong_hash = hashlib.sha256(b"something else").hexdigest() + h = headers.copy() + h["X-Content-hash-sha256"] = wrong_hash + + resp = requests.post(base_url + "item2", data=data2, auth=auth, headers=h) + assert resp.status_code == 422 + assert "Content hash verification failed" in resp.text + + # Verify it was NOT stored + resp = requests.get(base_url + "item2", auth=auth, headers=headers) + assert resp.status_code == 404 + + # 3. Test store without hash header (should still work) + data3 = b"some data, no hash" + resp = requests.post(base_url + "item3", data=data3, auth=auth, headers=headers) + assert resp.status_code == 200 + + resp = requests.get(base_url + "item3", auth=auth, headers=headers) + assert resp.status_code == 200 + assert resp.content == data3 + finally: + be.close()