Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 118 additions & 9 deletions Storage/src/Connection/Rest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
use Google\Cloud\Core\Upload\ResumableUploader;
use Google\Cloud\Core\Upload\StreamableUploader;
use Google\Cloud\Core\UriTrait;
use Google\Cloud\Storage\HashValidatingStream;
use Google\Cloud\Storage\StorageClient;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Psr7\MimeType;
Expand Down Expand Up @@ -331,6 +332,7 @@ public function downloadObject(array $args = [])
$requestedBytes = $this->getRequestedBytes($args);
$resultStream = Utils::streamFor(null);
$transcodedObj = false;
$hashHeader = null;

$args['retryStrategy'] ??= $this->retryStrategy;

Expand All @@ -339,12 +341,17 @@ public function downloadObject(array $args = [])
$invocationId = Uuid::uuid4()->toString();
$requestOptions['retryHeaders'] = self::getRetryHeaders($invocationId, 1);
$requestOptions['restRetryFunction'] = $this->getRestRetryFunction('objects', 'get', $args);
// We try to deduce if the object is a transcoded object when we receive the headers.
$requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj) {
// We try to deduce if the object is a transcoded object
// and capture the X-Goog-Hash when we receive the headers.
$requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj, &$hashHeader) {
$header = $response->getHeader(self::TRANSCODED_OBJ_HEADER_KEY);
if (is_array($header) && in_array(self::TRANSCODED_OBJ_HEADER_VAL, $header)) {
$transcodedObj = true;
}
$hash = $response->getHeaderLine('X-Goog-Hash');
if ($hash) {
$hashHeader = $hash;
}
};
$attempt = null;
$requestOptions['restRetryListener'] = function (
Expand Down Expand Up @@ -383,30 +390,111 @@ public function downloadObject(array $args = [])
}
};

$fetchedStream = $this->requestWrapper->send(
$response = $this->requestWrapper->send(
$request,
$requestOptions
)->getBody();
);
$fetchedStream = $response->getBody();

// If no retry attempt was made, then we can return the stream as is.
// This is important in the case where downloadObject is called to open
// the file but not to read from it yet.
if ($attempt === null) {
return $fetchedStream;
return $this->maybeWrapWithHashValidatingStream(
$fetchedStream,
$args,
$response,
$hashHeader,
$transcodedObj
);
}

// If our object is a transcoded object, then Range headers are not honoured.
// That means even if we had a partial download available, the final obj
// that was fetched will contain the complete object. So, we don't need to copy
// the partial stream, we can just return the stream we fetched.
if ($transcodedObj) {
return $fetchedStream;
return $this->maybeWrapWithHashValidatingStream(
$fetchedStream,
$args,
$response,
$hashHeader,
$transcodedObj
);
}

Utils::copyToStream($fetchedStream, $resultStream);

$resultStream->seek(0);
return $resultStream;
return $this->maybeWrapWithHashValidatingStream(
$resultStream,
$args,
$response,
$hashHeader,
$transcodedObj
);
}

/**
* Wrap the download stream in a HashValidatingStream if validation is enabled.
*/
private function maybeWrapWithHashValidatingStream(
StreamInterface $stream,
array $args,
ResponseInterface $response,
$hashHeader = null,
$transcodedObj = false
) {
$validate = $args['validate'] ?? 'crc32';
if ($validate === false || $validate === 'none') {
return $stream;
}

// Skip validation if the user requested a subrange of the object
$requestedBytes = $this->getRequestedBytes($args);
if ($requestedBytes['startByte'] > 0 || $requestedBytes['endByte'] !== '') {
return $stream;
}

// Skip validation if the object is a transcoded object (served decompressed, stored compressed)
if ($transcodedObj || $response->hasHeader(self::TRANSCODED_OBJ_HEADER_KEY)) {
return $stream;
}

$hashHeader = $hashHeader ?: $response->getHeaderLine('X-Goog-Hash');
if (!$hashHeader) {
return $stream;
}

$hashes = [];
$parts = explode(',', $hashHeader);
foreach ($parts as $part) {
$kv = explode('=', trim($part), 2);
if (count($kv) === 2) {
$hashes[$kv[0]] = $kv[1];
}
}

$options = [];
$crc32cSupported = in_array('crc32c', hash_algos());

if ($validate === 'md5') {
if (isset($hashes['md5'])) {
$options['expectedMd5'] = $hashes['md5'];
}
} elseif ($validate === 'crc32' || $validate === 'crc32c' || $validate === true) {
if ($crc32cSupported && isset($hashes['crc32c'])) {
$options['expectedCrc32c'] = $hashes['crc32c'];
} elseif (isset($hashes['md5'])) {
$options['expectedMd5'] = $hashes['md5'];
}
}

if (empty($options)) {
return $stream;
}

return new HashValidatingStream($stream, $options);
}

/**
Expand All @@ -418,13 +506,34 @@ public function downloadObject(array $args = [])
*/
public function downloadObjectAsync(array $args = [])
{
$transcodedObj = false;
$hashHeader = null;
list($request, $requestOptions) = $this->buildDownloadObjectParams($args);

// We try to deduce if the object is a transcoded object
// and capture the X-Goog-Hash when we receive the headers.
$requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj, &$hashHeader) {
$header = $response->getHeader(self::TRANSCODED_OBJ_HEADER_KEY);
if (is_array($header) && in_array(self::TRANSCODED_OBJ_HEADER_VAL, $header)) {
$transcodedObj = true;
}
$hash = $response->getHeaderLine('X-Goog-Hash');
if ($hash) {
$hashHeader = $hash;
}
};

return $this->requestWrapper->sendAsync(
$request,
$requestOptions
)->then(function (ResponseInterface $response) {
return $response->getBody();
)->then(function (ResponseInterface $response) use ($args, &$hashHeader, &$transcodedObj) {
return $this->maybeWrapWithHashValidatingStream(
$response->getBody(),
$args,
$response,
$hashHeader,
$transcodedObj
);
});
}

Expand Down
176 changes: 176 additions & 0 deletions Storage/src/HashValidatingStream.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
<?php
/**
* Copyright 2026 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace Google\Cloud\Storage;

use GuzzleHttp\Psr7\StreamDecoratorTrait;
use Psr\Http\Message\StreamInterface;
use RuntimeException;
use UnexpectedValueException;

/**
* A Guzzle stream decorator that computes CRC32C and MD5 hashes on the fly
* and validates them when the end of the stream is reached.
*/
class HashValidatingStream implements StreamInterface
{
use StreamDecoratorTrait;

private $stream;
private $expectedCrc32c;
private $expectedMd5;
private $crc32cContext;
private $md5Context;
private $crc32cEnabled = false;
private $md5Enabled = false;

/**
* @param StreamInterface $stream The underlying stream to wrap.
* @param array $options {
* Configuration options.
*
* @type string $expectedCrc32c Base64-encoded expected CRC32C checksum.
* @type string $expectedMd5 Base64-encoded expected MD5 checksum.
* }
* @throws RuntimeException If a requested hashing algorithm is not supported on the platform.
*/
public function __construct(StreamInterface $stream, array $options = [])
{
$this->stream = $stream;
$this->expectedCrc32c = $options['expectedCrc32c'] ?? null;
$this->expectedMd5 = $options['expectedMd5'] ?? null;

if ($this->expectedCrc32c !== null) {
if (!in_array('crc32c', hash_algos())) {
throw new RuntimeException('CRC32C hashing algorithm is not supported on this platform.');
}
$this->crc32cContext = hash_init('crc32c');
$this->crc32cEnabled = true;
}

if ($this->expectedMd5 !== null) {
$this->md5Context = hash_init('md5');
$this->md5Enabled = true;
}
}

/**
* Validating streams are not seekable since hash calculations are done on-the-fly.
*
* @return bool
*/
public function isSeekable(): bool
{
return false;
}

/**
* Seek operations are not supported on validating streams.
*
* @param int $offset
* @param int $whence
* @throws RuntimeException
*/
public function seek($offset, $whence = SEEK_SET): void
{
throw new RuntimeException('Seeking is not supported on a validating stream.');
}

/**
* Read from the stream and update hash calculations.
*
* @param int $length
* @return string
*/
public function read($length): string
{
$data = $this->stream->read($length);
$this->updateHashes($data);

if ($this->stream->eof()) {
$this->validate();
}

return $data;
}

/**
* Get the entire remaining contents of the stream and validate.
*
* @return string
*/
public function getContents(): string
{
$data = $this->stream->getContents();
$this->updateHashes($data);
$this->validate();
return $data;
}

/**
* Update hash contexts with the new chunk of data.
*/
private function updateHashes(string $data)
{
if ($data === '') {
return;
}

if ($this->crc32cEnabled) {
hash_update($this->crc32cContext, $data);
}

if ($this->md5Enabled) {
hash_update($this->md5Context, $data);
}
}

/**
* Validate the accumulated checksums against expected values.
*
* @throws UnexpectedValueException If checksum validation fails.
*/
private function validate()
{
if ($this->crc32cEnabled) {
$crc32cHash = hash_final($this->crc32cContext, true);
$calculatedCrc32c = base64_encode($crc32cHash);
$this->crc32cEnabled = false; // Prevent double validation
if ($calculatedCrc32c !== $this->expectedCrc32c) {
throw new UnexpectedValueException(sprintf(
'CRC32C checksum mismatch. Expected: %s, Calculated: %s',
$this->expectedCrc32c,
$calculatedCrc32c
));
}
}

if ($this->md5Enabled) {
$md5Hash = hash_final($this->md5Context, true);
$calculatedMd5 = base64_encode($md5Hash);
$this->md5Enabled = false; // Prevent double validation

if ($calculatedMd5 !== $this->expectedMd5) {
throw new UnexpectedValueException(sprintf(
'MD5 checksum mismatch. Expected: %s, Calculated: %s',
$this->expectedMd5,
$calculatedMd5
));
}
}
}
}
Loading
Loading