diff --git a/Storage/src/Connection/Rest.php b/Storage/src/Connection/Rest.php index b240f580d941..d48ba6fc9d72 100644 --- a/Storage/src/Connection/Rest.php +++ b/Storage/src/Connection/Rest.php @@ -27,6 +27,7 @@ use Google\Cloud\Core\Upload\ResumableUploader; use Google\Cloud\Core\Upload\StreamableUploader; use Google\Cloud\Core\UriTrait; +use Google\Cloud\Storage\HashValidatingStream; use Google\Cloud\Storage\StorageClient; use GuzzleHttp\Exception\RequestException; use GuzzleHttp\Psr7\MimeType; @@ -331,6 +332,7 @@ public function downloadObject(array $args = []) $requestedBytes = $this->getRequestedBytes($args); $resultStream = Utils::streamFor(null); $transcodedObj = false; + $hashHeader = null; $args['retryStrategy'] ??= $this->retryStrategy; @@ -339,12 +341,17 @@ public function downloadObject(array $args = []) $invocationId = Uuid::uuid4()->toString(); $requestOptions['retryHeaders'] = self::getRetryHeaders($invocationId, 1); $requestOptions['restRetryFunction'] = $this->getRestRetryFunction('objects', 'get', $args); - // We try to deduce if the object is a transcoded object when we receive the headers. - $requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj) { + // We try to deduce if the object is a transcoded object + // and capture the X-Goog-Hash when we receive the headers. + $requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj, &$hashHeader) { $header = $response->getHeader(self::TRANSCODED_OBJ_HEADER_KEY); if (is_array($header) && in_array(self::TRANSCODED_OBJ_HEADER_VAL, $header)) { $transcodedObj = true; } + $hash = $response->getHeaderLine('X-Goog-Hash'); + if ($hash) { + $hashHeader = $hash; + } }; $attempt = null; $requestOptions['restRetryListener'] = function ( @@ -383,16 +390,23 @@ public function downloadObject(array $args = []) } }; - $fetchedStream = $this->requestWrapper->send( + $response = $this->requestWrapper->send( $request, $requestOptions - )->getBody(); + ); + $fetchedStream = $response->getBody(); // If no retry attempt was made, then we can return the stream as is. // This is important in the case where downloadObject is called to open // the file but not to read from it yet. if ($attempt === null) { - return $fetchedStream; + return $this->maybeWrapWithHashValidatingStream( + $fetchedStream, + $args, + $response, + $hashHeader, + $transcodedObj + ); } // If our object is a transcoded object, then Range headers are not honoured. @@ -400,13 +414,87 @@ public function downloadObject(array $args = []) // that was fetched will contain the complete object. So, we don't need to copy // the partial stream, we can just return the stream we fetched. if ($transcodedObj) { - return $fetchedStream; + return $this->maybeWrapWithHashValidatingStream( + $fetchedStream, + $args, + $response, + $hashHeader, + $transcodedObj + ); } Utils::copyToStream($fetchedStream, $resultStream); $resultStream->seek(0); - return $resultStream; + return $this->maybeWrapWithHashValidatingStream( + $resultStream, + $args, + $response, + $hashHeader, + $transcodedObj + ); + } + + /** + * Wrap the download stream in a HashValidatingStream if validation is enabled. + */ + private function maybeWrapWithHashValidatingStream( + StreamInterface $stream, + array $args, + ResponseInterface $response, + $hashHeader = null, + $transcodedObj = false + ) { + $validate = $args['validate'] ?? 'crc32'; + if ($validate === false || $validate === 'none') { + return $stream; + } + + // Skip validation if the user requested a subrange of the object + $requestedBytes = $this->getRequestedBytes($args); + if ($requestedBytes['startByte'] > 0 || $requestedBytes['endByte'] !== '') { + return $stream; + } + + // Skip validation if the object is a transcoded object (served decompressed, stored compressed) + if ($transcodedObj || $response->hasHeader(self::TRANSCODED_OBJ_HEADER_KEY)) { + return $stream; + } + + $hashHeader = $hashHeader ?: $response->getHeaderLine('X-Goog-Hash'); + if (!$hashHeader) { + return $stream; + } + + $hashes = []; + $parts = explode(',', $hashHeader); + foreach ($parts as $part) { + $kv = explode('=', trim($part), 2); + if (count($kv) === 2) { + $hashes[$kv[0]] = $kv[1]; + } + } + + $options = []; + $crc32cSupported = in_array('crc32c', hash_algos()); + + if ($validate === 'md5') { + if (isset($hashes['md5'])) { + $options['expectedMd5'] = $hashes['md5']; + } + } elseif ($validate === 'crc32' || $validate === 'crc32c' || $validate === true) { + if ($crc32cSupported && isset($hashes['crc32c'])) { + $options['expectedCrc32c'] = $hashes['crc32c']; + } elseif (isset($hashes['md5'])) { + $options['expectedMd5'] = $hashes['md5']; + } + } + + if (empty($options)) { + return $stream; + } + + return new HashValidatingStream($stream, $options); } /** @@ -418,13 +506,34 @@ public function downloadObject(array $args = []) */ public function downloadObjectAsync(array $args = []) { + $transcodedObj = false; + $hashHeader = null; list($request, $requestOptions) = $this->buildDownloadObjectParams($args); + // We try to deduce if the object is a transcoded object + // and capture the X-Goog-Hash when we receive the headers. + $requestOptions['restOptions']['on_headers'] = function ($response) use (&$transcodedObj, &$hashHeader) { + $header = $response->getHeader(self::TRANSCODED_OBJ_HEADER_KEY); + if (is_array($header) && in_array(self::TRANSCODED_OBJ_HEADER_VAL, $header)) { + $transcodedObj = true; + } + $hash = $response->getHeaderLine('X-Goog-Hash'); + if ($hash) { + $hashHeader = $hash; + } + }; + return $this->requestWrapper->sendAsync( $request, $requestOptions - )->then(function (ResponseInterface $response) { - return $response->getBody(); + )->then(function (ResponseInterface $response) use ($args, &$hashHeader, &$transcodedObj) { + return $this->maybeWrapWithHashValidatingStream( + $response->getBody(), + $args, + $response, + $hashHeader, + $transcodedObj + ); }); } diff --git a/Storage/src/HashValidatingStream.php b/Storage/src/HashValidatingStream.php new file mode 100644 index 000000000000..3fce2ce8b432 --- /dev/null +++ b/Storage/src/HashValidatingStream.php @@ -0,0 +1,176 @@ +stream = $stream; + $this->expectedCrc32c = $options['expectedCrc32c'] ?? null; + $this->expectedMd5 = $options['expectedMd5'] ?? null; + + if ($this->expectedCrc32c !== null) { + if (!in_array('crc32c', hash_algos())) { + throw new RuntimeException('CRC32C hashing algorithm is not supported on this platform.'); + } + $this->crc32cContext = hash_init('crc32c'); + $this->crc32cEnabled = true; + } + + if ($this->expectedMd5 !== null) { + $this->md5Context = hash_init('md5'); + $this->md5Enabled = true; + } + } + + /** + * Validating streams are not seekable since hash calculations are done on-the-fly. + * + * @return bool + */ + public function isSeekable(): bool + { + return false; + } + + /** + * Seek operations are not supported on validating streams. + * + * @param int $offset + * @param int $whence + * @throws RuntimeException + */ + public function seek($offset, $whence = SEEK_SET): void + { + throw new RuntimeException('Seeking is not supported on a validating stream.'); + } + + /** + * Read from the stream and update hash calculations. + * + * @param int $length + * @return string + */ + public function read($length): string + { + $data = $this->stream->read($length); + $this->updateHashes($data); + + if ($this->stream->eof()) { + $this->validate(); + } + + return $data; + } + + /** + * Get the entire remaining contents of the stream and validate. + * + * @return string + */ + public function getContents(): string + { + $data = $this->stream->getContents(); + $this->updateHashes($data); + $this->validate(); + return $data; + } + + /** + * Update hash contexts with the new chunk of data. + */ + private function updateHashes(string $data) + { + if ($data === '') { + return; + } + + if ($this->crc32cEnabled) { + hash_update($this->crc32cContext, $data); + } + + if ($this->md5Enabled) { + hash_update($this->md5Context, $data); + } + } + + /** + * Validate the accumulated checksums against expected values. + * + * @throws UnexpectedValueException If checksum validation fails. + */ + private function validate() + { + if ($this->crc32cEnabled) { + $crc32cHash = hash_final($this->crc32cContext, true); + $calculatedCrc32c = base64_encode($crc32cHash); + $this->crc32cEnabled = false; // Prevent double validation + if ($calculatedCrc32c !== $this->expectedCrc32c) { + throw new UnexpectedValueException(sprintf( + 'CRC32C checksum mismatch. Expected: %s, Calculated: %s', + $this->expectedCrc32c, + $calculatedCrc32c + )); + } + } + + if ($this->md5Enabled) { + $md5Hash = hash_final($this->md5Context, true); + $calculatedMd5 = base64_encode($md5Hash); + $this->md5Enabled = false; // Prevent double validation + + if ($calculatedMd5 !== $this->expectedMd5) { + throw new UnexpectedValueException(sprintf( + 'MD5 checksum mismatch. Expected: %s, Calculated: %s', + $this->expectedMd5, + $calculatedMd5 + )); + } + } + } +} diff --git a/Storage/tests/System/ManageObjectsTest.php b/Storage/tests/System/ManageObjectsTest.php index 847ad64cc0be..cd76c43dd97b 100644 --- a/Storage/tests/System/ManageObjectsTest.php +++ b/Storage/tests/System/ManageObjectsTest.php @@ -832,6 +832,72 @@ public function testStringNormalization() } } + public function testDownloadsWithDefaultCrc32cValidationSuccess() + { + $object = self::$bucket->upload('system-test-data', ['name' => uniqid(self::TESTING_PREFIX)]); + + // Automatic CRC32C validation runs under the hood + $content = $object->downloadAsString(); + $this->assertEquals('system-test-data', $content); + + $object->delete(); + } + + public function testDownloadsWithExplicitMd5ValidationSuccess() + { + $object = self::$bucket->upload('system-test-data', ['name' => uniqid(self::TESTING_PREFIX)]); + + // Explicitly opt-in to MD5 validation + $content = $object->downloadAsString(['validate' => 'md5']); + $this->assertEquals('system-test-data', $content); + + $object->delete(); + } + + public function testDownloadsWithValidationDisabledSuccess() + { + $object = self::$bucket->upload('system-test-data', ['name' => uniqid(self::TESTING_PREFIX)]); + + // Explicitly disable validation + $content = $object->downloadAsString(['validate' => false]); + $this->assertEquals('system-test-data', $content); + + $object->delete(); + } + + public function testDownloadsWithRangeBypassesValidation() + { + $data = 'system-test-range-data'; + $object = self::$bucket->upload($data, ['name' => uniqid(self::TESTING_PREFIX)]); + + // Default validate is 'crc32', but we pass a Range header inside restOptions. + // This should successfully download the slice 'system' without throwing a mismatch exception. + $content = $object->downloadAsString([ + 'restOptions' => [ + 'headers' => [ + 'Range' => 'bytes=0-5' + ] + ] + ]); + $this->assertEquals('system', $content); + + $object->delete(); + } + + public function testDownloadToFileWithDefaultValidationSuccess() + { + $data = 'system-test-to-file-data'; + $object = self::$bucket->upload($data, ['name' => uniqid(self::TESTING_PREFIX)]); + + $tempFile = tempnam(sys_get_temp_dir(), 'gcs-test'); + $object->downloadToFile($tempFile); + + $this->assertEquals($data, file_get_contents($tempFile)); + + unlink($tempFile); + $object->delete(); + } + /** * Asserts that a provided StorageObject exists. * diff --git a/Storage/tests/Unit/Connection/RestTest.php b/Storage/tests/Unit/Connection/RestTest.php index 7fc4198ffe5c..ab946d5edec8 100644 --- a/Storage/tests/Unit/Connection/RestTest.php +++ b/Storage/tests/Unit/Connection/RestTest.php @@ -239,6 +239,190 @@ function ($args) use (&$actualRequest, $response) { ); } + public function testDownloadObjectWithTranscodedObjectValidationBypassed() + { + $body = 'test data'; + $response = new Response(200, [ + 'X-Goog-Stored-Content-Encoding' => 'gzip', + 'X-Goog-Hash' => 'crc32c=invalidcrc32c=' + ], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + // Should bypass validation entirely and succeed despite invalid hash + $stream = $rest->downloadObject(self::$downloadOptions); + $this->assertEquals($body, (string) $stream); + } + + public function testDownloadObjectWithCapturedHashFailsOnMismatchEvenIfHeaderStripped() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('CRC32C checksum mismatch'); + + $body = 'test data'; + // Final response missing the X-Goog-Hash header (mocking CDN/proxy stripping) + $response = new Response(200, [], $body); + + // Set up custom callback to trigger on_headers with X-Goog-Hash + $this->requestWrapper->send( + Argument::any(), + Argument::type('array') + )->will(function ($args) use ($response) { + $requestOptions = $args[1]; + if (isset($requestOptions['restOptions']['on_headers'])) { + $initialResponse = new Response(200, ['X-Goog-Hash' => 'crc32c=invalidcrc32c='], ''); + $requestOptions['restOptions']['on_headers']($initialResponse); + } + return $response; + }); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + $stream = $rest->downloadObject(self::$downloadOptions); + (string) $stream; // triggers validation + } + + public function testDownloadObjectWithCrc32cValidationSuccess() + { + $body = 'test data'; + $crc32c = base64_encode(hash('crc32c', $body, true)); + $response = new Response(200, ['X-Goog-Hash' => 'crc32c=' . $crc32c], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + $stream = $rest->downloadObject(self::$downloadOptions); + $this->assertEquals($body, (string) $stream); + } + + public function testDownloadObjectWithCrc32cValidationFailure() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('CRC32C checksum mismatch'); + + $body = 'test data'; + $response = new Response(200, ['X-Goog-Hash' => 'crc32c=invalidhash='], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + $stream = $rest->downloadObject(self::$downloadOptions); + (string) $stream; // Consume the stream to trigger validation + } + + public function testDownloadObjectWithMd5ValidationSuccess() + { + $body = 'test data'; + $md5 = base64_encode(hash('md5', $body, true)); + $response = new Response(200, ['X-Goog-Hash' => 'md5=' . $md5], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + // Explicitly opt-in to md5 validation + $options = self::$downloadOptions + ['validate' => 'md5']; + $stream = $rest->downloadObject($options); + $this->assertEquals($body, (string) $stream); + } + + public function testDownloadObjectValidationDisabled() + { + $body = 'test data'; + $response = new Response(200, ['X-Goog-Hash' => 'crc32c=invalidhash='], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + // Explicitly opt-out of validation + $options = self::$downloadOptions + ['validate' => false]; + $stream = $rest->downloadObject($options); + $this->assertEquals($body, (string) $stream); // Should succeed despite invalid hash header + } + + public function testDownloadObjectWithAutomaticCrc32cToMd5FallbackSuccess() + { + $body = 'test data'; + $md5 = base64_encode(hash('md5', $body, true)); + // GCS response has only md5 hash, no crc32c + $response = new Response(200, ['X-Goog-Hash' => 'md5=' . $md5], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + // No validate option specified - defaults to 'crc32' (which should fallback to 'md5') + $stream = $rest->downloadObject(self::$downloadOptions); + $this->assertEquals($body, (string) $stream); + } + + public function testDownloadObjectWithAutomaticCrc32cToMd5FallbackFailure() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('MD5 checksum mismatch'); + + $body = 'test data'; + // GCS response has only md5 hash, no crc32c, and it is invalid + $response = new Response(200, ['X-Goog-Hash' => 'md5=invalidmd5hash=='], $body); + + $this->requestWrapper->send(Argument::any(), Argument::any())->willReturn($response); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + // No validate option specified - defaults to 'crc32' (which should fallback to 'md5') + $stream = $rest->downloadObject(self::$downloadOptions); + (string) $stream; + } + + public function testDownloadObjectAsyncWithCrc32cValidationSuccess() + { + $body = 'test data'; + $crc32c = base64_encode(hash('crc32c', $body, true)); + $response = new Response(200, ['X-Goog-Hash' => 'crc32c=' . $crc32c], $body); + + $this->requestWrapper->sendAsync(Argument::any(), Argument::any())->willReturn(Create::promiseFor($response)); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + $promise = $rest->downloadObjectAsync(self::$downloadOptions); + $this->assertInstanceOf(PromiseInterface::class, $promise); + $stream = $promise->wait(); + $this->assertEquals($body, (string) $stream); + } + + public function testDownloadObjectAsyncWithCrc32cValidationFailure() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('CRC32C checksum mismatch'); + + $body = 'test data'; + $response = new Response(200, ['X-Goog-Hash' => 'crc32c=invalidhash='], $body); + + $this->requestWrapper->sendAsync(Argument::any(), Argument::any())->willReturn(Create::promiseFor($response)); + + $rest = new Rest(); + $rest->setRequestWrapper($this->requestWrapper->reveal()); + + $promise = $rest->downloadObjectAsync(self::$downloadOptions); + $this->assertInstanceOf(PromiseInterface::class, $promise); + $stream = $promise->wait(); + (string) $stream; // Consume the stream to trigger validation + } + /** * @dataProvider apiEndpointProvider */ diff --git a/Storage/tests/Unit/HashValidatingStreamTest.php b/Storage/tests/Unit/HashValidatingStreamTest.php new file mode 100644 index 000000000000..b94bae84f7d2 --- /dev/null +++ b/Storage/tests/Unit/HashValidatingStreamTest.php @@ -0,0 +1,142 @@ +correctCrc32c = base64_encode(hash('crc32c', $this->testData, true)); + $this->correctMd5 = base64_encode(hash('md5', $this->testData, true)); + } + + public function testValidCrc32cReadSequentiallySucceeds() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedCrc32c' => $this->correctCrc32c + ]); + + $content = ''; + while (!$stream->eof()) { + $content .= $stream->read(4); + } + + $this->assertEquals($this->testData, $content); + } + + public function testValidCrc32cGetContentsSucceeds() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedCrc32c' => $this->correctCrc32c + ]); + + $content = $stream->getContents(); + $this->assertEquals($this->testData, $content); + } + + public function testValidMd5GetContentsSucceeds() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedMd5' => $this->correctMd5 + ]); + + $content = $stream->getContents(); + $this->assertEquals($this->testData, $content); + } + + public function testValidBothCrc32cAndMd5Succeeds() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedCrc32c' => $this->correctCrc32c, + 'expectedMd5' => $this->correctMd5 + ]); + + $content = $stream->getContents(); + $this->assertEquals($this->testData, $content); + } + + public function testInvalidCrc32cThrowsException() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('CRC32C checksum mismatch'); + + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedCrc32c' => 'invalidcrc32c=' + ]); + + $stream->getContents(); + } + + public function testInvalidMd5ThrowsException() + { + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('MD5 checksum mismatch'); + + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, [ + 'expectedMd5' => 'invalidmd5hash==' + ]); + + $stream->getContents(); + } + + public function testNoHashesRequestedNoValidation() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, []); + + $content = $stream->getContents(); + $this->assertEquals($this->testData, $content); + } + + public function testStreamIsNonSeekable() + { + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, []); + + $this->assertFalse($stream->isSeekable()); + } + + public function testSeekThrowsException() + { + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Seeking is not supported on a validating stream.'); + + $rawStream = Utils::streamFor($this->testData); + $stream = new HashValidatingStream($rawStream, []); + + $stream->seek(0); + } +}