diff --git a/BUILD.bazel b/BUILD.bazel index e3deb917..b51d0235 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -15,9 +15,6 @@ load("@rules_cc//cc:cc_library.bzl", "cc_library") # surfaces (`SSL_CONF_*` command API and `SSL_read_ex`) that # BoringSSL doesn't provide; see clickhouse/base/sslsocket.cpp. # * lz4 / zstd come from BCR modules instead of contrib/. cityhash is -# the exception: it is built from contrib/ because the wire protocol -# needs ClickHouse's frozen CityHash variant, which differs from the -# stock BCR module (see the :cityhash target). # Selects the TLS implementation: `--@clickhouse-cpp//:tls=boringssl` # (default), `--@clickhouse-cpp//:tls=openssl`, or @@ -67,34 +64,6 @@ selects.config_setting_group( ], ) -# Vendored CityHash from contrib/, NOT the BCR `@cityhash` module. -# ClickHouse's wire protocol checksums compressed blocks with a specific -# frozen CityHash128 variant (see clickhouse/base/compressed.cpp); the -# stock Google CityHash 1.0.2 that BCR packages produces *different* -# hashes, so a server rejects compressed blocks with "Checksum doesn't -# match: corrupted data". This is the exact copy the CMake build uses, -# guaranteeing byte-identical hashing and wire compatibility. -cc_library( - name = "cityhash", - srcs = [ - "contrib/cityhash/cityhash/city.cc", - "contrib/cityhash/cityhash/city_config.h", - ], - hdrs = [ - "contrib/cityhash/cityhash/city.h", - "contrib/cityhash/cityhash/citycrc.h", - ], - # city_config.h gates __builtin_expect off with `#if WIN32 || WIN64`, but - # MSVC only predefines `_WIN32`/`_WIN64` — under CMake it's CMake that - # defines WIN32, which Bazel's MSVC toolchain doesn't. Define it here - # so the GCC/Clang-only builtin isn't used on MSVC. - copts = select({ - "@platforms//os:windows": ["/DWIN32"], - "//conditions:default": [], - }), - strip_include_prefix = "contrib/cityhash/cityhash", -) - # The main clickhouse-cpp library. Downstream callers use # `#include ` and link `@clickhouse-cpp//:clickhouse`. cc_library( @@ -109,7 +78,14 @@ cc_library( # Only compiled when TLS is enabled, mirroring CMake's # `IF (WITH_OPENSSL)` source-list append. exclude = ["clickhouse/base/sslsocket.cpp"], - ) + select({ + ) + [ + # Vendored CityHash, NOT the BCR `@cityhash` module. ClickHouse's + # wire protocol checksums compressed blocks with a specific frozen + # CityHash128 variant (see clickhouse/base/compressed.cpp) + # See clickhouse/cityhash/city.h for details + "clickhouse/cityhash/city.cc", + "clickhouse/cityhash/city_config.h", + ] + select({ ":tls_no": [], "//conditions:default": ["clickhouse/base/sslsocket.cpp"], }), @@ -118,7 +94,18 @@ cc_library( "clickhouse/base/*.h", "clickhouse/columns/*.h", "clickhouse/types/*.h", - ]), + ]) + [ + "clickhouse/cityhash/city.h", + "clickhouse/cityhash/citycrc.h", + ], + # city_config.h gates __builtin_expect off with `#if WIN32 || WIN64`, but + # MSVC only predefines `_WIN32`/`_WIN64` — under CMake it's CMake that + # defines WIN32, which Bazel's MSVC toolchain doesn't. Define it here + # so the GCC/Clang-only builtin isn't used on MSVC. + copts = select({ + "@platforms//os:windows": ["/DWIN32"], + "//conditions:default": [], + }), defines = [ # Abseil fallback is not enabled in the Bazel build. This fallback will be removed in the # near future. The Bazel build always uses the self-contained fallback for wide (128-bit) @@ -158,6 +145,11 @@ cc_library( ], "//conditions:default": [], }), + # Expose the vendored CityHash headers on the include path so the + # `#include ` form used by compressed.cpp / lowcardinality.cpp / + # types.cpp resolves (mirrors the old :cityhash target's + # `strip_include_prefix = "clickhouse/cityhash"`). + includes = ["clickhouse/cityhash"], # Expose headers at their workspace path so both internal # (`#include "client.h"` from clickhouse/*.cpp via quote-form # source-dir search) and external (`#include `) @@ -165,7 +157,6 @@ cc_library( strip_include_prefix = "/", visibility = ["//visibility:public"], deps = [ - ":cityhash", "@lz4//:lz4", "@zstd//:zstd", ] + select({ diff --git a/CMakeLists.txt b/CMakeLists.txt index 0091a2e9..fd88ff3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,6 @@ OPTION (CH_USE_ABSEIL_FOR_BIGNUM "Use Google Abseil for wide (128-bit) integers" OPTION (WITH_SYSTEM_ABSEIL "Use system Google Abseil, otherwise vendored part part of Google Abseil will be used" OFF) OPTION (WITH_SYSTEM_LZ4 "Use system LZ4" OFF) -OPTION (WITH_SYSTEM_CITYHASH "Use system cityhash" OFF) OPTION (WITH_SYSTEM_ZSTD "Use system ZSTD" OFF) OPTION (DEBUG_DEPENDENCIES "Print debug info about dependencies duting build" ON) OPTION (CHECK_VERSION "Check that version number corresponds to git tag, usefull in CI/CD to validate that new version published on GitHub has same version in sources" OFF) @@ -97,12 +96,8 @@ ELSE () SUBDIRS (contrib/lz4/lz4) ENDIF () -IF (WITH_SYSTEM_CITYHASH) - FIND_PACKAGE(cityhash REQUIRED) -ELSE () - INCLUDE_DIRECTORIES (contrib/cityhash/cityhash) - SUBDIRS (contrib/cityhash/cityhash) -ENDIF () +INCLUDE_DIRECTORIES (clickhouse/cityhash) +SUBDIRS (clickhouse/cityhash) IF (WITH_SYSTEM_ZSTD) FIND_PACKAGE(zstd REQUIRED) diff --git a/clickhouse/base/compressed.cpp b/clickhouse/base/compressed.cpp index f30b5f88..06d72376 100644 --- a/clickhouse/base/compressed.cpp +++ b/clickhouse/base/compressed.cpp @@ -55,7 +55,7 @@ size_t CompressedInput::DoNext(const void** ptr, size_t len) { } bool CompressedInput::Decompress() { - uint128 hash; + cityhash::uint128 hash; uint32_t compressed = 0; uint32_t original = 0; uint8_t method = 0; @@ -96,7 +96,7 @@ bool CompressedInput::Decompress() { if (!WireFormat::ReadBytes(*input_, tmp.data() + HEADER_SIZE, compressed - HEADER_SIZE)) { return false; } else { - if (hash != CityHash128((const char*)tmp.data(), compressed)) { + if (hash != cityhash::CityHash128((const char*)tmp.data(), compressed)) { throw CompressionError("data was corrupted"); } } @@ -190,7 +190,7 @@ void CompressedOutput::Compress(const void * data, size_t len) { WriteUnaligned(header + 5, static_cast(len)); } - WireFormat::WriteFixed(*destination_, CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE)); + WireFormat::WriteFixed(*destination_, cityhash::CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE)); WireFormat::WriteBytes(*destination_, compressed_buffer_.data(), compressed_size + HEADER_SIZE); break; } @@ -215,7 +215,7 @@ void CompressedOutput::Compress(const void * data, size_t len) { WriteUnaligned(header + 5, static_cast(len)); } - WireFormat::WriteFixed(*destination_, CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE)); + WireFormat::WriteFixed(*destination_, cityhash::CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE)); WireFormat::WriteBytes(*destination_, compressed_buffer_.data(), compressed_size + HEADER_SIZE); break; } diff --git a/contrib/cityhash/cityhash/BUCK b/clickhouse/cityhash/BUCK similarity index 100% rename from contrib/cityhash/cityhash/BUCK rename to clickhouse/cityhash/BUCK diff --git a/contrib/cityhash/cityhash/CMakeLists.txt b/clickhouse/cityhash/CMakeLists.txt similarity index 100% rename from contrib/cityhash/cityhash/CMakeLists.txt rename to clickhouse/cityhash/CMakeLists.txt diff --git a/contrib/cityhash/cityhash/COPYING b/clickhouse/cityhash/COPYING similarity index 100% rename from contrib/cityhash/cityhash/COPYING rename to clickhouse/cityhash/COPYING diff --git a/contrib/cityhash/cityhash/city.cc b/clickhouse/cityhash/city.cc similarity index 99% rename from contrib/cityhash/cityhash/city.cc rename to clickhouse/cityhash/city.cc index e6f7f184..b2d3e901 100644 --- a/contrib/cityhash/cityhash/city.cc +++ b/clickhouse/cityhash/city.cc @@ -33,6 +33,8 @@ #include #include // for memcpy and memset +namespace clickhouse::cityhash { + using namespace std; static uint64 UNALIGNED_LOAD64(const char *p) { @@ -467,3 +469,5 @@ uint128 CityHashCrc128(const char *s, size_t len) { } #endif + +} diff --git a/contrib/cityhash/cityhash/city.h b/clickhouse/cityhash/city.h similarity index 88% rename from contrib/cityhash/cityhash/city.h rename to clickhouse/cityhash/city.h index c2ab352c..e9f60060 100644 --- a/contrib/cityhash/cityhash/city.h +++ b/clickhouse/cityhash/city.h @@ -40,17 +40,23 @@ // of a+b is easily derived from the hashes of a and b. This property // doesn't hold for any hash functions in this file. -#ifndef CITY_HASH_H_ -#define CITY_HASH_H_ +// This is a slightly modified version of Google CityHash 1.0.1. +// https://github.com/google/cityhash/commit/8eded14d8e7cabfcdb10d4be35d521683edc0407 +// The API is hidden behind a separate namespace to avoid naming conflicts with other +// projects that use CityHash and also import clickhouse-cpp. + +#pragma once #include // for size_t. #include #include -typedef uint8_t uint8; -typedef uint32_t uint32; -typedef uint64_t uint64; -typedef std::pair uint128; +namespace clickhouse::cityhash { + +using uint8 = uint8_t; +using uint32 = uint32_t; +using uint64 = uint64_t; +using uint128 = std::pair; inline uint64 Uint128Low64(const uint128& x) { return x.first; } inline uint64 Uint128High64(const uint128& x) { return x.second; } @@ -87,4 +93,4 @@ inline uint64 Hash128to64(const uint128& x) { return b; } -#endif // CITY_HASH_H_ +} diff --git a/contrib/cityhash/cityhash/city_config.h b/clickhouse/cityhash/city_config.h similarity index 100% rename from contrib/cityhash/cityhash/city_config.h rename to clickhouse/cityhash/city_config.h diff --git a/contrib/cityhash/cityhash/citycrc.h b/clickhouse/cityhash/citycrc.h similarity index 100% rename from contrib/cityhash/cityhash/citycrc.h rename to clickhouse/cityhash/citycrc.h diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 0722ea8f..e286c863 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -271,7 +271,7 @@ details::LowCardinalityHashKey ColumnLowCardinality::computeHashKey(const ItemVi } const auto hash1 = hasher(item.data); - const auto hash2 = CityHash64(item.data.data(), item.data.size()); + const auto hash2 = cityhash::CityHash64(item.data.data(), item.data.size()); return details::LowCardinalityHashKey{hash1, hash2}; } diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index d25798db..4a1276b1 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -179,7 +179,7 @@ uint64_t Type::GetTypeUniqueId() const { if (type_unique_id_.load(std::memory_order_relaxed) == 0) { const auto name = GetName(); - type_unique_id_.store(CityHash64WithSeed(name.c_str(), name.size(), code_), std::memory_order_relaxed); + type_unique_id_.store(cityhash::CityHash64WithSeed(name.c_str(), name.size(), code_), std::memory_order_relaxed); } return type_unique_id_;