Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 25 additions & 34 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ load("@rules_cc//cc:cc_library.bzl", "cc_library")
# surfaces (`SSL_CONF_*` command API and `SSL_read_ex`) that
# BoringSSL doesn't provide; see clickhouse/base/sslsocket.cpp.
# * lz4 / zstd come from BCR modules instead of contrib/. cityhash is
# the exception: it is built from contrib/ because the wire protocol
# needs ClickHouse's frozen CityHash variant, which differs from the
# stock BCR module (see the :cityhash target).

# Selects the TLS implementation: `--@clickhouse-cpp//:tls=boringssl`
# (default), `--@clickhouse-cpp//:tls=openssl`, or
Expand Down Expand Up @@ -67,34 +64,6 @@ selects.config_setting_group(
],
)

# Vendored CityHash from contrib/, NOT the BCR `@cityhash` module.
# ClickHouse's wire protocol checksums compressed blocks with a specific
# frozen CityHash128 variant (see clickhouse/base/compressed.cpp); the
# stock Google CityHash 1.0.2 that BCR packages produces *different*
# hashes, so a server rejects compressed blocks with "Checksum doesn't
# match: corrupted data". This is the exact copy the CMake build uses,
# guaranteeing byte-identical hashing and wire compatibility.
cc_library(
name = "cityhash",
srcs = [
"contrib/cityhash/cityhash/city.cc",
"contrib/cityhash/cityhash/city_config.h",
],
hdrs = [
"contrib/cityhash/cityhash/city.h",
"contrib/cityhash/cityhash/citycrc.h",
],
# city_config.h gates __builtin_expect off with `#if WIN32 || WIN64`, but
# MSVC only predefines `_WIN32`/`_WIN64` — under CMake it's CMake that
# defines WIN32, which Bazel's MSVC toolchain doesn't. Define it here
# so the GCC/Clang-only builtin isn't used on MSVC.
copts = select({
"@platforms//os:windows": ["/DWIN32"],
"//conditions:default": [],
}),
strip_include_prefix = "contrib/cityhash/cityhash",
)

# The main clickhouse-cpp library. Downstream callers use
# `#include <clickhouse/client.h>` and link `@clickhouse-cpp//:clickhouse`.
cc_library(
Expand All @@ -109,7 +78,14 @@ cc_library(
# Only compiled when TLS is enabled, mirroring CMake's
# `IF (WITH_OPENSSL)` source-list append.
exclude = ["clickhouse/base/sslsocket.cpp"],
) + select({
) + [
# Vendored CityHash, NOT the BCR `@cityhash` module. ClickHouse's
# wire protocol checksums compressed blocks with a specific frozen
# CityHash128 variant (see clickhouse/base/compressed.cpp)
# See clickhouse/cityhash/city.h for details
"clickhouse/cityhash/city.cc",
"clickhouse/cityhash/city_config.h",
] + select({
":tls_no": [],
"//conditions:default": ["clickhouse/base/sslsocket.cpp"],
}),
Expand All @@ -118,7 +94,18 @@ cc_library(
"clickhouse/base/*.h",
"clickhouse/columns/*.h",
"clickhouse/types/*.h",
]),
]) + [
"clickhouse/cityhash/city.h",
"clickhouse/cityhash/citycrc.h",
],
# city_config.h gates __builtin_expect off with `#if WIN32 || WIN64`, but
# MSVC only predefines `_WIN32`/`_WIN64` — under CMake it's CMake that
# defines WIN32, which Bazel's MSVC toolchain doesn't. Define it here
# so the GCC/Clang-only builtin isn't used on MSVC.
copts = select({
"@platforms//os:windows": ["/DWIN32"],
"//conditions:default": [],
}),
defines = [
# Abseil fallback is not enabled in the Bazel build. This fallback will be removed in the
# near future. The Bazel build always uses the self-contained fallback for wide (128-bit)
Expand Down Expand Up @@ -158,14 +145,18 @@ cc_library(
],
"//conditions:default": [],
}),
# Expose the vendored CityHash headers on the include path so the
# `#include <city.h>` form used by compressed.cpp / lowcardinality.cpp /
# types.cpp resolves (mirrors the old :cityhash target's
# `strip_include_prefix = "clickhouse/cityhash"`).
includes = ["clickhouse/cityhash"],
# Expose headers at their workspace path so both internal
# (`#include "client.h"` from clickhouse/*.cpp via quote-form
# source-dir search) and external (`#include <clickhouse/client.h>`)
# include forms resolve.
strip_include_prefix = "/",
visibility = ["//visibility:public"],
deps = [
":cityhash",
"@lz4//:lz4",
"@zstd//:zstd",
] + select({
Expand Down
9 changes: 2 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ OPTION (CH_USE_ABSEIL_FOR_BIGNUM "Use Google Abseil for wide (128-bit) integers"
OPTION (WITH_SYSTEM_ABSEIL "Use system Google Abseil, otherwise vendored part part of Google Abseil will be used" OFF)

OPTION (WITH_SYSTEM_LZ4 "Use system LZ4" OFF)
OPTION (WITH_SYSTEM_CITYHASH "Use system cityhash" OFF)
OPTION (WITH_SYSTEM_ZSTD "Use system ZSTD" OFF)
OPTION (DEBUG_DEPENDENCIES "Print debug info about dependencies duting build" ON)
OPTION (CHECK_VERSION "Check that version number corresponds to git tag, usefull in CI/CD to validate that new version published on GitHub has same version in sources" OFF)
Expand Down Expand Up @@ -97,12 +96,8 @@ ELSE ()
SUBDIRS (contrib/lz4/lz4)
ENDIF ()

IF (WITH_SYSTEM_CITYHASH)
FIND_PACKAGE(cityhash REQUIRED)
ELSE ()
INCLUDE_DIRECTORIES (contrib/cityhash/cityhash)
SUBDIRS (contrib/cityhash/cityhash)
ENDIF ()
INCLUDE_DIRECTORIES (clickhouse/cityhash)
SUBDIRS (clickhouse/cityhash)

IF (WITH_SYSTEM_ZSTD)
FIND_PACKAGE(zstd REQUIRED)
Expand Down
8 changes: 4 additions & 4 deletions clickhouse/base/compressed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ size_t CompressedInput::DoNext(const void** ptr, size_t len) {
}

bool CompressedInput::Decompress() {
uint128 hash;
cityhash::uint128 hash;
uint32_t compressed = 0;
uint32_t original = 0;
uint8_t method = 0;
Expand Down Expand Up @@ -96,7 +96,7 @@ bool CompressedInput::Decompress() {
if (!WireFormat::ReadBytes(*input_, tmp.data() + HEADER_SIZE, compressed - HEADER_SIZE)) {
return false;
} else {
if (hash != CityHash128((const char*)tmp.data(), compressed)) {
if (hash != cityhash::CityHash128((const char*)tmp.data(), compressed)) {
throw CompressionError("data was corrupted");
}
}
Expand Down Expand Up @@ -190,7 +190,7 @@ void CompressedOutput::Compress(const void * data, size_t len) {
WriteUnaligned(header + 5, static_cast<uint32_t>(len));
}

WireFormat::WriteFixed(*destination_, CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE));
WireFormat::WriteFixed(*destination_, cityhash::CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE));
WireFormat::WriteBytes(*destination_, compressed_buffer_.data(), compressed_size + HEADER_SIZE);
break;
}
Expand All @@ -215,7 +215,7 @@ void CompressedOutput::Compress(const void * data, size_t len) {
WriteUnaligned(header + 5, static_cast<uint32_t>(len));
}

WireFormat::WriteFixed(*destination_, CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE));
WireFormat::WriteFixed(*destination_, cityhash::CityHash128((const char*)compressed_buffer_.data(), compressed_size + HEADER_SIZE));
WireFormat::WriteBytes(*destination_, compressed_buffer_.data(), compressed_size + HEADER_SIZE);
break;
}
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <algorithm>
#include <string.h> // for memcpy and memset

namespace clickhouse::cityhash {

using namespace std;

static uint64 UNALIGNED_LOAD64(const char *p) {
Expand Down Expand Up @@ -467,3 +469,5 @@ uint128 CityHashCrc128(const char *s, size_t len) {
}

#endif

}
20 changes: 13 additions & 7 deletions contrib/cityhash/cityhash/city.h → clickhouse/cityhash/city.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,23 @@
// of a+b is easily derived from the hashes of a and b. This property
// doesn't hold for any hash functions in this file.

#ifndef CITY_HASH_H_
#define CITY_HASH_H_
// This is a slightly modified version of Google CityHash 1.0.1.
// https://github.com/google/cityhash/commit/8eded14d8e7cabfcdb10d4be35d521683edc0407
// The API is hidden behind a separate namespace to avoid naming conflicts with other
// projects that use CityHash and also import clickhouse-cpp.

#pragma once

#include <stdlib.h> // for size_t.
#include <stdint.h>
#include <utility>

typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;
namespace clickhouse::cityhash {

using uint8 = uint8_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
using uint128 = std::pair<uint64, uint64>;

inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }
Expand Down Expand Up @@ -87,4 +93,4 @@ inline uint64 Hash128to64(const uint128& x) {
return b;
}

#endif // CITY_HASH_H_
}
File renamed without changes.
2 changes: 1 addition & 1 deletion clickhouse/columns/lowcardinality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ details::LowCardinalityHashKey ColumnLowCardinality::computeHashKey(const ItemVi
}

const auto hash1 = hasher(item.data);
const auto hash2 = CityHash64(item.data.data(), item.data.size());
const auto hash2 = cityhash::CityHash64(item.data.data(), item.data.size());

return details::LowCardinalityHashKey{hash1, hash2};
}
Expand Down
2 changes: 1 addition & 1 deletion clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ uint64_t Type::GetTypeUniqueId() const {

if (type_unique_id_.load(std::memory_order_relaxed) == 0) {
const auto name = GetName();
type_unique_id_.store(CityHash64WithSeed(name.c_str(), name.size(), code_), std::memory_order_relaxed);
type_unique_id_.store(cityhash::CityHash64WithSeed(name.c_str(), name.size(), code_), std::memory_order_relaxed);
}

return type_unique_id_;
Expand Down
Loading