Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ if(PAIMON_BUILD_TESTS)
endif()
# Adding unit tests part of the "paimon" portion of the test suite
add_custom_target(paimon-tests)
build_gtest()
resolve_dependency(GTest)

add_custom_target(unittest
ctest
Expand Down Expand Up @@ -422,6 +422,8 @@ if(PAIMON_BUILD_TESTS)
endif()
endif()

paimon_print_dependency_resolution_summary()

include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/PaimonConfigVersion.cmake"
Expand Down
59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,65 @@ $ cd build
$ cmake ..
$ make
```

### Third-party dependencies

Paimon C++ can either build selected third-party dependencies from bundled
sources or use libraries that are already installed on the system. The default
mode is `AUTO`, which tries system packages first and falls back to bundled
sources when they are not found.

```
$ cmake -B build -DPAIMON_DEPENDENCY_SOURCE=AUTO
```

The supported dependency source values are:

* `AUTO`: use a system package when available, otherwise build bundled sources.
* `BUNDLED`: always build bundled sources.
* `SYSTEM`: require system packages and fail if they are not found.

You can also override individual dependencies. The supported dependency set
includes Arrow/Parquet, ORC, Protobuf, Avro, RE2, fmt, RapidJSON, TBB, glog,
GoogleTest, and compression libraries.

```
$ cmake -B build \
-DPAIMON_DEPENDENCY_SOURCE=AUTO \
-DArrow_SOURCE=SYSTEM \
-DArrow_ROOT=/opt/arrow \
-Dzstd_SOURCE=BUNDLED
```

Use `PAIMON_PACKAGE_PREFIX` to provide one common prefix for dependencies whose
own `<Package>_ROOT` variable is not set.

```
$ cmake -B build \
-DPAIMON_DEPENDENCY_SOURCE=SYSTEM \
-DPAIMON_PACKAGE_PREFIX=/opt/paimon-deps
```

Package-manager-specific modes are intentionally out of scope for this first
dependency source interface. They can still be used through standard CMake
mechanisms such as `CMAKE_PREFIX_PATH` or `CMAKE_TOOLCHAIN_FILE`, while Paimon
keeps the dependency source values limited to `AUTO`, `BUNDLED`, and `SYSTEM`.

When `Arrow_SOURCE` is explicitly set to `SYSTEM` or `BUNDLED`, the compression
dependencies default to the same source unless individually overridden. Mixing
system and bundled copies of transitive dependencies can cause ABI conflicts,
so prefer keeping Arrow and its compression dependencies from the same source
unless you have a specific reason to override them.

When `ORC_SOURCE` is explicitly set, `Protobuf_SOURCE` defaults to the same
source unless individually overridden. In `AUTO` mode, Paimon prechecks for a
system ORC installation and defaults Protobuf to `SYSTEM` only when system ORC
is found; otherwise Protobuf stays bundled with bundled ORC.

CMake prints a dependency resolution summary during configuration showing the
requested source, actual source, compatibility target, and search root for each
resolved dependency.

## Contributing

Paimon-cpp is an active open-source project and we welcome people who want to contribute or share good ideas!
Expand Down
16 changes: 16 additions & 0 deletions cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,15 @@ function(add_paimon_lib LIB_NAME)
# Generate a single "objlib" from all C++ modules and link
# that "objlib" into each library kind, to avoid compiling twice
add_library(${LIB_NAME}_objlib OBJECT ${ARG_SOURCES})
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${LIB_NAME}_objlib PRIVATE -Wno-global-constructors)
endif()
# Necessary to make static linking into other shared libraries work properly
set_property(TARGET ${LIB_NAME}_objlib PROPERTY POSITION_INDEPENDENT_CODE 1)
if(ARG_DEPENDENCIES)
# In static-only builds, some dependency names are still declared as
# *_shared. Map them to *_static when the shared target is unavailable.
set(_paimon_objlib_link_deps)
set(_paimon_objlib_deps)
foreach(_paimon_dep IN LISTS ARG_DEPENDENCIES)
set(_paimon_mapped_dep "${_paimon_dep}")
Expand All @@ -65,14 +69,24 @@ function(add_paimon_lib LIB_NAME)
"${_paimon_dep}")
endif()
if(TARGET ${_paimon_mapped_dep})
get_target_property(_paimon_is_internal_lib ${_paimon_mapped_dep}
PAIMON_INTERNAL_LIBRARY)
list(APPEND _paimon_objlib_deps ${_paimon_mapped_dep})
if(NOT _paimon_is_internal_lib)
list(APPEND _paimon_objlib_link_deps ${_paimon_mapped_dep})
endif()
unset(_paimon_is_internal_lib)
endif()
unset(_paimon_mapped_dep)
endforeach()
if(_paimon_objlib_deps)
add_dependencies(${LIB_NAME}_objlib ${_paimon_objlib_deps})
endif()
if(_paimon_objlib_link_deps)
target_link_libraries(${LIB_NAME}_objlib PRIVATE ${_paimon_objlib_link_deps})
endif()
unset(_paimon_objlib_deps)
unset(_paimon_objlib_link_deps)
unset(_paimon_dep)
endif()
set(LIB_DEPS $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
Expand Down Expand Up @@ -103,6 +117,7 @@ function(add_paimon_lib LIB_NAME)
target_include_directories(${LIB_NAME}_shared PRIVATE ${ARG_PRIVATE_INCLUDES})
endif()

set_property(TARGET ${LIB_NAME}_shared PROPERTY PAIMON_INTERNAL_LIBRARY TRUE)
set_target_properties(${LIB_NAME}_shared
PROPERTIES LIBRARY_OUTPUT_DIRECTORY
"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}"
Expand Down Expand Up @@ -157,6 +172,7 @@ function(add_paimon_lib LIB_NAME)

set(LIB_NAME_STATIC ${LIB_NAME})

set_property(TARGET ${LIB_NAME}_static PROPERTY PAIMON_INTERNAL_LIBRARY TRUE)
set_target_properties(${LIB_NAME}_static
PROPERTIES ARCHIVE_OUTPUT_DIRECTORY
"${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}"
Expand Down
101 changes: 101 additions & 0 deletions cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,107 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")

option(PAIMON_BUILD_CONFIG_SUMMARY_JSON
"Summarize build configuration in a JSON file" ON)

#----------------------------------------------------------------------
set_option_category("Dependencies")

define_option_string(PAIMON_DEPENDENCY_SOURCE
"Default third-party dependency source"
"AUTO"
AUTO
BUNDLED
SYSTEM)

define_option_string(PAIMON_PACKAGE_PREFIX
"Default prefix used to find third-party packages" "")

define_option(PAIMON_DEPENDENCY_USE_SHARED
"Prefer shared libraries for system third-party packages" OFF)

define_option_string(Arrow_SOURCE
"Dependency source for Apache Arrow"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(zstd_SOURCE
"Dependency source for zstd"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(Snappy_SOURCE
"Dependency source for Snappy"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(LZ4_SOURCE
"Dependency source for LZ4"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(ZLIB_SOURCE
"Dependency source for ZLIB"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(RE2_SOURCE
"Dependency source for RE2"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(Protobuf_SOURCE
"Dependency source for Protobuf"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(ORC_SOURCE
"Dependency source for Apache ORC"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(fmt_SOURCE
"Dependency source for fmt"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(RapidJSON_SOURCE
"Dependency source for RapidJSON"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(TBB_SOURCE
"Dependency source for TBB"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(glog_SOURCE
"Dependency source for glog"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(Avro_SOURCE
"Dependency source for Avro C++"
""
AUTO
BUNDLED
SYSTEM)
define_option_string(GTest_SOURCE
"Dependency source for GoogleTest"
""
AUTO
BUNDLED
SYSTEM)
endif()

macro(validate_config)
Expand Down
87 changes: 87 additions & 0 deletions cmake_modules/FindArrowAlt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2024-present Alibaba Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");

set(_PAIMON_ARROW_ROOTS ${Arrow_ROOT} ${ARROW_ROOT} ${PAIMON_PACKAGE_PREFIX})
list(REMOVE_ITEM _PAIMON_ARROW_ROOTS "")
if(_PAIMON_ARROW_ROOTS)
set(_PAIMON_ARROW_FIND_ARGS HINTS ${_PAIMON_ARROW_ROOTS} NO_DEFAULT_PATH)
endif()

find_package(Arrow CONFIG QUIET ${_PAIMON_ARROW_FIND_ARGS})
find_package(Parquet CONFIG QUIET ${_PAIMON_ARROW_FIND_ARGS})
find_package(ArrowDataset CONFIG QUIET ${_PAIMON_ARROW_FIND_ARGS})
find_package(ArrowAcero CONFIG QUIET ${_PAIMON_ARROW_FIND_ARGS})

function(_paimon_select_first_target OUT_VAR)
foreach(_target IN LISTS ARGN)
if(TARGET ${_target})
set(${OUT_VAR}
${_target}
PARENT_SCOPE)
return()
endif()
endforeach()
endfunction()

if(PAIMON_DEPENDENCY_USE_SHARED)
_paimon_select_first_target(_PAIMON_ARROW_TARGET Arrow::arrow_shared Arrow::arrow)
_paimon_select_first_target(_PAIMON_PARQUET_TARGET Parquet::parquet_shared
Parquet::parquet)
_paimon_select_first_target(_PAIMON_ARROW_DATASET_TARGET
ArrowDataset::arrow_dataset_shared
Arrow::arrow_dataset_shared ArrowDataset::arrow_dataset)
_paimon_select_first_target(_PAIMON_ARROW_ACERO_TARGET ArrowAcero::arrow_acero_shared
Arrow::arrow_acero_shared ArrowAcero::arrow_acero)
else()
_paimon_select_first_target(_PAIMON_ARROW_TARGET Arrow::arrow_static Arrow::arrow)
_paimon_select_first_target(_PAIMON_PARQUET_TARGET Parquet::parquet_static
Parquet::parquet)
_paimon_select_first_target(_PAIMON_ARROW_DATASET_TARGET
ArrowDataset::arrow_dataset_static
Arrow::arrow_dataset_static ArrowDataset::arrow_dataset)
_paimon_select_first_target(_PAIMON_ARROW_ACERO_TARGET ArrowAcero::arrow_acero_static
Arrow::arrow_acero_static ArrowAcero::arrow_acero)
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
ArrowAlt REQUIRED_VARS _PAIMON_ARROW_TARGET _PAIMON_PARQUET_TARGET
_PAIMON_ARROW_DATASET_TARGET _PAIMON_ARROW_ACERO_TARGET)

if(ArrowAlt_FOUND)
get_target_property(ARROW_INCLUDE_DIR ${_PAIMON_ARROW_TARGET}
INTERFACE_INCLUDE_DIRECTORIES)

if(NOT TARGET arrow)
add_library(arrow INTERFACE IMPORTED)
if(ARROW_INCLUDE_DIR)
set_target_properties(arrow PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${ARROW_INCLUDE_DIR}")
endif()
target_link_libraries(arrow INTERFACE ${_PAIMON_ARROW_TARGET})
endif()

if(NOT TARGET arrow_acero)
add_library(arrow_acero INTERFACE IMPORTED)
target_link_libraries(arrow_acero INTERFACE ${_PAIMON_ARROW_ACERO_TARGET} arrow)
endif()

if(NOT TARGET arrow_dataset)
add_library(arrow_dataset INTERFACE IMPORTED)
target_link_libraries(arrow_dataset INTERFACE ${_PAIMON_ARROW_DATASET_TARGET}
arrow_acero)
endif()

if(NOT TARGET parquet)
add_library(parquet INTERFACE IMPORTED)
target_link_libraries(parquet INTERFACE ${_PAIMON_PARQUET_TARGET} arrow_dataset)
endif()
endif()

unset(_PAIMON_ARROW_ACERO_TARGET)
unset(_PAIMON_ARROW_DATASET_TARGET)
unset(_PAIMON_ARROW_FIND_ARGS)
unset(_PAIMON_ARROW_ROOTS)
unset(_PAIMON_ARROW_TARGET)
unset(_PAIMON_PARQUET_TARGET)
42 changes: 42 additions & 0 deletions cmake_modules/FindAvroAlt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2024-present Alibaba Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");

set(_PAIMON_AVRO_ROOTS ${Avro_ROOT} ${AVRO_ROOT} ${PAIMON_PACKAGE_PREFIX})
list(REMOVE_ITEM _PAIMON_AVRO_ROOTS "")
if(_PAIMON_AVRO_ROOTS)
set(_PAIMON_AVRO_FIND_ARGS HINTS ${_PAIMON_AVRO_ROOTS} NO_DEFAULT_PATH)
endif()

find_package(PkgConfig QUIET)
if(PkgConfig_FOUND)
pkg_check_modules(PC_Avro QUIET avro-cpp)
endif()

find_path(AVRO_INCLUDE_DIR
NAMES avro/Decoder.hh ${_PAIMON_AVRO_FIND_ARGS}
HINTS ${PC_Avro_INCLUDE_DIRS}
PATH_SUFFIXES include)
find_library(AVRO_LIBRARY
NAMES avrocpp_s avrocpp ${_PAIMON_AVRO_FIND_ARGS}
HINTS ${PC_Avro_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(AvroAlt REQUIRED_VARS AVRO_LIBRARY AVRO_INCLUDE_DIR)

if(AvroAlt_FOUND AND NOT TARGET avro)
add_library(avro UNKNOWN IMPORTED)
set_target_properties(avro
PROPERTIES IMPORTED_LOCATION "${AVRO_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${AVRO_INCLUDE_DIR}")
foreach(_dependency zlib zstd snappy)
if(TARGET ${_dependency})
target_link_libraries(avro INTERFACE ${_dependency})
endif()
endforeach()
set(AVRO_LIBRARIES "${AVRO_LIBRARY}")
endif()

unset(_PAIMON_AVRO_FIND_ARGS)
unset(_PAIMON_AVRO_ROOTS)
Loading
Loading