diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c0884f51e4..d5e7f7685b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -155,7 +155,6 @@ jobs: cd build cmake .. -G "Visual Studio 17 2022" \ -DCMAKE_BUILD_TYPE=RELEASE \ - -DBUILD_LIBHDFSPP=OFF \ -DBUILD_TOOLS=OFF \ -DBUILD_JAVA=OFF \ -DANALYZE_JAVA=OFF \ diff --git a/CMakeLists.txt b/CMakeLists.txt index ae0f9b44f3..3a23e1258b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,9 +41,6 @@ option (ANALYZE_JAVA "Run static analysis of the Java" OFF) -option (BUILD_LIBHDFSPP - "Include LIBHDFSPP library in the build process" - OFF) option (BUILD_SPARSEHASH "Include sparsehash library in the build process" diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh index ea71567c5f..1b5a61139b 100644 --- a/c++/include/orc/OrcFile.hh +++ b/c++/include/orc/OrcFile.hh @@ -135,14 +135,6 @@ namespace orc { std::unique_ptr readLocalFile(const std::string& path, ReaderMetrics* metrics = nullptr); - /** - * Create a stream to an HDFS file. - * @param path the uri of the file in HDFS - * @param metrics the metrics of the reader - */ - [[deprecated("readHdfsFile is deprecated in 2.0.1")]] std::unique_ptr readHdfsFile( - const std::string& path, ReaderMetrics* metrics = nullptr); - /** * Create a reader to read the ORC file. * @param stream the stream to read diff --git a/c++/libs/libhdfspp/imported_timestamp b/c++/libs/libhdfspp/imported_timestamp deleted file mode 100644 index 84965ce4a5..0000000000 --- a/c++/libs/libhdfspp/imported_timestamp +++ /dev/null @@ -1,10 +0,0 @@ -Wed Aug 30 10:56:51 EDT 2017 -HDFS-10787 -commit 9587bb04a818a2661e264f619b09c15ce10ff38e -Author: Anatoli Shein -Date: Wed Aug 30 10:49:42 2017 -0400 - - fixed warnings3 -diffs: -------------- - -------------- -Wed Aug 30 10:56:51 EDT 2017 diff --git a/c++/libs/libhdfspp/libhdfspp.tar.gz b/c++/libs/libhdfspp/libhdfspp.tar.gz deleted file mode 100644 index 35c4d6127a..0000000000 Binary files a/c++/libs/libhdfspp/libhdfspp.tar.gz and /dev/null differ diff --git a/c++/libs/libhdfspp/pull_hdfs.sh b/c++/libs/libhdfspp/pull_hdfs.sh deleted file mode 100755 index a207a93f88..0000000000 --- a/c++/libs/libhdfspp/pull_hdfs.sh +++ /dev/null @@ -1,32 +0,0 @@ -if [ -z "$1" ]; then - echo "Usage: pull_hdfs [path_to_hdfs_git_root]" - exit 1; -fi -if [ ! -d "$1" ]; then - echo "$1 is not a directory" -fi -if [ ! -d "$1/hadoop-hdfs-project" ]; then - echo "$1 is not the root of a hadoop git checkout" -fi - -HADOOP_ROOT=$1 -echo HADOOP_ROOT=$HADOOP_ROOT -OUT=$(readlink -m `dirname $0`) -echo OUT=$OUT -TS=$OUT/imported_timestamp - - cd $HADOOP_ROOT && - mvn -pl :hadoop-hdfs-native-client -Pnative compile -Dnative_make_args="copy_hadoop_files" - (date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS; \ - echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \ - echo " --------------" >> $TS) - cd $OUT && - #Delete everything except for pull_hdfs.sh and imported_timestamp - find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! -name '..' -exec rm -rf {} + && - cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp . && - cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern libhdfspp/ && - cd libhdfspp && - tar -czf ../libhdfspp.tar.gz * && - cd .. && - rm -rf libhdfspp && - date >> $TS \ No newline at end of file diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index a1fd549ceb..efddae23ea 100644 --- a/c++/src/CMakeLists.txt +++ b/c++/src/CMakeLists.txt @@ -191,9 +191,6 @@ set(SOURCE_FILES Vector.cc Writer.cc) -if(BUILD_LIBHDFSPP) - set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc) -endif(BUILD_LIBHDFSPP) if(BUILD_ENABLE_AVX512) set(SOURCE_FILES @@ -212,7 +209,6 @@ target_link_libraries (orc $ $ $ - $ $> ) @@ -227,16 +223,12 @@ target_include_directories (orc PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} - ${LIBHDFSPP_INCLUDE_DIR} ) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(orc PRIVATE -Wall -Wextra $<$:-Werror>) endif () -if (BUILD_LIBHDFSPP) - target_compile_definitions(orc PUBLIC -DBUILD_LIBHDFSPP) -endif (BUILD_LIBHDFSPP) if (BUILD_SPARSEHASH) target_compile_definitions(orc PUBLIC -DBUILD_SPARSEHASH) diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc index be86724329..470adfc1f3 100644 --- a/c++/src/OrcFile.cc +++ b/c++/src/OrcFile.cc @@ -103,15 +103,7 @@ namespace orc { } std::unique_ptr readFile(const std::string& path, ReaderMetrics* metrics) { -#ifdef BUILD_LIBHDFSPP - if (strncmp(path.c_str(), "hdfs://", 7) == 0) { - return orc::readHdfsFile(std::string(path), metrics); - } else { -#endif - return orc::readLocalFile(std::string(path), metrics); -#ifdef BUILD_LIBHDFSPP - } -#endif + return orc::readLocalFile(std::string(path), metrics); } DIAGNOSTIC_POP diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc deleted file mode 100644 index d878e276cb..0000000000 --- a/c++/src/OrcHdfsFile.cc +++ /dev/null @@ -1,178 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "orc/OrcFile.hh" - -#include "Adaptor.hh" -#include "Utils.hh" -#include "orc/Exceptions.hh" - -#include -#include -#include -#include -#include -#include -#include - -#include "hdfspp/hdfspp.h" - -namespace orc { - - DIAGNOSTIC_PUSH - -#ifdef __clang__ - DIAGNOSTIC_IGNORE("-Wunused-private-field") -#endif - - class HdfsFileInputStream : public InputStream { - private: - std::string filename_; - std::unique_ptr file_; - std::unique_ptr fileSystem_; - uint64_t totalLength_; - const uint64_t readSize_ = 1024 * 1024; // 1 MB - ReaderMetrics* metrics_; - - public: - HdfsFileInputStream(std::string filename, ReaderMetrics* metrics) : metrics_(metrics) { - filename_ = filename; - - // Building a URI object from the given uri_path - hdfs::URI uri; - try { - uri = hdfs::URI::parse_from_string(filename_); - } catch (const hdfs::uri_parse_error&) { - throw ParseError("Malformed URI: " + filename_); - } - - // This sets conf path to default "$HADOOP_CONF_DIR" or "/etc/hadoop/conf" - // and loads configs core-site.xml and hdfs-site.xml from the conf path - hdfs::ConfigParser parser; - if (!parser.LoadDefaultResources()) { - throw ParseError("Could not load default resources. "); - } - auto stats = parser.ValidateResources(); - // validating core-site.xml - if (!stats[0].second.ok()) { - throw ParseError(stats[0].first + " is invalid: " + stats[0].second.ToString()); - } - // validating hdfs-site.xml - if (!stats[1].second.ok()) { - throw ParseError(stats[1].first + " is invalid: " + stats[1].second.ToString()); - } - hdfs::Options options; - if (!parser.get_options(options)) { - throw ParseError("Could not load Options object. "); - } - hdfs::IoService* io_service = hdfs::IoService::New(); - // Wrapping file_system into a unique pointer to guarantee deletion - fileSystem_ = - std::unique_ptr(hdfs::FileSystem::New(io_service, "", options)); - if (fileSystem_.get() == nullptr) { - throw ParseError("Can't create FileSystem object. "); - } - hdfs::Status status; - // Checking if the user supplied the host - if (!uri.get_host().empty()) { - // Using port if supplied, otherwise using "" to look up port in configs - std::string port = uri.has_port() ? std::to_string(uri.get_port()) : ""; - status = fileSystem_->Connect(uri.get_host(), port); - if (!status.ok()) { - throw ParseError("Can't connect to " + uri.get_host() + ":" + port + ". " + - status.ToString()); - } - } else { - status = fileSystem_->ConnectToDefaultFs(); - if (!status.ok()) { - if (!options.defaultFS.get_host().empty()) { - throw ParseError("Error connecting to " + options.defaultFS.str() + ". " + - status.ToString()); - } else { - throw ParseError("Error connecting to the cluster: defaultFS is empty. " + - status.ToString()); - } - } - } - - if (fileSystem_.get() == nullptr) { - throw ParseError("Can't connect the file system. "); - } - - hdfs::FileHandle* file_raw = nullptr; - status = fileSystem_->Open(uri.get_path(true), &file_raw); - if (!status.ok()) { - throw ParseError("Can't open " + uri.get_path(true) + ". " + status.ToString()); - } - // Wrapping file_raw into a unique pointer to guarantee deletion - file_.reset(file_raw); - - hdfs::StatInfo stat_info; - status = fileSystem_->GetFileInfo(uri.get_path(true), stat_info); - if (!status.ok()) { - throw ParseError("Can't stat " + uri.get_path(true) + ". " + status.ToString()); - } - totalLength_ = stat_info.length; - } - - uint64_t getLength() const override { - return totalLength_; - } - - uint64_t getNaturalReadSize() const override { - return readSize_; - } - - void read(void* buf, uint64_t length, uint64_t offset) override { - SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount); - if (!buf) { - throw ParseError("Buffer is null"); - } - - char* buf_ptr = reinterpret_cast(buf); - hdfs::Status status; - size_t total_bytes_read = 0; - size_t last_bytes_read = 0; - - do { - status = - file_->PositionRead(buf_ptr, static_cast(length) - total_bytes_read, - static_cast(offset + total_bytes_read), &last_bytes_read); - if (!status.ok()) { - throw ParseError("Error reading the file: " + status.ToString()); - } - total_bytes_read += last_bytes_read; - buf_ptr += last_bytes_read; - } while (total_bytes_read < length); - } - - const std::string& getName() const override { - return filename_; - } - - ~HdfsFileInputStream() override; - }; - - DIAGNOSTIC_POP - - HdfsFileInputStream::~HdfsFileInputStream() {} - - std::unique_ptr readHdfsFile(const std::string& path, ReaderMetrics* metrics) { - return std::make_unique(path, metrics); - } -} // namespace orc diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index c03afa8f29..a130b7dfee 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -702,70 +702,4 @@ if(BUILD_SPARSEHASH) endblock() endif() -# ---------------------------------------------------------------------- -# LIBHDFSPP -if(BUILD_LIBHDFSPP) - set (BUILD_LIBHDFSPP FALSE) - if(ORC_CXX_HAS_THREAD_LOCAL) - find_package(CyrusSASL) - find_package(OpenSSL) - find_package(Threads) - if (CYRUS_SASL_SHARED_LIB AND OPENSSL_LIBRARIES) - set (BUILD_LIBHDFSPP TRUE) - set (LIBHDFSPP_PREFIX "${THIRDPARTY_DIR}/libhdfspp_ep-install") - set (LIBHDFSPP_INCLUDE_DIR "${LIBHDFSPP_PREFIX}/include") - set (LIBHDFSPP_STATIC_LIB_NAME hdfspp_static) - set (LIBHDFSPP_STATIC_LIB "${LIBHDFSPP_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${LIBHDFSPP_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set (LIBHDFSPP_SRC_URL "${PROJECT_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz") - set (LIBHDFSPP_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX=${LIBHDFSPP_PREFIX} - -DPROTOBUF_INCLUDE_DIR=${PROTOBUF_INCLUDE_DIR} - -DPROTOBUF_LIBRARY=${PROTOBUF_STATIC_LIB} - -DPROTOBUF_PROTOC_LIBRARY=${PROTOC_STATIC_LIB} - -DPROTOBUF_PROTOC_EXECUTABLE=${PROTOBUF_EXECUTABLE} - -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} - -DCMAKE_C_FLAGS=${EP_C_FLAGS} - -DBUILD_SHARED_LIBS=OFF - -DHDFSPP_LIBRARY_ONLY=TRUE - -DBUILD_SHARED_HDFSPP=FALSE) - - if (BUILD_POSITION_INDEPENDENT_LIB) - set(LIBHDFSPP_CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS} -DCMAKE_POSITION_INDEPENDENT_CODE=ON) - endif () - - ExternalProject_Add (libhdfspp_ep - DEPENDS orc::protobuf - URL ${LIBHDFSPP_SRC_URL} - LOG_DOWNLOAD 0 - LOG_CONFIGURE 0 - LOG_BUILD 0 - LOG_INSTALL 0 - BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}" - CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS}) - - orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR}) - - set (LIBHDFSPP_LIBRARIES - libhdfspp - ${CYRUS_SASL_SHARED_LIB} - ${OPENSSL_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT}) - - elseif(CYRUS_SASL_SHARED_LIB) - message(STATUS - "WARNING: Libhdfs++ library was not built because the required OpenSSL library was not found") - elseif(OPENSSL_LIBRARIES) - message(STATUS - "WARNING: Libhdfs++ library was not built because the required CyrusSASL library was not found") - else () - message(STATUS - "WARNING: Libhdfs++ library was not built because the required CyrusSASL and OpenSSL libraries were not found") - endif(CYRUS_SASL_SHARED_LIB AND OPENSSL_LIBRARIES) - else(ORC_CXX_HAS_THREAD_LOCAL) - message(STATUS - "WARNING: Libhdfs++ library was not built because the required feature - thread_local storage is not supported by your compiler. Known compilers that - support this feature: GCC, Visual Studio, Clang (community version), - Clang (version for iOS 9 and later), Clang (version for Xcode 8 and later)") - endif(ORC_CXX_HAS_THREAD_LOCAL) -endif(BUILD_LIBHDFSPP) + diff --git a/conan/all/conanfile.py b/conan/all/conanfile.py index cc79f5b0dd..56c7b34079 100644 --- a/conan/all/conanfile.py +++ b/conan/all/conanfile.py @@ -119,7 +119,6 @@ def generate(self): tc.variables["BUILD_JAVA"] = False tc.variables["BUILD_CPP_TESTS"] = False tc.variables["BUILD_TOOLS"] = self.options.build_tools - tc.variables["BUILD_LIBHDFSPP"] = False tc.variables["BUILD_POSITION_INDEPENDENT_LIB"] = bool(self.options.get_safe("fPIC", True)) tc.variables["INSTALL_VENDORED_LIBS"] = False # AVX512 support is determined by ORC_USER_SIMD_LEVEL env var at runtime, defaults to off