Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ if(SOURCEMETA_CORE_JSONSCHEMA)
add_subdirectory(src/core/jsonschema)
endif()

if(SOURCEMETA_CORE_GZIP)
find_package(LibDeflate REQUIRED)
find_package(ZLIB REQUIRED)
add_subdirectory(src/core/gzip)
endif()

if(SOURCEMETA_CORE_JSONL)
add_subdirectory(src/core/jsonl)
endif()
Expand All @@ -157,11 +163,6 @@ if(SOURCEMETA_CORE_SEMVER)
add_subdirectory(src/core/semver)
endif()

if(SOURCEMETA_CORE_GZIP)
find_package(LibDeflate REQUIRED)
add_subdirectory(src/core/gzip)
endif()

if(SOURCEMETA_CORE_HTML)
add_subdirectory(src/core/html)
endif()
Expand Down Expand Up @@ -278,6 +279,10 @@ if(SOURCEMETA_CORE_TESTS)
add_subdirectory(test/jsonschema)
endif()

if(SOURCEMETA_CORE_GZIP)
add_subdirectory(test/gzip)
endif()

if(SOURCEMETA_CORE_JSONL)
add_subdirectory(test/jsonl)
endif()
Expand All @@ -290,10 +295,6 @@ if(SOURCEMETA_CORE_TESTS)
add_subdirectory(test/semver)
endif()

if(SOURCEMETA_CORE_GZIP)
add_subdirectory(test/gzip)
endif()

if(SOURCEMETA_CORE_HTML)
add_subdirectory(test/html)
endif()
Expand Down
1 change: 1 addition & 0 deletions DEPENDENCIES
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b86
jsontestsuite https://github.com/nst/JSONTestSuite d64aefb55228d9584d3e5b2433f720ea8fd00c82
yaml-test-suite https://github.com/yaml/yaml-test-suite data-2022-01-17
cmark-gfm https://github.com/github/cmark-gfm 587a12bb54d95ac37241377e6ddc93ea0e45439b
zlib https://github.com/madler/zlib v1.3.2
jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72
jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd
jsonschema-draft7 https://github.com/json-schema-org/json-schema-spec 567f768506aaa33a38e552c85bf0586029ef1b32
Expand Down
Binary file added benchmark/files/large.jsonl.gz
Binary file not shown.
21 changes: 20 additions & 1 deletion benchmark/jsonl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <cstddef> // std::size_t
#include <filesystem> // std::filesystem
#include <fstream> // std::ifstream
#include <string> // std::string
#include <ios> // std::ios

static void JSONL_Parse_Large(benchmark::State &state) {
const std::filesystem::path filepath{std::string{CURRENT_DIRECTORY} +
Expand All @@ -25,4 +25,23 @@ static void JSONL_Parse_Large(benchmark::State &state) {
}
}

static void JSONL_Parse_Large_GZIP(benchmark::State &state) {
const std::filesystem::path filepath{std::string{CURRENT_DIRECTORY} +
"/files/large.jsonl.gz"};

for (auto _ : state) {
std::ifstream stream{filepath, std::ios::binary};
assert(stream.is_open());
std::size_t count{0};
for ([[maybe_unused]] const auto &entry :
sourcemeta::core::JSONL{stream, sourcemeta::core::JSONL::Mode::GZIP}) {
count += 1;
}

assert(count == 10000);
benchmark::DoNotOptimize(count);
}
}

BENCHMARK(JSONL_Parse_Large);
BENCHMARK(JSONL_Parse_Large_GZIP);
126 changes: 126 additions & 0 deletions cmake/FindZLIB.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
if(NOT ZLIB_FOUND)
set(ZLIB_DIR "${PROJECT_SOURCE_DIR}/vendor/zlib")
set(ZLIB_PUBLIC_HEADER "${ZLIB_DIR}/zlib.h")
set(ZLIB_PRIVATE_HEADERS "${ZLIB_DIR}/zconf.h")

add_library(zlib
"${ZLIB_PUBLIC_HEADER}" ${ZLIB_PRIVATE_HEADERS}
"${ZLIB_DIR}/adler32.c"
"${ZLIB_DIR}/compress.c"
"${ZLIB_DIR}/crc32.c"
"${ZLIB_DIR}/crc32.h"
"${ZLIB_DIR}/deflate.c"
"${ZLIB_DIR}/deflate.h"
"${ZLIB_DIR}/gzclose.c"
"${ZLIB_DIR}/gzguts.h"
"${ZLIB_DIR}/gzlib.c"
"${ZLIB_DIR}/gzread.c"
"${ZLIB_DIR}/gzwrite.c"
"${ZLIB_DIR}/infback.c"
"${ZLIB_DIR}/inffast.c"
"${ZLIB_DIR}/inffast.h"
"${ZLIB_DIR}/inffixed.h"
"${ZLIB_DIR}/inflate.c"
"${ZLIB_DIR}/inflate.h"
"${ZLIB_DIR}/inftrees.c"
"${ZLIB_DIR}/inftrees.h"
"${ZLIB_DIR}/trees.c"
"${ZLIB_DIR}/trees.h"
"${ZLIB_DIR}/uncompr.c"
"${ZLIB_DIR}/zutil.c"
"${ZLIB_DIR}/zutil.h")

target_compile_definitions(zlib PUBLIC NO_FSEEKO)
target_compile_definitions(zlib PUBLIC _LARGEFILE64_SOURCE=1)

if(SOURCEMETA_COMPILER_MSVC)
target_compile_options(zlib PRIVATE /W3 /MP /wd4996)
target_compile_definitions(zlib PRIVATE _CRT_SECURE_NO_WARNINGS)
else()
target_compile_options(zlib PRIVATE
-Wall
-Wextra
-Wpedantic
-Werror
-Wdouble-promotion
-Wfloat-equal
-Wmissing-declarations
-Wshadow
-Wwrite-strings
-Wno-cast-align
-Wno-cast-qual
-Wno-format-nonliteral
-Wno-sign-conversion
-Wno-shorten-64-to-32
-Wno-implicit-int-conversion
-Wno-comma
-Wno-implicit-fallthrough)

if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(zlib PRIVATE
-funroll-loops
-fstrict-aliasing
-ftree-vectorize
-fno-math-errno
-fwrapv)
endif()

# Disable LTO for zlib to work around GCC LTO linker plugin not
# properly rescanning this archive for transitive dependencies
if(SOURCEMETA_COMPILER_GCC)
target_compile_options(zlib PRIVATE -fno-lto)
endif()
endif()

target_include_directories(zlib PUBLIC
"$<BUILD_INTERFACE:${ZLIB_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>")

add_library(ZLIB::ZLIB ALIAS zlib)

set_target_properties(zlib
PROPERTIES
OUTPUT_NAME zlib
PUBLIC_HEADER "${ZLIB_PUBLIC_HEADER}"
PRIVATE_HEADER "${ZLIB_PRIVATE_HEADERS}"
C_STANDARD 11
C_STANDARD_REQUIRED ON
C_EXTENSIONS OFF
POSITION_INDEPENDENT_CODE ON
C_VISIBILITY_PRESET "default"
C_VISIBILITY_INLINES_HIDDEN FALSE
VISIBILITY_INLINES_HIDDEN OFF
WINDOWS_EXPORT_ALL_SYMBOLS TRUE
EXPORT_NAME ZLIB)

if(SOURCEMETA_CORE_INSTALL)
include(GNUInstallDirs)
install(TARGETS zlib
EXPORT zlib
PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
COMPONENT sourcemeta_core_dev
PRIVATE_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
COMPONENT sourcemeta_core_dev
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
COMPONENT sourcemeta_core
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
COMPONENT sourcemeta_core
NAMELINK_COMPONENT sourcemeta_core_dev
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
COMPONENT sourcemeta_core_dev)
install(EXPORT zlib
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zlib"
NAMESPACE ZLIB::
COMPONENT sourcemeta_core_dev)

file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/zlib-config.cmake
"include(\"\${CMAKE_CURRENT_LIST_DIR}/zlib.cmake\")\n"
"check_required_components(\"zlib\")\n")
Comment thread
jviotti marked this conversation as resolved.
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/zlib-config.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zlib"
COMPONENT sourcemeta_core_dev)
endif()

set(ZLIB_FOUND ON)
endif()
4 changes: 4 additions & 0 deletions config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake")
elseif(component STREQUAL "jsonl")
find_dependency(LibDeflate CONFIG)
find_dependency(ZLIB CONFIG)
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_gzip.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonl.cmake")
elseif(component STREQUAL "jsonpointer")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake")
Expand Down Expand Up @@ -118,6 +121,7 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_semver.cmake")
elseif(component STREQUAL "gzip")
find_dependency(LibDeflate CONFIG)
find_dependency(ZLIB CONFIG)
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_gzip.cmake")
elseif(component STREQUAL "html")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake")
Expand Down
7 changes: 5 additions & 2 deletions src/core/gzip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME gzip
PRIVATE_HEADERS error.h
SOURCES gzip.cc)
PRIVATE_HEADERS error.h streambuf.h
SOURCES gzip.cc streambuf.cc)

# Way faster for full buffer decompression
target_link_libraries(sourcemeta_core_gzip PRIVATE LibDeflate::LibDeflate)
# Supports streaming
target_link_libraries(sourcemeta_core_gzip PRIVATE ZLIB::ZLIB)

if(SOURCEMETA_CORE_INSTALL)
sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME gzip)
Expand Down
3 changes: 3 additions & 0 deletions src/core/gzip/include/sourcemeta/core/gzip.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
#include <sourcemeta/core/gzip_export.h>
#endif

// NOLINTBEGIN(misc-include-cleaner)
#include <sourcemeta/core/gzip_error.h>
#include <sourcemeta/core/gzip_streambuf.h>
// NOLINTEND(misc-include-cleaner)

#include <cstddef> // std::size_t
#include <cstdint> // std::uint8_t
Expand Down
46 changes: 46 additions & 0 deletions src/core/gzip/include/sourcemeta/core/gzip_streambuf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef SOURCEMETA_CORE_GZIP_STREAMBUF_H_
#define SOURCEMETA_CORE_GZIP_STREAMBUF_H_

#ifndef SOURCEMETA_CORE_GZIP_EXPORT
#include <sourcemeta/core/gzip_export.h>
#endif

#include <istream> // std::istream
#include <memory> // std::unique_ptr
#include <streambuf> // std::streambuf

namespace sourcemeta::core {

/// @ingroup gzip
/// A stream buffer that performs streaming GZIP decompression (RFC 1952) over
/// an input stream.
class SOURCEMETA_CORE_GZIP_EXPORT GZIPStreamBuffer : public std::streambuf {
public:
GZIPStreamBuffer(std::istream &compressed_stream);
~GZIPStreamBuffer() override;

GZIPStreamBuffer(const GZIPStreamBuffer &) = delete;
auto operator=(const GZIPStreamBuffer &) -> GZIPStreamBuffer & = delete;
GZIPStreamBuffer(GZIPStreamBuffer &&) = delete;
auto operator=(GZIPStreamBuffer &&) -> GZIPStreamBuffer & = delete;

protected:
auto underflow() -> int_type override;

private:
// Exporting symbols that depends on the standard C++ library is considered
// safe.
// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN
#if defined(_MSC_VER)
#pragma warning(disable : 4251)
#endif
struct Internal;
std::unique_ptr<Internal> internal;
#if defined(_MSC_VER)
#pragma warning(default : 4251)
#endif
};

} // namespace sourcemeta::core

#endif
Loading
Loading