diff --git a/.gitmodules b/.gitmodules index 05927d49d1..b3b0cf963c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,9 @@ [submodule "others/mbedtls"] path = others/mbedtls url = https://github.com/Mbed-TLS/mbedtls.git +[submodule "others/simdjson"] + path = others/simdjson + url = https://github.com/simdjson/simdjson.git +[submodule "others/jsoncons"] + path = others/jsoncons + url = https://github.com/danielaparker/jsoncons.git diff --git a/README.md b/README.md index 50b8833319..9d3c7ab8d0 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,12 @@ It is recommended to use assertions where applicable, and to enable them with The source tree includes a Benchmark tool that can help measure library performance. The tool is located in the `test/benchmark/` directory. The build process also creates the binary here, so you will have the tool after the compilation is finished. +To compare the JSON regression suite across both supported JSON backends with separate build directories, run: + +```shell +$ ./test/run-json-backend-matrix.sh --jobs 4 +``` + To run, just type: ```shell @@ -323,6 +329,8 @@ Note that the tool does not call the last phase (logging). Please remember to reset `basic_rules.conf` if you want to try with a different ruleset. +The benchmark directory also includes `json_benchmark`, which targets JSON request-body processing with fixed scenario classes such as large objects, deep nesting, numeric payloads and UTF-8-heavy strings. + ## Reporting Issues If you are facing a configuration issue or something is not working as you diff --git a/build/msc_find_lib.m4 b/build/msc_find_lib.m4 index 77fdb38d38..dc800a9839 100644 --- a/build/msc_find_lib.m4 +++ b/build/msc_find_lib.m4 @@ -10,11 +10,11 @@ dnl Sets and AC_SUBSTs: dnl ${NAME}_CFLAGS, ${NAME}_LDADD, ${NAME}_LDFLAGS, dnl ${NAME}_VERSION, ${NAME}_DISPLAY, ${NAME}_FOUND (0/1/2) dnl -dnl NAME - Variable prefix (e.g., YAJL, CURL, LIBXML2) +dnl NAME - Variable prefix (e.g., CURL, LIBXML2, LMDB) dnl PKG_NAMES - Space-separated pkg-config names to try -dnl HEADER - Header file to look for (e.g., yajl/yajl_parse.h) +dnl HEADER - Header file to look for (e.g., libxml/parser.h) dnl LIB_NAMES - Space-separated library names for -l flags -dnl EXTRA_CFLAGS - Additional CFLAGS when found (e.g., -DWITH_YAJL) +dnl EXTRA_CFLAGS - Additional CFLAGS when found (e.g., -DWITH_LIBXML2) dnl MIN_VERSION - Optional minimum version for pkg-config check dnl WITH_NAME - Optional --with-X name if different from lowercased NAME @@ -208,7 +208,7 @@ if test "${_msc_header_dir}" = "."; then _msc_check_inc_path="$4" fi else - # Header with subdirectory (e.g., "yajl/yajl_parse.h") + # Header with subdirectory (e.g., "libxml/parser.h") if test -e "$4/include/$2"; then _msc_check_inc_path="$4/include" elif test -e "$4/$2"; then diff --git a/build/release.sh b/build/release.sh old mode 100755 new mode 100644 diff --git a/build/win32/CMakeLists.txt b/build/win32/CMakeLists.txt index fbf39f08d9..1fbe029c91 100644 --- a/build/win32/CMakeLists.txt +++ b/build/win32/CMakeLists.txt @@ -7,6 +7,8 @@ option(WITH_LUA "Include LUA support" ON) option(WITH_LIBXML2 "Include LibXML2 support" ON) option(WITH_MAXMIND "Include MaxMind support" ON) option(WITH_CURL "Include CURL support" ON) +set(JSON_BACKEND "simdjson" CACHE STRING "Select internal JSON backend (simdjson or jsoncons)") +set_property(CACHE JSON_BACKEND PROPERTY STRINGS simdjson jsoncons) option(USE_ASAN "Build with Address Sanitizer" OFF) @@ -51,6 +53,8 @@ target_compile_definitions(libinjection PRIVATE LIBINJECTION_VERSION="${LIBINJEC project(mbedcrypto C) set(MBEDTLS_DIR ${BASE_DIR}/others/mbedtls) +set(SIMDJSON_DIR ${BASE_DIR}/others/simdjson/singleheader) +set(JSONCONS_DIR ${BASE_DIR}/others/jsoncons/include) add_library(mbedcrypto STATIC ${MBEDTLS_DIR}/library/base64.c ${MBEDTLS_DIR}/library/sha1.c ${MBEDTLS_DIR}/library/md5.c ${MBEDTLS_DIR}/library/platform_util.c ${MBEDTLS_DIR}/library/constant_time.c) @@ -87,7 +91,25 @@ set(PACKAGE_VERSION "${PROJECT_VERSION}") set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_TARNAME "${PACKAGE_NAME}") -set(HAVE_YAJL 1) # should always be one, mandatory dependency +if(NOT JSON_BACKEND STREQUAL "simdjson" AND NOT JSON_BACKEND STREQUAL "jsoncons") + message(FATAL_ERROR "Unsupported JSON_BACKEND '${JSON_BACKEND}'. Use simdjson or jsoncons.") +endif() + +unset(MSC_JSON_BACKEND_SIMDJSON) +unset(MSC_JSON_BACKEND_JSONCONS) +set(JSON_BACKEND_SIMDJSON_SOURCE ${BASE_DIR}/src/request_body_processor/json_backend_simdjson.cc) +set(JSON_BACKEND_JSONCONS_SOURCE ${BASE_DIR}/src/request_body_processor/json_backend_jsoncons.cc) + +if(JSON_BACKEND STREQUAL "simdjson") + set(MSC_JSON_BACKEND_SIMDJSON 1) + set(JSON_BACKEND_SOURCES ${JSON_BACKEND_SIMDJSON_SOURCE} ${SIMDJSON_DIR}/simdjson.cpp) + set(JSON_BACKEND_INCLUDE_DIR ${SIMDJSON_DIR}) +else() + set(MSC_JSON_BACKEND_JSONCONS 1) + set(JSON_BACKEND_SOURCES ${JSON_BACKEND_JSONCONS_SOURCE}) + set(JSON_BACKEND_INCLUDE_DIR ${JSONCONS_DIR}) +endif() + set(HAVE_GEOIP 0) # should always be zero, no conan package available set(HAVE_SSDEEP 0) # should always be zero, no conan package available @@ -119,7 +141,6 @@ macro(include_package package flag) endif() endmacro() -include_package(yajl HAVE_YAJL) include_package(libxml2 HAVE_LIBXML2) include_package(lua HAVE_LUA) include_package(CURL HAVE_CURL) @@ -133,11 +154,13 @@ include_package(maxminddb HAVE_MAXMIND) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) file(GLOB_RECURSE libModSecuritySources ${BASE_DIR}/src/*.cc) +list(REMOVE_ITEM libModSecuritySources ${JSON_BACKEND_SIMDJSON_SOURCE} ${JSON_BACKEND_JSONCONS_SOURCE}) +list(APPEND libModSecuritySources ${JSON_BACKEND_SOURCES}) add_library(libModSecurity SHARED ${libModSecuritySources}) target_compile_definitions(libModSecurity PRIVATE WITH_PCRE2) -target_include_directories(libModSecurity PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${BASE_DIR}/others ${MBEDTLS_DIR}/include) +target_include_directories(libModSecurity PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${BASE_DIR}/others ${MBEDTLS_DIR}/include ${JSON_BACKEND_INCLUDE_DIR}) target_link_libraries(libModSecurity PRIVATE pcre2::pcre2 libinjection mbedcrypto Poco::Poco Iphlpapi.lib) macro(add_package_dependency project compile_definition link_library flag) @@ -147,7 +170,6 @@ macro(add_package_dependency project compile_definition link_library flag) endif() endmacro() -add_package_dependency(libModSecurity WITH_YAJL yajl::yajl HAVE_YAJL) add_package_dependency(libModSecurity WITH_LIBXML2 LibXml2::LibXml2 HAVE_LIBXML2) add_package_dependency(libModSecurity WITH_LUA lua::lua HAVE_LUA) if(HAVE_LUA) @@ -164,9 +186,8 @@ project(libModSecurityTests) function(setTestTargetProperties executable) target_compile_definitions(${executable} PRIVATE WITH_PCRE2) - target_include_directories(${executable} PRIVATE ${BASE_DIR} ${BASE_DIR}/headers) + target_include_directories(${executable} PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${JSONCONS_DIR}) target_link_libraries(${executable} PRIVATE libModSecurity pcre2::pcre2 dirent::dirent) - add_package_dependency(${executable} WITH_YAJL yajl::yajl HAVE_YAJL) endfunction() # unit tests diff --git a/build/win32/conanfile.txt b/build/win32/conanfile.txt index b8f9721d0a..0eddc175e4 100644 --- a/build/win32/conanfile.txt +++ b/build/win32/conanfile.txt @@ -1,5 +1,4 @@ [requires] -yajl/2.1.0 pcre2/10.42 libxml2/2.12.6 lua/5.4.6 diff --git a/build/win32/config.h.cmake b/build/win32/config.h.cmake index 2f6a73085e..d56ce7c56f 100644 --- a/build/win32/config.h.cmake +++ b/build/win32/config.h.cmake @@ -57,12 +57,15 @@ /* Define if SSDEEP is available */ #cmakedefine HAVE_SSDEEP -/* Define if YAJL is available */ -#cmakedefine HAVE_YAJL - /* Define if libcurl is available */ #cmakedefine HAVE_CURL +/* Define if jsoncons is the selected internal JSON backend */ +#cmakedefine MSC_JSON_BACKEND_JSONCONS + +/* Define if simdjson is the selected internal JSON backend */ +#cmakedefine MSC_JSON_BACKEND_SIMDJSON + /* Name of package */ #define PACKAGE "@PACKAGE_NAME@" @@ -89,4 +92,4 @@ #cmakedefine STDC_HEADERS #endif -#endif // ndef MODSECURITY_CONFIG_H \ No newline at end of file +#endif // ndef MODSECURITY_CONFIG_H diff --git a/build/yajl.m4 b/build/yajl.m4 deleted file mode 100644 index 06271e1fea..0000000000 --- a/build/yajl.m4 +++ /dev/null @@ -1,33 +0,0 @@ -dnl Check for YAJL Libraries -dnl Sets: -dnl YAJL_CFLAGS -dnl YAJL_LDADD -dnl YAJL_LDFLAGS -dnl YAJL_VERSION -dnl YAJL_DISPLAY -dnl YAJL_FOUND - -AC_DEFUN([PROG_YAJL], [ -MSC_CHECK_LIB([YAJL], [yajl2 yajl], [yajl/yajl_parse.h], [yajl], [-DWITH_YAJL]) - -# FIX: if the include directory in CFLAGS ends with "include/yajl", -# remove the suffix "/yajl". The library header files are included -# using the prefix (e.g., #include ), and -# this is even the case for the library itself (e.g., -# yajl_tree.h includes yajl/yajl_common.h). -_msc_yajl_new_cflags="" -for _msc_yajl_flag in $YAJL_CFLAGS; do - case "$_msc_yajl_flag" in - -I*/include/yajl) - _msc_yajl_new_flag="${_msc_yajl_flag%/yajl}" - _msc_yajl_new_cflags="$_msc_yajl_new_cflags $_msc_yajl_new_flag" - ;; - *) - _msc_yajl_new_cflags="$_msc_yajl_new_cflags $_msc_yajl_flag" - ;; - esac -done -YAJL_CFLAGS="$_msc_yajl_new_cflags" -YAJL_DISPLAY="${YAJL_LDADD}, ${YAJL_CFLAGS}" - -]) # AC_DEFUN [PROG_YAJL] diff --git a/configure.ac b/configure.ac index 03295be063..679d7284e6 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ m4_define([msc_version_with_patchlevel], [msc_version_major.msc_version_minor.msc_version_patchlevel]) m4_define([msc_version_git], - [m4_esyscmd_s(git describe)]) + [m4_esyscmd_s(git describe --tags --always 2>/dev/null || echo unknown)]) m4_define([msc_version_info], [msc_version_c_plus_a:msc_version_patchlevel:msc_version_minor]) @@ -62,6 +62,23 @@ PKG_PROG_PKG_CONFIG # Set C++ standard version and check if compiler supports it. AX_CXX_COMPILE_STDCXX(17, noext, mandatory) +AC_ARG_WITH([json-backend], + [AS_HELP_STRING([--with-json-backend=BACKEND], + [Select internal JSON backend: simdjson or jsoncons [default=simdjson]])], + [json_backend="$withval"], + [json_backend="simdjson"]) + +case "$json_backend" in + simdjson|jsoncons) + ;; + *) + AC_MSG_ERROR([Unsupported JSON backend '$json_backend'. Use simdjson or jsoncons.]) + ;; +esac + +JSON_BACKEND="$json_backend" +AC_SUBST([JSON_BACKEND]) + # Check for libinjection if ! test -f "${srcdir}/others/libinjection/src/libinjection_html5.c"; then AC_MSG_ERROR([\ @@ -80,7 +97,7 @@ AC_MSG_ERROR([\ ]) fi # Libinjection version -AC_DEFUN([LIBINJECTION_VERSION], m4_esyscmd_s(cd "others/libinjection" && git describe && cd ../..)) +AC_DEFUN([LIBINJECTION_VERSION], m4_esyscmd_s(cd "others/libinjection" && (git describe --tags --always 2>/dev/null || echo unknown) && cd ../..)) AC_SUBST([LIBINJECTION_VERSION]) # Check for Mbed TLS @@ -101,16 +118,68 @@ AC_MSG_ERROR([\ ]) fi # Mbed TLS version -AC_DEFUN([MBEDTLS_VERSION], m4_esyscmd_s(cd "others/mbedtls" && git describe && cd ../..)) +AC_DEFUN([MBEDTLS_VERSION], m4_esyscmd_s(cd "others/mbedtls" && (git describe --tags --always 2>/dev/null || echo unknown) && cd ../..)) -# SecLang test version -AC_DEFUN([SECLANG_TEST_VERSION], m4_esyscmd_s(cd "test/test-cases/secrules-language-tests" && git log -1 --format="%h" --abbrev-commit && cd ../../..)) +# Check for selected JSON backend +if test "x$json_backend" = "xsimdjson"; then +if ! test -f "${srcdir}/others/simdjson/singleheader/simdjson.h" || \ + ! test -f "${srcdir}/others/simdjson/singleheader/simdjson.cpp"; then +AC_MSG_ERROR([\ + + + simdjson was not found within ModSecurity source directory. + + simdjson code is available as part of ModSecurity source code in a format + of a git-submodule. git-submodule allow us to specify the correct version of + simdjson and still uses the simdjson repository to download it. + + You can download simdjson using git: + + $ git submodule update --init --recursive + + ]) +fi +JSON_BACKEND_VERSION=`cd "${srcdir}/others/simdjson" && git describe --tags --always 2>/dev/null || echo unknown` +AC_DEFINE([MSC_JSON_BACKEND_SIMDJSON], [1], + [Define if simdjson is the selected internal JSON backend]) +elif test "x$json_backend" = "xjsoncons"; then +if ! test -d "${srcdir}/others/jsoncons/include" || \ + ! test -f "${srcdir}/others/jsoncons/include/jsoncons/json.hpp"; then +AC_MSG_ERROR([\ + + + jsoncons was not found within ModSecurity source directory. + + jsoncons code is available as part of ModSecurity source code in a format + of a git-submodule. git-submodule allow us to specify the correct version of + jsoncons and still uses the jsoncons repository to download it. + + You can download jsoncons using git: + + $ git submodule update --init --recursive + ]) +fi +JSON_BACKEND_VERSION=`cd "${srcdir}/others/jsoncons" && git describe --tags --always 2>/dev/null || echo unknown` +AC_DEFINE([MSC_JSON_BACKEND_JSONCONS], [1], + [Define if jsoncons is the selected internal JSON backend]) +fi +AC_SUBST([JSON_BACKEND_VERSION]) + +AC_ARG_ENABLE([json-audit-instrumentation], + [AS_HELP_STRING([--enable-json-audit-instrumentation], + [Enable optional JSON audit instrumentation for benchmark builds [default=no]])], + [enable_json_audit_instrumentation="$enableval"], + [enable_json_audit_instrumentation="no"]) -# Check for yajl -PROG_YAJL +AS_CASE([$enable_json_audit_instrumentation], + [yes], [AC_DEFINE([MSC_JSON_AUDIT_INSTRUMENTATION], [1], + [Define if optional JSON audit instrumentation is enabled])], + [no], [], + [AC_MSG_ERROR([Unsupported value '$enable_json_audit_instrumentation' for --enable-json-audit-instrumentation. Use yes or no.])]) -AM_CONDITIONAL([YAJL_VERSION], [test "$YAJL_VERSION" != ""]) +# SecLang test version +AC_DEFUN([SECLANG_TEST_VERSION], m4_esyscmd_s(cd "test/test-cases/secrules-language-tests" && git log -1 --format="%h" --abbrev-commit && cd ../../..)) # Check for LibGeoIP PROG_GEOIP @@ -306,14 +375,7 @@ fi # Decide if we want to build the tests or not. -buildTestUtilities=false -if test "x$YAJL_FOUND" = "x1"; then - # Regression tests will not be able to run without the logging support. - # But we still have the unit tests. - # if test "$debugLogs" = "true"; then - buildTestUtilities=true - # fi -fi +buildTestUtilities=true AM_CONDITIONAL([TEST_UTILITIES], [test $buildTestUtilities = true]) @@ -328,6 +390,8 @@ fi AM_CONDITIONAL([EXAMPLES], [test $buildExamples = true]) AM_CONDITIONAL([BUILD_PARSER], [test $buildParser = true]) AM_CONDITIONAL([USE_MUTEX_ON_PM], [test $mutexPm = true]) +AM_CONDITIONAL([JSON_BACKEND_SIMDJSON], [test "x$json_backend" = "xsimdjson"]) +AM_CONDITIONAL([JSON_BACKEND_JSONCONS], [test "x$json_backend" = "xjsoncons"]) # General link options @@ -422,6 +486,8 @@ AS_ECHO_N(" + libInjection ....") echo LIBINJECTION_VERSION AS_ECHO_N(" + Mbed TLS ....") echo MBEDTLS_VERSION +AS_ECHO_N(" + JSON backend ....") +echo "$JSON_BACKEND ($JSON_BACKEND_VERSION)" AS_ECHO_N(" + SecLang tests ....") echo SECLANG_TEST_VERSION @@ -451,7 +517,6 @@ if test "x$GEOIP_FOUND" = "x2" && test "x$MAXMIND_FOUND" = "x2"; then fi MSC_STATUS_LIB([LibCURL ], [CURL]) -MSC_STATUS_LIB([YAJL ], [YAJL]) MSC_STATUS_LIB([LMDB ], [LMDB]) MSC_STATUS_LIB([LibXML2 ], [LIBXML2]) MSC_STATUS_LIB([SSDEEP ], [SSDEEP]) @@ -532,4 +597,3 @@ if test "$aflFuzzer" = "true"; then echo " $ export CC=afl-clang-fast " echo " " fi - diff --git a/doc/Makefile.am b/doc/Makefile.am index dfada090ff..f6aebff44d 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -6,7 +6,8 @@ ACLOCAL_AMFLAGS = -I build # distribution of the Doxygen configuration file EXTRA_DIST = \ - doxygen.cfg + doxygen.cfg \ + jsoncons_number_scan_assessment.md MAINTAINERCLEANFILES = \ @@ -14,4 +15,3 @@ MAINTAINERCLEANFILES = \ doxygen_sqlite3.db \ html \ latex - diff --git a/doc/jsoncons_number_scan_assessment.md b/doc/jsoncons_number_scan_assessment.md new file mode 100644 index 0000000000..95d40ac3e9 --- /dev/null +++ b/doc/jsoncons_number_scan_assessment.md @@ -0,0 +1,159 @@ +# Assessment of the jsoncons Number Path + +# 1. Deutsch + +## 1.1 Zweck + +Dieses Dokument beschreibt den aktuellen Zahlpfad des `jsoncons`-Backends in dieser Repository-Version und die dazu ergaenzten Regression-Tests. Es dokumentiert den sichtbaren Implementierungsstand belegt am Code und an der vendorten `jsoncons`-Dokumentation, ohne Aenderungen an Third-Party-Code vorauszusetzen. + +## 1.2 Zusammenfassung + +Die aktuelle Implementierung ist im gegebenen Scope funktional korrekt und abgeschlossen, auch wenn der zusaetzliche Zahlenscan eine bekannte Performance-Kostenstelle bleibt. + +Der aktuelle Code verarbeitet Zahlenevents im `jsoncons`-Backend ueber `emitEvent(...)` und verwendet dafuer zusaetzlich einen `RawJsonTokenCursor`, der Rohzahlentoken direkt aus dem Original-Input rekonstruiert (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`, `src/request_body_processor/json_backend_jsoncons.cc:577-705`). + +Eine backend-only Entfernung dieses Scans ist im sichtbaren Scope nicht belastbar belegt, weil der oeffentliche Cursor-Kontext der vendorten `jsoncons`-Version hier keine nutzbaren `begin_position()`- und `end_position()`-Werte bereitstellt (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-457`, `others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +Als Regression-Schutz wurden exakte Lexemtests am `JsonEventSink::on_number(std::string_view)`-Rand und zusaetzliche End-to-End-Regressionsfaelle fuer fehlende Root-Scalar-Zahlen ergaenzt (`test/unit/json_backend_depth_tests.cc:149-316`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +## 1.3 Technische Umsetzung im aktuellen Code + +`JSONAdapter::parse(...)` ist der gemeinsame Eintrittspunkt und ruft bei einem Build mit `MSC_JSON_BACKEND_JSONCONS` den Pfad `parseDocumentWithJsoncons(...)` auf (`src/request_body_processor/json_adapter.cc:59-78`). + +`parseDocumentWithJsoncons(...)` konfiguriert `jsoncons::json_options` mit `max_nesting_depth`, `lossless_number(true)` und `lossless_bignum(true)`, baut einen `jsoncons::json_string_cursor` ueber dem Input auf und initialisiert parallel dazu einen `RawJsonTokenCursor` ueber demselben Eingabestring (`src/request_body_processor/json_backend_jsoncons.cc:716-760`). + +Der zusaetzliche Rohscan ist im lokalen Hilfstyp `RawJsonTokenCursor` implementiert. `consumeNextNumberToken(...)` sucht ab der aktuellen Cursor-Position zuerst mit `skipToNextNumberToken(...)` nach dem naechsten Zahlentoken und liest das Token dann mit `consumeNumberAt(...)` ein (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +`consumeNumberAt(...)` behandelt dabei sichtbar Vorzeichen, Ganzzahlteil, Nachkommateil und Exponent einschliesslich Exponent-Vorzeichen (`src/request_body_processor/json_backend_jsoncons.cc:429-486`). `skipInsignificantAt(...)` ueberspringt Leerraum, Komma und Doppelpunkt, waehrend `isNumberBoundary(...)` Leerraum sowie `,`, `]` und `}` als Zahlgrenzen erkennt (`src/request_body_processor/json_backend_jsoncons.cc:298-315`). + +Im Eventpfad gibt es zwei relevante Zahlzweige. + +- Fuer `string_value`-Events, die intern als numerische Stringevents erkannt werden, versucht der Code zuerst `advanceExactNumber(decoded_number, ...)`. Wenn das gelingt, wird der dekodierte String direkt an `on_number(...)` weitergegeben. Wenn das nicht gelingt, faellt der Code auf `consumeNextNumberToken(...)` und danach auf `rawNumberFromContext(...)` zurueck (`src/request_body_processor/json_backend_jsoncons.cc:622-658`). +- Fuer `int64_value`, `uint64_value`, `double_value` und `half_value` wird immer zuerst `consumeNextNumberToken(...)` aufgerufen. Das Ergebnis wird dann ueber `rawNumberFromContext(...)` materialisiert und an `on_number(...)` weitergereicht (`src/request_body_processor/json_backend_jsoncons.cc:684-705`). + +`rawNumberFromContext(...)` arbeitet in klarer Reihenfolge. Zuerst versucht die Funktion, einen Kandidaten ueber `context.begin_position()` und `context.end_position()` aus dem Original-Input auszuschneiden. Falls dieser Kandidat nicht passt, verwendet sie den bereits gescannten Tokenstring. Fuer numerische Stringevents kann sie zuletzt noch den dekodierten Event-String verwenden, sofern dieser selbst ein gueltiges JSON-Zahllexem ist (`src/request_body_processor/json_backend_jsoncons.cc:545-575`). + +Die aktivierten Optionen beeinflussen die von `jsoncons` gelieferten Eventformen. Laut vendorter Dokumentation liest `lossless_number(true)` Zahlen mit Nachkommateil oder Exponent als String mit `semantic_tag::bigdec`, und `lossless_bignum(true)` behaelt uebergrosse Ganzzahlen als String mit `semantic_tag::bigint` und uebergrosse Gleitkommawerte als `bigdec` (`others/jsoncons/doc/ref/corelib/basic_json_options.md:25-29`). + +Der Parsercode belegt diese Eventformen direkt. Bei Integer-Ueberlauf emittiert der Parser `string_value(..., semantic_tag::bigint, ...)`, und bei aktivem `lossless_number_` emittiert er fuer Zahlen mit Nachkommateil oder Exponent `string_value(..., semantic_tag::bigdec, ...)` (`others/jsoncons/include/jsoncons/json_parser.hpp:2458-2554`). + +Die internen Parserpositionsdaten sind im Parser selbst vorhanden. Beim ersten `-`, bei `0` und bei `1` bis `9` setzt der Parser `begin_position_`, `parse_number(...)` verarbeitet Integer-, Fraction- und Exponent-Anteile, und die Parserklasse ueberschreibt `begin_position()` und `end_position()` (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:1720-1940`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`). + +## 1.4 Warum die aktuelle Implementierung korrekt ist + +Im sichtbaren Scope ist die beobachtbare Vertragsgrenze das an `JsonEventSink::on_number(std::string_view)` weitergereichte Rohzahllexem. Genau an dieser Grenze arbeitet die aktuelle Implementierung: primitive Zahlenevents werden ueber den Rohscan materialisiert, numerische Stringevents werden bevorzugt ueber `advanceExactNumber(...)` synchronisiert und sonst ebenfalls ueber den Rohscan abgesichert (`src/request_body_processor/json_backend_jsoncons.cc:622-705`). + +Der lokale Rohscan bildet die JSON-Zahlsyntax im sichtbaren Code konkret ab. Der Scanner behandelt negatives Vorzeichen, Nachkommateil, Exponent, Exponent-Vorzeichen, Leerraum sowie die in Arrays und Objekten sichtbaren Trenner (`src/request_body_processor/json_backend_jsoncons.cc:298-315`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +Die neuen Unit-Tests pruefen genau diese beobachtbare Eigenschaft ohne Normalisierung: `collectNumberLexemes(...)` parst ueber `JSONAdapter::parse(...)`, sammelt jede `on_number(...)`-Nutzlast als String und vergleicht Root-Scalar-Faelle sowie einen Objekt/Array-Fall mit Whitespace und Trennern exakt gegen die erwarteten Lexeme (`test/unit/json_backend_depth_tests.cc:149-316`). + +Die zusaetzlichen Regressionsfaelle im JSON-Testfile sichern denselben Vertrag noch einmal End-to-End ueber `ARGS:json` und den Debug-Log ab, jeweils mit exakter Stringerwartung fuer das urspruengliche Zahllexem (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +Diese Bewertung behauptet nicht, dass der Pfad performance-optimal ist. Sie beschreibt, dass die aktuelle Implementierung im gegebenen Scope das rohe Zahllexem funktional korrekt weiterreicht und deshalb als abgeschlossen bewertet werden kann. + +## 1.5 Warum keine backend-only Entfernung des Scans moeglich ist + +Die sichtbare `jsoncons`-Dokumentation beschreibt `basic_json_cursor::context()` als Rueckgabe des aktuellen `ser_context` (`others/jsoncons/doc/ref/corelib/basic_json_cursor.md:117-119`). In der vendorten Implementierung gibt `basic_json_cursor::context()` tatsaechlich `*this` zurueck (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-407`). + +Im selben Cursorcode sind nur `line()` und `column()` ueberschrieben (`others/jsoncons/include/jsoncons/json_cursor.hpp:450-457`). Die Basisklasse `ser_context` liefert fuer `begin_position()` und `end_position()` dagegen standardmaessig `0` (`others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +Die vendorte Dokumentation sagt zu `ser_context`, dass `begin_position()` und `end_position()` derzeit nur fuer den JSON-Parser unterstuetzt werden (`others/jsoncons/doc/ref/corelib/ser_context.md:20-33`). Der Parser selbst besitzt diese Positionsdaten auch intern (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`), aber im sichtbaren Backend-Code wird mit `cursor.context()` gearbeitet, nicht mit dem Parserobjekt selbst (`src/request_body_processor/json_backend_jsoncons.cc:758-760`). + +Damit ist ein backend-only Fast Path ueber den oeffentlichen Cursor-Kontext in dieser Repository-Version nicht belastbar belegt. Genau deshalb bleibt der bestehende Zahlenscan im aktuellen Scope funktional notwendig. + +## 1.6 Testabdeckung + +Die ergaenzten Tests in `test/unit/json_backend_depth_tests.cc` fuegen einen kleinen `NumberCollectingSink` hinzu, der nur `on_number(std::string_view)` sammelt, und einen Helper, der `JSONAdapter::parse(...)` ueber beliebigen Input ausfuehrt (`test/unit/json_backend_depth_tests.cc:70-186`). + +Die Root-Scalar-Regressionsfaelle pruefen unveraenderte Rohlexeme fuer `0`, `-0`, `1.0`, `1e3`, `-1.25e-4`, `18446744073709551615`, `18446744073709551616` und `123456789012345678901234567890` (`test/unit/json_backend_depth_tests.cc:229-253`). + +Ein weiterer Test prueft dieselben Lexemtypen in einem gemischten Objekt/Array mit Leerraum und Trennern und vergleicht die gesamte `on_number(...)`-Sequenz exakt (`test/unit/json_backend_depth_tests.cc:256-275`). + +Das bestehende Regressionsfile wurde nur um die bislang fehlenden Root-Scalar-Faelle `0`, `-1.25e-4`, `18446744073709551615` und `18446744073709551616` ergaenzt. Bereits vorhandene Faelle wie `1.0`, `1e3`, `-0` und `123456789012345678901234567890` blieben unveraendert (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:362-405`). + +Die Tests behaupten bewusst nicht, dass interne `jsoncons`-Positionsdaten korrekt nach aussen propagiert werden. Sie sichern das beobachtbare Backend-Verhalten am `on_number(...)`-Rand und ueber den bestehenden ModSecurity-Regressionspfad ab. + +## 1.7 Fazit + +Die aktuelle `jsoncons`-Implementierung ist in diesem Repository-Stand im gegebenen Scope funktional korrekt und abgeschlossen. Der zusaetzliche Zahlenscan ist sichtbar vorhanden und bleibt eine bekannte Performance-Kostenstelle, ist im aktuell erlaubten backend-only Rahmen aber weiterhin die massgebliche und notwendige Quelle fuer das Rohzahllexem. + +Die neuen Tests liefern dafuer gezielten Regression-Schutz, ohne Third-Party-Code zu aendern oder unbelegte Aussagen ueber nicht sichtbare Schnittstellen zu treffen. + +# 2. English + +## 2.1 Purpose + +This document records the current number-token path of the `jsoncons` backend in this repository revision and the regression tests that were added around it. It is intentionally limited to what is directly supported by the repository code and the vendored `jsoncons` documentation. + +## 2.2 Summary + +The current implementation is functionally correct and complete within the given scope, even though the additional numeric scan remains a known performance cost. + +In the current code, numeric events are handled by `emitEvent(...)` together with an additional `RawJsonTokenCursor` that reconstructs raw numeric tokens from the original input (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`, `src/request_body_processor/json_backend_jsoncons.cc:577-705`). + +A backend-only removal of that scan is not currently supported by the visible scope, because the public cursor context exposed by the vendored `jsoncons` version does not provide usable `begin_position()` and `end_position()` values here (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-457`, `others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +As regression protection, exact lexeme comparisons were added at the `JsonEventSink::on_number(std::string_view)` boundary together with additional end-to-end regression cases for the missing root-scalar numeric inputs (`test/unit/json_backend_depth_tests.cc:149-316`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +## 2.3 Current implementation + +`JSONAdapter::parse(...)` is the shared entry point and dispatches to `parseDocumentWithJsoncons(...)` when ModSecurity is built with `MSC_JSON_BACKEND_JSONCONS` (`src/request_body_processor/json_adapter.cc:59-78`). + +`parseDocumentWithJsoncons(...)` configures `jsoncons::json_options` with `max_nesting_depth`, `lossless_number(true)`, and `lossless_bignum(true)`, creates a `jsoncons::json_string_cursor` on the input, and also creates a `RawJsonTokenCursor` over the same input text (`src/request_body_processor/json_backend_jsoncons.cc:716-760`). + +The additional raw scan is implemented in the local `RawJsonTokenCursor`. `consumeNextNumberToken(...)` first locates the next numeric token with `skipToNextNumberToken(...)` and then reads the token with `consumeNumberAt(...)` (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +`consumeNumberAt(...)` explicitly handles sign, integer part, fraction, exponent, and exponent sign (`src/request_body_processor/json_backend_jsoncons.cc:429-486`). `skipInsignificantAt(...)` skips whitespace, commas, and colons, while `isNumberBoundary(...)` treats whitespace together with `,`, `]`, and `}` as numeric boundaries (`src/request_body_processor/json_backend_jsoncons.cc:298-315`). + +There are two relevant numeric event branches. + +- For `string_value` events recognized as numeric string events, the code first attempts `advanceExactNumber(decoded_number, ...)`. If that succeeds, the decoded string is sent directly to `on_number(...)`. If it does not, the code falls back to `consumeNextNumberToken(...)` and `rawNumberFromContext(...)` (`src/request_body_processor/json_backend_jsoncons.cc:622-658`). +- For `int64_value`, `uint64_value`, `double_value`, and `half_value`, the code always calls `consumeNextNumberToken(...)` first, then materializes the token through `rawNumberFromContext(...)`, and finally passes it to `on_number(...)` (`src/request_body_processor/json_backend_jsoncons.cc:684-705`). + +`rawNumberFromContext(...)` follows a fixed order. It first attempts to slice a candidate directly from the original input using `context.begin_position()` and `context.end_position()`. If that candidate is not usable, it falls back to the already scanned token. For numeric string events it can finally use the decoded event string, provided that string is itself a valid JSON number lexeme (`src/request_body_processor/json_backend_jsoncons.cc:545-575`). + +The enabled `jsoncons` options change the event shapes delivered by the parser. The vendored documentation states that `lossless_number(true)` reads numbers with fractional parts or exponents as strings tagged `semantic_tag::bigdec`, and that `lossless_bignum(true)` preserves out-of-range integers as strings tagged `semantic_tag::bigint` and out-of-range floating-point values as `bigdec` (`others/jsoncons/doc/ref/corelib/basic_json_options.md:25-29`). + +The parser implementation matches that documentation. On integer overflow it emits `string_value(..., semantic_tag::bigint, ...)`, and when `lossless_number_` is enabled it emits `string_value(..., semantic_tag::bigdec, ...)` for numbers with fractions or exponents (`others/jsoncons/include/jsoncons/json_parser.hpp:2458-2554`). + +The parser also maintains internal position data. It sets `begin_position_` when it first sees `-`, `0`, or `1` through `9`, `parse_number(...)` handles integer, fraction, and exponent states, and the parser class overrides `begin_position()` and `end_position()` (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:1720-1940`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`). + +## 2.4 Why the current implementation is correct + +Within the visible scope, the relevant contract boundary is the raw numeric lexeme passed into `JsonEventSink::on_number(std::string_view)`. That is exactly what the current implementation preserves: primitive numeric events are materialized through the raw scan, and numeric string events are first synchronized through `advanceExactNumber(...)` when possible and otherwise protected by the same raw scan path (`src/request_body_processor/json_backend_jsoncons.cc:622-705`). + +The local raw scanner explicitly models the JSON number syntax visible in this repository. It covers negative signs, fractions, exponents, exponent signs, whitespace, and the delimiters used inside arrays and objects (`src/request_body_processor/json_backend_jsoncons.cc:298-315`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +The added unit tests validate that exact observable behavior without normalization. `collectNumberLexemes(...)` parses through `JSONAdapter::parse(...)`, captures every `on_number(...)` payload as a string, and compares both root-scalar cases and a mixed object/array input with whitespace and delimiters against the exact expected lexemes (`test/unit/json_backend_depth_tests.cc:149-316`). + +The added regression cases in the JSON test file check the same contract end-to-end through `ARGS:json` and the debug log, again with exact string expectations for the original numeric lexemes (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +This assessment does not claim that the path is performance-optimal. It documents that, within the current scope, the implementation functionally preserves the raw numeric lexeme and can therefore be treated as complete for this repository state. + +## 2.5 Why backend-only removal is not currently supported + +The visible `jsoncons` documentation describes `basic_json_cursor::context()` as returning the current `ser_context` (`others/jsoncons/doc/ref/corelib/basic_json_cursor.md:117-119`). In the vendored implementation, `basic_json_cursor::context()` does in fact return `*this` (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-407`). + +In that same cursor implementation, only `line()` and `column()` are overridden (`others/jsoncons/include/jsoncons/json_cursor.hpp:450-457`). The base `ser_context` implementation returns `0` for `begin_position()` and `end_position()` (`others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +The vendored `ser_context` documentation says that `begin_position()` and `end_position()` are currently only supported for the JSON parser (`others/jsoncons/doc/ref/corelib/ser_context.md:20-33`). The parser itself does have those positions internally (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`), but the visible backend code operates through `cursor.context()`, not through the parser object directly (`src/request_body_processor/json_backend_jsoncons.cc:758-760`). + +For that reason, a backend-only fast path based on the public cursor context is not supported by the currently visible code. The existing numeric scan therefore remains the authoritative and necessary source of the raw numeric lexeme in the present scope. + +## 2.6 Test coverage + +The additions in `test/unit/json_backend_depth_tests.cc` introduce a small `NumberCollectingSink` that only records `on_number(std::string_view)` and a helper that executes `JSONAdapter::parse(...)` on arbitrary input (`test/unit/json_backend_depth_tests.cc:70-186`). + +The root-scalar regression cases verify unchanged raw lexemes for `0`, `-0`, `1.0`, `1e3`, `-1.25e-4`, `18446744073709551615`, `18446744073709551616`, and `123456789012345678901234567890` (`test/unit/json_backend_depth_tests.cc:229-253`). + +An additional test validates the same lexeme classes inside a mixed object/array input with whitespace and delimiters and compares the full `on_number(...)` sequence exactly (`test/unit/json_backend_depth_tests.cc:256-275`). + +The existing regression file was extended only with the previously missing root-scalar cases `0`, `-1.25e-4`, `18446744073709551615`, and `18446744073709551616`. Existing cases such as `1.0`, `1e3`, `-0`, and `123456789012345678901234567890` were left in place (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:362-405`). + +The tests intentionally do not claim that internal `jsoncons` position data is propagated outward correctly. They protect the observable backend behavior at the `on_number(...)` boundary and through the established ModSecurity regression path. + +## 2.7 Conclusion + +In this repository state, the current `jsoncons` implementation is functionally correct and complete within the given scope. The additional numeric scan is visibly present and remains a known performance cost, but within the currently allowed backend-only scope it is still the decisive and necessary source for the raw numeric lexeme. + +The added tests provide focused regression protection for that behavior without modifying third-party code or asserting capabilities that are not directly supported by the visible interfaces. diff --git a/examples/multiprocess_c/Makefile.am b/examples/multiprocess_c/Makefile.am index 726d1d9057..a0011d8c73 100644 --- a/examples/multiprocess_c/Makefile.am +++ b/examples/multiprocess_c/Makefile.am @@ -19,15 +19,12 @@ multi_LDFLAGS = \ -lstdc++ \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(MAXMIND_LDFLAGS) multi_CFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ $(GLOBAL_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/multithread/Makefile.am b/examples/multithread/Makefile.am index 0871efa1e1..c47c13b51f 100644 --- a/examples/multithread/Makefile.am +++ b/examples/multithread/Makefile.am @@ -15,8 +15,7 @@ multithread_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) multithread_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,12 +27,11 @@ multithread_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) multithread_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ multithread_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ multithread_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/reading_logs_via_rule_message/Makefile.am b/examples/reading_logs_via_rule_message/Makefile.am index 5a6ba74b2a..384a84e73b 100644 --- a/examples/reading_logs_via_rule_message/Makefile.am +++ b/examples/reading_logs_via_rule_message/Makefile.am @@ -15,8 +15,7 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,12 +27,11 @@ simple_request_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ simple_request_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ simple_request_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/reading_logs_with_offset/Makefile.am b/examples/reading_logs_with_offset/Makefile.am index a98ed48d0e..95373a4c47 100644 --- a/examples/reading_logs_with_offset/Makefile.am +++ b/examples/reading_logs_with_offset/Makefile.am @@ -15,8 +15,7 @@ read_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) read_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -27,12 +26,11 @@ read_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(MAXMIND_LDFLAGS) read_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ read_CPPFLAGS = \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ read_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/simple_example_using_c/Makefile.am b/examples/simple_example_using_c/Makefile.am index b03ab96d48..9bf657ba27 100644 --- a/examples/simple_example_using_c/Makefile.am +++ b/examples/simple_example_using_c/Makefile.am @@ -17,15 +17,12 @@ test_LDFLAGS = \ -lm \ -lstdc++ \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) test_CFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ $(GLOBAL_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/using_bodies_in_chunks/Makefile.am b/examples/using_bodies_in_chunks/Makefile.am index 9eb438f368..68c9b34dfa 100644 --- a/examples/using_bodies_in_chunks/Makefile.am +++ b/examples/using_bodies_in_chunks/Makefile.am @@ -15,8 +15,7 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -27,12 +26,11 @@ simple_request_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ simple_request_CPPFLAGS = \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -52,5 +49,3 @@ simple_request_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/headers/modsecurity/transaction.h b/headers/modsecurity/transaction.h index 3e70caa38e..5cfaff0f13 100644 --- a/headers/modsecurity/transaction.h +++ b/headers/modsecurity/transaction.h @@ -77,25 +77,6 @@ typedef struct Rules_t RulesSet; do { } while (0); #endif - -#define LOGFY_ADD(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - if (b.data() == NULL) { \ - yajl_gen_string(g, reinterpret_cast(""), \ - strlen("")); \ - } else { \ - yajl_gen_string(g, reinterpret_cast(b.data()), \ - b.length()); \ - } - -#define LOGFY_ADD_INT(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - yajl_gen_number(g, reinterpret_cast(b), strlen(b)); - -#define LOGFY_ADD_NUM(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - yajl_gen_integer(g, b); - #ifdef __cplusplus namespace modsecurity { diff --git a/modsecurity.pc.in b/modsecurity.pc.in index d00ad644fa..19d64b70e9 100644 --- a/modsecurity.pc.in +++ b/modsecurity.pc.in @@ -8,4 +8,4 @@ Description: ModSecurity API Version: @MSC_VERSION_WITH_PATCHLEVEL@ Cflags: -I@includedir@ Libs: -L@libdir@ -lmodsecurity -Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @PCRE2_LDADD@ @SSDEEP_LDADD@ @YAJL_LDADD@ +Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @PCRE2_LDADD@ @SSDEEP_LDADD@ diff --git a/others/jsoncons b/others/jsoncons new file mode 160000 index 0000000000..128553c8d1 --- /dev/null +++ b/others/jsoncons @@ -0,0 +1 @@ +Subproject commit 128553c8d1b222c30819656d123590accb60689d diff --git a/others/simdjson b/others/simdjson new file mode 160000 index 0000000000..fb83b114ef --- /dev/null +++ b/others/simdjson @@ -0,0 +1 @@ +Subproject commit fb83b114efcec4544eba8d45e3c7969ca756c086 diff --git a/src/Makefile.am b/src/Makefile.am index 14c26697b5..52f24de428 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -231,6 +231,7 @@ UTILS = \ utils/geo_lookup.cc \ utils/https_client.cc \ utils/ip_tree.cc \ + utils/json_writer.cc \ utils/msc_tree.cc \ utils/random.cc \ utils/regex.cc \ @@ -248,10 +249,29 @@ COLLECTION = \ BODY_PROCESSORS = \ request_body_processor/multipart.cc \ request_body_processor/xml.cc \ - request_body_processor/json.cc + request_body_processor/json.cc \ + request_body_processor/json_adapter.cc \ + request_body_processor/json_instrumentation.cc + +if JSON_BACKEND_SIMDJSON +BODY_PROCESSORS += \ + request_body_processor/json_backend_simdjson.cc +JSON_BACKEND_SOURCES = \ + ../others/simdjson/singleheader/simdjson.cpp +JSON_BACKEND_CPPFLAGS = \ + -I$(top_srcdir)/others/simdjson/singleheader +endif + +if JSON_BACKEND_JSONCONS +BODY_PROCESSORS += \ + request_body_processor/json_backend_jsoncons.cc +JSON_BACKEND_CPPFLAGS = \ + -I$(top_srcdir)/others/jsoncons/include +endif libmodsecurity_la_SOURCES = \ + $(JSON_BACKEND_SOURCES) \ parser/seclang-parser.cc \ parser/seclang-scanner.cc \ parser/driver.cc \ @@ -295,6 +315,7 @@ libmodsecurity_la_CPPFLAGS = \ -I$(top_builddir) \ -g \ -I$(top_srcdir)/others \ + $(JSON_BACKEND_CPPFLAGS) \ -I$(top_srcdir)/others/mbedtls/include \ -fPIC \ -O3 \ @@ -303,7 +324,6 @@ libmodsecurity_la_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ @@ -325,7 +345,6 @@ libmodsecurity_la_LDFLAGS = \ $(PCRE2_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ -version-info @MSC_VERSION_INFO@ @@ -341,6 +360,4 @@ libmodsecurity_la_LIBADD = \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ $(MAXMIND_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) - + $(SSDEEP_LDADD) diff --git a/src/actions/transformations/css_decode.cc b/src/actions/transformations/css_decode.cc index 41da9390ea..d999813d87 100644 --- a/src/actions/transformations/css_decode.cc +++ b/src/actions/transformations/css_decode.cc @@ -15,6 +15,8 @@ #include "css_decode.h" +#include + #include "src/utils/string.h" using namespace modsecurity::utils::string; @@ -138,7 +140,17 @@ static inline bool css_decode_inplace(std::string &val) { /* The character after backslash is not a hexadecimal digit, * nor a newline. */ /* Use one character after backslash as is. */ - *d++ = input[i++]; + const auto escaped = input[i++]; + *d++ = escaped; + + /* + * Preserve legacy behaviour for escaped NUL by consuming + * one trailing whitespace character. + */ + if ((escaped == '\0') && (i < input_len) + && std::isspace(input[i])) { + i++; + } } } else { /* No characters after backslash. */ diff --git a/src/actions/transformations/utf8_to_unicode.cc b/src/actions/transformations/utf8_to_unicode.cc index 263c782bf6..8a2cb31e5b 100644 --- a/src/actions/transformations/utf8_to_unicode.cc +++ b/src/actions/transformations/utf8_to_unicode.cc @@ -27,6 +27,42 @@ constexpr int UNICODE_ERROR_INVALID_ENCODING = -2; namespace modsecurity::actions::transformations { +static inline char *appendUnicodeEscape(char *data, + unsigned char (&unicode)[8], unsigned int d) { + int length = 0; + + *data++ = '%'; + *data++ = 'u'; + snprintf(reinterpret_cast(unicode), sizeof(unicode), "%x", d); + length = strlen(reinterpret_cast(unicode)); + + switch (length) { + case 1: + *data++ = '0'; + *data++ = '0'; + *data++ = '0'; + break; + case 2: + *data++ = '0'; + *data++ = '0'; + break; + case 3: + *data++ = '0'; + break; + case 4: + case 5: + break; + } + + for (std::string::size_type j = 0; + j < static_cast(length); j++) { + *data++ = unicode[j]; + } + + return data; +} + + static inline bool encode(std::string &value) { auto input = reinterpret_cast(value.data()); const auto input_len = value.length(); @@ -76,38 +112,9 @@ static inline bool encode(std::string &value) { unicode_len = 2; count += 6; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x1F) << 6) | (*(utf + 1) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } @@ -126,40 +133,11 @@ static inline bool encode(std::string &value) { unicode_len = 3; count+=6; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x0F) << 12) | ((*(utf + 1) & 0x3F) << 6) | (*(utf + 2) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } @@ -187,41 +165,12 @@ static inline bool encode(std::string &value) { unicode_len = 4; count+=7; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x07) << 18) | ((*(utf + 1) & 0x3F) << 12) | ((*(utf + 2) & 0x3F) << 6) | (*(utf + 3) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } diff --git a/src/modsecurity.cc b/src/modsecurity.cc index 8f943b7f76..f3632f9cf0 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -17,10 +17,6 @@ #include "modsecurity/modsecurity.h" #include "src/config.h" -#ifdef WITH_YAJL -#include -#include -#endif #ifdef WITH_LIBXML2 #include #include @@ -38,6 +34,7 @@ #include "src/collection/backend/in_memory-per_process.h" #include "src/collection/backend/lmdb.h" #include "src/unique_id.h" +#include "src/utils/json_writer.h" #include "src/utils/regex.h" #include "src/utils/geo_lookup.h" #include "src/actions/transformations/transformation.h" @@ -214,59 +211,34 @@ void ModSecurity::serverLog(void *data, const RuleMessage &rm) { int ModSecurity::processContentOffset(const char *content, size_t len, const char *matchString, std::string *json, const char **err) { -#ifdef WITH_YAJL Utils::Regex variables("v([0-9]+),([0-9]+)"); Utils::Regex operators("o([0-9]+),([0-9]+)"); Utils::Regex transformations("t:(?:(?!t:).)+"); - yajl_gen g; std::string varValue; - const unsigned char *buf; - size_t jsonSize; + utils::JsonWriter writer(false); std::list vars = variables.searchAll(matchString); std::list ops = operators.searchAll(matchString); std::list trans = transformations.searchAll(matchString); - g = yajl_gen_alloc(NULL); - if (g == NULL) { - *err = "Failed to allocate memory for the JSON creation."; - return -1; - } - - yajl_gen_config(g, yajl_gen_beautify, 0); - - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("match"), - strlen("match")); - - yajl_gen_array_open(g); - yajl_gen_map_open(g); - - yajl_gen_string(g, reinterpret_cast("variable"), - strlen("variable")); - - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("highlight"), - strlen("highlight")); - - yajl_gen_array_open(g); + writer.start_object(); + writer.key("match"); + writer.start_array(); + writer.start_object(); + writer.key("variable"); + writer.start_object(); + writer.key("highlight"); + writer.start_array(); for(auto [it, pending] = std::tuple{vars.rbegin(), vars.size()}; pending > 3; pending -= 3) { - yajl_gen_map_open(g); + writer.start_object(); it++; const std::string &startingAt = it->str(); it++; const std::string &size = it->str(); it++; - yajl_gen_string(g, - reinterpret_cast("startingAt"), - strlen("startingAt")); - yajl_gen_string(g, - reinterpret_cast(startingAt.c_str()), - startingAt.size()); - yajl_gen_string(g, reinterpret_cast("size"), - strlen("size")); - yajl_gen_string(g, - reinterpret_cast(size.c_str()), - size.size()); - yajl_gen_map_close(g); + writer.key("startingAt"); + writer.string(startingAt); + writer.key("size"); + writer.string(size); + writer.end_object(); if (stoi(startingAt) >= len) { *err = "Offset is out of the content limits."; @@ -280,109 +252,70 @@ int ModSecurity::processContentOffset(const char *content, size_t len, varValue.append(value); } } - yajl_gen_array_close(g); - - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); + writer.end_array(); - yajl_gen_array_open(g); + writer.key("value"); + writer.start_array(); - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); - yajl_gen_string(g, reinterpret_cast(varValue.c_str()), - varValue.size()); - yajl_gen_map_close(g); + writer.start_object(); + writer.key("value"); + writer.string(varValue); + writer.end_object(); while (!trans.empty()) { modsecurity::actions::transformations::Transformation *t; - yajl_gen_map_open(g); - yajl_gen_string(g, - reinterpret_cast("transformation"), - strlen("transformation")); - - yajl_gen_string(g, - reinterpret_cast(trans.back().str().c_str()), - trans.back().str().size()); + writer.start_object(); + writer.key("transformation"); + writer.string(trans.back().str()); t = modsecurity::actions::transformations::Transformation::instantiate( trans.back().str().c_str()); t->transform(varValue, nullptr); trans.pop_back(); - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); - yajl_gen_string(g, reinterpret_cast( - varValue.c_str()), - varValue.size()); - yajl_gen_map_close(g); + writer.key("value"); + writer.string(varValue); + writer.end_object(); delete t; } - yajl_gen_array_close(g); - - yajl_gen_string(g, reinterpret_cast("operator"), - strlen("operator")); + writer.end_array(); - yajl_gen_map_open(g); + writer.key("operator"); + writer.start_object(); for(auto [it, pending] = std::tuple{ops.rbegin(), ops.size()}; pending > 3; pending -= 3) { - yajl_gen_string(g, reinterpret_cast("highlight"), - strlen("highlight")); - yajl_gen_map_open(g); + writer.key("highlight"); + writer.start_object(); it++; const std::string &startingAt = it->str(); it++; const std::string &size = ops.back().str(); it++; - yajl_gen_string(g, - reinterpret_cast("startingAt"), - strlen("startingAt")); - yajl_gen_string(g, - reinterpret_cast(startingAt.c_str()), - startingAt.size()); - yajl_gen_string(g, reinterpret_cast("size"), - strlen("size")); - yajl_gen_string(g, - reinterpret_cast(size.c_str()), - size.size()); - yajl_gen_map_close(g); + writer.key("startingAt"); + writer.string(startingAt); + writer.key("size"); + writer.string(size); + writer.end_object(); if (stoi(startingAt) >= varValue.size()) { *err = "Offset is out of the variable limits."; return -1; } - yajl_gen_string(g, - reinterpret_cast("value"), - strlen("value")); const auto value = std::string(varValue, stoi(startingAt), stoi(size)); - yajl_gen_string(g, - reinterpret_cast(value.c_str()), - value.size()); + writer.key("value"); + writer.string(value); } - yajl_gen_map_close(g); - - - yajl_gen_map_close(g); - yajl_gen_array_close(g); - - yajl_gen_map_close(g); - yajl_gen_array_close(g); - yajl_gen_map_close(g); + writer.end_object(); + writer.end_object(); + writer.end_array(); + writer.end_object(); - yajl_gen_get_buf(g, &buf, &jsonSize); - - json->assign(reinterpret_cast(buf), jsonSize); + json->assign(writer.to_string()); json->append("\n"); - - yajl_gen_free(g); return 0; -#else - *err = "Without YAJL support, we cannot generate JSON."; - return -1; -#endif } diff --git a/src/operators/validate_byte_range.cc b/src/operators/validate_byte_range.cc index 05d06c7880..aea8832c56 100644 --- a/src/operators/validate_byte_range.cc +++ b/src/operators/validate_byte_range.cc @@ -15,6 +15,8 @@ #include "src/operators/validate_byte_range.h" +#include +#include #include #include @@ -23,18 +25,73 @@ namespace modsecurity { namespace operators { +namespace { + +std::string trimCopy(const std::string &value) { + std::string::size_type start = 0; + std::string::size_type end = value.size(); + + while (start < end + && std::isspace(static_cast(value[start]))) { + start++; + } + while (end > start + && std::isspace(static_cast(value[end - 1]))) { + end--; + } + + return value.substr(start, end - start); +} + +bool parseStrictInt(const std::string &value, int *result, std::string *error) { + const std::string trimmed = trimCopy(value); + + if (trimmed.empty()) { + error->assign("Not able to convert '" + value + "' into a number"); + return false; + } + + size_t pos = 0; + + try { + *result = std::stoi(trimmed, &pos); + } catch (...) { + error->assign("Not able to convert '" + trimmed + "' into a number"); + return false; + } + + if (pos != trimmed.size()) { + error->assign("Not able to convert '" + trimmed + "' into a number"); + return false; + } + + return true; +} + +inline void allowByte(std::array *table, int value) { + (*table)[value >> 3] = ((*table)[value >> 3] + | (1U << static_cast(value & 0x7))); +} + +} // namespace + + bool ValidateByteRange::getRange(const std::string &rangeRepresentation, std::string *error) { - size_t pos = rangeRepresentation.find_first_of("-"); - int start; - int end; + return getRange(rangeRepresentation, &table, error); +} + + +bool ValidateByteRange::getRange(const std::string &rangeRepresentation, + std::array *targetTable, + std::string *error) const { + const std::string range = trimCopy(rangeRepresentation); + const size_t pos = range.find_first_of("-"); + int start = 0; + int end = 0; if (pos == std::string::npos) { - try { - start = std::stoi(rangeRepresentation); - } catch(...) { - error->assign("Not able to convert '" + rangeRepresentation + - "' into a number"); + if (parseStrictInt(range, &start, error) == false) { return false; } if ((start < 0) || (start > 255)) { @@ -42,26 +99,16 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, std::to_string(start)); return false; } - table[start >> 3] = (table[start >> 3] | (1 << (start & 0x7))); + allowByte(targetTable, start); return true; } - try { - start = std::stoi(std::string(rangeRepresentation, 0, pos)); - } catch (...) { - error->assign("Not able to convert '" + - std::string(rangeRepresentation, 0, pos) + - "' into a number"); + if (parseStrictInt(std::string(range, 0, pos), &start, error) == false) { return false; } - try { - end = std::stoi(std::string(rangeRepresentation, pos + 1, - rangeRepresentation.length() - (pos + 1))); - } catch (...) { - error->assign("Not able to convert '" + std::string(rangeRepresentation, - pos + 1, rangeRepresentation.length() - (pos + 1)) + - "' into a number"); + if (parseStrictInt(std::string(range, pos + 1, + range.length() - (pos + 1)), &end, error) == false) { return false; } @@ -81,7 +128,7 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, } while (start <= end) { - table[start >> 3] = (table[start >> 3] | (1 << (start & 0x7))); + allowByte(targetTable, start); start++; } @@ -91,34 +138,34 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, bool ValidateByteRange::init(const std::string &file, std::string *error) { - size_t pos = m_param.find_first_of(","); - bool rc; - - if (pos == std::string::npos) { - rc = getRange(m_param, error); - } else { - rc = getRange(std::string(m_param, 0, pos), error); - } - - if (rc == false) { - return false; - } - - while (pos != std::string::npos) { - size_t next_pos = m_param.find_first_of(",", pos + 1); - - if (next_pos == std::string::npos) { - rc = getRange(std::string(m_param, pos + 1, m_param.length() - - (pos + 1)), error); - } else { - rc = getRange(std::string(m_param, pos + 1, next_pos - (pos + 1)), error); - } - if (rc == false) { + std::array parsedTable{}; + std::string::size_type pos = 0; + + table.fill('\0'); + + while (true) { + const std::string::size_type nextPos = m_param.find(',', pos); + const std::string token = nextPos == std::string::npos + ? m_param.substr(pos) + : m_param.substr(pos, nextPos - pos); + + if (getRange(token, &parsedTable, error) == false) { + /* + * Keep byte 0 allowed on invalid parameters so callers that + * continue after init() failure keep legacy behaviour. + */ + table[0] = table[0] | 1U; return false; } - pos = next_pos; + + if (nextPos == std::string::npos) { + break; + } + + pos = nextPos + 1; } + table = parsedTable; return true; } diff --git a/src/operators/validate_byte_range.h b/src/operators/validate_byte_range.h index 7551171b01..f9d137a18b 100644 --- a/src/operators/validate_byte_range.h +++ b/src/operators/validate_byte_range.h @@ -16,11 +16,9 @@ #ifndef SRC_OPERATORS_VALIDATE_BYTE_RANGE_H_ #define SRC_OPERATORS_VALIDATE_BYTE_RANGE_H_ +#include #include -#include -#include #include -#include #include "src/operators/operator.h" @@ -32,9 +30,7 @@ class ValidateByteRange : public Operator { public: /** @ingroup ModSecurity_Operator */ explicit ValidateByteRange(std::unique_ptr param) - : Operator("ValidateByteRange", std::move(param)) { - std::memset(table, '\0', sizeof(char) * 32); - } + : Operator("ValidateByteRange", std::move(param)) { } ~ValidateByteRange() override { } bool evaluate(Transaction *transaction, RuleWithActions *rule, @@ -42,9 +38,15 @@ class ValidateByteRange : public Operator { RuleMessage &ruleMessage) override; bool getRange(const std::string &rangeRepresentation, std::string *error); bool init(const std::string& file, std::string *error) override; + private: - std::vector ranges; - char table[32]; + static constexpr size_t kTableSize = 32; + + bool getRange(const std::string &rangeRepresentation, + std::array *targetTable, + std::string *error) const; + + std::array table{}; }; } // namespace operators diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 685675819f..205e8ba4d2 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -22,7 +22,6 @@ libmodsec_parser_la_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ diff --git a/src/request_body_processor/json.cc b/src/request_body_processor/json.cc index f56704effa..798eb23935 100644 --- a/src/request_body_processor/json.cc +++ b/src/request_body_processor/json.cc @@ -13,15 +13,19 @@ * */ - -#ifdef WITH_YAJL +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include "src/request_body_processor/json.h" -#include -#include +#include +#include #include +#include "src/request_body_processor/json_adapter.h" +#include "src/request_body_processor/json_instrumentation.h" + namespace modsecurity { namespace RequestBodyProcessor { @@ -29,79 +33,87 @@ namespace RequestBodyProcessor { static const double json_depth_limit_default = 10000.0; static const char* json_depth_limit_exceeded_msg = ". Parsing depth limit exceeded"; +namespace { + +JsonSinkStatus startContainer(std::deque *containers, + JSONContainer *container, int64_t *current_depth, double max_depth, + bool *depth_limit_exceeded) { + containers->push_back(container); + (*current_depth)++; + if (*current_depth > max_depth) { + *depth_limit_exceeded = true; + return JsonSinkStatus::DepthLimitExceeded; + } + return JsonSinkStatus::Continue; +} + +JsonSinkStatus endContainer(std::deque *containers, + int64_t *current_depth) { + if (containers->empty()) { + return JsonSinkStatus::InternalError; + } + + JSONContainer *container = containers->back(); + containers->pop_back(); + delete container; + + if (containers->empty() == false) { + JSONContainerArray *array = dynamic_cast( + containers->back()); + if (array != nullptr) { + array->m_elementCounter++; + } + } + + (*current_depth)--; + if (*current_depth < 0) { + *current_depth = 0; + return JsonSinkStatus::InternalError; + } + + return JsonSinkStatus::Continue; +} + +} // namespace + JSON::JSON(Transaction *transaction) : m_transaction(transaction), - m_handle(NULL), m_current_key(""), + m_data(""), m_max_depth(json_depth_limit_default), m_current_depth(0), m_depth_limit_exceeded(false) { - /** - * yajl callback functions - * For more information on the function signatures and order, check - * http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html - */ - - /** - * yajl configuration and callbacks - */ - static yajl_callbacks callbacks = { - yajl_null, - yajl_boolean, - NULL /* yajl_integer */, - NULL /* yajl_double */, - yajl_number, - yajl_string, - yajl_start_map, - yajl_map_key, - yajl_end_map, - yajl_start_array, - yajl_end_array - }; - - - /** - * yajl initialization - * - * yajl_parser_config definition: - * http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c - * - * TODO: make UTF8 validation optional, as it depends on Content-Encoding - */ - m_handle = yajl_alloc(&callbacks, NULL, this); - - yajl_config(m_handle, yajl_allow_partial_values, 0); } JSON::~JSON() { - while (m_containers.size() > 0) { - JSONContainer *a = m_containers.back(); - m_containers.pop_back(); - delete a; - } - yajl_free(m_handle); + clearContainers(); } bool JSON::init() { + clearContainers(); + m_current_key.clear(); + m_data.clear(); + m_current_depth = 0; + m_depth_limit_exceeded = false; + return true; } -bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { - /* Feed our parser and catch any errors */ - m_status = yajl_parse(m_handle, - (const unsigned char *)buf, size); - if (m_status != yajl_status_ok) { - unsigned char *e = yajl_get_error(m_handle, 0, - (const unsigned char *)buf, size); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - if (m_depth_limit_exceeded) { - err->append(json_depth_limit_exceeded_msg); - } - yajl_free_error(m_handle, e); - return false; +bool JSON::processChunk(const char *buf, unsigned int size, + const std::string *err) { + (void) err; + if (buf != nullptr && size > 0) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto start_time = std::chrono::steady_clock::now(); + m_data.append(buf, size); + recordJsonProcessChunkAppend(size, static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time).count())); +#else + m_data.append(buf, size); +#endif } return true; @@ -109,16 +121,63 @@ bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { bool JSON::complete(std::string *err) { - /* Wrap up the parsing process */ - m_status = yajl_complete_parse(m_handle); - if (m_status != yajl_status_ok) { - unsigned char *e = yajl_get_error(m_handle, 0, NULL, 0); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - if (m_depth_limit_exceeded) { + if (m_data.empty()) { + return true; + } + + JSONAdapter adapter; + JsonParseResult result = adapter.parse(m_data, + static_cast(this)); + + if (!result.ok()) { + if (result.sink_status == JsonSinkStatus::DepthLimitExceeded) { + m_depth_limit_exceeded = true; + } + if (err != nullptr) { + switch (result.parse_status) { + case JsonParseStatus::ParseError: + if (result.detail.empty()) { + err->assign("Invalid JSON body."); + } else { + err->assign(result.detail); + } + break; + case JsonParseStatus::TruncatedInput: + if (result.detail.empty()) { + err->assign("Incomplete JSON body."); + } else { + err->assign(result.detail); + } + break; + case JsonParseStatus::Utf8Error: + if (result.detail.empty()) { + err->assign("Invalid UTF-8 in JSON body."); + } else { + err->assign(result.detail); + } + break; + case JsonParseStatus::EngineAbort: + if (result.detail.empty()) { + err->assign("JSON traversal aborted by ModSecurity."); + } else { + err->assign(result.detail); + } + break; + case JsonParseStatus::InternalError: + if (result.detail.empty()) { + err->assign("Internal JSON backend failure."); + } else { + err->assign(result.detail); + } + break; + case JsonParseStatus::Ok: + err->clear(); + break; + } + } + if (m_depth_limit_exceeded && err != nullptr) { err->append(json_depth_limit_exceeded_msg); - } - yajl_free_error(m_handle, e); + } return false; } @@ -163,158 +222,68 @@ int JSON::addArgument(const std::string& value) { } -/** - * Callback for hash key values; we use those to define the variable names - * under ARGS. Whenever we reach a new key, we update the current key value. - */ -int JSON::yajl_map_key(void *ctx, const unsigned char *key, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string safe_key; - - /** - * yajl does not provide us with null-terminated strings, but - * rather expects us to copy the data from the key up to the - * length informed; we create a standalone null-termined copy - * in safe_key - */ - safe_key.assign((const char *)key, length); - - tthis->m_current_key = safe_key; - - return 1; +JsonSinkStatus JSON::on_key(std::string_view value) { + m_current_key.assign(value.data(), value.size()); + return JsonSinkStatus::Continue; } -/** - * Callback for null values - * - */ -int JSON::yajl_null(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - return tthis->addArgument(""); +JsonSinkStatus JSON::on_null() { + return addArgument("") != 0 ? JsonSinkStatus::Continue + : JsonSinkStatus::EngineAbort; } -/** - * Callback for boolean values - */ -int JSON::yajl_boolean(void *ctx, int value) { - JSON *tthis = reinterpret_cast(ctx); +JsonSinkStatus JSON::on_boolean(bool value) { if (value) { - return tthis->addArgument("true"); + return addArgument("true") != 0 ? JsonSinkStatus::Continue + : JsonSinkStatus::EngineAbort; } - return tthis->addArgument("false"); + return addArgument("false") != 0 ? JsonSinkStatus::Continue + : JsonSinkStatus::EngineAbort; } -/** - * Callback for string values - */ -int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); +JsonSinkStatus JSON::on_string(std::string_view value) { + return addArgument(std::string(value.data(), value.size())) != 0 + ? JsonSinkStatus::Continue : JsonSinkStatus::EngineAbort; } -/** - * Callback for numbers; YAJL can use separate callbacks for integers/longs and - * float/double values, but since we are not interested in using the numeric - * values here, we use a generic handler which uses numeric strings - */ -int JSON::yajl_number(void *ctx, const char *value, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); +JsonSinkStatus JSON::on_number(std::string_view value) { + return addArgument(std::string(value.data(), value.size())) != 0 + ? JsonSinkStatus::Continue : JsonSinkStatus::EngineAbort; } -/** - * Callback for a new hash, which indicates a new subtree, labeled as the - * current argument name, is being created - */ -int JSON::yajl_start_array(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - std::string name = tthis->getCurrentKey(); - tthis->m_containers.push_back( - reinterpret_cast(new JSONContainerArray(name))); - tthis->m_current_depth++; - if (tthis->m_current_depth > tthis->m_max_depth) { - tthis->m_depth_limit_exceeded = true; - return 0; - } - return 1; +JsonSinkStatus JSON::on_start_array() { + return startContainer(&m_containers, new JSONContainerArray(getCurrentKey()), + &m_current_depth, m_max_depth, &m_depth_limit_exceeded); } -int JSON::yajl_end_array(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - if (tthis->m_containers.empty()) { - tthis->m_current_depth--; - return 1; - } - - JSONContainer *a = tthis->m_containers.back(); - tthis->m_containers.pop_back(); - delete a; - if (tthis->m_containers.size() > 0) { - JSONContainerArray *ja = dynamic_cast( - tthis->m_containers.back()); - if (ja) { - ja->m_elementCounter++; - } - } - tthis->m_current_depth--; - - return 1; +JsonSinkStatus JSON::on_end_array() { + return endContainer(&m_containers, &m_current_depth); } -int JSON::yajl_start_map(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - std::string name(tthis->getCurrentKey()); - tthis->m_containers.push_back( - reinterpret_cast(new JSONContainerMap(name))); - tthis->m_current_depth++; - if (tthis->m_current_depth > tthis->m_max_depth) { - tthis->m_depth_limit_exceeded = true; - return 0; - } - return 1; +JsonSinkStatus JSON::on_start_object() { + return startContainer(&m_containers, new JSONContainerMap(getCurrentKey()), + &m_current_depth, m_max_depth, &m_depth_limit_exceeded); } -/** - * Callback for end hash, meaning the current subtree is being closed, and that - * we should go back to the parent variable label - */ -int JSON::yajl_end_map(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - if (tthis->m_containers.empty()) { - tthis->m_current_depth--; - return 1; - } - - JSONContainer *a = tthis->m_containers.back(); - tthis->m_containers.pop_back(); - delete a; +JsonSinkStatus JSON::on_end_object() { + return endContainer(&m_containers, &m_current_depth); +} - if (tthis->m_containers.size() > 0) { - JSONContainerArray *ja = dynamic_cast( - tthis->m_containers.back()); - if (ja) { - ja->m_elementCounter++; - } +void JSON::clearContainers() { + while (m_containers.size() > 0) { + JSONContainer *a = m_containers.back(); + m_containers.pop_back(); + delete a; } - - tthis->m_current_depth--; - return 1; } - } // namespace RequestBodyProcessor } // namespace modsecurity - - -#endif // WITH_YAJL - diff --git a/src/request_body_processor/json.h b/src/request_body_processor/json.h index 961ea94ea8..53af13c4a8 100644 --- a/src/request_body_processor/json.h +++ b/src/request_body_processor/json.h @@ -16,17 +16,12 @@ #ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_H_ #define SRC_REQUEST_BODY_PROCESSOR_JSON_H_ - -#ifdef WITH_YAJL - -#include - -#include -#include #include +#include #include "modsecurity/transaction.h" #include "modsecurity/rules_set.h" +#include "src/request_body_processor/json_backend.h" namespace modsecurity { @@ -55,28 +50,27 @@ class JSONContainerMap : public JSONContainer { }; -class JSON { +class JSON : public JsonEventSink { public: explicit JSON(Transaction *transaction); - ~JSON(); + ~JSON() override; - static bool init(); - bool processChunk(const char *buf, unsigned int size, std::string *err); + bool init(); + bool processChunk(const char *buf, unsigned int size, + const std::string *err); bool complete(std::string *err); int addArgument(const std::string& value); - static int yajl_number(void *ctx, const char *value, size_t length); - static int yajl_string(void *ctx, const unsigned char *value, - size_t length); - static int yajl_boolean(void *ctx, int value); - static int yajl_null(void *ctx); - static int yajl_map_key(void *ctx, const unsigned char *key, - size_t length); - static int yajl_end_map(void *ctx); - static int yajl_start_map(void *ctx); - static int yajl_start_array(void *ctx); - static int yajl_end_array(void *ctx); + JsonSinkStatus on_start_object() override; + JsonSinkStatus on_end_object() override; + JsonSinkStatus on_start_array() override; + JsonSinkStatus on_end_array() override; + JsonSinkStatus on_key(std::string_view value) override; + JsonSinkStatus on_string(std::string_view value) override; + JsonSinkStatus on_number(std::string_view value) override; + JsonSinkStatus on_boolean(bool value) override; + JsonSinkStatus on_null() override; bool isPreviousArray() const { const JSONContainerArray *prev = NULL; @@ -108,11 +102,12 @@ class JSON { } private: + void clearContainers(); + std::deque m_containers; Transaction *m_transaction; - yajl_handle m_handle; - yajl_status m_status; std::string m_current_key; + std::string m_data; double m_max_depth; int64_t m_current_depth; bool m_depth_limit_exceeded; @@ -122,7 +117,4 @@ class JSON { } // namespace RequestBodyProcessor } // namespace modsecurity -#endif // WITH_YAJL - #endif // SRC_REQUEST_BODY_PROCESSOR_JSON_H_ - diff --git a/src/request_body_processor/json_adapter.cc b/src/request_body_processor/json_adapter.cc new file mode 100644 index 0000000000..e22dad653b --- /dev/null +++ b/src/request_body_processor/json_adapter.cc @@ -0,0 +1,104 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "src/request_body_processor/json_adapter.h" + +#include + +#include "src/config.h" + +namespace modsecurity { +namespace RequestBodyProcessor { +namespace { + +JsonParseResult makeResult(JsonParseStatus parse_status, + JsonSinkStatus sink_status = JsonSinkStatus::Continue, + std::string detail = "") { + return JsonParseResult{parse_status, sink_status, std::move(detail)}; +} + +JsonParseResult normalizeResult(JsonParseResult result) { + if (result.parse_status != JsonParseStatus::Ok) { + return result; + } + + switch (result.sink_status) { + case JsonSinkStatus::Continue: + return result; + case JsonSinkStatus::EngineAbort: + result.parse_status = JsonParseStatus::EngineAbort; + return result; + case JsonSinkStatus::DepthLimitExceeded: + result.parse_status = JsonParseStatus::ParseError; + return result; + case JsonSinkStatus::InternalError: + result.parse_status = JsonParseStatus::InternalError; + return result; + } + + result.parse_status = JsonParseStatus::InternalError; + result.sink_status = JsonSinkStatus::InternalError; + result.detail.assign("Unknown JSON sink status."); + return result; +} + +} // namespace + +JsonParseResult JSONAdapter::parse(std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) const { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + if (input.empty()) { + return makeResult(JsonParseStatus::Ok); + } + +#if defined(MSC_JSON_BACKEND_SIMDJSON) + return normalizeResult(parseDocumentWithSimdjson(input, sink, options)); +#elif defined(MSC_JSON_BACKEND_JSONCONS) + return normalizeResult(parseDocumentWithJsoncons(input, sink, options)); +#else + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, + "ModSecurity was built without a selected JSON backend."); +#endif +} + +JsonParseResult JSONAdapter::parse(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) const { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + if (input.empty()) { + return makeResult(JsonParseStatus::Ok); + } + +#if defined(MSC_JSON_BACKEND_SIMDJSON) + return normalizeResult(parseDocumentWithSimdjson(input, sink, options)); +#elif defined(MSC_JSON_BACKEND_JSONCONS) + return normalizeResult(parseDocumentWithJsoncons(input, sink, options)); +#else + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, + "ModSecurity was built without a selected JSON backend."); +#endif +} + +} // namespace RequestBodyProcessor +} // namespace modsecurity diff --git a/src/request_body_processor/json_adapter.h b/src/request_body_processor/json_adapter.h new file mode 100644 index 0000000000..11c1cfe217 --- /dev/null +++ b/src/request_body_processor/json_adapter.h @@ -0,0 +1,38 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ + +#include + +#include "src/request_body_processor/json_backend.h" + +namespace modsecurity { +namespace RequestBodyProcessor { + +class JSONAdapter { + public: + JsonParseResult parse(std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options = JsonBackendParseOptions()) const; + + JsonParseResult parse(const std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options = JsonBackendParseOptions()) const; +}; + +} // namespace RequestBodyProcessor +} // namespace modsecurity + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ diff --git a/src/request_body_processor/json_backend.h b/src/request_body_processor/json_backend.h new file mode 100644 index 0000000000..749d3401e2 --- /dev/null +++ b/src/request_body_processor/json_backend.h @@ -0,0 +1,83 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ + +#include +#include + +namespace modsecurity { +namespace RequestBodyProcessor { + +enum class JsonParseStatus { + Ok, + ParseError, + TruncatedInput, + Utf8Error, + EngineAbort, + InternalError +}; + +enum class JsonSinkStatus { + Continue, + EngineAbort, + DepthLimitExceeded, + InternalError +}; + +struct JsonParseResult { + JsonParseStatus parse_status{JsonParseStatus::Ok}; + JsonSinkStatus sink_status{JsonSinkStatus::Continue}; + std::string detail; + + bool ok() const { + return parse_status == JsonParseStatus::Ok + && sink_status == JsonSinkStatus::Continue; + } +}; + +struct JsonBackendParseOptions { + int technical_max_depth{1048576}; +}; + +class JsonEventSink { + public: + virtual ~JsonEventSink() = default; + + virtual JsonSinkStatus on_start_object() = 0; + virtual JsonSinkStatus on_end_object() = 0; + virtual JsonSinkStatus on_start_array() = 0; + virtual JsonSinkStatus on_end_array() = 0; + virtual JsonSinkStatus on_key(std::string_view value) = 0; + virtual JsonSinkStatus on_string(std::string_view value) = 0; + virtual JsonSinkStatus on_number(std::string_view raw_number) = 0; + virtual JsonSinkStatus on_boolean(bool value) = 0; + virtual JsonSinkStatus on_null() = 0; +}; + +JsonParseResult parseDocumentWithSimdjson(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +JsonParseResult parseDocumentWithSimdjson(std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +JsonParseResult parseDocumentWithJsoncons(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +} // namespace RequestBodyProcessor +} // namespace modsecurity + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ diff --git a/src/request_body_processor/json_backend_jsoncons.cc b/src/request_body_processor/json_backend_jsoncons.cc new file mode 100644 index 0000000000..fab4554acb --- /dev/null +++ b/src/request_body_processor/json_backend_jsoncons.cc @@ -0,0 +1,792 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_backend.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "src/request_body_processor/json_instrumentation.h" +#include +#include +#include +#include + +namespace modsecurity { +namespace RequestBodyProcessor { +namespace { + +JsonParseResult makeResult(JsonParseStatus parse_status, + JsonSinkStatus sink_status = JsonSinkStatus::Continue, + std::string detail = "") { + return JsonParseResult{parse_status, sink_status, std::move(detail)}; +} + +JsonParseResult makeResult(JsonParseStatus parse_status, std::string detail) { + return makeResult(parse_status, JsonSinkStatus::Continue, std::move(detail)); +} + +JsonParseResult stopTraversal(JsonSinkStatus sink_status, + std::string_view location) { + return makeResult(JsonParseStatus::Ok, sink_status, + std::string("JSON traversal stopped while ") + std::string(location) + + "."); +} + +bool isUtf8RelatedError(const std::error_code &error) { + switch (static_cast(error.value())) { + case jsoncons::json_errc::illegal_character_in_string: + case jsoncons::json_errc::illegal_control_character: + case jsoncons::json_errc::illegal_escaped_character: + case jsoncons::json_errc::expected_codepoint_surrogate_pair: + case jsoncons::json_errc::invalid_hex_escape_sequence: + case jsoncons::json_errc::invalid_unicode_escape_sequence: + case jsoncons::json_errc::expected_continuation_byte: + case jsoncons::json_errc::over_long_utf8_sequence: + case jsoncons::json_errc::illegal_codepoint: + case jsoncons::json_errc::illegal_surrogate_value: + case jsoncons::json_errc::unpaired_high_surrogate: + case jsoncons::json_errc::illegal_unicode_character: + return true; + default: + return false; + } +} + +JsonParseResult fromJsonconsError(const std::error_code &error, + const jsoncons::ser_context &context) { + std::string detail = error.message() + " at line " + + std::to_string(context.line()) + ", column " + + std::to_string(context.column()) + "."; + + switch (static_cast(error.value())) { + case jsoncons::json_errc::unexpected_eof: + return makeResult(JsonParseStatus::TruncatedInput, + JsonSinkStatus::Continue, detail); + case jsoncons::json_errc::max_nesting_depth_exceeded: + return makeResult(JsonParseStatus::ParseError, + JsonSinkStatus::Continue, detail); + case jsoncons::json_errc::source_error: + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, detail); + default: + if (isUtf8RelatedError(error)) { + return makeResult(JsonParseStatus::Utf8Error, + JsonSinkStatus::Continue, detail); + } + return makeResult(JsonParseStatus::ParseError, + JsonSinkStatus::Continue, detail); + } +} + +bool isDigit(char value) { + return std::isdigit(static_cast(value)) != 0; +} + +bool isValidJsonNumber(std::string_view token) { + std::size_t index = 0; + + if (token.empty()) { + return false; + } + + if (token[index] == '-') { + index++; + if (index == token.size()) { + return false; + } + } + + if (token[index] == '0') { + index++; + } else { + if (!isDigit(token[index]) || token[index] == '0') { + return false; + } + while (index < token.size() && isDigit(token[index])) { + index++; + } + } + + if (index < token.size() && token[index] == '.') { + index++; + if (index == token.size() || !isDigit(token[index])) { + return false; + } + while (index < token.size() && isDigit(token[index])) { + index++; + } + } + + if (index < token.size() && (token[index] == 'e' || token[index] == 'E')) { + index++; + if (index < token.size() && (token[index] == '+' || token[index] == '-')) { + index++; + } + if (index == token.size() || !isDigit(token[index])) { + return false; + } + while (index < token.size() && isDigit(token[index])) { + index++; + } + } + + return index == token.size(); +} + +bool tokenMatchesNumericEvent(jsoncons::staj_event_type event_type, + std::string_view token) { + if (!isValidJsonNumber(token)) { + return false; + } + + if (event_type == jsoncons::staj_event_type::int64_value + || event_type == jsoncons::staj_event_type::uint64_value) { + return token.find_first_of(".eE") == std::string_view::npos; + } + + return true; +} + +bool isNumericEventType(jsoncons::staj_event_type event_type) { + switch (event_type) { + case jsoncons::staj_event_type::int64_value: + case jsoncons::staj_event_type::uint64_value: + case jsoncons::staj_event_type::double_value: + case jsoncons::staj_event_type::half_value: + return true; + default: + return false; + } +} + +bool isNumericStringEvent(const jsoncons::staj_event &event) { + return event.event_type() == jsoncons::staj_event_type::string_value + && (event.tag() == jsoncons::semantic_tag::bigint + || event.tag() == jsoncons::semantic_tag::bigdec); +} + +class RawJsonTokenCursor { + public: + explicit RawJsonTokenCursor(const std::string &input) + : m_input(input) { } + + bool consume(const jsoncons::staj_event &event, std::string_view *raw_token, + std::string *detail) { + skipInsignificant(); + + if (isNumericEventType(event.event_type()) || isNumericStringEvent(event)) { + return consumeNumber(raw_token, detail); + } + + switch (event.event_type()) { + case jsoncons::staj_event_type::begin_object: + return consumeChar('{', raw_token, detail); + case jsoncons::staj_event_type::end_object: + return consumeChar('}', raw_token, detail); + case jsoncons::staj_event_type::begin_array: + return consumeChar('[', raw_token, detail); + case jsoncons::staj_event_type::end_array: + return consumeChar(']', raw_token, detail); + case jsoncons::staj_event_type::key: + case jsoncons::staj_event_type::string_value: + return consumeString(raw_token, detail); + case jsoncons::staj_event_type::null_value: + return consumeLiteral("null", raw_token, detail); + case jsoncons::staj_event_type::bool_value: { + std::error_code error; + const bool value = event.get(error); + if (error) { + if (detail != nullptr) { + *detail = std::string("Unable to decode boolean event while synchronizing raw token cursor: ") + + error.message(); + } + return false; + } + return consumeLiteral(value ? "true" : "false", raw_token, detail); + } + case jsoncons::staj_event_type::byte_string_value: + if (detail != nullptr) { + *detail = "Unsupported byte-string event encountered in jsoncons backend."; + } + return false; + } + + if (detail != nullptr) { + *detail = "Unsupported STAJ event encountered while synchronizing raw JSON tokens."; + } + return false; + } + + bool consumeNextNumberToken(std::string_view *raw_token, + std::string *detail) { + std::size_t probe_offset = m_offset; + if (!skipToNextNumberToken(&probe_offset, detail)) { + return false; + } + if (!consumeNumberAt(&probe_offset, raw_token, detail)) { + return false; + } + m_offset = probe_offset; + return true; + } + + bool advanceExactNumber(std::string_view exact_number, std::string *detail) { + if (!isValidJsonNumber(exact_number)) { + if (detail != nullptr) { + *detail = "Unable to advance raw JSON number cursor using a non-numeric token."; + } + return false; + } + + std::size_t probe_offset = m_offset; + if (!skipToNextNumberToken(&probe_offset, detail)) { + return false; + } + if (probe_offset + exact_number.size() > m_input.size() + || m_input.compare(probe_offset, exact_number.size(), exact_number) + != 0) { + if (detail != nullptr) { + *detail = "Exact raw JSON number token did not match jsoncons numeric lexeme."; + } + return false; + } + + const std::size_t next_offset = probe_offset + exact_number.size(); + if (next_offset < m_input.size() + && !isNumberBoundary(m_input[next_offset])) { + if (detail != nullptr) { + *detail = "Exact raw JSON number token was followed by additional numeric characters."; + } + return false; + } + + m_offset = next_offset; + return true; + } + + private: + static bool isWhitespace(char value) { + return std::isspace(static_cast(value)) != 0; + } + + static bool isHexDigit(char value) { + return std::isxdigit(static_cast(value)) != 0; + } + + static bool isNumberBoundary(char value) { + return isWhitespace(value) || value == ',' || value == ']' || value == '}'; + } + + void skipInsignificant() { + skipInsignificantAt(&m_offset); + } + + void skipInsignificantAt(std::size_t *offset) const { + while (*offset < m_input.size()) { + char current = m_input[*offset]; + if (isWhitespace(current) || current == ',' || current == ':') { + (*offset)++; + continue; + } + break; + } + } + + bool consumeChar(char expected, std::string_view *raw_token, + std::string *detail) { + return consumeCharAt(&m_offset, expected, raw_token, detail); + } + + bool consumeCharAt(std::size_t *offset, char expected, + std::string_view *raw_token, std::string *detail) const { + if (*offset >= m_input.size() || m_input[*offset] != expected) { + if (detail != nullptr) { + *detail = std::string("Expected raw JSON token '") + expected + + "' while synchronizing jsoncons events."; + } + return false; + } + + *raw_token = std::string_view(m_input.data() + *offset, 1); + (*offset)++; + return true; + } + + bool consumeLiteral(const char *literal, std::string_view *raw_token, + std::string *detail) { + return consumeLiteralAt(&m_offset, literal, raw_token, detail); + } + + bool consumeLiteralAt(std::size_t *offset, const char *literal, + std::string_view *raw_token, std::string *detail) const { + const std::size_t length = std::char_traits::length(literal); + if (*offset + length > m_input.size() + || m_input.compare(*offset, length, literal) != 0) { + if (detail != nullptr) { + *detail = std::string("Expected raw JSON literal '") + literal + + "' while synchronizing jsoncons events."; + } + return false; + } + + *raw_token = std::string_view(m_input.data() + *offset, length); + *offset += length; + return true; + } + + bool consumeString(std::string_view *raw_token, std::string *detail) { + return consumeStringAt(&m_offset, raw_token, detail); + } + + bool consumeStringAt(std::size_t *offset, std::string_view *raw_token, + std::string *detail) const { + const std::size_t start = *offset; + + if (*offset >= m_input.size() || m_input[*offset] != '"') { + if (detail != nullptr) { + *detail = "Expected raw JSON string token while synchronizing jsoncons events."; + } + return false; + } + + (*offset)++; + while (*offset < m_input.size()) { + char current = m_input[(*offset)++]; + if (current == '\\') { + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Truncated escape sequence while synchronizing raw JSON string token."; + } + return false; + } + + char escaped = m_input[(*offset)++]; + if (escaped == 'u') { + for (int i = 0; i < 4; i++) { + if (*offset >= m_input.size() + || !isHexDigit(m_input[*offset])) { + if (detail != nullptr) { + *detail = "Invalid Unicode escape while synchronizing raw JSON string token."; + } + return false; + } + (*offset)++; + } + } + continue; + } + + if (current == '"') { + *raw_token = std::string_view(m_input.data() + start, + *offset - start); + return true; + } + + if (static_cast(current) < 0x20) { + if (detail != nullptr) { + *detail = "Unexpected control character while synchronizing raw JSON string token."; + } + return false; + } + } + + if (detail != nullptr) { + *detail = "Unterminated string token while synchronizing jsoncons events."; + } + return false; + } + + bool consumeNumber(std::string_view *raw_token, std::string *detail) { + return consumeNumberAt(&m_offset, raw_token, detail); + } + + bool consumeNumberAt(std::size_t *offset, std::string_view *raw_token, + std::string *detail) const { + const std::size_t start = *offset; + + if (*offset < m_input.size() && m_input[*offset] == '-') { + (*offset)++; + } + + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Unexpected end of input while synchronizing raw JSON number token."; + } + return false; + } + + if (m_input[*offset] == '0') { + (*offset)++; + } else { + if (!isDigit(m_input[*offset]) || m_input[*offset] == '0') { + if (detail != nullptr) { + *detail = "Invalid integer component while synchronizing raw JSON number token."; + } + return false; + } + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + } + + if (*offset < m_input.size() && m_input[*offset] == '.') { + (*offset)++; + if (*offset >= m_input.size() || !isDigit(m_input[*offset])) { + if (detail != nullptr) { + *detail = "Invalid fraction component while synchronizing raw JSON number token."; + } + return false; + } + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + } + + if (*offset < m_input.size() + && (m_input[*offset] == 'e' || m_input[*offset] == 'E')) { + (*offset)++; + if (*offset < m_input.size() + && (m_input[*offset] == '+' || m_input[*offset] == '-')) { + (*offset)++; + } + if (*offset >= m_input.size() || !isDigit(m_input[*offset])) { + if (detail != nullptr) { + *detail = "Invalid exponent component while synchronizing raw JSON number token."; + } + return false; + } + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + } + + *raw_token = std::string_view(m_input.data() + start, *offset - start); + return true; + } + + bool skipTokenAt(std::size_t *offset, std::string *detail) const { + std::string_view ignored; + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Unexpected end of input while searching for a raw JSON number token."; + } + return false; + } + + switch (m_input[*offset]) { + case '{': + return consumeCharAt(offset, '{', &ignored, detail); + case '}': + return consumeCharAt(offset, '}', &ignored, detail); + case '[': + return consumeCharAt(offset, '[', &ignored, detail); + case ']': + return consumeCharAt(offset, ']', &ignored, detail); + case '"': + return consumeStringAt(offset, &ignored, detail); + case 't': + return consumeLiteralAt(offset, "true", &ignored, detail); + case 'f': + return consumeLiteralAt(offset, "false", &ignored, detail); + case 'n': + return consumeLiteralAt(offset, "null", &ignored, detail); + default: + if (detail != nullptr) { + *detail = "Unable to locate the next raw JSON number token while synchronizing jsoncons events."; + } + return false; + } + } + + bool skipToNextNumberToken(std::size_t *offset, std::string *detail) const { + while (true) { + skipInsignificantAt(offset); + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Unexpected end of input while searching for a raw JSON number token."; + } + return false; + } + if (m_input[*offset] == '-' || isDigit(m_input[*offset])) { + return true; + } + if (!skipTokenAt(offset, detail)) { + return false; + } + } + } + + const std::string &m_input; + std::size_t m_offset{0}; +}; + +std::string_view rawNumberFromContext(const std::string &input, + jsoncons::staj_event_type event_type, const jsoncons::ser_context &context, + const jsoncons::staj_event &event, std::string_view scanned_token) { + const std::size_t begin = context.begin_position(); + const std::size_t end = context.end_position(); + + if (begin < end && end <= input.size()) { + std::string_view candidate(input.data() + begin, end - begin); + if (tokenMatchesNumericEvent(event_type, candidate)) { + return candidate; + } + } + + if (tokenMatchesNumericEvent(event_type, scanned_token)) { + return scanned_token; + } + + if (isNumericStringEvent(event)) { + std::error_code error; + jsoncons::string_view decoded = event.get(error); + if (error) { + return std::string_view(); + } + if (isValidJsonNumber(std::string_view(decoded.data(), decoded.size()))) { + return std::string_view(decoded.data(), decoded.size()); + } + return std::string_view(); + } + + return std::string_view(); +} + +JsonParseResult emitEvent(const std::string &input, JsonEventSink *sink, + RawJsonTokenCursor *token_cursor, const jsoncons::staj_event &event, + const jsoncons::ser_context &context) { + JsonSinkStatus sink_status = JsonSinkStatus::Continue; + std::error_code error; + std::string_view raw_token; + std::string sync_detail; + + switch (event.event_type()) { + case jsoncons::staj_event_type::begin_object: + sink_status = sink->on_start_object(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "starting an object"); + } + return makeResult(JsonParseStatus::Ok); + case jsoncons::staj_event_type::end_object: + sink_status = sink->on_end_object(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "ending an object"); + } + return makeResult(JsonParseStatus::Ok); + case jsoncons::staj_event_type::begin_array: + sink_status = sink->on_start_array(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "starting an array"); + } + return makeResult(JsonParseStatus::Ok); + case jsoncons::staj_event_type::end_array: + sink_status = sink->on_end_array(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "ending an array"); + } + return makeResult(JsonParseStatus::Ok); + case jsoncons::staj_event_type::key: { + jsoncons::string_view decoded = event.get(error); + if (error) { + return fromJsonconsError(error, context); + } + sink_status = sink->on_key(std::string_view(decoded.data(), + decoded.size())); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "processing an object key"); + } + return makeResult(JsonParseStatus::Ok); + } + case jsoncons::staj_event_type::string_value: { + jsoncons::string_view decoded = event.get(error); + if (error) { + return fromJsonconsError(error, context); + } + if (isNumericStringEvent(event)) { + const std::string_view decoded_number(decoded.data(), decoded.size()); + if (isValidJsonNumber(decoded_number) + && token_cursor->advanceExactNumber(decoded_number, + &sync_detail)) { + recordJsonconsTokenExactAdvanceStep(); + sink_status = sink->on_number(decoded_number); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a number"); + } + return makeResult(JsonParseStatus::Ok); + } + if (!token_cursor->consumeNextNumberToken(&raw_token, + &sync_detail)) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, sync_detail); + } + recordJsonconsTokenSyncStep(); + std::string_view raw_number = rawNumberFromContext(input, + jsoncons::staj_event_type::double_value, context, event, + raw_token); + if (raw_number.empty()) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unable to materialize numeric JSON token from jsoncons backend."); + } + sink_status = sink->on_number(raw_number); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a number"); + } + return makeResult(JsonParseStatus::Ok); + } + sink_status = sink->on_string(std::string_view(decoded.data(), + decoded.size())); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a string"); + } + return makeResult(JsonParseStatus::Ok); + } + case jsoncons::staj_event_type::null_value: + sink_status = sink->on_null(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a null value"); + } + return makeResult(JsonParseStatus::Ok); + case jsoncons::staj_event_type::bool_value: + { + bool boolean_value = event.get(error); + if (error) { + return fromJsonconsError(error, context); + } + sink_status = sink->on_boolean(boolean_value); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a boolean"); + } + return makeResult(JsonParseStatus::Ok); + } + case jsoncons::staj_event_type::int64_value: + case jsoncons::staj_event_type::uint64_value: + case jsoncons::staj_event_type::double_value: + case jsoncons::staj_event_type::half_value: { + if (!token_cursor->consumeNextNumberToken(&raw_token, + &sync_detail)) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, sync_detail); + } + recordJsonconsTokenSyncStep(); + std::string_view raw_number = rawNumberFromContext(input, + event.event_type(), context, event, raw_token); + if (raw_number.empty()) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unable to materialize numeric JSON token from jsoncons backend."); + } + sink_status = sink->on_number(raw_number); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a number"); + } + return makeResult(JsonParseStatus::Ok); + } + default: + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unsupported JSON token type encountered in jsoncons backend."); + } +} + +} // namespace + +JsonParseResult parseDocumentWithJsoncons(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + jsoncons::json_options cursor_options; + cursor_options.max_nesting_depth(options.technical_max_depth); + cursor_options.lossless_number(true); + cursor_options.lossless_bignum(true); + + std::error_code error; +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto cursor_start = std::chrono::steady_clock::now(); + jsoncons::json_string_cursor cursor(input, cursor_options, error); + recordJsonconsCursorInit(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - cursor_start).count())); +#else + jsoncons::json_string_cursor cursor(input, cursor_options, error); +#endif + if (error) { + return fromJsonconsError(error, cursor.context()); + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto token_cursor_start = std::chrono::steady_clock::now(); + RawJsonTokenCursor token_cursor(input); + recordJsonconsTokenCursorInit(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - token_cursor_start).count())); + const auto event_loop_start = std::chrono::steady_clock::now(); + const auto record_event_loop = [&event_loop_start]() { + recordJsonconsEventLoop(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - event_loop_start).count())); + }; +#else + RawJsonTokenCursor token_cursor(input); +#endif + + while (!cursor.done()) { + JsonParseResult result = emitEvent(input, sink, &token_cursor, + cursor.current(), cursor.context()); + if (!result.ok()) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + record_event_loop(); +#endif + return result; + } + + cursor.next(error); + if (error) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + record_event_loop(); +#endif + return fromJsonconsError(error, cursor.context()); + } + } + + cursor.check_done(error); + if (error) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + record_event_loop(); +#endif + return fromJsonconsError(error, cursor.context()); + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + record_event_loop(); +#endif + return makeResult(JsonParseStatus::Ok); +} + +} // namespace RequestBodyProcessor +} // namespace modsecurity diff --git a/src/request_body_processor/json_backend_simdjson.cc b/src/request_body_processor/json_backend_simdjson.cc new file mode 100644 index 0000000000..722aaff271 --- /dev/null +++ b/src/request_body_processor/json_backend_simdjson.cc @@ -0,0 +1,567 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_backend.h" + +#include +#include +#include +#include +#include +#include + +#include "src/request_body_processor/json_instrumentation.h" +#include "simdjson.h" + +namespace modsecurity { +namespace RequestBodyProcessor { +namespace { + +JsonParseResult makeResult(JsonParseStatus parse_status, + JsonSinkStatus sink_status = JsonSinkStatus::Continue, + std::string detail = "") { + return JsonParseResult{parse_status, sink_status, std::move(detail)}; +} + +JsonParseResult makeResult(JsonParseStatus parse_status, std::string detail) { + return makeResult(parse_status, JsonSinkStatus::Continue, std::move(detail)); +} + +JsonParseResult stopTraversal(JsonSinkStatus sink_status, + std::string_view location) { + return makeResult(JsonParseStatus::Ok, sink_status, + std::string("JSON traversal stopped while ") + std::string(location) + + "."); +} + +JsonParseResult fromSimdjsonError(simdjson::error_code error) { + switch (error) { + case simdjson::UTF8_ERROR: + return makeResult(JsonParseStatus::Utf8Error, + std::string("Invalid UTF-8 in JSON body: ") + + simdjson::error_message(error)); + case simdjson::EMPTY: + case simdjson::UNCLOSED_STRING: + case simdjson::INCOMPLETE_ARRAY_OR_OBJECT: + case simdjson::INSUFFICIENT_PADDING: + return makeResult(JsonParseStatus::TruncatedInput, + std::string("Incomplete JSON body: ") + + simdjson::error_message(error)); + case simdjson::DEPTH_ERROR: + case simdjson::TAPE_ERROR: + case simdjson::STRING_ERROR: + case simdjson::T_ATOM_ERROR: + case simdjson::F_ATOM_ERROR: + case simdjson::N_ATOM_ERROR: + case simdjson::NUMBER_ERROR: + case simdjson::BIGINT_ERROR: + case simdjson::UNESCAPED_CHARS: + case simdjson::TRAILING_CONTENT: + return makeResult(JsonParseStatus::ParseError, + std::string("Invalid JSON body: ") + + simdjson::error_message(error)); + case simdjson::CAPACITY: + case simdjson::OUT_OF_CAPACITY: + case simdjson::MEMALLOC: + return makeResult(JsonParseStatus::InternalError, + std::string("JSON parser backend failure: ") + + simdjson::error_message(error)); + default: + return makeResult(JsonParseStatus::InternalError, + std::string("JSON backend failed: ") + + simdjson::error_message(error)); + } +} + +std::size_t effectiveTechnicalMaxDepth( + const JsonBackendParseOptions &options) { + return options.technical_max_depth > 0 + ? static_cast(options.technical_max_depth) : 1; +} + +std::string_view trimTrailingJsonWhitespace(std::string_view token) { + while (!token.empty()) { + const char tail = token.back(); + if (tail != ' ' && tail != '\t' && tail != '\n' && tail != '\r') { + break; + } + token.remove_suffix(1); + } + return token; +} + +/* + * The ondemand parser is reused per thread because simdjson benefits from + * keeping its internal buffers warm across parses. thread_local storage keeps + * the parser isolated to the calling thread, so no parser state is shared + * across transactions running on different threads. The parse and full + * document traversal both complete inside parseDocumentWithSimdjson(), so no + * parser-backed state escapes this function. We intentionally do not add an + * automatic release/recreate heuristic here: the vendored simdjson API + * explicitly supports parser reuse, and retained capacity after unusually + * large inputs remains a conscious tradeoff rather than an accidental leak. + */ +simdjson::ondemand::parser &getReusableSimdjsonParser() { + thread_local std::unique_ptr parser; + if (parser == nullptr) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto parser_start = std::chrono::steady_clock::now(); + parser.reset(new simdjson::ondemand::parser()); + recordSimdjsonParserConstruction(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - parser_start).count())); +#else + parser.reset(new simdjson::ondemand::parser()); +#endif + } + return *parser; +} + +std::size_t clampRequestedMaxDepth(std::size_t input_size, + const JsonBackendParseOptions &options) { + const std::size_t requested_depth = effectiveTechnicalMaxDepth(options); + const std::size_t max_possible_depth = (input_size / 2) + 1; + return std::min(requested_depth, std::max(1, + max_possible_depth)); +} + +simdjson::error_code prepareParser(simdjson::ondemand::parser *parser, + std::size_t input_size, const JsonBackendParseOptions &options) { + if (parser == nullptr) { + return simdjson::MEMALLOC; + } + + const JsonBackendParseOptions default_options; + std::size_t required_max_depth = parser->max_depth(); + if (options.technical_max_depth != default_options.technical_max_depth) { + required_max_depth = clampRequestedMaxDepth(input_size, options); + } + + if (parser->capacity() >= input_size + && parser->max_depth() == required_max_depth) { + return simdjson::SUCCESS; + } + + // simdjson reuses parser buffers across parses. allocate() can grow the + // per-thread parser to satisfy a larger document or different max-depth, + // but it does not proactively shrink retained capacity for later, smaller + // inputs. In simdjson 4.6.1 the max-depth parameter is only enforced by + // simdjson's development checks, so we keep passing it here for that + // internal guardrail while our own walker enforces technical_max_depth at + // runtime using current_depth(). + return parser->allocate(input_size, required_max_depth); +} + +template +JsonParseResult getResult(ResultType &&result, TargetType *target) { + if (auto error = std::forward(result).get(*target); error) { + return fromSimdjsonError(error); + } + + return makeResult(JsonParseStatus::Ok); +} + +class JsonBackendWalker { + public: + JsonBackendWalker(JsonEventSink *sink, + const JsonBackendParseOptions &options) + : m_sink(sink), + m_technical_max_depth(effectiveTechnicalMaxDepth(options)) { } + + JsonParseResult walk(simdjson::ondemand::document *document) { + bool is_scalar = false; + JsonParseResult result = getResult(document->is_scalar(), &is_scalar); + if (!result.ok()) { + return result; + } + + if (is_scalar) { + return walkDocumentScalar(document); + } + + simdjson::ondemand::value root_value; + result = getResult(document->get_value(), &root_value); + if (!result.ok()) { + return result; + } + + return walkValue(root_value); + } + + private: + JsonParseResult walkDocumentScalar(simdjson::ondemand::document *document) { + simdjson::ondemand::json_type type; + JsonParseResult result = getResult(document->type(), &type); + if (!result.ok()) { + return result; + } + + switch (type) { + case simdjson::ondemand::json_type::string: { + std::string_view decoded; + result = getResult(document->get_string(), &decoded); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_string(decoded); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a root string"); + } + return makeResult(JsonParseStatus::Ok); + } + case simdjson::ondemand::json_type::number: { + std::string_view raw_number; + result = getResult(document->raw_json_token(), &raw_number); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_number( + trimTrailingJsonWhitespace(raw_number)); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a root number"); + } + return makeResult(JsonParseStatus::Ok); + } + case simdjson::ondemand::json_type::boolean: { + bool boolean_value = false; + result = getResult(document->get_bool(), &boolean_value); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_boolean(boolean_value); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a root boolean"); + } + return makeResult(JsonParseStatus::Ok); + } + case simdjson::ondemand::json_type::null: { + bool is_null = false; + result = getResult(document->is_null(), &is_null); + if (!result.ok()) { + return result; + } + if (!is_null) { + return makeResult(JsonParseStatus::InternalError, + "Root scalar classified as null but failed validation."); + } + + JsonSinkStatus sink_status = m_sink->on_null(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a root null"); + } + return makeResult(JsonParseStatus::Ok); + } + case simdjson::ondemand::json_type::unknown: + return makeResult(JsonParseStatus::ParseError, + "Invalid JSON token encountered in simdjson backend."); + case simdjson::ondemand::json_type::object: + case simdjson::ondemand::json_type::array: + return makeResult(JsonParseStatus::InternalError, + "Unexpected root scalar container encountered in simdjson backend."); + } + + return makeResult(JsonParseStatus::InternalError, + "Unsupported root scalar type encountered in simdjson backend."); + } + + JsonParseResult walkValue(simdjson::ondemand::value value) { + simdjson::ondemand::json_type type; + + JsonParseResult result = getResult(value.type(), &type); + if (!result.ok()) { + return result; + } + + switch (type) { + case simdjson::ondemand::json_type::object: + if (auto result = enforceTechnicalDepth(value); !result.ok()) { + return result; + } + return walkObject(value); + case simdjson::ondemand::json_type::array: + if (auto result = enforceTechnicalDepth(value); !result.ok()) { + return result; + } + return walkArray(value); + case simdjson::ondemand::json_type::string: + return walkString(value); + case simdjson::ondemand::json_type::number: + return walkNumber(value); + case simdjson::ondemand::json_type::boolean: + return walkBoolean(value); + case simdjson::ondemand::json_type::null: { + JsonSinkStatus sink_status = m_sink->on_null(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a null value"); + } + return makeResult(JsonParseStatus::Ok); + } + case simdjson::ondemand::json_type::unknown: + return makeResult(JsonParseStatus::ParseError, + "Invalid JSON token encountered in simdjson backend."); + } + + return makeResult(JsonParseStatus::InternalError, + "Unsupported JSON token type encountered."); + } + + JsonParseResult walkObject(simdjson::ondemand::value value) { + simdjson::ondemand::object object; + JsonParseResult result = getResult(value.get_object(), &object); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_start_object(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "starting an object"); + } + + for (auto field_result : object) { + simdjson::ondemand::field field; + std::string_view key; + simdjson::ondemand::value child; + + result = getResult(std::move(field_result), &field); + if (!result.ok()) { + return result; + } + + result = getResult(field.unescaped_key(), &key); + if (!result.ok()) { + return result; + } + + sink_status = m_sink->on_key(key); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "processing an object key"); + } + + child = field.value(); + + result = walkValue(child); + if (!result.ok()) { + return result; + } + } + + sink_status = m_sink->on_end_object(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "ending an object"); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonParseResult walkArray(simdjson::ondemand::value value) { + simdjson::ondemand::array array; + JsonParseResult result = getResult(value.get_array(), &array); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_start_array(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "starting an array"); + } + + for (auto element_result : array) { + simdjson::ondemand::value element; + + result = getResult(std::move(element_result), &element); + if (!result.ok()) { + return result; + } + + result = walkValue(element); + if (!result.ok()) { + return result; + } + } + + sink_status = m_sink->on_end_array(); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "ending an array"); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonParseResult walkString(simdjson::ondemand::value value) { + std::string_view decoded; + JsonParseResult result = getResult(value.get_string(), &decoded); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_string(decoded); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a string"); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonParseResult walkNumber(simdjson::ondemand::value value) { + std::string_view raw_number = trimTrailingJsonWhitespace( + value.raw_json_token()); + JsonSinkStatus sink_status = m_sink->on_number(raw_number); + + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a number"); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonParseResult walkBoolean(simdjson::ondemand::value value) { + bool boolean_value = false; + JsonParseResult result = getResult(value.get_bool(), &boolean_value); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_boolean(boolean_value); + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, "handling a boolean"); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonParseResult enforceTechnicalDepth(simdjson::ondemand::value value) { + const int32_t current_depth = value.current_depth(); + if (current_depth <= 0) { + return makeResult(JsonParseStatus::InternalError, + "Invalid current depth reported by simdjson backend."); + } + + if (static_cast(current_depth) > m_technical_max_depth) { + return makeResult(JsonParseStatus::ParseError, + "JSON nesting depth exceeds backend technical max depth."); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonEventSink *m_sink; + std::size_t m_technical_max_depth; +}; + +struct PreparedSimdjsonInput { + simdjson::padded_string_view view{}; + simdjson::padded_string owned_copy{}; +}; + +PreparedSimdjsonInput prepareMutableSimdjsonInput(std::string *input) { + PreparedSimdjsonInput prepared; + + // The production request-body path owns a mutable std::string, so we can + // pad that buffer in place and keep the logical JSON length in the + // returned padded_string_view. This removes the extra padded_string copy + // while still satisfying simdjson's padding requirement explicitly. + prepared.view = simdjson::pad(*input); + return prepared; +} + +PreparedSimdjsonInput prepareConstSimdjsonInput(const std::string &input) { + PreparedSimdjsonInput prepared; + prepared.view = simdjson::padded_string_view(input); + + // The const path must not guess about std::string capacity. We only parse + // directly when simdjson itself confirms that the existing allocation + // and/or trailing whitespace provide sufficient padding. + if (prepared.view.has_sufficient_padding()) { + return prepared; + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto padded_start = std::chrono::steady_clock::now(); + prepared.owned_copy = simdjson::padded_string(input); + recordSimdjsonPaddedCopy(input.size(), static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - padded_start).count())); +#else + prepared.owned_copy = simdjson::padded_string(input); +#endif + prepared.view = prepared.owned_copy; + return prepared; +} + +JsonParseResult parsePreparedDocumentWithSimdjson( + simdjson::padded_string_view input, JsonEventSink *sink, + const JsonBackendParseOptions &options) { + simdjson::ondemand::parser &parser = getReusableSimdjsonParser(); + // This only prepares parser capacity and max-depth bookkeeping. Buffer + // lifetime and padding must already have been handled by the caller. + if (auto error = prepareParser(&parser, input.length(), options); error) { + return fromSimdjsonError(error); + } + + simdjson::ondemand::document document; +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto iterate_start = std::chrono::steady_clock::now(); + if (auto error = parser.iterate(input).get(document); error) { + recordSimdjsonIterate(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - iterate_start).count())); + return fromSimdjsonError(error); + } + recordSimdjsonIterate(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - iterate_start).count())); +#else + if (auto error = parser.iterate(input).get(document); error) { + return fromSimdjsonError(error); + } +#endif + + JsonBackendWalker walker(sink, options); + return walker.walk(&document); +} + +} // namespace + +JsonParseResult parseDocumentWithSimdjson(std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + PreparedSimdjsonInput prepared = prepareMutableSimdjsonInput(&input); + return parsePreparedDocumentWithSimdjson(prepared.view, sink, options); +} + +JsonParseResult parseDocumentWithSimdjson(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + PreparedSimdjsonInput prepared = prepareConstSimdjsonInput(input); + return parsePreparedDocumentWithSimdjson(prepared.view, sink, options); +} + +} // namespace RequestBodyProcessor +} // namespace modsecurity diff --git a/src/request_body_processor/json_instrumentation.cc b/src/request_body_processor/json_instrumentation.cc new file mode 100644 index 0000000000..0b4502e6aa --- /dev/null +++ b/src/request_body_processor/json_instrumentation.cc @@ -0,0 +1,124 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_instrumentation.h" + +#include + +namespace modsecurity { +namespace RequestBodyProcessor { +namespace { + +thread_local JsonInstrumentationMetrics g_metrics; + +std::uint64_t elapsedNanos( + std::chrono::steady_clock::time_point start_time) noexcept { + return static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time).count()); +} + +} // namespace + +void jsonInstrumentationReset() noexcept { + g_metrics = JsonInstrumentationMetrics{}; +} + +JsonInstrumentationMetrics jsonInstrumentationSnapshot() noexcept { + return g_metrics; +} + +std::string captureRequestBodySnapshot(const std::ostringstream &request_body) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto start_time = std::chrono::steady_clock::now(); + std::string snapshot = request_body.str(); + g_metrics.request_body_snapshot_count++; + g_metrics.request_body_snapshot_bytes += snapshot.size(); + g_metrics.request_body_snapshot_ns += elapsedNanos(start_time); + return snapshot; +#else + return request_body.str(); +#endif +} + +void recordJsonProcessChunkAppend(std::size_t bytes, + std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.json_process_chunk_calls++; + g_metrics.json_process_chunk_appended_bytes += bytes; + g_metrics.json_process_chunk_ns += elapsed_ns; +#else + (void) bytes; + (void) elapsed_ns; +#endif +} + +void recordSimdjsonParserConstruction(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.simdjson_parser_constructions++; + g_metrics.simdjson_parser_construction_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordSimdjsonPaddedCopy(std::size_t bytes, + std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.simdjson_padded_copy_bytes += bytes; + g_metrics.simdjson_padded_copy_ns += elapsed_ns; +#else + (void) bytes; + (void) elapsed_ns; +#endif +} + +void recordSimdjsonIterate(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.simdjson_iterate_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsCursorInit(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.jsoncons_cursor_constructions++; + g_metrics.jsoncons_cursor_init_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsTokenCursorInit(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.jsoncons_token_cursor_constructions++; + g_metrics.jsoncons_token_cursor_init_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsEventLoop(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.jsoncons_event_loop_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsTokenSyncStep() noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.jsoncons_token_sync_steps++; +#endif +} + +void recordJsonconsTokenExactAdvanceStep() noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + g_metrics.jsoncons_token_exact_advance_steps++; +#endif +} + +} // namespace RequestBodyProcessor +} // namespace modsecurity diff --git a/src/request_body_processor/json_instrumentation.h b/src/request_body_processor/json_instrumentation.h new file mode 100644 index 0000000000..db24dc7b99 --- /dev/null +++ b/src/request_body_processor/json_instrumentation.h @@ -0,0 +1,56 @@ +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ + +#include +#include +#include +#include + +namespace modsecurity { +namespace RequestBodyProcessor { + +struct JsonInstrumentationMetrics { + std::uint64_t request_body_snapshot_count{0}; + std::uint64_t request_body_snapshot_bytes{0}; + std::uint64_t request_body_snapshot_ns{0}; + + std::uint64_t json_process_chunk_calls{0}; + std::uint64_t json_process_chunk_appended_bytes{0}; + std::uint64_t json_process_chunk_ns{0}; + + std::uint64_t simdjson_parser_constructions{0}; + std::uint64_t simdjson_parser_construction_ns{0}; + std::uint64_t simdjson_padded_copy_bytes{0}; + std::uint64_t simdjson_padded_copy_ns{0}; + std::uint64_t simdjson_iterate_ns{0}; + + std::uint64_t jsoncons_cursor_constructions{0}; + std::uint64_t jsoncons_cursor_init_ns{0}; + std::uint64_t jsoncons_token_cursor_constructions{0}; + std::uint64_t jsoncons_token_cursor_init_ns{0}; + std::uint64_t jsoncons_event_loop_ns{0}; + std::uint64_t jsoncons_token_sync_steps{0}; + std::uint64_t jsoncons_token_exact_advance_steps{0}; +}; + +void jsonInstrumentationReset() noexcept; +JsonInstrumentationMetrics jsonInstrumentationSnapshot() noexcept; + +std::string captureRequestBodySnapshot(const std::ostringstream &request_body); + +void recordJsonProcessChunkAppend(std::size_t bytes, std::uint64_t elapsed_ns) + noexcept; +void recordSimdjsonParserConstruction(std::uint64_t elapsed_ns) noexcept; +void recordSimdjsonPaddedCopy(std::size_t bytes, std::uint64_t elapsed_ns) + noexcept; +void recordSimdjsonIterate(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsCursorInit(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsTokenCursorInit(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsEventLoop(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsTokenSyncStep() noexcept; +void recordJsonconsTokenExactAdvanceStep() noexcept; + +} // namespace RequestBodyProcessor +} // namespace modsecurity + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ diff --git a/src/transaction.cc b/src/transaction.cc index 408f9b3d40..953115db88 100644 --- a/src/transaction.cc +++ b/src/transaction.cc @@ -13,13 +13,12 @@ * */ -#include "modsecurity/transaction.h" - -#ifdef WITH_YAJL -#include -#include +#ifdef HAVE_CONFIG_H +#include "config.h" #endif +#include "modsecurity/transaction.h" + #include #include @@ -36,16 +35,16 @@ #include "src/actions/disruptive/deny.h" #include "modsecurity/intervention.h" #include "modsecurity/modsecurity.h" +#include "src/request_body_processor/json.h" +#include "src/request_body_processor/json_instrumentation.h" #include "src/request_body_processor/multipart.h" #include "src/request_body_processor/xml.h" -#ifdef WITH_YAJL -#include "src/request_body_processor/json.h" -#endif #include "modsecurity/audit_log.h" #include "src/unique_id.h" #include "src/utils/string.h" #include "src/utils/system.h" #include "src/utils/decode.h" +#include "src/utils/json_writer.h" #include "src/utils/random.h" #include "modsecurity/rule.h" #include "modsecurity/rule_message.h" @@ -59,6 +58,7 @@ using modsecurity::actions::Action; +using modsecurity::RequestBodyProcessor::captureRequestBodySnapshot; using modsecurity::RequestBodyProcessor::Multipart; using modsecurity::RequestBodyProcessor::XML; @@ -142,11 +142,7 @@ Transaction::Transaction(ModSecurity *ms, RulesSet *rules, const char *id, #else m_xml(nullptr), #endif -#ifdef WITH_YAJL m_json(new RequestBodyProcessor::JSON(this)), -#else - m_json(nullptr), -#endif m_secRuleEngine(RulesSetProperties::PropertyNotSetRuleEngine), m_secXMLParseXmlIntoArgs(rules->m_secXMLParseXmlIntoArgs), m_logCbData(logCbData), @@ -173,9 +169,7 @@ Transaction::~Transaction() { intervention::free(&m_it); intervention::clean(&m_it); -#ifdef WITH_YAJL delete m_json; -#endif #ifdef WITH_LIBXML2 delete m_xml; #endif @@ -693,13 +687,17 @@ int Transaction::processRequestBody() { */ std::unique_ptr a = m_variableRequestHeaders.resolveFirst( "Content-Type"); + const std::string requestBodySnapshot = captureRequestBodySnapshot( + m_requestBody); + const std::size_t requestBodySnapshotSize = requestBodySnapshot.size(); bool requestBodyNoFilesLimitExceeded = false; if ((m_requestBodyType == WWWFormUrlEncoded) || (m_requestBodyProcessor == JSONRequestBody) || (m_requestBodyProcessor == XMLRequestBody)) { if ((m_rules->m_requestBodyNoFilesLimit.m_set) - && (m_requestBody.str().size() > m_rules->m_requestBodyNoFilesLimit.m_value)) { + && (requestBodySnapshotSize + > m_rules->m_requestBodyNoFilesLimit.m_value)) { m_variableReqbodyError.set("1", 0); m_variableReqbodyErrorMsg.set("Request body excluding files is bigger than the maximum expected.", 0); m_variableInboundDataError.set("1", m_variableOffset); @@ -709,72 +707,64 @@ int Transaction::processRequestBody() { } } -#ifdef WITH_LIBXML2 - if (m_requestBodyProcessor == XMLRequestBody) { + if (m_requestBodyProcessor == JSONRequestBody) { // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { std::string error; - if (m_xml->init() == true) { - m_xml->processChunk(m_requestBody.str().c_str(), - m_requestBody.str().size(), + if (m_rules->m_requestBodyJsonDepthLimit.m_set) { + m_json->setMaxDepth(m_rules->m_requestBodyJsonDepthLimit.m_value); + } + if (m_json->init() == true) { + m_json->processChunk(requestBodySnapshot.c_str(), + requestBodySnapshotSize, &error); - m_xml->complete(&error); + m_json->complete(&error); } - if (error.empty() == false) { + if (error.empty() == false && requestBodySnapshotSize > 0) { m_variableReqbodyError.set("1", m_variableOffset); - m_variableReqbodyErrorMsg.set("XML parsing error: " + error, + m_variableReqbodyProcessorError.set("1", m_variableOffset); + m_variableReqbodyErrorMsg.set("JSON parsing error: " + error, m_variableOffset); - m_variableReqbodyProcessorErrorMsg.set("XML parsing error: " \ + m_variableReqbodyProcessorErrorMsg.set("JSON parsing error: " \ + error, m_variableOffset); - m_variableReqbodyProcessorError.set("1", m_variableOffset); } else { m_variableReqbodyError.set("0", m_variableOffset); m_variableReqbodyProcessorError.set("0", m_variableOffset); } } -#endif -#if WITH_YAJL + } #ifdef WITH_LIBXML2 - } else if (m_requestBodyProcessor == JSONRequestBody) { -#else - if (m_requestBodyProcessor == JSONRequestBody) { -#endif + else if (m_requestBodyProcessor == XMLRequestBody) { // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { std::string error; - if (m_rules->m_requestBodyJsonDepthLimit.m_set) { - m_json->setMaxDepth(m_rules->m_requestBodyJsonDepthLimit.m_value); - } - if (m_json->init() == true) { - m_json->processChunk(m_requestBody.str().c_str(), - m_requestBody.str().size(), + if (m_xml->init() == true) { + m_xml->processChunk(requestBodySnapshot.c_str(), + requestBodySnapshotSize, &error); - m_json->complete(&error); + m_xml->complete(&error); } - if (error.empty() == false && m_requestBody.str().size() > 0) { + if (error.empty() == false) { m_variableReqbodyError.set("1", m_variableOffset); - m_variableReqbodyProcessorError.set("1", m_variableOffset); - m_variableReqbodyErrorMsg.set("JSON parsing error: " + error, + m_variableReqbodyErrorMsg.set("XML parsing error: " + error, m_variableOffset); - m_variableReqbodyProcessorErrorMsg.set("JSON parsing error: " \ + m_variableReqbodyProcessorErrorMsg.set("XML parsing error: " \ + error, m_variableOffset); + m_variableReqbodyProcessorError.set("1", m_variableOffset); } else { m_variableReqbodyError.set("0", m_variableOffset); m_variableReqbodyProcessorError.set("0", m_variableOffset); } } + } #endif -#if defined(WITH_LIBXML2) or defined(WITH_YAJL) - } else if (m_requestBodyType == MultiPartRequestBody) { -#else - if (m_requestBodyType == MultiPartRequestBody) { -#endif + else if (m_requestBodyType == MultiPartRequestBody) { std::string error; int reqbodyNoFilesLength = 0; if (a != NULL) { Multipart m(*a, this); if (m.init(&error) == true) { - m.process(m_requestBody.str(), &error, m_variableOffset); + m.process(requestBodySnapshot, &error, m_variableOffset); } reqbodyNoFilesLength = m.m_reqbody_no_files_length; m.multipart_complete(&error); @@ -801,7 +791,7 @@ int Transaction::processRequestBody() { m_variableOffset++; // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { - extractArguments("POST", m_requestBody.str(), m_variableOffset); + extractArguments("POST", requestBodySnapshot, m_variableOffset); } } else if (m_requestBodyType != UnknownFormat) { /** @@ -855,16 +845,16 @@ int Transaction::processRequestBody() { } fullRequest = fullRequest + "\n\n"; - fullRequest = fullRequest + m_requestBody.str(); + fullRequest = fullRequest + requestBodySnapshot; m_variableFullRequest.set(fullRequest, m_variableOffset); m_variableFullRequestLength.set(std::to_string(fullRequest.size()), m_variableOffset); if (m_requestBody.tellp() > 0) { - m_variableRequestBody.set(m_requestBody.str(), m_variableOffset); + m_variableRequestBody.set(requestBodySnapshot, m_variableOffset); m_variableRequestBodyLength.set(std::to_string( - m_requestBody.str().size()), - m_variableOffset, m_requestBody.str().size()); + requestBodySnapshotSize), + m_variableOffset, requestBodySnapshotSize); } this->m_rules->evaluate(modsecurity::RequestBodyPhase, this); @@ -1564,197 +1554,139 @@ std::string Transaction::toOldAuditLogFormat(int parts, std::string Transaction::toJSON(int parts) { -#ifdef WITH_YAJL - const unsigned char *buf; - size_t len; - yajl_gen g; std::string log; std::string ts = utils::string::ascTime(&m_timeStamp); std::string uniqueId = UniqueId::uniqueId(); - - g = yajl_gen_alloc(NULL); - if (g == NULL) { - return ""; - } - yajl_gen_config(g, yajl_gen_beautify, 0); - - /* main */ - yajl_gen_map_open(g); - - /* trasaction */ - yajl_gen_string(g, reinterpret_cast("transaction"), - strlen("transaction")); - - yajl_gen_map_open(g); - /* Part: A (header mandatory) */ - LOGFY_ADD("client_ip", m_clientIpAddress); - LOGFY_ADD("time_stamp", ts); - LOGFY_ADD("server_id", uniqueId); - LOGFY_ADD_NUM("client_port", m_clientPort); - LOGFY_ADD("host_ip", m_serverIpAddress); - LOGFY_ADD_NUM("host_port", m_serverPort); - LOGFY_ADD("unique_id", m_id); - - /* request */ - yajl_gen_string(g, reinterpret_cast("request"), - strlen("request")); - yajl_gen_map_open(g); - - LOGFY_ADD("method", - utils::string::dash_if_empty( - m_variableRequestMethod.evaluate())); - - LOGFY_ADD("http_version", m_httpVersion); - LOGFY_ADD("hostname", m_requestHostName); - LOGFY_ADD("uri", this->m_uri); + utils::JsonWriter writer(false); + + const auto addString = [&writer](std::string_view key, + const std::string &value) { + writer.key(key); + writer.string(value); + }; + const auto addInteger = [&writer](std::string_view key, int64_t value) { + writer.key(key); + writer.integer(value); + }; + + writer.start_object(); + writer.key("transaction"); + writer.start_object(); + + addString("client_ip", m_clientIpAddress); + addString("time_stamp", ts); + addString("server_id", uniqueId); + addInteger("client_port", m_clientPort); + addString("host_ip", m_serverIpAddress); + addInteger("host_port", m_serverPort); + addString("unique_id", m_id); + + writer.key("request"); + writer.start_object(); + addString("method", + utils::string::dash_if_empty(m_variableRequestMethod.evaluate())); + addString("http_version", m_httpVersion); + addString("hostname", m_requestHostName); + addString("uri", this->m_uri); if (parts & audit_log::AuditLog::CAuditLogPart) { - // FIXME: check for the binary content size. - LOGFY_ADD("body", utils::string::toHexIfNeeded(this->m_requestBody.str())); + addString("body", utils::string::toHexIfNeeded(this->m_requestBody.str())); } - /* request headers */ if (parts & audit_log::AuditLog::BAuditLogPart) { std::vector l; - yajl_gen_string(g, reinterpret_cast("headers"), - strlen("headers")); - yajl_gen_map_open(g); + writer.key("headers"); + writer.start_object(); m_variableRequestHeaders.resolve(&l); for (auto &h : l) { - LOGFY_ADD(utils::string::toHexIfNeeded(h->getKey().c_str()).c_str(), utils::string::toHexIfNeeded(h->getValue())); + std::string header_name = + utils::string::toHexIfNeeded(h->getKey().c_str()); + std::string header_value = + utils::string::toHexIfNeeded(h->getValue()); + addString(header_name, header_value); delete h; } - - /* end: request headers */ - yajl_gen_map_close(g); + writer.end_object(); } + writer.end_object(); - /* end: request */ - yajl_gen_map_close(g); - - /* response */ - yajl_gen_string(g, reinterpret_cast("response"), - strlen("response")); - yajl_gen_map_open(g); - + writer.key("response"); + writer.start_object(); if (parts & audit_log::AuditLog::EAuditLogPart) { - LOGFY_ADD("body", this->m_responseBody.str()); + addString("body", this->m_responseBody.str()); } - LOGFY_ADD_NUM("http_code", m_httpCodeReturned); + addInteger("http_code", m_httpCodeReturned); - /* response headers */ if (parts & audit_log::AuditLog::FAuditLogPart) { std::vector l; - yajl_gen_string(g, reinterpret_cast("headers"), - strlen("headers")); - yajl_gen_map_open(g); + writer.key("headers"); + writer.start_object(); m_variableResponseHeaders.resolve(&l); for (auto &h : l) { - LOGFY_ADD(h->getKey().c_str(), h->getValue()); + addString(h->getKey(), h->getValue()); delete h; } - - /* end: response headers */ - yajl_gen_map_close(g); + writer.end_object(); } - /* end: response */ - yajl_gen_map_close(g); + writer.end_object(); - /* producer */ if (parts & audit_log::AuditLog::HAuditLogPart) { - yajl_gen_string(g, reinterpret_cast("producer"), - strlen("producer")); - yajl_gen_map_open(g); - - /* producer > libmodsecurity */ - LOGFY_ADD("modsecurity", m_ms->whoAmI()); - - /* producer > connector */ - LOGFY_ADD("connector", m_ms->getConnectorInformation()); - - /* producer > engine state */ - LOGFY_ADD("secrules_engine", + writer.key("producer"); + writer.start_object(); + addString("modsecurity", m_ms->whoAmI()); + addString("connector", m_ms->getConnectorInformation()); + addString("secrules_engine", RulesSet::ruleEngineStateString( - (RulesSetProperties::RuleEngine) getRuleEngineState())); + (RulesSetProperties::RuleEngine) getRuleEngineState())); - /* producer > components */ - yajl_gen_string(g, - reinterpret_cast("components"), - strlen("components")); - - yajl_gen_array_open(g); + writer.key("components"); + writer.start_array(); for (const auto &a : m_rules->m_components) { - yajl_gen_string(g, - reinterpret_cast - (a.data()), a.length()); + writer.string(a); } - yajl_gen_array_close(g); - - /* end: producer */ - yajl_gen_map_close(g); - - /* messages */ - yajl_gen_string(g, - reinterpret_cast("messages"), - strlen("messages")); - yajl_gen_array_open(g); - for (auto a : m_rulesMessages) { - yajl_gen_map_open(g); - LOGFY_ADD("message", a.m_message); - yajl_gen_string(g, - reinterpret_cast("details"), - strlen("details")); - yajl_gen_map_open(g); - LOGFY_ADD("match", a.m_match); - LOGFY_ADD("reference", a.m_reference); - LOGFY_ADD("ruleId", std::to_string(a.m_rule.m_ruleId)); - LOGFY_ADD("file", a.m_rule.getFileName()); - LOGFY_ADD("lineNumber", std::to_string(a.m_rule.getLineNumber())); - LOGFY_ADD("data", utils::string::toHexIfNeeded(a.m_data)); - LOGFY_ADD("severity", std::to_string(a.m_severity)); - LOGFY_ADD("ver", a.m_rule.m_ver); - LOGFY_ADD("rev", a.m_rule.m_rev); - - yajl_gen_string(g, - reinterpret_cast("tags"), - strlen("tags")); - yajl_gen_array_open(g); - for (auto b : a.m_tags) { - yajl_gen_string(g, - reinterpret_cast(b.data()), - b.length()); + writer.end_array(); + writer.end_object(); + + writer.key("messages"); + writer.start_array(); + for (const auto &a : m_rulesMessages) { + writer.start_object(); + addString("message", a.m_message); + writer.key("details"); + writer.start_object(); + addString("match", a.m_match); + addString("reference", a.m_reference); + addString("ruleId", std::to_string(a.m_rule.m_ruleId)); + addString("file", a.m_rule.getFileName()); + addString("lineNumber", std::to_string(a.m_rule.getLineNumber())); + addString("data", utils::string::toHexIfNeeded(a.m_data)); + addString("severity", std::to_string(a.m_severity)); + addString("ver", a.m_rule.m_ver); + addString("rev", a.m_rule.m_rev); + + writer.key("tags"); + writer.start_array(); + for (const auto &b : a.m_tags) { + writer.string(b); } - yajl_gen_array_close(g); + writer.end_array(); - LOGFY_ADD("maturity", std::to_string(a.m_rule.m_maturity)); - LOGFY_ADD("accuracy", std::to_string(a.m_rule.m_accuracy)); - yajl_gen_map_close(g); - yajl_gen_map_close(g); + addString("maturity", std::to_string(a.m_rule.m_maturity)); + addString("accuracy", std::to_string(a.m_rule.m_accuracy)); + writer.end_object(); + writer.end_object(); } - yajl_gen_array_close(g); - /* end: messages */ + writer.end_array(); } - /* end: transaction */ - yajl_gen_map_close(g); - - /* end: main */ - yajl_gen_map_close(g); + writer.end_object(); + writer.end_object(); - yajl_gen_get_buf(g, &buf, &len); - - log.assign(reinterpret_cast(buf), len); + log = writer.to_string(); log.append("\n"); - - yajl_gen_free(g); - return log; -#else - return std::string("{\"error\":\"ModSecurity was " \ - "not compiled with JSON support.\"}"); -#endif } @@ -2326,4 +2258,3 @@ extern "C" int msc_set_request_hostname(Transaction *transaction, } // namespace modsecurity - diff --git a/src/utils/json_writer.cc b/src/utils/json_writer.cc new file mode 100644 index 0000000000..9d2ddc1032 --- /dev/null +++ b/src/utils/json_writer.cc @@ -0,0 +1,199 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "src/utils/json_writer.h" + +#include +#include + +namespace modsecurity { +namespace utils { + +JsonWriter::JsonWriter(bool pretty, std::string indent) + : m_output(), + m_stack(), + m_pretty(pretty), + m_indent(std::move(indent)) { } + +void JsonWriter::start_object() { + begin_container(ContainerType::Object, '{'); +} + +void JsonWriter::end_object() { + end_container(ContainerType::Object, '}'); +} + +void JsonWriter::start_array() { + begin_container(ContainerType::Array, '['); +} + +void JsonWriter::end_array() { + end_container(ContainerType::Array, ']'); +} + +void JsonWriter::key(std::string_view value) { + Frame &frame = m_stack.back(); + + if (!frame.first) { + m_output.push_back(','); + } + if (m_pretty) { + newline_and_indent(m_stack.size()); + } + + write_escaped_string(value); + m_output.append(m_pretty ? ": " : ":"); + + frame.first = false; + frame.expecting_key = false; +} + +void JsonWriter::string(std::string_view value) { + before_value(); + write_escaped_string(value); +} + +void JsonWriter::number(std::string_view raw) { + before_value(); + m_output.append(raw.data(), raw.size()); +} + +void JsonWriter::integer(int64_t value) { + before_value(); + m_output.append(std::to_string(value)); +} + +void JsonWriter::boolean(bool value) { + before_value(); + m_output.append(value ? "true" : "false"); +} + +void JsonWriter::null() { + before_value(); + m_output.append("null"); +} + +const std::string& JsonWriter::str() const { + return m_output; +} + +const std::string& JsonWriter::to_string() const { + return m_output; +} + +void JsonWriter::before_value() { + if (m_stack.empty()) { + return; + } + + Frame &frame = m_stack.back(); + if (frame.type == ContainerType::Array) { + if (!frame.first) { + m_output.push_back(','); + } + if (m_pretty) { + newline_and_indent(m_stack.size()); + } + frame.first = false; + return; + } + + frame.expecting_key = true; +} + +void JsonWriter::after_container_end() { + if (m_stack.empty()) { + return; + } + + Frame &frame = m_stack.back(); + if (frame.type == ContainerType::Object) { + frame.expecting_key = true; + } +} + +void JsonWriter::begin_container(ContainerType type, char token) { + before_value(); + m_output.push_back(token); + m_stack.push_back(Frame{ + type, + true, + type == ContainerType::Object + }); +} + +void JsonWriter::end_container(ContainerType expected, char token) { + Frame frame = m_stack.back(); + m_stack.pop_back(); + + if (frame.type == expected && !frame.first && m_pretty) { + newline_and_indent(m_stack.size()); + } + + m_output.push_back(token); + after_container_end(); +} + +void JsonWriter::newline_and_indent(std::size_t depth) { + m_output.push_back('\n'); + for (std::size_t i = 0; i < depth; ++i) { + m_output.append(m_indent); + } +} + +void JsonWriter::write_escaped_string(std::string_view value) { + static const char *kHex = "0123456789abcdef"; + char unicode_escape[] = {'\\', 'u', '0', '0', '0', '0', '\0'}; + + m_output.push_back('"'); + for (const unsigned char c : value) { + switch (c) { + case '"': + m_output.append("\\\""); + break; + case '\\': + m_output.append("\\\\"); + break; + case '\b': + m_output.append("\\b"); + break; + case '\f': + m_output.append("\\f"); + break; + case '\n': + m_output.append("\\n"); + break; + case '\r': + m_output.append("\\r"); + break; + case '\t': + m_output.append("\\t"); + break; + default: + if (c < 0x20) { + unicode_escape[4] = kHex[(c >> 4) & 0x0f]; + unicode_escape[5] = kHex[c & 0x0f]; + m_output.append(unicode_escape, 6); + } else { + m_output.push_back(static_cast(c)); + } + break; + } + } + m_output.push_back('"'); +} + +} // namespace utils +} // namespace modsecurity diff --git a/src/utils/json_writer.h b/src/utils/json_writer.h new file mode 100644 index 0000000000..04ce1df7df --- /dev/null +++ b/src/utils/json_writer.h @@ -0,0 +1,74 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_UTILS_JSON_WRITER_H_ +#define SRC_UTILS_JSON_WRITER_H_ + +#include +#include +#include +#include + +namespace modsecurity { +namespace utils { + +class JsonWriter { + public: + explicit JsonWriter(bool pretty = false, std::string indent = " "); + + void start_object(); + void end_object(); + void start_array(); + void end_array(); + + void key(std::string_view value); + void string(std::string_view value); + void number(std::string_view raw); + void integer(int64_t value); + void boolean(bool value); + void null(); + + const std::string& str() const; + const std::string& to_string() const; + + private: + enum class ContainerType { + Object, + Array + }; + + struct Frame { + ContainerType type; + bool first; + bool expecting_key; + }; + + void before_value(); + void after_container_end(); + void begin_container(ContainerType type, char token); + void end_container(ContainerType expected, char token); + void newline_and_indent(std::size_t depth); + void write_escaped_string(std::string_view value); + + std::string m_output; + std::vector m_stack; + bool m_pretty; + std::string m_indent; +}; + +} // namespace utils +} // namespace modsecurity + +#endif // SRC_UTILS_JSON_WRITER_H_ diff --git a/src/utils/msc_tree.cc b/src/utils/msc_tree.cc index ea6c1a4167..3b622d5c93 100644 --- a/src/utils/msc_tree.cc +++ b/src/utils/msc_tree.cc @@ -980,12 +980,11 @@ int tree_contains_ip(TreeRoot *rtree, -int add_ip_from_param( - const char *param, TreeRoot **rtree, char **error_msg) +static int add_ip_entries_from_param(const char *param, TreeRoot *rtree) { char *param_copy = strdup(param); char *saved = NULL; - char *str; + char *str = NULL; TreeNode *tnode = NULL; str = strtok_r(param_copy, ",", &saved); @@ -993,17 +992,15 @@ int add_ip_from_param( { if (strchr(str, ':') == NULL) { - tnode = TreeAddIP(str, (*rtree)->ipv4_tree, IPV4_TREE); + tnode = TreeAddIP(str, rtree->ipv4_tree, IPV4_TREE); } else { - tnode = TreeAddIP(str, (*rtree)->ipv6_tree, IPV6_TREE); + tnode = TreeAddIP(str, rtree->ipv6_tree, IPV6_TREE); } if (tnode == NULL) { - //*error_msg = apr_psprintf("Could not add entry " \ - // "\"%s\" from: %s.", str, param); free(param_copy); return -1; } @@ -1016,45 +1013,22 @@ int add_ip_from_param( } -int ip_tree_from_param( +int add_ip_from_param( const char *param, TreeRoot **rtree, char **error_msg) { - char *param_copy = strdup(param); - char *saved = NULL; - char *str = NULL; - TreeNode *tnode = NULL; + return add_ip_entries_from_param(param, *rtree); +} + +int ip_tree_from_param( + const char *param, TreeRoot **rtree, char **error_msg) +{ if (create_radix_tree(rtree, error_msg)) { - free(param_copy); return -1; } - str = strtok_r(param_copy, ",", &saved); - while (str != NULL) - { - if (strchr(str, ':') == NULL) - { - tnode = TreeAddIP(str, (*rtree)->ipv4_tree, IPV4_TREE); - } - else - { - tnode = TreeAddIP(str, (*rtree)->ipv6_tree, IPV6_TREE); - } - - if (tnode == NULL) - { - //*error_msg = apr_psprintf("Could not add entry " \ - // "\"%s\" from: %s.", str, param); - free(param_copy); - return -1; - } - - str = strtok_r(NULL, ",", &saved); - } - free(param_copy); - - return 0; + return add_ip_entries_from_param(param, *rtree); } diff --git a/src/variables/variable.h b/src/variables/variable.h index 06f407f2c3..0d6f3adb6a 100644 --- a/src/variables/variable.h +++ b/src/variables/variable.h @@ -98,6 +98,59 @@ class n : public Variable { \ } \ }; +#define VARIABLE_MONKEY_SIMPLE_VARIABLES(X) \ + X("RESPONSE_CONTENT_TYPE", m_variableResponseContentType) \ + X("ARGS_COMBINED_SIZE", m_variableARGScombinedSize) \ + X("AUTH_TYPE", m_variableAuthType) \ + X("FILES_COMBINED_SIZE", m_variableFilesCombinedSize) \ + X("FULL_REQUEST", m_variableFullRequest) \ + X("FULL_REQUEST_LENGTH", m_variableFullRequestLength) \ + X("INBOUND_DATA_ERROR", m_variableInboundDataError) \ + X("MATCHED_VAR", m_variableMatchedVar) \ + X("MATCHED_VAR_NAME", m_variableMatchedVarName) \ + X("MSC_PCRE_ERROR", m_variableMscPcreError) \ + X("MSC_PCRE_LIMITS_EXCEEDED", m_variableMscPcreLimitsExceeded) \ + X("MULTIPART_CRLF_LF_LINES", m_variableMultipartCrlfLFLines) \ + X("MULTIPART_DATA_AFTER", m_variableMultipartDataAfter) \ + X("MULTIPART_FILE_LIMIT_EXCEEDED", m_variableMultipartFileLimitExceeded) \ + X("MULTIPART_STRICT_ERROR", m_variableMultipartStrictError) \ + X("MULTIPART_HEADER_FOLDING", m_variableMultipartHeaderFolding) \ + X("MULTIPART_INVALID_QUOTING", m_variableMultipartInvalidQuoting) \ + X("MULTIPART_INVALID_HEADER_FOLDING", m_variableMultipartInvalidHeaderFolding) \ + X("MULTIPART_UNMATCHED_BOUNDARY", m_variableMultipartUnmatchedBoundary) \ + X("OUTBOUND_DATA_ERROR", m_variableOutboundDataError) \ + X("PATH_INFO", m_variablePathInfo) \ + X("QUERY_STRING", m_variableQueryString) \ + X("REMOTE_ADDR", m_variableRemoteAddr) \ + X("REMOTE_HOST", m_variableRemoteHost) \ + X("REMOTE_PORT", m_variableRemotePort) \ + X("REQBODY_ERROR", m_variableReqbodyError) \ + X("REQBODY_ERROR_MSG", m_variableReqbodyErrorMsg) \ + X("REQBODY_PROCESSOR_ERROR_MSG", m_variableReqbodyProcessorErrorMsg) \ + X("REQBODY_PROCESSOR_ERROR", m_variableReqbodyProcessorError) \ + X("REQBODY_PROCESSOR", m_variableReqbodyProcessor) \ + X("REQUEST_BASENAME", m_variableRequestBasename) \ + X("REQUEST_BODY", m_variableRequestBody) \ + X("REQUEST_BODY_LENGTH", m_variableRequestBodyLength) \ + X("REQUEST_FILENAME", m_variableRequestFilename) \ + X("REQUEST_LINE", m_variableRequestLine) \ + X("REQUEST_METHOD", m_variableRequestMethod) \ + X("REQUEST_PROTOCOL", m_variableRequestProtocol) \ + X("REQUEST_URI", m_variableRequestURI) \ + X("REQUEST_URI_RAW", m_variableRequestURIRaw) \ + X("RESOURCE", m_variableResource) \ + X("RESPONSE_BODY", m_variableResponseBody) \ + X("RESPONSE_CONTENT_LENGTH", m_variableResponseContentLength) \ + X("RESPONSE_PROTOCOL", m_variableResponseProtocol) \ + X("RESPONSE_STATUS", m_variableResponseStatus) \ + X("SERVER_ADDR", m_variableServerAddr) \ + X("SERVER_NAME", m_variableServerName) \ + X("SERVER_PORT", m_variableServerPort) \ + X("SESSIONID", m_variableSessionID) \ + X("UNIQUE_ID", m_variableUniqueID) \ + X("URLENCODED_ERROR", m_variableUrlEncodedError) \ + X("USERID", m_variableUserID) + namespace modsecurity { @@ -274,109 +327,16 @@ class VariableMonkeyResolution { throw std::invalid_argument("Variable not found."); } - if (comp(variable, "RESPONSE_CONTENT_TYPE")) { - t->m_variableResponseContentType.evaluate(l); - } else if (comp(variable, "ARGS_COMBINED_SIZE")) { - t->m_variableARGScombinedSize.evaluate(l); - } else if (comp(variable, "AUTH_TYPE")) { - t->m_variableAuthType.evaluate(l); - } else if (comp(variable, "FILES_COMBINED_SIZE")) { - t->m_variableFilesCombinedSize.evaluate(l); - } else if (comp(variable, "FULL_REQUEST")) { - t->m_variableFullRequest.evaluate(l); - } else if (comp(variable, "FULL_REQUEST_LENGTH")) { - t->m_variableFullRequestLength.evaluate(l); - } else if (comp(variable, "INBOUND_DATA_ERROR")) { - t->m_variableInboundDataError.evaluate(l); - } else if (comp(variable, "MATCHED_VAR")) { - t->m_variableMatchedVar.evaluate(l); - } else if (comp(variable, "MATCHED_VAR_NAME")) { - t->m_variableMatchedVarName.evaluate(l); - } else if (comp(variable, "MSC_PCRE_ERROR")) { - t->m_variableMscPcreError.evaluate(l); - } else if (comp(variable, "MSC_PCRE_LIMITS_EXCEEDED")) { - t->m_variableMscPcreLimitsExceeded.evaluate(l); - } else if (comp(variable, "MULTIPART_CRLF_LF_LINES")) { - t->m_variableMultipartCrlfLFLines.evaluate(l); - } else if (comp(variable, "MULTIPART_DATA_AFTER")) { - t->m_variableMultipartDataAfter.evaluate(l); - } else if (comp(variable, "MULTIPART_FILE_LIMIT_EXCEEDED")) { - t->m_variableMultipartFileLimitExceeded.evaluate(l); - } else if (comp(variable, "MULTIPART_STRICT_ERROR")) { - t->m_variableMultipartStrictError.evaluate(l); - } else if (comp(variable, "MULTIPART_HEADER_FOLDING")) { - t->m_variableMultipartHeaderFolding.evaluate(l); - } else if (comp(variable, "MULTIPART_INVALID_QUOTING")) { - t->m_variableMultipartInvalidQuoting.evaluate(l); - } else if (comp(variable, "MULTIPART_INVALID_HEADER_FOLDING")) { - t->m_variableMultipartInvalidHeaderFolding.evaluate(l); - } else if (comp(variable, "MULTIPART_UNMATCHED_BOUNDARY")) { - t->m_variableMultipartUnmatchedBoundary.evaluate(l); - } else if (comp(variable, "OUTBOUND_DATA_ERROR")) { - t->m_variableOutboundDataError.evaluate(l); - } else if (comp(variable, "PATH_INFO")) { - t->m_variablePathInfo.evaluate(l); - } else if (comp(variable, "QUERY_STRING")) { - t->m_variableQueryString.evaluate(l); - } else if (comp(variable, "REMOTE_ADDR")) { - t->m_variableRemoteAddr.evaluate(l); - } else if (comp(variable, "REMOTE_HOST")) { - t->m_variableRemoteHost.evaluate(l); - } else if (comp(variable, "REMOTE_PORT")) { - t->m_variableRemotePort.evaluate(l); - } else if (comp(variable, "REQBODY_ERROR")) { - t->m_variableReqbodyError.evaluate(l); - } else if (comp(variable, "REQBODY_ERROR_MSG")) { - t->m_variableReqbodyErrorMsg.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR_MSG")) { - t->m_variableReqbodyProcessorErrorMsg.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR")) { - t->m_variableReqbodyProcessorError.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR")) { - t->m_variableReqbodyProcessor.evaluate(l); - } else if (comp(variable, "REQUEST_BASENAME")) { - t->m_variableRequestBasename.evaluate(l); - } else if (comp(variable, "REQUEST_BODY")) { - t->m_variableRequestBody.evaluate(l); - } else if (comp(variable, "REQUEST_BODY_LENGTH")) { - t->m_variableRequestBodyLength.evaluate(l); - } else if (comp(variable, "REQUEST_FILENAME")) { - t->m_variableRequestFilename.evaluate(l); - } else if (comp(variable, "REQUEST_LINE")) { - t->m_variableRequestLine.evaluate(l); - } else if (comp(variable, "REQUEST_METHOD")) { - t->m_variableRequestMethod.evaluate(l); - } else if (comp(variable, "REQUEST_PROTOCOL")) { - t->m_variableRequestProtocol.evaluate(l); - } else if (comp(variable, "REQUEST_URI")) { - t->m_variableRequestURI.evaluate(l); - } else if (comp(variable, "REQUEST_URI_RAW")) { - t->m_variableRequestURIRaw.evaluate(l); - } else if (comp(variable, "RESOURCE")) { - t->m_variableResource.evaluate(l); - } else if (comp(variable, "RESPONSE_BODY")) { - t->m_variableResponseBody.evaluate(l); - } else if (comp(variable, "RESPONSE_CONTENT_LENGTH")) { - t->m_variableResponseContentLength.evaluate(l); - } else if (comp(variable, "RESPONSE_PROTOCOL")) { - t->m_variableResponseProtocol.evaluate(l); - } else if (comp(variable, "RESPONSE_STATUS")) { - t->m_variableResponseStatus.evaluate(l); - } else if (comp(variable, "SERVER_ADDR")) { - t->m_variableServerAddr.evaluate(l); - } else if (comp(variable, "SERVER_NAME")) { - t->m_variableServerName.evaluate(l); - } else if (comp(variable, "SERVER_PORT")) { - t->m_variableServerPort.evaluate(l); - } else if (comp(variable, "SESSIONID")) { - t->m_variableSessionID.evaluate(l); - } else if (comp(variable, "UNIQUE_ID")) { - t->m_variableUniqueID.evaluate(l); - } else if (comp(variable, "URLENCODED_ERROR")) { - t->m_variableUrlEncodedError.evaluate(l); - } else if (comp(variable, "USERID")) { - t->m_variableUserID.evaluate(l); - } else { + bool matched = false; +#define VARIABLE_MONKEY_EVALUATE_DISPATCH(name, member) \ + if (matched == false && comp(variable, name)) { \ + t->member.evaluate(l); \ + matched = true; \ + } + VARIABLE_MONKEY_SIMPLE_VARIABLES(VARIABLE_MONKEY_EVALUATE_DISPATCH); +#undef VARIABLE_MONKEY_EVALUATE_DISPATCH + + if (matched == false) { throw std::invalid_argument("Variable not found."); } } @@ -389,126 +349,41 @@ class VariableMonkeyResolution { collection = variable.find(":"); } if (collection == std::string::npos) { - if (comp(variable, "RESPONSE_CONTENT_TYPE")) { - vv = t->m_variableResponseContentType.resolveFirst(); - } else if (comp(variable, "ARGS_COMBINED_SIZE")) { - vv = t->m_variableARGScombinedSize.resolveFirst(); - } else if (comp(variable, "AUTH_TYPE")) { - vv = t->m_variableAuthType.resolveFirst(); - } else if (comp(variable, "FILES_COMBINED_SIZE")) { - vv = t->m_variableFilesCombinedSize.resolveFirst(); - } else if (comp(variable, "FULL_REQUEST")) { - vv = t->m_variableFullRequest.resolveFirst(); - } else if (comp(variable, "FULL_REQUEST_LENGTH")) { - vv = t->m_variableFullRequestLength.resolveFirst(); - } else if (comp(variable, "INBOUND_DATA_ERROR")) { - vv = t->m_variableInboundDataError.resolveFirst(); - } else if (comp(variable, "MATCHED_VAR")) { - vv = t->m_variableMatchedVar.resolveFirst(); - } else if (comp(variable, "MATCHED_VAR_NAME")) { - vv = t->m_variableMatchedVarName.resolveFirst(); - } else if (comp(variable, "MSC_PCRE_ERROR")) { - vv = t->m_variableMscPcreError.resolveFirst(); - } else if (comp(variable, "MSC_PCRE_LIMITS_EXCEEDED")) { - vv = t->m_variableMscPcreLimitsExceeded.resolveFirst(); - } else if (comp(variable, "MULTIPART_CRLF_LF_LINES")) { - vv = t->m_variableMultipartCrlfLFLines.resolveFirst(); - } else if (comp(variable, "MULTIPART_DATA_AFTER")) { - vv = t->m_variableMultipartDataAfter.resolveFirst(); - } else if (comp(variable, "MULTIPART_FILE_LIMIT_EXCEEDED")) { - vv = t->m_variableMultipartFileLimitExceeded.resolveFirst(); - } else if (comp(variable, "MULTIPART_STRICT_ERROR")) { - vv = t->m_variableMultipartStrictError.resolveFirst(); - } else if (comp(variable, "MULTIPART_HEADER_FOLDING")) { - vv = t->m_variableMultipartHeaderFolding.resolveFirst(); - } else if (comp(variable, "MULTIPART_INVALID_QUOTING")) { - vv = t->m_variableMultipartInvalidQuoting.resolveFirst(); - } else if (comp(variable, "MULTIPART_INVALID_HEADER_FOLDING")) { - vv = t->m_variableMultipartInvalidHeaderFolding.resolveFirst(); - } else if (comp(variable, "MULTIPART_UNMATCHED_BOUNDARY")) { - vv = t->m_variableMultipartUnmatchedBoundary.resolveFirst(); - } else if (comp(variable, "OUTBOUND_DATA_ERROR")) { - vv = t->m_variableOutboundDataError.resolveFirst(); - } else if (comp(variable, "PATH_INFO")) { - vv = t->m_variablePathInfo.resolveFirst(); - } else if (comp(variable, "QUERY_STRING")) { - vv = t->m_variableQueryString.resolveFirst(); - } else if (comp(variable, "REMOTE_ADDR")) { - vv = t->m_variableRemoteAddr.resolveFirst(); - } else if (comp(variable, "REMOTE_HOST")) { - vv = t->m_variableRemoteHost.resolveFirst(); - } else if (comp(variable, "REMOTE_PORT")) { - vv = t->m_variableRemotePort.resolveFirst(); - } else if (comp(variable, "REQBODY_ERROR")) { - vv = t->m_variableReqbodyError.resolveFirst(); - } else if (comp(variable, "REQBODY_ERROR_MSG")) { - vv = t->m_variableReqbodyErrorMsg.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR_MSG")) { - vv = t->m_variableReqbodyProcessorErrorMsg.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR")) { - vv = t->m_variableReqbodyProcessorError.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR")) { - vv = t->m_variableReqbodyProcessor.resolveFirst(); - } else if (comp(variable, "REQUEST_BASENAME")) { - vv = t->m_variableRequestBasename.resolveFirst(); - } else if (comp(variable, "REQUEST_BODY")) { - vv = t->m_variableRequestBody.resolveFirst(); - } else if (comp(variable, "REQUEST_BODY_LENGTH")) { - vv = t->m_variableRequestBodyLength.resolveFirst(); - } else if (comp(variable, "REQUEST_FILENAME")) { - vv = t->m_variableRequestFilename.resolveFirst(); - } else if (comp(variable, "REQUEST_LINE")) { - vv = t->m_variableRequestLine.resolveFirst(); - } else if (comp(variable, "REQUEST_METHOD")) { - vv = t->m_variableRequestMethod.resolveFirst(); - } else if (comp(variable, "REQUEST_PROTOCOL")) { - vv = t->m_variableRequestProtocol.resolveFirst(); - } else if (comp(variable, "REQUEST_URI")) { - vv = t->m_variableRequestURI.resolveFirst(); - } else if (comp(variable, "REQUEST_URI_RAW")) { - vv = t->m_variableRequestURIRaw.resolveFirst(); - } else if (comp(variable, "RESOURCE")) { - vv = t->m_variableResource.resolveFirst(); - } else if (comp(variable, "RESPONSE_BODY")) { - vv = t->m_variableResponseBody.resolveFirst(); - } else if (comp(variable, "RESPONSE_CONTENT_LENGTH")) { - vv = t->m_variableResponseContentLength.resolveFirst(); - } else if (comp(variable, "RESPONSE_PROTOCOL")) { - vv = t->m_variableResponseProtocol.resolveFirst(); - } else if (comp(variable, "RESPONSE_STATUS")) { - vv = t->m_variableResponseStatus.resolveFirst(); - } else if (comp(variable, "SERVER_ADDR")) { - vv = t->m_variableServerAddr.resolveFirst(); - } else if (comp(variable, "SERVER_NAME")) { - vv = t->m_variableServerName.resolveFirst(); - } else if (comp(variable, "SERVER_PORT")) { - vv = t->m_variableServerPort.resolveFirst(); - } else if (comp(variable, "SESSIONID")) { - vv = t->m_variableSessionID.resolveFirst(); - } else if (comp(variable, "UNIQUE_ID")) { - vv = t->m_variableUniqueID.resolveFirst(); - } else if (comp(variable, "URLENCODED_ERROR")) { - vv = t->m_variableUrlEncodedError.resolveFirst(); - } else if (comp(variable, "USERID")) { - vv = t->m_variableUserID.resolveFirst(); - } else if (comp(variable, "TX")) { + bool matched = false; +#define VARIABLE_MONKEY_RESOLVE_DISPATCH(name, member) \ + if (matched == false && comp(variable, name)) { \ + vv = t->member.resolveFirst(); \ + matched = true; \ + } + VARIABLE_MONKEY_SIMPLE_VARIABLES(VARIABLE_MONKEY_RESOLVE_DISPATCH); +#undef VARIABLE_MONKEY_RESOLVE_DISPATCH + + if (matched == false && comp(variable, "TX")) { vv = t->m_collections.m_tx_collection->resolveFirst(""); - } else if (comp(variable, "RESOURCE")) { + matched = true; + } else if (matched == false && comp(variable, "RESOURCE")) { vv = t->m_collections.m_resource_collection->resolveFirst("", t->m_collections.m_resource_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "USER")) { + matched = true; + } else if (matched == false && comp(variable, "USER")) { vv = t->m_collections.m_user_collection->resolveFirst("", t->m_collections.m_user_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "SESSION")) { + matched = true; + } else if (matched == false && comp(variable, "SESSION")) { vv = t->m_collections.m_session_collection->resolveFirst("", t->m_collections.m_session_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "IP")) { + matched = true; + } else if (matched == false && comp(variable, "IP")) { vv = t->m_collections.m_ip_collection->resolveFirst("", t->m_collections.m_ip_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "GLOBAL")) { + matched = true; + } else if (matched == false && comp(variable, "GLOBAL")) { vv = t->m_collections.m_global_collection->resolveFirst("", t->m_collections.m_global_collection_key, t->m_rules->m_secWebAppId.m_value); - } else { + matched = true; + } + + if (matched == false) { throw std::invalid_argument("Variable not found."); } } else { @@ -729,4 +604,6 @@ std::string operator+(const std::string &a, const modsecurity::variables::Variab } // namespace variables } // namespace modsecurity +#undef VARIABLE_MONKEY_SIMPLE_VARIABLES + #endif // SRC_VARIABLES_VARIABLE_H_ diff --git a/test/Makefile.am b/test/Makefile.am index 2e7e05d614..4d3a469620 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -23,7 +23,8 @@ noinst_PROGRAMS = EXTRA_DIST = \ test-cases/* \ custom-test-driver \ - test-suite.sh + test-suite.sh \ + run-json-backend-matrix.sh # unit_tests @@ -34,12 +35,20 @@ unit_tests_SOURCES = \ unit/unit_test.cc \ common/custom_debug_log.cc +noinst_PROGRAMS += json_backend_depth_tests +json_backend_depth_tests_SOURCES = \ + unit/json_backend_depth_tests.cc + noinst_HEADERS = \ - common/modsecurity_test.cc \ - common/*.h \ - unit/*.h \ - regression/*.h + $(srcdir)/common/colors.h \ + $(srcdir)/common/custom_debug_log.h \ + $(srcdir)/common/json.h \ + $(srcdir)/common/modsecurity_test.h \ + $(srcdir)/common/modsecurity_test_context.h \ + $(srcdir)/common/modsecurity_test_results.h \ + $(srcdir)/unit/unit_test.h \ + $(srcdir)/regression/regression_test.h unit_tests_LDADD = \ @@ -52,8 +61,7 @@ unit_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) unit_tests_LDFLAGS = \ @@ -66,15 +74,15 @@ unit_tests_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) unit_tests_CPPFLAGS = \ -Icommon \ -I$(top_srcdir)/ \ + -I$(top_srcdir)/others/jsoncons/include \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -83,11 +91,19 @@ unit_tests_CPPFLAGS = \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LUA_CFLAGS) \ $(SSDEEP_CFLAGS) \ $(LIBXML2_CFLAGS) +json_backend_depth_tests_LDADD = \ + $(unit_tests_LDADD) + +json_backend_depth_tests_LDFLAGS = \ + $(unit_tests_LDFLAGS) + +json_backend_depth_tests_CPPFLAGS = \ + $(unit_tests_CPPFLAGS) + # regression @@ -107,8 +123,7 @@ regression_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) regression_tests_LDFLAGS = \ @@ -119,7 +134,6 @@ regression_tests_LDFLAGS = \ -lm \ -lstdc++ \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(LUA_LDFLAGS) @@ -128,8 +142,9 @@ regression_tests_LDFLAGS = \ regression_tests_CPPFLAGS = \ -Icommon \ -I$(top_srcdir) \ + -I$(top_srcdir)/others/jsoncons/include \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -140,7 +155,6 @@ regression_tests_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LIBXML2_CFLAGS) @@ -161,8 +175,7 @@ rules_optimization_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) rules_optimization_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -174,14 +187,13 @@ rules_optimization_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) rules_optimization_CPPFLAGS = \ -Icommon \ -I$(top_srcdir)/ \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -192,6 +204,4 @@ rules_optimization_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LIBXML2_CFLAGS) - diff --git a/test/benchmark/Makefile.am b/test/benchmark/Makefile.am index 2ac9d92111..a7f681484b 100644 --- a/test/benchmark/Makefile.am +++ b/test/benchmark/Makefile.am @@ -1,17 +1,28 @@ -noinst_PROGRAMS = benchmark +noinst_PROGRAMS = \ + benchmark \ + json_benchmark + +EXTRA_DIST = \ + basic_rules.conf \ + json_benchmark_rules.conf \ + run-json-benchmarks.sh \ + download-owasp-v3-rules.sh \ + download-owasp-v4-rules.sh benchmark_SOURCES = \ benchmark.cc +json_benchmark_SOURCES = \ + json_benchmark.cc + benchmark_LDADD = \ $(CURL_LDADD) \ $(GEOIP_LDADD) \ $(MAXMIND_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(YAJL_LDADD) \ $(LMDB_LDADD) \ $(SSDEEP_LDADD) \ $(LUA_LDADD) \ @@ -27,19 +38,29 @@ benchmark_LDFLAGS = \ -lstdc++ \ $(GEOIP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(LUA_LDFLAGS) benchmark_CPPFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_builddir) \ + -I$(top_srcdir) \ + -I$(top_srcdir)/headers \ $(GLOBAL_CPPFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ $(LMDB_CFLAGS) \ $(LIBXML2_CFLAGS) +json_benchmark_LDADD = \ + $(benchmark_LDADD) + +json_benchmark_LDFLAGS = \ + $(benchmark_LDFLAGS) + +json_benchmark_CPPFLAGS = \ + $(benchmark_CPPFLAGS) \ + -DMSC_JSON_BENCHMARK_RULES_DIR=\"$(srcdir)\" + MAINTAINERCLEANFILES = \ Makefile.in - diff --git a/test/benchmark/json_benchmark.cc b/test/benchmark/json_benchmark.cc new file mode 100644 index 0000000000..f23e0df8b2 --- /dev/null +++ b/test/benchmark/json_benchmark.cc @@ -0,0 +1,497 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "config.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "modsecurity/modsecurity.h" +#include "modsecurity/rules_set.h" +#include "modsecurity/transaction.h" +#include "src/request_body_processor/json_instrumentation.h" + +#ifndef MSC_JSON_BENCHMARK_RULES_DIR +#error "MSC_JSON_BENCHMARK_RULES_DIR must be defined by the build system." +#endif + +namespace { + +using Clock = std::chrono::steady_clock; + +constexpr std::size_t kDefaultTargetBytes = 1048576; +constexpr std::size_t kDefaultDepth = 512; +constexpr unsigned long long kDefaultIterations = 100; +constexpr const char *kRulesFileName = "json_benchmark_rules.conf"; + +struct Options { + std::string scenario; + unsigned long long iterations{kDefaultIterations}; + std::size_t target_bytes{kDefaultTargetBytes}; + std::size_t depth{kDefaultDepth}; + bool include_invalid{false}; + bool output_json{false}; +}; + +struct Metrics { + std::uint64_t append_request_body_ns{0}; + std::uint64_t process_request_body_ns{0}; + std::uint64_t total_transaction_ns{0}; + unsigned long long parse_success_count{0}; + unsigned long long parse_error_count{0}; +}; + +const char *usage_message = + "Usage: json_benchmark --scenario NAME [--iterations N] " + "[--target-bytes N] [--depth N] [--include-invalid] [--output json]"; + +std::string benchmarkBackend() { +#if defined(MSC_JSON_BACKEND_SIMDJSON) + return "simdjson"; +#elif defined(MSC_JSON_BACKEND_JSONCONS) + return "jsoncons"; +#else +#error "A JSON backend must be selected at build time." +#endif +} + +std::string rulesFilePath() { + return std::string(MSC_JSON_BENCHMARK_RULES_DIR) + "/" + kRulesFileName; +} + +std::uint64_t elapsedNanos(Clock::time_point start_time) { + return static_cast( + std::chrono::duration_cast( + Clock::now() - start_time).count()); +} + +std::size_t parseSize(const char *value, const char *flag_name) { + errno = 0; + char *end = nullptr; + const unsigned long long parsed = std::strtoull(value, &end, 10); + if (errno != 0 || end == value || *end != '\0') { + throw std::runtime_error(std::string("invalid numeric value for ") + + flag_name + ": " + value); + } + if (parsed > std::numeric_limits::max()) { + throw std::runtime_error(std::string("value too large for ") + + flag_name + ": " + value); + } + return static_cast(parsed); +} + +unsigned long long parseIterations(const char *value) { + errno = 0; + char *end = nullptr; + const unsigned long long parsed = std::strtoull(value, &end, 10); + if (errno != 0 || end == value || *end != '\0' || parsed == 0) { + throw std::runtime_error(std::string("invalid numeric value for --iterations: ") + + value); + } + return parsed; +} + +Options parseOptions(int argc, const char *argv[]) { + Options options; + + for (int i = 1; i < argc; i++) { + const std::string current(argv[i]); + if (current == "-h" || current == "-?" || current == "--help") { + std::cout << usage_message << std::endl; + std::exit(0); + } else if (current == "--scenario") { + if (i + 1 >= argc) { + throw std::runtime_error("missing value for --scenario"); + } + options.scenario.assign(argv[++i]); + } else if (current == "--iterations") { + if (i + 1 >= argc) { + throw std::runtime_error("missing value for --iterations"); + } + options.iterations = parseIterations(argv[++i]); + } else if (current == "--target-bytes") { + if (i + 1 >= argc) { + throw std::runtime_error("missing value for --target-bytes"); + } + options.target_bytes = parseSize(argv[++i], "--target-bytes"); + } else if (current == "--depth") { + if (i + 1 >= argc) { + throw std::runtime_error("missing value for --depth"); + } + options.depth = parseSize(argv[++i], "--depth"); + } else if (current == "--include-invalid") { + options.include_invalid = true; + } else if (current == "--output") { + if (i + 1 >= argc) { + throw std::runtime_error("missing value for --output"); + } + const std::string output_format(argv[++i]); + if (output_format != "json") { + throw std::runtime_error("unsupported output format: " + + output_format); + } + options.output_json = true; + } else { + throw std::runtime_error("unknown option: " + current); + } + } + + if (options.scenario.empty()) { + throw std::runtime_error("missing required --scenario"); + } + + const bool is_invalid_scenario = options.scenario == "truncated" + || options.scenario == "malformed"; + if (is_invalid_scenario && !options.include_invalid) { + throw std::runtime_error( + "invalid JSON scenarios require --include-invalid"); + } + + return options; +} + +std::string makeLargeObject(std::size_t target_bytes) { + std::string body("{"); + std::size_t index = 0; + + while (body.size() + 32 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += "\"key"; + body += std::to_string(index); + body += "\":\"value"; + body += std::to_string(index); + body += "\""; + index++; + } + + body.push_back('}'); + return body; +} + +std::string makeUtf8Object(std::size_t target_bytes) { + static const char *const utf8_values[] = { + u8"Gr\u00fc\u00dfe", + u8"\u3053\u3093\u306b\u3061\u306f", + u8"\u043f\u0440\u0438\u0432\u0435\u0442", + u8"\u0645\u0631\u062d\u0628\u0627", + u8"\U0001F30D" + }; + + std::string body("{"); + std::size_t index = 0; + + while (body.size() + 48 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += "\"utf8_"; + body += std::to_string(index); + body += "\":\""; + body += utf8_values[index % (sizeof(utf8_values) / sizeof(utf8_values[0]))]; + body += "\""; + index++; + } + + body.push_back('}'); + return body; +} + +std::string makeNumbersArray(std::size_t target_bytes) { + static const char *const numeric_tokens[] = { + "0", + "-0", + "1.0", + "1e3", + "-1.25e-4", + "123456789012345678901234567890", + "6.02214076e23", + "3.141592653589793238462643383279" + }; + + std::string body("["); + std::size_t index = 0; + + while (body.size() + 40 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += numeric_tokens[index + % (sizeof(numeric_tokens) / sizeof(numeric_tokens[0]))]; + index++; + } + + body.push_back(']'); + return body; +} + +std::string makeDeepNesting(std::size_t depth) { + std::string body; + + for (std::size_t i = 0; i < depth; i++) { + if ((i % 2) == 0) { + body += "{\"k\":"; + } else { + body.push_back('['); + } + } + + body += "\"leaf\""; + + for (std::size_t i = depth; i > 0; i--) { + if (((i - 1) % 2) == 0) { + body.push_back('}'); + } else { + body.push_back(']'); + } + } + + return body; +} + +std::string buildScenarioBody(const Options &options) { + if (options.scenario == "large-object") { + return makeLargeObject(options.target_bytes); + } + if (options.scenario == "deep-nesting") { + return makeDeepNesting(options.depth); + } + if (options.scenario == "numbers") { + return makeNumbersArray(options.target_bytes); + } + if (options.scenario == "utf8") { + return makeUtf8Object(options.target_bytes); + } + if (options.scenario == "truncated") { + std::string body = makeLargeObject(options.target_bytes); + body.pop_back(); + return body; + } + if (options.scenario == "malformed") { + std::string body = makeLargeObject(options.target_bytes); + body.insert(body.size() - 1, ",]"); + return body; + } + + throw std::runtime_error("unsupported scenario: " + options.scenario); +} + +bool isResolvedZero(const std::unique_ptr &value) { + return value && *value == "0"; +} + +Metrics runBenchmark(modsecurity::ModSecurity *modsec, + modsecurity::RulesSet *rules, const std::string &body, + const Options &options) { + Metrics metrics; + + for (unsigned long long iteration = 0; iteration < options.iterations; + iteration++) { + const auto total_start = Clock::now(); + modsecurity::Transaction transaction(modsec, rules, nullptr); + transaction.processConnection("200.249.12.31", 12345, + "127.0.0.1", 80); + transaction.processURI("/json-benchmark", "POST", "1.1"); + transaction.addRequestHeader("Host", "localhost"); + transaction.addRequestHeader("User-Agent", + "ModSecurity-json-benchmark/1.0"); + transaction.addRequestHeader("Content-Type", "application/json"); + const std::string content_length = std::to_string(body.size()); + transaction.addRequestHeader("Content-Length", content_length.c_str()); + transaction.processRequestHeaders(); + + const auto append_start = Clock::now(); + const int append_ok = transaction.appendRequestBody( + reinterpret_cast(body.data()), body.size()); + metrics.append_request_body_ns += elapsedNanos(append_start); + if (append_ok == 0) { + throw std::runtime_error( + "appendRequestBody reported partial body processing"); + } + + const auto process_start = Clock::now(); + if (!transaction.processRequestBody()) { + throw std::runtime_error("processRequestBody returned false"); + } + metrics.process_request_body_ns += elapsedNanos(process_start); + metrics.total_transaction_ns += elapsedNanos(total_start); + + const std::unique_ptr reqbody_error = + transaction.m_variableReqbodyError.resolveFirst(); + const std::unique_ptr processor_error = + transaction.m_variableReqbodyProcessorError.resolveFirst(); + + if (!reqbody_error || !processor_error) { + throw std::runtime_error( + "unable to resolve JSON parse outcome variables"); + } + + const bool parse_success = isResolvedZero(reqbody_error) + && isResolvedZero(processor_error); + const bool parse_error = !isResolvedZero(reqbody_error) + || !isResolvedZero(processor_error); + + if (parse_success == parse_error) { + throw std::runtime_error( + "ambiguous JSON parse outcome observed in benchmark"); + } + + if (parse_success) { + metrics.parse_success_count++; + } else { + metrics.parse_error_count++; + } + } + + return metrics; +} + +long currentMaxRssKb() { + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage) != 0) { + return -1; + } + return usage.ru_maxrss; +} + +void printJson(const Options &options, const std::string &body, + const Metrics &metrics) { + std::cout << "{"; + std::cout << "\"backend\":\"" << benchmarkBackend() << "\","; + std::cout << "\"scenario\":\"" << options.scenario << "\","; + std::cout << "\"iterations\":" << options.iterations << ","; + std::cout << "\"body_bytes\":" << body.size() << ","; + std::cout << "\"append_request_body_ns\":" + << metrics.append_request_body_ns << ","; + std::cout << "\"process_request_body_ns\":" + << metrics.process_request_body_ns << ","; + std::cout << "\"total_transaction_ns\":" + << metrics.total_transaction_ns << ","; + std::cout << "\"parse_success_count\":" + << metrics.parse_success_count << ","; + std::cout << "\"parse_error_count\":" + << metrics.parse_error_count << ","; + std::cout << "\"ru_maxrss_kb\":" << currentMaxRssKb(); +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const modsecurity::RequestBodyProcessor::JsonInstrumentationMetrics + instrumentation = + modsecurity::RequestBodyProcessor::jsonInstrumentationSnapshot(); + std::cout << ",\"request_body_snapshot_count\":" + << instrumentation.request_body_snapshot_count; + std::cout << ",\"request_body_snapshot_bytes\":" + << instrumentation.request_body_snapshot_bytes; + std::cout << ",\"request_body_snapshot_ns\":" + << instrumentation.request_body_snapshot_ns; + std::cout << ",\"json_process_chunk_calls\":" + << instrumentation.json_process_chunk_calls; + std::cout << ",\"json_process_chunk_appended_bytes\":" + << instrumentation.json_process_chunk_appended_bytes; + std::cout << ",\"json_process_chunk_ns\":" + << instrumentation.json_process_chunk_ns; + std::cout << ",\"simdjson_parser_constructions\":" + << instrumentation.simdjson_parser_constructions; + std::cout << ",\"simdjson_parser_construction_ns\":" + << instrumentation.simdjson_parser_construction_ns; + std::cout << ",\"simdjson_padded_copy_bytes\":" + << instrumentation.simdjson_padded_copy_bytes; + std::cout << ",\"simdjson_padded_copy_ns\":" + << instrumentation.simdjson_padded_copy_ns; + std::cout << ",\"simdjson_iterate_ns\":" + << instrumentation.simdjson_iterate_ns; + std::cout << ",\"jsoncons_cursor_constructions\":" + << instrumentation.jsoncons_cursor_constructions; + std::cout << ",\"jsoncons_cursor_init_ns\":" + << instrumentation.jsoncons_cursor_init_ns; + std::cout << ",\"jsoncons_token_cursor_constructions\":" + << instrumentation.jsoncons_token_cursor_constructions; + std::cout << ",\"jsoncons_token_cursor_init_ns\":" + << instrumentation.jsoncons_token_cursor_init_ns; + std::cout << ",\"jsoncons_event_loop_ns\":" + << instrumentation.jsoncons_event_loop_ns; + std::cout << ",\"jsoncons_token_sync_steps\":" + << instrumentation.jsoncons_token_sync_steps; + std::cout << ",\"jsoncons_token_exact_advance_steps\":" + << instrumentation.jsoncons_token_exact_advance_steps; +#endif + std::cout << "}" << std::endl; +} + +void printHumanReadable(const Options &options, const std::string &body, + const Metrics &metrics) { + std::cout << "backend: " << benchmarkBackend() << "\n"; + std::cout << "scenario: " << options.scenario << "\n"; + std::cout << "iterations: " << options.iterations << "\n"; + std::cout << "body_bytes: " << body.size() << "\n"; + std::cout << "append_request_body_ns: " + << metrics.append_request_body_ns << "\n"; + std::cout << "process_request_body_ns: " + << metrics.process_request_body_ns << "\n"; + std::cout << "total_transaction_ns: " + << metrics.total_transaction_ns << "\n"; + std::cout << "parse_success_count: " + << metrics.parse_success_count << "\n"; + std::cout << "parse_error_count: " + << metrics.parse_error_count << "\n"; + std::cout << "ru_maxrss_kb: " << currentMaxRssKb() << "\n"; +} + +} // namespace + +int main(int argc, const char *argv[]) { + try { + const Options options = parseOptions(argc, argv); + const std::string body = buildScenarioBody(options); + + modsecurity::ModSecurity modsec; + modsec.setConnectorInformation( + "ModSecurity-json-benchmark v0.0.1-alpha"); + + modsecurity::RulesSet rules; + const std::string rules_path = rulesFilePath(); + if (rules.loadFromUri(rules_path.c_str()) < 0) { + std::cerr << "failed to load benchmark rules from " + << rules_path << std::endl; + std::cerr << rules.m_parserError.str() << std::endl; + return 1; + } + + modsecurity::RequestBodyProcessor::jsonInstrumentationReset(); + const Metrics metrics = runBenchmark(&modsec, &rules, body, options); + + if (options.output_json) { + printJson(options, body, metrics); + } else { + printHumanReadable(options, body, metrics); + } + return 0; + } catch (const std::exception &error) { + std::cerr << error.what() << std::endl; + std::cerr << usage_message << std::endl; + return 64; + } +} diff --git a/test/benchmark/json_benchmark_rules.conf b/test/benchmark/json_benchmark_rules.conf new file mode 100644 index 0000000000..5e13a15705 --- /dev/null +++ b/test/benchmark/json_benchmark_rules.conf @@ -0,0 +1,9 @@ +SecRuleEngine On +SecRequestBodyAccess On +SecAuditEngine Off +SecDebugLog /dev/null +SecDebugLogLevel 0 +SecRequestBodyLimit 8388608 +SecRequestBodyNoFilesLimit 8388608 +SecRequestBodyJsonDepthLimit 4096 +SecRule REQUEST_HEADERS:Content-Type "^application/json$" "id:901001,phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON" diff --git a/test/benchmark/run-json-benchmarks.sh b/test/benchmark/run-json-benchmarks.sh new file mode 100755 index 0000000000..779b7c3c5b --- /dev/null +++ b/test/benchmark/run-json-benchmarks.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +set -u + +usage() { + cat <<'EOF' +Usage: test/benchmark/run-json-benchmarks.sh --simdjson-build DIR --jsoncons-build DIR [--include-invalid] +EOF +} + +simdjson_build="" +jsoncons_build="" +include_invalid=0 +output_file="${PWD}/json-benchmark-results.jsonl" + +while [ "$#" -gt 0 ]; do + case "$1" in + --simdjson-build) + shift + [ "$#" -gt 0 ] || { usage; exit 64; } + simdjson_build="$1" + ;; + --jsoncons-build) + shift + [ "$#" -gt 0 ] || { usage; exit 64; } + jsoncons_build="$1" + ;; + --include-invalid) + include_invalid=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + usage + exit 64 + ;; + esac + shift +done + +[ -n "${simdjson_build}" ] || { usage; exit 64; } +[ -n "${jsoncons_build}" ] || { usage; exit 64; } + +readonly base_scenarios=( + "large-object" + "deep-nesting" + "numbers" + "utf8" +) + +readonly invalid_scenarios=( + "truncated" + "malformed" +) + +run_scenarios() { + local build_dir="$1" + local binary="${build_dir}/test/benchmark/json_benchmark" + local scenario + + if [ ! -x "${binary}" ]; then + echo "missing benchmark binary: ${binary}" >&2 + return 1 + fi + + for scenario in "${base_scenarios[@]}"; do + "${binary}" --scenario "${scenario}" --output json >> "${output_file}" + done + + if [ "${include_invalid}" -ne 0 ]; then + for scenario in "${invalid_scenarios[@]}"; do + "${binary}" --scenario "${scenario}" --include-invalid --output json >> "${output_file}" + done + fi +} + +: > "${output_file}" +run_scenarios "${simdjson_build}" || exit 1 +run_scenarios "${jsoncons_build}" || exit 1 + +printf 'Wrote %s\n' "${output_file}" diff --git a/test/common/json.h b/test/common/json.h new file mode 100644 index 0000000000..f98d8b07c5 --- /dev/null +++ b/test/common/json.h @@ -0,0 +1,342 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef TEST_COMMON_JSON_H_ +#define TEST_COMMON_JSON_H_ + +#include + +#include +#include +#include +#include + +namespace modsecurity_test { +namespace json { + +enum class JsonType { + Object, + Array, + String, + Number, + Boolean, + Null, + Unknown +}; + +using JsonNode = jsoncons::ojson; + +class JsonValue; + +class JsonField { + public: + JsonField() = default; + JsonField(std::string_view key, const JsonNode *value) + : m_key(key), + m_value(value) { } + + bool valid() const { + return m_value != nullptr; + } + + std::string_view unescaped_key() const { + return m_key; + } + + JsonValue value() const; + + private: + std::string_view m_key; + const JsonNode *m_value{nullptr}; +}; + +class JsonArray { + public: + class iterator { + public: + explicit iterator(JsonNode::const_array_iterator iterator) + : m_iterator(iterator) { } + + JsonValue operator*() const; + + iterator &operator++() { + ++m_iterator; + return *this; + } + + bool operator!=(const iterator &other) const { + return m_iterator != other.m_iterator; + } + + private: + JsonNode::const_array_iterator m_iterator; + }; + + JsonArray() = default; + explicit JsonArray(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr && m_value->is_array(); + } + + iterator begin() const { + return iterator(m_value->array_range().begin()); + } + + iterator end() const { + return iterator(m_value->array_range().end()); + } + + private: + const JsonNode *m_value{nullptr}; +}; + +class JsonObject { + public: + class iterator { + public: + explicit iterator(JsonNode::const_object_iterator iterator) + : m_iterator(iterator) { } + + JsonField operator*() const { + const auto &member = *m_iterator; + return JsonField(std::string_view(member.key().data(), + member.key().size()), &member.value()); + } + + iterator &operator++() { + ++m_iterator; + return *this; + } + + bool operator!=(const iterator &other) const { + return m_iterator != other.m_iterator; + } + + private: + JsonNode::const_object_iterator m_iterator; + }; + + JsonObject() = default; + explicit JsonObject(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr && m_value->is_object(); + } + + iterator begin() const { + return iterator(m_value->object_range().begin()); + } + + iterator end() const { + return iterator(m_value->object_range().end()); + } + + private: + const JsonNode *m_value{nullptr}; +}; + +class JsonValue { + public: + JsonValue() = default; + explicit JsonValue(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr; + } + + JsonObject get_object() const { + return JsonObject(valid() && m_value->is_object() ? m_value : nullptr); + } + + JsonArray get_array() const { + return JsonArray(valid() && m_value->is_array() ? m_value : nullptr); + } + + JsonType type() const { + if (!valid()) { + return JsonType::Unknown; + } + if (m_value->is_object()) { + return JsonType::Object; + } + if (m_value->is_array()) { + return JsonType::Array; + } + if (m_value->is_string()) { + return JsonType::String; + } + if (m_value->is_bool()) { + return JsonType::Boolean; + } + if (m_value->is_null()) { + return JsonType::Null; + } + return JsonType::Number; + } + + const JsonNode *raw() const { + return m_value; + } + + private: + const JsonNode *m_value{nullptr}; +}; + +inline JsonValue JsonField::value() const { + return JsonValue(m_value); +} + +inline JsonValue JsonArray::iterator::operator*() const { + return JsonValue(&(*m_iterator)); +} + +class JsonDocument { + public: + JsonValue get_value() const { + return JsonValue(&m_root); + } + + JsonArray get_array() const { + return JsonArray(&m_root); + } + + bool parse(const std::string &input, std::string *error = nullptr) { + try { + m_root = JsonNode::parse(input); + return true; + } catch (const std::exception &exception) { + if (error != nullptr) { + error->assign(exception.what()); + } + return false; + } + } + + private: + JsonNode m_root; +}; + +inline bool get(JsonArray value, JsonArray *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(const JsonField &value, JsonField *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(JsonObject value, JsonObject *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(JsonType value, JsonType *target, + std::string *error = nullptr) { + (void) error; + *target = value; + return true; +} + +inline bool get(JsonValue value, JsonValue *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(std::string_view value, std::string_view *target, + std::string *error = nullptr) { + (void) error; + *target = value; + return true; +} + +inline bool load_document(const std::string &file, JsonDocument *document, + std::string *error) { + std::ifstream input(file.c_str()); + std::string buffer; + + if (input.is_open() == false) { + if (error != nullptr) { + error->assign("Unable to open JSON file."); + } + return false; + } + + buffer.assign((std::istreambuf_iterator(input)), + std::istreambuf_iterator()); + return document->parse(buffer, error); +} + +inline std::string get_string(JsonValue value) { + if (!value.valid()) { + return ""; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return ""; + } +} + +inline std::string get_raw_number(JsonValue value) { + if (!value.valid() || value.raw()->is_number() == false) { + return ""; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return ""; + } +} + +inline int64_t get_integer(JsonValue value) { + if (!value.valid()) { + return 0; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return 0; + } +} + +} // namespace json +} // namespace modsecurity_test + +#endif // TEST_COMMON_JSON_H_ diff --git a/test/common/modsecurity_test.cc b/test/common/modsecurity_test.cc index 23eed49e58..bec67285c4 100644 --- a/test/common/modsecurity_test.cc +++ b/test/common/modsecurity_test.cc @@ -15,9 +15,6 @@ #include "test/common/modsecurity_test.h" -#ifdef WITH_YAJL -#include -#endif #include #include #include @@ -29,6 +26,7 @@ #include #include "modsecurity/modsecurity.h" +#include "test/common/json.h" namespace modsecurity_test { @@ -47,8 +45,8 @@ std::string ModSecurityTest::header() { template bool ModSecurityTest::load_test_json(const std::string &file) { - char errbuf[1024]; - yajl_val node; + std::string error; + modsecurity_test::json::JsonDocument document; std::ifstream myfile; myfile.open(file.c_str()); @@ -56,29 +54,45 @@ bool ModSecurityTest::load_test_json(const std::string &file) { std::cout << "Problems opening file: " << file << std::endl; return false; } + myfile.close(); - std::string str((std::istreambuf_iterator(myfile)), - std::istreambuf_iterator()); - node = yajl_tree_parse((const char *) str.c_str(), errbuf, sizeof(errbuf)); - if (node == NULL) { + if (modsecurity_test::json::load_document(file, &document, &error) + == false) { std::cout << "Problems parsing file: " << file << std::endl; - if (strlen(errbuf) > 0) { - std::cout << errbuf << std::endl; + if (error.empty() == false) { + std::cout << error << std::endl; } return false; } if (m_format) { - auto u = T::from_yajl_node(node); + auto u = T::from_json_document(&document); u->filename = file; (*this)[file].push_back(std::move(u)); } else { - size_t num_tests = node->u.array.len; - for ( int i = 0; i < num_tests; i++ ) { - yajl_val obj = node->u.array.values[i]; + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(document.get_array(), &tests, + &error) == false) { + std::cout << "Problems parsing file: " << file << std::endl; + if (error.empty() == false) { + std::cout << error << std::endl; + } + return false; + } - auto u = T::from_yajl_node(obj); + for (auto test_result : tests) { + modsecurity_test::json::JsonValue value; + if (modsecurity_test::json::get(std::move(test_result), &value, + &error) == false) { + std::cout << "Problems parsing file: " << file << std::endl; + if (error.empty() == false) { + std::cout << error << std::endl; + } + return false; + } + + auto u = T::from_json_value(value); u->filename = file; const auto key = u->filename + ":" + u->name; @@ -86,8 +100,6 @@ bool ModSecurityTest::load_test_json(const std::string &file) { } } - yajl_tree_free(node); - return true; } diff --git a/test/common/modsecurity_test.h b/test/common/modsecurity_test.h index 6e8a3bbc8f..80ebaefa09 100644 --- a/test/common/modsecurity_test.h +++ b/test/common/modsecurity_test.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifndef TEST_COMMON_MODSECURITY_TEST_H_ #define TEST_COMMON_MODSECURITY_TEST_H_ diff --git a/test/fuzzer/Makefile.am b/test/fuzzer/Makefile.am index eee3a94e23..b1022fdab9 100644 --- a/test/fuzzer/Makefile.am +++ b/test/fuzzer/Makefile.am @@ -19,7 +19,6 @@ afl_fuzzer_LDADD = \ $(GEOIP_LDFLAGS) $(GEOIP_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(YAJL_LDFLAGS) $(YAJL_LDADD) \ $(LMDB_LDFLAGS) $(LMDB_LDADD) \ $(MAXMIND_LDFLAGS) $(MAXMIND_LDADD) \ $(SSDEEP_LDFLAGS) $(SSDEEP_LDADD) \ @@ -36,13 +35,12 @@ afl_fuzzer_CPPFLAGS = \ -I../../ \ -O0 \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(GEOIP_CFLAGS) \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ diff --git a/test/regression/regression.cc b/test/regression/regression.cc index 5b1ca514e8..f1d8eab8d7 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -439,7 +439,6 @@ int main(int argc, char **argv) test.cmd_options(argc, argv); if (test.m_format) { -#ifdef WITH_YAJL std::cout << "start formatting test case JSON files" << std::endl; ModSecurityTest test2; test2.cmd_options(argc, argv); @@ -459,11 +458,6 @@ int main(int argc, char **argv) } std::cout << "finished formatting files." << std::endl; return 0; -#else - std::cout << "Test utility cannot format test case JSON files without being built with YAJL." \ - << std::endl; - return 1; -#endif } if (!test.m_automake_output && !test.m_count_all) { diff --git a/test/regression/regression_test.cc b/test/regression/regression_test.cc index 18f61b64dc..d0661d7ff0 100644 --- a/test/regression/regression_test.cc +++ b/test/regression/regression_test.cc @@ -22,12 +22,131 @@ #include #include #include +#include -#ifdef WITH_YAJL -#include -#endif +#include "src/utils/json_writer.h" namespace modsecurity_test { +namespace { + +std::string join_strings(const std::vector &values) { + std::stringstream stream; + + for (const auto &entry : values) { + stream << entry; + } + + return stream.str(); +} + +std::vector json_array_to_vec_string( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonArray array; + std::vector values; + + if (modsecurity_test::json::get(value.get_array(), &array) == false) { + return values; + } + + for (auto entry_result : array) { + modsecurity_test::json::JsonValue entry; + + if (modsecurity_test::json::get(std::move(entry_result), &entry) + == false) { + continue; + } + + values.push_back(modsecurity_test::json::get_string(entry)); + } + + return values; +} + +std::vector> json_object_to_map( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + std::vector> values; + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return values; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + values.emplace_back(std::string(key), + modsecurity_test::json::get_string(child)); + } + + return values; +} + +void set_int_from_json(int &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { + if (key == want_key) { + dest = static_cast(modsecurity_test::json::get_integer(value)); + } +} + +void set_opt_int_from_json(std::optional &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { + if (key == want_key) { + dest = static_cast(modsecurity_test::json::get_integer(value)); + } +} + +void set_string_from_json(std::string &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { + if (key == want_key) { + dest = modsecurity_test::json::get_string(value); + } +} + +std::unique_ptr make_empty_regression_test() { + auto test = std::make_unique(); + test->enabled = 0; + test->version_min = 0; + test->clientPort = 0; + test->serverPort = 0; + test->http_code = 200; + return test; +} + +void append_headers(modsecurity::utils::JsonWriter *writer, + const std::vector> &headers) { + writer->start_object(); + for (const auto &[name, value] : headers) { + writer->key(name); + writer->string(value); + } + writer->end_object(); +} + +void append_string_array(modsecurity::utils::JsonWriter *writer, + std::vector values) { + if (values.empty()) { + values.emplace_back(""); + } + + writer->start_array(); + for (const auto &value : values) { + writer->string(value); + } + writer->end_array(); +} + +} // namespace std::string RegressionTest::print() { std::stringstream i; @@ -48,182 +167,254 @@ std::string RegressionTest::print() { return i.str(); } +std::unique_ptr RegressionTest::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; -inline std::string RegressionTest::yajl_array_to_str(const yajl_val &node) { - std::stringstream i; - for (int z = 0; z < node->u.array.len; z++) { - yajl_val val3 = node->u.array.values[z]; - const char *key = YAJL_GET_STRING(val3); - i << key; + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return make_empty_regression_test(); } - return i.str(); -} - -inline std::vector RegressionTest::yajl_array_to_vec_str( - const yajl_val &node) { - std::vector vec; - for (int z = 0; z < node->u.array.len; z++) { - yajl_val val3 = node->u.array.values[z]; - const char *key = YAJL_GET_STRING(val3); - vec.push_back(key); + modsecurity_test::json::JsonType type; + if (modsecurity_test::json::get(root.type(), &type) == false) { + return make_empty_regression_test(); } - return vec; -} + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(root.get_array(), &tests) == false) { + return make_empty_regression_test(); + } -inline std::vector> - RegressionTest::yajl_array_to_map(const yajl_val &node) { - std::vector> vec; - for (int z = 0; z < node->u.object.len; z++) { - const char *key = node->u.object.keys[z]; - yajl_val val3 = node->u.object.values[z]; - const char *value = YAJL_GET_STRING(val3); - std::pair a(key, value); - vec.push_back(a); - } - return vec; -} + for (auto test_result : tests) { + modsecurity_test::json::JsonValue test; + if (modsecurity_test::json::get(std::move(test_result), &test) + == false) { + continue; + } -static inline void set_int_from_yajl(int &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { - if (key == want_key) { - dest = YAJL_GET_INTEGER(val); - } -} + return from_json_value(test); + } -static inline void set_opt_int_from_yajl(std::optional &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { - if (key == want_key) { - dest = YAJL_GET_INTEGER(val); + return make_empty_regression_test(); } + + return from_json_value(root); } -static inline void set_string_from_yajl(std::string &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { - if (key == want_key) { - dest = YAJL_GET_STRING(val); +std::unique_ptr RegressionTest::from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + auto test = make_empty_regression_test(); + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return test; } -} -std::unique_ptr RegressionTest::from_yajl_node(const yajl_val &node) { - size_t nelem = node->u.object.len; - auto u = std::make_unique(); - u->http_code = 200; - - for (int i = 0; i < nelem; i++) { - const char *key = node->u.object.keys[ i ]; - yajl_val val = node->u.object.values[ i ]; - - set_int_from_yajl(u->enabled, "enabled", key, val); - set_int_from_yajl(u->version_min, "version_min", key, val); - set_opt_int_from_yajl(u->version_max, "version_max", key, val); - set_string_from_yajl(u->title, "title", key, val); - set_string_from_yajl(u->url, "url", key, val); - set_string_from_yajl(u->resource, "resource", key, val); - set_opt_int_from_yajl(u->github_issue, "github_issue", key, val); - if (strcmp(key, "client") == 0) { - u->update_client_from_yajl_node(val); - } - if (strcmp(key, "server") == 0) { - u->update_server_from_yajl_node(val); + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; } - if (strcmp(key, "request") == 0) { - u->update_request_from_yajl_node(val); + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; } - if (strcmp(key, "response") == 0) { - u->update_response_from_yajl_node(val); + child = field.value(); + + set_int_from_json(test->enabled, "enabled", key, child); + set_int_from_json(test->version_min, "version_min", key, child); + set_opt_int_from_json(test->version_max, "version_max", key, child); + set_string_from_json(test->title, "title", key, child); + set_string_from_json(test->url, "url", key, child); + set_string_from_json(test->resource, "resource", key, child); + set_opt_int_from_json(test->github_issue, "github_issue", key, child); + + if (key == "client") { + test->update_client_from_json_value(child); + } else if (key == "server") { + test->update_server_from_json_value(child); + } else if (key == "request") { + test->update_request_from_json_value(child); + } else if (key == "response") { + test->update_response_from_json_value(child); + } else if (key == "expected") { + test->update_expected_from_json_value(child); + } else if (key == "rules") { + test->update_rules_from_json_value(child); } - if (strcmp(key, "expected") == 0) { - u->update_expected_from_yajl_node(val); + } + + test->name = test->title; + return test; +} + +void RegressionTest::update_client_from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; } - if (strcmp(key, "rules") == 0) { - u->update_rules_from_yajl_node(val); + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; } + child = field.value(); + + set_string_from_json(clientIp, "ip", key, child); + set_int_from_json(clientPort, "port", key, child); } +} - u->name = u->title; +void RegressionTest::update_server_from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; - return u; -} + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; + } -void RegressionTest::update_client_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; - set_string_from_yajl(clientIp, "ip", key2, val2); - set_int_from_yajl(clientPort, "port", key2, val2); + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + set_string_from_json(serverIp, "ip", key, child); + set_int_from_json(serverPort, "port", key, child); + set_string_from_json(hostname, "hostname", key, child); } } -void RegressionTest::update_server_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +void RegressionTest::update_request_from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; - set_string_from_yajl(serverIp, "ip", key2, val2); - set_int_from_yajl(serverPort, "port", key2, val2); - set_string_from_yajl(hostname, "hostname", key2, val2); + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; } -} -void RegressionTest::update_request_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; - set_string_from_yajl(uri, "uri", key2, val2); - set_string_from_yajl(method, "method", key2, val2); - if (strcmp(key2, "http_version") == 0) { - httpVersion = YAJL_GET_NUMBER(val2); + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; } - if (strcmp(key2, "headers") == 0) { - request_headers = yajl_array_to_map(val2); + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; } - if (strcmp(key2, "body") == 0) { - request_body = yajl_array_to_str(val2); - request_body_lines = yajl_array_to_vec_str(val2); + child = field.value(); + + set_string_from_json(uri, "uri", key, child); + set_string_from_json(method, "method", key, child); + if (key == "http_version") { + httpVersion = modsecurity_test::json::get_raw_number(child); + } else if (key == "headers") { + request_headers = json_object_to_map(child); + } else if (key == "body") { + request_body_lines = json_array_to_vec_string(child); + request_body = join_strings(request_body_lines); } } } -void RegressionTest::update_response_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +void RegressionTest::update_response_from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; - if (strcmp(key2, "headers") == 0) { - response_headers = yajl_array_to_map(val2); + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; } - if (strcmp(key2, "body") == 0) { - response_body = yajl_array_to_str(val2); - response_body_lines = yajl_array_to_vec_str(val2); + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; } - set_string_from_yajl(response_protocol, "protocol", key2, val2); + child = field.value(); + + if (key == "headers") { + response_headers = json_object_to_map(child); + } else if (key == "body") { + response_body_lines = json_array_to_vec_string(child); + response_body = join_strings(response_body_lines); + } + set_string_from_json(response_protocol, "protocol", key, child); } } -void RegressionTest::update_expected_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +void RegressionTest::update_expected_from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; - set_string_from_yajl(audit_log, "audit_log", key2, val2); - set_string_from_yajl(debug_log, "debug_log", key2, val2); - set_string_from_yajl(error_log, "error_log", key2, val2); - set_int_from_yajl(http_code, "http_code", key2, val2); - set_string_from_yajl(redirect_url, "redirect_url", key2, val2); - set_string_from_yajl(parser_error, "parser_error", key2, val2); + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + set_string_from_json(audit_log, "audit_log", key, child); + set_string_from_json(debug_log, "debug_log", key, child); + set_string_from_json(error_log, "error_log", key, child); + set_int_from_json(http_code, "http_code", key, child); + set_string_from_json(redirect_url, "redirect_url", key, child); + set_string_from_json(parser_error, "parser_error", key, child); } } -void RegressionTest::update_rules_from_yajl_node(const yajl_val &val) { - std::stringstream si; - for (int j = 0; j < val->u.array.len; j++) { - yajl_val val2 = val->u.array.values[ j ]; - const char *keyj = YAJL_GET_STRING(val2); - si << keyj << "\n"; +void RegressionTest::update_rules_from_json_value( + modsecurity_test::json::JsonValue value) { + std::stringstream stream; + + rules_lines = json_array_to_vec_string(value); + for (const auto &line : rules_lines) { + stream << line << "\n"; } - rules = si.str(); - rules_lines = yajl_array_to_vec_str(val); + + rules = stream.str(); } @@ -269,187 +460,149 @@ void RegressionTest::update_content_lengths() { update_content_length(response_headers, response_body.size()); } -std::unique_ptr RegressionTests::from_yajl_node(const yajl_val &node) { - auto u = std::make_unique(); - size_t num_tests = node->u.array.len; - for (int i = 0; i < num_tests; i++) { - yajl_val obj = node->u.array.values[i]; - u->tests.emplace_back(std::move(RegressionTest::from_yajl_node(obj))); - } - return u; -} +std::unique_ptr RegressionTests::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; -void RegressionTests::update_content_lengths() { - for (auto & test : tests) { - test->update_content_lengths(); + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return std::make_unique(); } -} -#ifdef WITH_YAJL - -static yajl_gen_status gen_string_view(yajl_gen g, std::string_view s) { - return yajl_gen_string(g, reinterpret_cast(s.data()), s.length()); + return from_json_value(root); } -static yajl_gen_status gen_key_str(yajl_gen g, std::string_view key, std::string_view val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - return gen_string_view(g, val); -} +std::unique_ptr RegressionTests::from_json_value( + modsecurity_test::json::JsonValue value) { + auto tests = std::make_unique(); + modsecurity_test::json::JsonType type; -static yajl_gen_status gen_key_str_if_non_empty(yajl_gen g, std::string_view key, std::string_view val) { - if (val.empty()) { - return yajl_gen_status_ok; + if (modsecurity_test::json::get(value.type(), &type) == false) { + return tests; } - return gen_key_str(g, key, val); -} -static yajl_gen_status gen_key_int(yajl_gen g, std::string_view key, int val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - return yajl_gen_integer(g, val); -} + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray array; -static yajl_gen_status gen_key_opt_int(yajl_gen g, std::string_view key, std::optional val) { - if (!val.has_value()) { - return yajl_gen_status_ok; - } - return gen_key_int(g, key, val.value()); -} + if (modsecurity_test::json::get(value.get_array(), &array) == false) { + return tests; + } -static yajl_gen_status gen_key_int_if_non_zero(yajl_gen g, std::string_view key, int val) { - if (val == 0) { - return yajl_gen_status_ok; + for (auto test_result : array) { + modsecurity_test::json::JsonValue test_value; + if (modsecurity_test::json::get(std::move(test_result), &test_value) + == false) { + continue; + } + tests->tests.emplace_back( + std::move(RegressionTest::from_json_value(test_value))); + } + return tests; } - return gen_key_int(g, key, val); -} -static yajl_gen_status gen_key_number(yajl_gen g, std::string_view key, std::string_view raw_val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; + if (type == modsecurity_test::json::JsonType::Object) { + tests->tests.emplace_back(std::move(RegressionTest::from_json_value(value))); } - return yajl_gen_number(g, reinterpret_cast(raw_val.data()), raw_val.length()); -} -static yajl_gen_status gen_key_str_array(yajl_gen g, std::string_view key, const std::vector &lines) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - if (auto s{yajl_gen_array_open(g)}; s != yajl_gen_status_ok) { - return s; - } - for (const auto &line : lines) { - if (auto s{gen_string_view(g, line)}; s != yajl_gen_status_ok) { - return s; - } - } - return yajl_gen_array_close(g); + return tests; } -static yajl_gen_status gen_key_headers(yajl_gen g, std::string_view key, const std::vector> &headers) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - if (auto s{yajl_gen_map_open(g)}; s != yajl_gen_status_ok) { - return s; - } - for (const auto &[name, value] : headers) { - if (auto s{gen_key_str(g, name, value)}; s != yajl_gen_status_ok) { - return s; - } +void RegressionTests::update_content_lengths() { + for (auto & test : tests) { + test->update_content_lengths(); } - return yajl_gen_map_close(g); } std::string RegressionTests::toJSON() const { - const unsigned char *buf; - size_t len; - yajl_gen g; - - g = yajl_gen_alloc(NULL); - if (g == NULL) { - return ""; - } - yajl_gen_config(g, yajl_gen_beautify, 1); - yajl_gen_config(g, yajl_gen_indent_string, " "); - - yajl_gen_array_open(g); - for (const auto &t : tests) { - yajl_gen_map_open(g); - gen_key_int(g, "enabled", t->enabled); - gen_key_int(g, "version_min", t->version_min); - gen_key_opt_int(g, "version_max", t->version_max); - gen_key_str(g, "title", t->title); - gen_key_str_if_non_empty(g, "url", t->url); - gen_key_str_if_non_empty(g, "resource", t->resource); - gen_key_opt_int(g, "github_issue", t->github_issue); - - gen_string_view(g, "client"); - yajl_gen_map_open(g); - gen_key_str(g, "ip", t->clientIp); - gen_key_int(g, "port", t->clientPort); - yajl_gen_map_close(g); - - gen_string_view(g, "server"); - yajl_gen_map_open(g); - gen_key_str(g, "ip", t->serverIp); - gen_key_int(g, "port", t->serverPort); - yajl_gen_map_close(g); - - gen_string_view(g, "request"); - yajl_gen_map_open(g); - gen_key_headers(g, "headers", t->request_headers); - gen_key_str(g, "uri", t->uri); - gen_key_str(g, "method", t->method); - if (!t->httpVersion.empty()) { - gen_key_number(g, "http_version", t->httpVersion); + modsecurity::utils::JsonWriter writer(true, " "); + + const auto addString = [&writer](std::string_view key, + const std::string &value) { + writer.key(key); + writer.string(value); + }; + const auto addStringIfNonEmpty = [&writer, &addString]( + std::string_view key, const std::string &value) { + if (value.empty() == false) { + addString(key, value); } - - auto request_body_lines{t->request_body_lines}; - if (request_body_lines.empty()) { - request_body_lines.emplace_back(""); + }; + const auto addInteger = [&writer](std::string_view key, int value) { + writer.key(key); + writer.integer(value); + }; + const auto addOptionalInteger = [&writer](std::string_view key, + const std::optional &value) { + if (value.has_value()) { + writer.key(key); + writer.integer(value.value()); } - gen_key_str_array(g, "body", request_body_lines); - - yajl_gen_map_close(g); + }; - gen_string_view(g, "response"); - yajl_gen_map_open(g); - gen_key_headers(g, "headers", t->response_headers); - - auto response_body_lines{t->response_body_lines}; - if (response_body_lines.empty()) { - response_body_lines.emplace_back(""); + writer.start_array(); + for (const auto &t : tests) { + writer.start_object(); + addInteger("enabled", t->enabled); + addInteger("version_min", t->version_min); + addOptionalInteger("version_max", t->version_max); + addString("title", t->title); + addStringIfNonEmpty("url", t->url); + addStringIfNonEmpty("resource", t->resource); + addOptionalInteger("github_issue", t->github_issue); + + writer.key("client"); + writer.start_object(); + addString("ip", t->clientIp); + addInteger("port", t->clientPort); + writer.end_object(); + + writer.key("server"); + writer.start_object(); + addString("ip", t->serverIp); + addInteger("port", t->serverPort); + writer.end_object(); + + writer.key("request"); + writer.start_object(); + writer.key("headers"); + append_headers(&writer, t->request_headers); + addString("uri", t->uri); + addString("method", t->method); + if (!t->httpVersion.empty()) { + writer.key("http_version"); + writer.number(t->httpVersion); } - gen_key_str_array(g, "body", response_body_lines); - - gen_key_str_if_non_empty(g, "protocol", t->response_protocol); - yajl_gen_map_close(g); - - gen_string_view(g, "expected"); - yajl_gen_map_open(g); - gen_key_str_if_non_empty(g, "audit_log", t->audit_log); - gen_key_str_if_non_empty(g, "debug_log", t->debug_log); - gen_key_str_if_non_empty(g, "error_log", t->error_log); - gen_key_int(g, "http_code", t->http_code); - gen_key_str_if_non_empty(g, "redirect_url", t->redirect_url); - gen_key_str_if_non_empty(g, "parser_error", t->parser_error); - yajl_gen_map_close(g); - gen_key_str_array(g, "rules", t->rules_lines); - - yajl_gen_map_close(g); - } - yajl_gen_array_close(g); - - yajl_gen_get_buf(g, &buf, &len); - std::string s{reinterpret_cast(buf), len}; - yajl_gen_free(g); - return s; + writer.key("body"); + append_string_array(&writer, t->request_body_lines); + writer.end_object(); + + writer.key("response"); + writer.start_object(); + writer.key("headers"); + append_headers(&writer, t->response_headers); + writer.key("body"); + append_string_array(&writer, t->response_body_lines); + addStringIfNonEmpty("protocol", t->response_protocol); + writer.end_object(); + + writer.key("expected"); + writer.start_object(); + addStringIfNonEmpty("audit_log", t->audit_log); + addStringIfNonEmpty("debug_log", t->debug_log); + addStringIfNonEmpty("error_log", t->error_log); + addInteger("http_code", t->http_code); + addStringIfNonEmpty("redirect_url", t->redirect_url); + addStringIfNonEmpty("parser_error", t->parser_error); + writer.end_object(); + + writer.key("rules"); + append_string_array(&writer, t->rules_lines); + + writer.end_object(); + } + writer.end_array(); + + return writer.to_string(); } -#endif // WITH_YAJL - } // namespace modsecurity_test diff --git a/test/regression/regression_test.h b/test/regression/regression_test.h index 0271482f96..1ff13ac10d 100644 --- a/test/regression/regression_test.h +++ b/test/regression/regression_test.h @@ -13,8 +13,6 @@ * */ -#include - #include #include #include @@ -24,6 +22,8 @@ #include #include +#include "test/common/json.h" + #ifndef TEST_REGRESSION_REGRESSION_TEST_H_ #define TEST_REGRESSION_REGRESSION_TEST_H_ @@ -32,7 +32,10 @@ namespace modsecurity_test { class RegressionTest { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); static std::string print(); std::string filename; @@ -69,12 +72,6 @@ class RegressionTest { std::string uri; std::string resource; - static inline std::string yajl_array_to_str(const yajl_val &node); - static inline std::vector yajl_array_to_vec_str( - const yajl_val &node); - static inline std::vector> - yajl_array_to_map(const yajl_val &node); - int http_code; std::string redirect_url; @@ -86,17 +83,20 @@ class RegressionTest { void update_content_lengths(); private: - void update_client_from_yajl_node(const yajl_val &val); - void update_server_from_yajl_node(const yajl_val &val); - void update_request_from_yajl_node(const yajl_val &val); - void update_response_from_yajl_node(const yajl_val &val); - void update_expected_from_yajl_node(const yajl_val &val); - void update_rules_from_yajl_node(const yajl_val &val); + void update_client_from_json_value(modsecurity_test::json::JsonValue value); + void update_server_from_json_value(modsecurity_test::json::JsonValue value); + void update_request_from_json_value(modsecurity_test::json::JsonValue value); + void update_response_from_json_value(modsecurity_test::json::JsonValue value); + void update_expected_from_json_value(modsecurity_test::json::JsonValue value); + void update_rules_from_json_value(modsecurity_test::json::JsonValue value); }; class RegressionTests { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); void update_content_lengths(); std::string toJSON() const; diff --git a/test/run-json-backend-matrix.sh b/test/run-json-backend-matrix.sh new file mode 100755 index 0000000000..3af7b7d632 --- /dev/null +++ b/test/run-json-backend-matrix.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash + +set -u + +usage() { + cat <<'EOF' +Usage: test/run-json-backend-matrix.sh [--build-root DIR] [--jobs N] [--configure-extra "ARGS"] [--keep-build-dirs] + +Exit codes: + 0 success + 10 configure failure + 11 build failure + 12 test failure + 13 backend result difference + 64 invalid usage +EOF +} + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +build_root="${repo_root}/build-json-backend-matrix" +jobs="$(getconf _NPROCESSORS_ONLN 2>/dev/null || printf '1')" +configure_extra="" +keep_build_dirs=0 + +while [ "$#" -gt 0 ]; do + case "$1" in + --build-root) + shift + [ "$#" -gt 0 ] || { usage; exit 64; } + build_root="$1" + ;; + --jobs) + shift + [ "$#" -gt 0 ] || { usage; exit 64; } + jobs="$1" + ;; + --configure-extra) + shift + [ "$#" -gt 0 ] || { usage; exit 64; } + configure_extra="$1" + ;; + --keep-build-dirs) + keep_build_dirs=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + usage + exit 64 + ;; + esac + shift +done + +declare -a configure_extra_args=() +if [ -n "${configure_extra}" ]; then + read -r -a configure_extra_args <<< "${configure_extra}" +fi + +readonly test_files=( + "test/test-cases/regression/request-body-parser-json.json" + "test/test-cases/regression/request-body-parser-json-backend-edgecases.json" +) + +mkdir -p "${build_root}/logs" + +extract_summary() { + local backend="$1" + local input_log="$2" + local output_tsv="$3" + + awk -v backend="${backend}" ' + match($0, /^:test-result:[[:space:]]+([^[:space:]]+)[[:space:]]+([^:]+):(.*)$/, m) { + print backend "\t" m[2] "\t" m[3] "\t" m[1]; + } + ' "${input_log}" > "${output_tsv}" +} + +run_backend() { + local backend="$1" + local build_dir="${build_root}/${backend}" + local raw_log="${build_root}/logs/${backend}.log" + local summary_file="${build_root}/logs/${backend}.summary.tsv" + local configure_status=0 + local build_status=0 + local test_status=0 + + if [ "${keep_build_dirs}" -eq 0 ]; then + rm -rf "${build_dir}" + fi + mkdir -p "${build_dir}" + : > "${raw_log}" + + ( + cd "${build_dir}" && \ + "${repo_root}/configure" \ + --with-json-backend="${backend}" \ + "${configure_extra_args[@]}" + ) >> "${raw_log}" 2>&1 + configure_status=$? + if [ "${configure_status}" -ne 0 ]; then + return 10 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C others + ) >> "${raw_log}" 2>&1 + build_status=$? + if [ "${build_status}" -ne 0 ]; then + return 11 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C src libmodsecurity.la + ) >> "${raw_log}" 2>&1 + build_status=$? + if [ "${build_status}" -ne 0 ]; then + return 11 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C test regression_tests json_backend_depth_tests + ) >> "${raw_log}" 2>&1 + build_status=$? + if [ "${build_status}" -ne 0 ]; then + return 11 + fi + + for test_file in "${test_files[@]}"; do + ( + cd "${build_dir}/test" && \ + ./regression_tests automake "${repo_root}/${test_file}" + ) >> "${raw_log}" 2>&1 + if [ "$?" -ne 0 ]; then + test_status=1 + fi + done + + ( + cd "${build_dir}/test" && \ + ./json_backend_depth_tests + ) >> "${raw_log}" 2>&1 + if [ "$?" -ne 0 ]; then + test_status=1 + fi + + extract_summary "${backend}" "${raw_log}" "${summary_file}" + if [ ! -s "${summary_file}" ]; then + test_status=1 + fi + if awk -F '\t' '$4 != "PASS" {exit 1}' "${summary_file}"; then + : + else + test_status=1 + fi + + if [ "${test_status}" -ne 0 ]; then + return 12 + fi + return 0 +} + +backend_test_failure=0 +for backend in simdjson jsoncons; do + run_backend "${backend}" + status=$? + case "${status}" in + 0) + ;; + 10|11) + exit "${status}" + ;; + 12) + backend_test_failure=1 + ;; + *) + exit "${status}" + ;; + esac +done + +cat "${build_root}/logs/simdjson.summary.tsv" \ + "${build_root}/logs/jsoncons.summary.tsv" \ + > "${build_root}/summary.tsv" + +if diff -u \ + <(cut -f 2- "${build_root}/logs/simdjson.summary.tsv" | sort) \ + <(cut -f 2- "${build_root}/logs/jsoncons.summary.tsv" | sort) \ + > "${build_root}/logs/backend-diff.log"; then + : +else + exit 13 +fi + +if [ "${backend_test_failure}" -ne 0 ]; then + exit 12 +fi + +exit 0 diff --git a/test/test-cases/regression/request-body-parser-json-backend-edgecases.json b/test/test-cases/regression/request-body-parser-json-backend-edgecases.json new file mode 100644 index 0000000000..03c87a0348 --- /dev/null +++ b/test/test-cases/regression/request-body-parser-json-backend-edgecases.json @@ -0,0 +1,857 @@ +[ + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar string", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "7" + }, + "uri": "/", + "method": "POST", + "body": [ + "\"hello\"" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"hello\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210101',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^hello$\" \"id:'210102',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar zero", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "1" + }, + "uri": "/", + "method": "POST", + "body": [ + "0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210131',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^0$\" \"id:'210132',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar decimal number", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "3" + }, + "uri": "/", + "method": "POST", + "body": [ + "1.0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1.0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210103',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1.0$\" \"id:'210104',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar scientific number", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "3" + }, + "uri": "/", + "method": "POST", + "body": [ + "1e3" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1e3\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210105',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1e3$\" \"id:'210106',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar negative fraction with exponent", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "8" + }, + "uri": "/", + "method": "POST", + "body": [ + "-1.25e-4" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"-1.25e-4\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210133',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^-1\\.25e-4$\" \"id:'210134',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar negative zero", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "-0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"-0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210107',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^-0$\" \"id:'210108',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar uint64 max", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "18446744073709551615" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"18446744073709551615\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210135',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^18446744073709551615$\" \"id:'210136',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar uint64 overflow", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "18446744073709551616" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"18446744073709551616\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210137',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^18446744073709551616$\" \"id:'210138',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar big integer", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "30" + }, + "uri": "/", + "method": "POST", + "body": [ + "123456789012345678901234567890" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"123456789012345678901234567890\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210109',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^123456789012345678901234567890$\" \"id:'210110',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar boolean", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "5" + }, + "uri": "/", + "method": "POST", + "body": [ + "false" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"false\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210121',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^false$\" \"id:'210122',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar null", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "4" + }, + "uri": "/", + "method": "POST", + "body": [ + "null" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210123',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^$\" \"id:'210124',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - nested root array path", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "[{\"deep\":[\"x\",\"y\"]}]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"y\" \\(Variable: ARGS:json.array_0.deep.array_1\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210111',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.array_0.deep.array_1 \"^y$\" \"id:'210112',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - empty root array", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "[]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" .Variable: REQBODY_ERROR.", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210125',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"0\" \"id:'210126',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - empty root object", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "{}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" .Variable: REQBODY_ERROR.", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210113',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"0\" \"id:'210114',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root array high index", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "21" + }, + "uri": "/", + "method": "POST", + "body": [ + "[0,1,2,3,4,5,6,7,8,9]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"9\" \\(Variable: ARGS:json.array_9\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210127',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.array_9 \"^9$\" \"id:'210128',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar big exponent", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "6" + }, + "uri": "/", + "method": "POST", + "body": [ + "1e1000" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1e1000\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210129',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1e1000$\" \"id:'210130',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - duplicate keys remain addressable", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "30" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"dup\":\"first\",\"dup\":\"second\"}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"second\" \\(Variable: ARGS:json.dup\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210115',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.dup \"^second$\" \"id:'210116',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - truncated body", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "7" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"foo\":" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Failed to parse request body", + "http_code": 403 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210117',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"!@eq 0\" \"id:'210118',phase:2,t:none,log,deny,status:403,msg:'Failed to parse request body.',logdata:'%{reqbody_error_msg}'\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - invalid unicode escape", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "16" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"bad\":\"\\uD800\"}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Failed to parse request body", + "http_code": 403 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210119',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"!@eq 0\" \"id:'210120',phase:2,t:none,log,deny,status:403,msg:'Failed to parse request body.',logdata:'%{reqbody_error_msg}'\"" + ] + } +] diff --git a/test/test-suite.in b/test/test-suite.in index 6e8754254b..f16500eb65 100644 --- a/test/test-suite.in +++ b/test/test-suite.in @@ -96,6 +96,7 @@ TESTS+=test/test-cases/regression/operator-verifycpf.json TESTS+=test/test-cases/regression/operator-verifyssn.json TESTS+=test/test-cases/regression/operator-verifysvnr.json TESTS+=test/test-cases/regression/request-body-parser-json.json +TESTS+=test/test-cases/regression/request-body-parser-json-backend-edgecases.json TESTS+=test/test-cases/regression/request-body-parser-multipart-crlf.json TESTS+=test/test-cases/regression/request-body-parser-multipart.json TESTS+=test/test-cases/regression/request-body-parser-xml.json diff --git a/test/unit/json_backend_depth_tests.cc b/test/unit/json_backend_depth_tests.cc new file mode 100644 index 0000000000..f2c2648457 --- /dev/null +++ b/test/unit/json_backend_depth_tests.cc @@ -0,0 +1,495 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include +#include +#include +#include + +#include "src/request_body_processor/json_adapter.h" + +namespace modsecurity { +namespace RequestBodyProcessor { +namespace { + +class AcceptAllSink : public JsonEventSink { + public: + JsonSinkStatus on_start_object() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_end_object() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_start_array() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_end_array() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_key(std::string_view value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_string(std::string_view value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_number(std::string_view raw_number) override { + (void) raw_number; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_boolean(bool value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_null() override { + return JsonSinkStatus::Continue; + } +}; + +class NumberCollectingSink : public AcceptAllSink { + public: + JsonSinkStatus on_number(std::string_view raw_number) override { + numbers.emplace_back(raw_number.data(), raw_number.size()); + return JsonSinkStatus::Continue; + } + + std::vector numbers; +}; + +const char *parseStatusName(JsonParseStatus status) { + switch (status) { + case JsonParseStatus::Ok: + return "Ok"; + case JsonParseStatus::ParseError: + return "ParseError"; + case JsonParseStatus::TruncatedInput: + return "TruncatedInput"; + case JsonParseStatus::Utf8Error: + return "Utf8Error"; + case JsonParseStatus::EngineAbort: + return "EngineAbort"; + case JsonParseStatus::InternalError: + return "InternalError"; + } + + return "UnknownParseStatus"; +} + +const char *sinkStatusName(JsonSinkStatus status) { + switch (status) { + case JsonSinkStatus::Continue: + return "Continue"; + case JsonSinkStatus::EngineAbort: + return "EngineAbort"; + case JsonSinkStatus::DepthLimitExceeded: + return "DepthLimitExceeded"; + case JsonSinkStatus::InternalError: + return "InternalError"; + } + + return "UnknownSinkStatus"; +} + +std::string makeNestedArrayJson(std::size_t depth) { + std::string input(depth, '['); + input.push_back('0'); + input.append(depth, ']'); + return input; +} + +std::string describeUnexpectedResult(const JsonParseResult &result, + const char *expectation) { + std::string detail = std::string("Expected ") + expectation + ", got " + + parseStatusName(result.parse_status) + "/" + + sinkStatusName(result.sink_status) + "."; + if (!result.detail.empty()) { + detail.append(" "); + detail.append(result.detail); + } + return detail; +} + +std::string describeStringList(const std::vector &values) { + std::string description = "["; + + for (std::size_t i = 0; i < values.size(); i++) { + if (i != 0) { + description.append(", "); + } + description.push_back('"'); + description.append(values[i]); + description.push_back('"'); + } + + description.push_back(']'); + return description; +} + +bool expectParseResult(const std::string &input, JsonParseStatus parse_status, + JsonSinkStatus sink_status, const char *expectation, + std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + JsonParseResult result = adapter.parse(input, &sink, JsonBackendParseOptions()); + + if (result.parse_status != parse_status + || result.sink_status != sink_status) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, expectation); + failure_detail->append(" Input: "); + failure_detail->append(input); + } + return false; + } + + return true; +} + +bool collectNumberLexemes(const std::string &input, + std::vector *numbers, std::string *failure_detail) { + NumberCollectingSink sink; + JSONAdapter adapter; + JsonParseResult result = adapter.parse(input, &sink, JsonBackendParseOptions()); + + if (!result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + failure_detail->append(" Input: "); + failure_detail->append(input); + } + return false; + } + + if (numbers != nullptr) { + *numbers = sink.numbers; + } + return true; +} + +bool collectNumberLexemes(std::string *input, std::vector *numbers, + std::string *failure_detail) { + NumberCollectingSink sink; + JSONAdapter adapter; + JsonParseResult result = adapter.parse(*input, &sink, JsonBackendParseOptions()); + + if (!result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + failure_detail->append(" Input: "); + failure_detail->append(*input); + } + return false; + } + + if (numbers != nullptr) { + *numbers = sink.numbers; + } + return true; +} + +bool expectNumberLexemes(const char *case_name, const std::string &input, + const std::vector &expected, std::string *failure_detail) { + std::vector actual; + if (!collectNumberLexemes(input, &actual, failure_detail)) { + return false; + } + + if (actual != expected) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Case '") + case_name + + "' expected " + describeStringList(expected) + + ", got " + describeStringList(actual) + "."; + } + return false; + } + + return true; +} + +bool expectBackendDepthLimitParseError(std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + JsonBackendParseOptions options; + + options.technical_max_depth = 2; + JsonParseResult result = adapter.parse(makeNestedArrayJson(8), &sink, + options); + + if (result.parse_status != JsonParseStatus::ParseError + || result.sink_status != JsonSinkStatus::Continue) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, + "ParseError/Continue"); + } + return false; + } + + return true; +} + +bool expectBackendDepthHeadroomSuccess(std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + JsonBackendParseOptions options; + + options.technical_max_depth = 32; + JsonParseResult result = adapter.parse(makeNestedArrayJson(8), &sink, + options); + + if (!result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + } + return false; + } + + return true; +} + +bool expectEmptyInputSuccess(std::string *failure_detail) { + return expectParseResult("", JsonParseStatus::Ok, + JsonSinkStatus::Continue, "Ok/Continue", failure_detail); +} + +bool expectMalformedInputParseError(std::string *failure_detail) { + return expectParseResult("a", JsonParseStatus::ParseError, + JsonSinkStatus::Continue, "ParseError/Continue", failure_detail); +} + +bool expectTruncatedInputMapsToTruncatedInput(std::string *failure_detail) { + return expectParseResult("{\"key\":", JsonParseStatus::TruncatedInput, + JsonSinkStatus::Continue, "TruncatedInput/Continue", failure_detail); +} + +bool expectExactRootScalarNumberLexemes(std::string *failure_detail) { + struct NumberLexemeCase { + const char *name; + const char *input; + }; + + const NumberLexemeCase cases[] = { + {"zero", "0"}, + {"negative_zero", "-0"}, + {"decimal", "1.0"}, + {"scientific", "1e3"}, + {"negative_fraction_with_exponent", "-1.25e-4"}, + {"uint64_max", "18446744073709551615"}, + {"uint64_overflow", "18446744073709551616"}, + {"large_integer", "123456789012345678901234567890"} + }; + + for (const auto &test_case : cases) { + if (!expectNumberLexemes(test_case.name, test_case.input, + std::vector{test_case.input}, failure_detail)) { + return false; + } + } + + return true; +} + +bool expectExactContainerNumberLexemes(std::string *failure_detail) { + const std::string input = + "{ \"arr\" : [ 0 , -0 , 1.0 , 1e3 ], " + "\"obj\" : { \"frac\" : -1.25e-4 , " + "\"max\" : 18446744073709551615 , " + "\"over\" : 18446744073709551616 , " + "\"big\" : 123456789012345678901234567890 } }"; + + return expectNumberLexemes("container_numbers_with_whitespace_and_boundaries", + input, std::vector{ + "0", + "-0", + "1.0", + "1e3", + "-1.25e-4", + "18446744073709551615", + "18446744073709551616", + "123456789012345678901234567890" + }, failure_detail); +} + +#if defined(MSC_JSON_BACKEND_SIMDJSON) +bool isJsonWhitespace(char value) { + return value == ' ' || value == '\t' || value == '\n' || value == '\r'; +} + +bool expectMutableSimdjsonPathPreservesLogicalInput( + std::string *failure_detail) { + std::string input("{\"n\":1}"); + const std::string original = input; + std::vector numbers; + + input.shrink_to_fit(); + if (!collectNumberLexemes(&input, &numbers, failure_detail)) { + return false; + } + + if (numbers != std::vector{"1"}) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Expected [\"1\"], got ") + + describeStringList(numbers) + "."; + } + return false; + } + + if (input.size() <= original.size()) { + if (failure_detail != nullptr) { + *failure_detail = "Expected mutable simdjson input to grow after " + "in-place padding."; + } + return false; + } + + if (input.compare(0, original.size(), original) != 0) { + if (failure_detail != nullptr) { + *failure_detail = "Mutable simdjson input changed its logical JSON " + "prefix."; + } + return false; + } + + for (std::size_t i = original.size(); i < input.size(); i++) { + if (!isJsonWhitespace(input[i])) { + if (failure_detail != nullptr) { + *failure_detail = "Mutable simdjson input appended a non-" + "whitespace padding byte."; + } + return false; + } + } + + return true; +} + +bool expectConstSimdjsonPathLeavesInputUntouched(std::string *failure_detail) { + const std::string original("{\"n\":1}"); + std::string input = original; + std::vector numbers; + + input.shrink_to_fit(); + if (!collectNumberLexemes(static_cast(input), &numbers, + failure_detail)) { + return false; + } + + if (numbers != std::vector{"1"}) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Expected [\"1\"], got ") + + describeStringList(numbers) + "."; + } + return false; + } + + if (input != original) { + if (failure_detail != nullptr) { + *failure_detail = "Const simdjson input was mutated."; + } + return false; + } + + return true; +} +#endif + +bool reportTestResult(const char *name, bool passed, + const std::string &detail) { + std::cout << ":test-result: " << (passed ? "PASS " : "FAIL ") + << "json_backend_depth_tests:" << name << std::endl; + if (!passed && !detail.empty()) { + std::cerr << name << ": " << detail << std::endl; + } + return passed; +} + +} // namespace + +int runJsonBackendDepthTests() { + int failures = 0; + std::string detail; + + if (!reportTestResult("technical_depth_limit_returns_parse_error", + expectBackendDepthLimitParseError(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("technical_depth_with_headroom_succeeds", + expectBackendDepthHeadroomSuccess(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("empty_input_returns_ok", + expectEmptyInputSuccess(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("malformed_input_maps_to_parse_error", + expectMalformedInputParseError(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("truncated_input_maps_to_truncated_input", + expectTruncatedInputMapsToTruncatedInput(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("number_lexemes_for_root_scalars_remain_exact", + expectExactRootScalarNumberLexemes(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("number_lexemes_in_containers_remain_exact", + expectExactContainerNumberLexemes(&detail), detail)) { + failures++; + } + +#if defined(MSC_JSON_BACKEND_SIMDJSON) + detail.clear(); + if (!reportTestResult("mutable_simdjson_input_keeps_logical_json_prefix", + expectMutableSimdjsonPathPreservesLogicalInput(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("const_simdjson_input_is_not_mutated", + expectConstSimdjsonPathLeavesInputUntouched(&detail), detail)) { + failures++; + } +#endif + + return failures == 0 ? 0 : 1; +} + +} // namespace RequestBodyProcessor +} // namespace modsecurity + +int main() { + return modsecurity::RequestBodyProcessor::runJsonBackendDepthTests(); +} diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index e67c100523..8cfbb18738 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -29,6 +29,16 @@ namespace modsecurity_test { +namespace { + +std::unique_ptr make_empty_unit_test() { + auto test = std::make_unique(); + test->ret = 0; + test->skipped = false; + return test; +} + +} // namespace void replaceAll(std::string *s, const std::string &search, @@ -106,36 +116,85 @@ std::string UnitTest::print() const { } -std::unique_ptr UnitTest::from_yajl_node(const yajl_val &node) { - size_t num_tests = node->u.object.len; - auto u = std::make_unique(); - - for (int i = 0; i < num_tests; i++) { - const char *key = node->u.object.keys[ i ]; - yajl_val val = node->u.object.values[ i ]; - - u->skipped = false; - if (strcmp(key, "param") == 0) { - u->param = YAJL_GET_STRING(val); - } else if (strcmp(key, "input") == 0) { - u->input = YAJL_GET_STRING(val); - json2bin(&u->input); - } else if (strcmp(key, "resource") == 0) { - u->resource = YAJL_GET_STRING(val); - } else if (strcmp(key, "name") == 0) { - u->name = YAJL_GET_STRING(val); - } else if (strcmp(key, "type") == 0) { - u->type = YAJL_GET_STRING(val); - } else if (strcmp(key, "ret") == 0) { - u->ret = YAJL_GET_INTEGER(val); - } else if (strcmp(key, "output") == 0) { - u->output = std::string(YAJL_GET_STRING(val)); - json2bin(&u->output); - /* - * Converting \\u0000 to \0 due to the following gcc bug: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53690 - * - */ +std::unique_ptr UnitTest::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; + + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return make_empty_unit_test(); + } + + modsecurity_test::json::JsonType type; + if (modsecurity_test::json::get(root.type(), &type) == false) { + return make_empty_unit_test(); + } + + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(root.get_array(), &tests) == false) { + return make_empty_unit_test(); + } + + for (auto test_result : tests) { + modsecurity_test::json::JsonValue test_value; + if (modsecurity_test::json::get(std::move(test_result), + &test_value) == false) { + continue; + } + + return from_json_value(test_value); + } + + return make_empty_unit_test(); + } + + return from_json_value(root); +} + +std::unique_ptr UnitTest::from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + auto u = make_empty_unit_test(); + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return u; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(std::move(field_result), &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + if (key == "param") { + u->param = modsecurity_test::json::get_string(child); + } else if (key == "input") { + u->input = modsecurity_test::json::get_string(child); + json2bin(&u->input); + } else if (key == "resource") { + u->resource = modsecurity_test::json::get_string(child); + } else if (key == "name") { + u->name = modsecurity_test::json::get_string(child); + } else if (key == "type") { + u->type = modsecurity_test::json::get_string(child); + } else if (key == "ret") { + u->ret = static_cast(modsecurity_test::json::get_integer(child)); + } else if (key == "output") { + u->output = modsecurity_test::json::get_string(child); + json2bin(&u->output); + /* + * Converting \\u0000 to \0 due to the following gcc bug: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53690 + * + */ } } diff --git a/test/unit/unit_test.h b/test/unit/unit_test.h index ffd776442b..a92565fc6c 100644 --- a/test/unit/unit_test.h +++ b/test/unit/unit_test.h @@ -13,13 +13,13 @@ * */ -#include - #include #include #include #include +#include "test/common/json.h" + #ifndef TEST_UNIT_UNIT_TEST_H_ #define TEST_UNIT_UNIT_TEST_H_ @@ -27,13 +27,16 @@ namespace modsecurity_test { class UnitTestResult { public: - int ret; + int ret = 0; std::string output; }; class UnitTest { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); std::string print() const; diff --git a/tools/rules-check/Makefile.am b/tools/rules-check/Makefile.am index 8080411716..fc14a6f986 100644 --- a/tools/rules-check/Makefile.am +++ b/tools/rules-check/Makefile.am @@ -16,8 +16,7 @@ modsec_rules_check_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) modsec_rules_check_LDFLAGS = \ $(GEOIP_LDFLAGS) \ @@ -26,11 +25,10 @@ modsec_rules_check_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LIBXML2_LDFLAGS) modsec_rules_check_CPPFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(GLOBAL_CPPFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ @@ -40,4 +38,3 @@ modsec_rules_check_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in -