From 28171c767c907f9d9bba3a3c4f4f00c50e53df71 Mon Sep 17 00:00:00 2001 From: Charles Strahan Date: Wed, 10 Jun 2026 18:23:14 -0500 Subject: [PATCH] Bump vendored LIEF 0.13.0 -> 0.17.6 and migrate to its API postject's 3-year-old LIEF (0.13.0) corrupts the dynamic symbol table when it rebuilds a binary with a large .gnu.hash: only some symbol-table `st_name` offsets are updated, the rest keep stale offsets, so exported symbols resolve to truncated/garbage names. For a Node.js SEA this breaks native (raw-V8) addons at runtime with "undefined symbol: _ZN2v8...", because the dlopen'd .node resolves V8 symbols from the host executable by name. This is upstream LIEF bug lief-project/LIEF#1241 ("Corrupted symbol table due to DT_GNU_HASH parsing"): a hardcoded NB_MAX_MASKWORD = 512 cap on the GNU-hash bloom filter. When the filter exceeds 512 maskwords (Node's binary has ~40k dynamic symbols, far past that), the symbol-table rebuild is left incomplete. Fixed by LIEF commit 2d1bbae (in the 0.16.6 -> 0.16.7 range), which removes the cap and reconstructs GnuHash properly; verified here by a version sweep (0.16.6 corrupts, 0.16.7 is clean). Two conditions must coincide to hit it, which is why it looked PIE-specific: injecting a note into a PIE (ET_DYN) binary forces LIEF to relocate .dynsym/.dynstr (the trigger for the symbol-table rebuild), and a >512-maskword .gnu.hash is the defect (#1241). Official non-PIE/ET_EXEC Node builds never relocate .dynstr, so they dodged the bug even on old LIEF; builds that link the node binary as PIE hit it directly. This bumps the dependency to 0.17.6 (latest) via CMake FetchContent and migrates src/postject.cpp to the LIEF 0.16+ API. The injection logic is lifted from Node.js core's own LIEF-based injector, src/node_sea_bin.cc, added in nodejs/node#61167 (Joyee Cheung's work to move SEA blob injection into Node and off the stale postject). Also force-include globally: LIEF's bundled fmt calls global free() from a template whose definition point doesn't see under Emscripten, which otherwise fails the wasm build. Co-Authored-By: Claude Opus 4.8 (1M context) --- CMakeLists.txt | 42 ++++++++---- src/postject.cpp | 165 ++++++++++++++++++++++------------------------- 2 files changed, 104 insertions(+), 103 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5a47ca..3b474f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,25 +1,39 @@ -cmake_minimum_required(VERSION 3.9) -set(CMAKE_CXX_STANDARD 11) +cmake_minimum_required(VERSION 3.24) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) project(postject) -# Disable unnecessary LIEF options -option(LIEF_DOC "Build LIEF docs" OFF) -option(LIEF_C_API "C API" OFF) -option(LIEF_PYTHON_API "Build LIEF Python API" OFF) -option(LIEF_ENABLE_JSON "Enable JSON-related APIs" OFF) -option(LIEF_EXAMPLES "Build LIEF examples" OFF) -option(LIEF_TESTS "Build LIEF tests" OFF) -option(LIEF_ART "Build LIEF with ART module" OFF) -option(LIEF_DEX "Build LIEF with DEX module" OFF) -option(LIEF_VDEX "Build LIEF with VDEX module" OFF) -option(LIEF_OAT "Build LIEF with OAT module" OFF) +# LIEF bundles fmt/spdlog, whose memory_buffer calls global `free` from a template +# whose definition point (under Emscripten's libc++ headers) doesn't yet have +# visible -- ADL can't find it, so the build errors. Force-include +# into every TU so `free`/`malloc` are always declared. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include cstdlib") + +include(FetchContent) + +# Disable unnecessary LIEF options (set before LIEF is configured). +set(LIEF_DOC OFF CACHE BOOL "" FORCE) +set(LIEF_C_API OFF CACHE BOOL "" FORCE) +set(LIEF_PYTHON_API OFF CACHE BOOL "" FORCE) +set(LIEF_ENABLE_JSON OFF CACHE BOOL "" FORCE) +set(LIEF_EXAMPLES OFF CACHE BOOL "" FORCE) +set(LIEF_TESTS OFF CACHE BOOL "" FORCE) +set(LIEF_INSTALL OFF CACHE BOOL "" FORCE) +set(LIEF_LOGGING OFF CACHE BOOL "" FORCE) +set(LIEF_LOGGING_DEBUG OFF CACHE BOOL "" FORCE) if(MSVC) set(LIEF_USE_CRT_RELEASE "MT" CACHE STRING "LIEF CRT option") endif() -add_subdirectory(vendor/lief) +FetchContent_Declare( + LIEF + GIT_REPOSITORY https://github.com/lief-project/LIEF.git + GIT_TAG 0.17.6 + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(LIEF) add_executable(postject src/postject.cpp) set_target_properties(postject PROPERTIES LINK_FLAGS "-sSINGLE_FILE -sMODULARIZE=1 -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=268435456 -sMAXIMUM_MEMORY=4294967296 --bind") diff --git a/src/postject.cpp b/src/postject.cpp index 730058c..997f51f 100644 --- a/src/postject.cpp +++ b/src/postject.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,14 @@ std::vector vec_from_val(const emscripten::val& value) { return emscripten::convertJSArrayToNumberVector(value); } +static emscripten::val to_uint8array(const std::vector& output) { + emscripten::val view{ + emscripten::typed_memory_view(output.size(), output.data())}; + auto output_data = emscripten::val::global("Uint8Array").new_(output.size()); + output_data.call("set", view); + return output_data; +} + ExecutableFormat get_executable_format(const emscripten::val& executable) { std::vector buffer = vec_from_val(executable); @@ -51,10 +60,11 @@ emscripten::val inject_into_elf(const emscripten::val& executable, return object; } + // LIEF can return a length longer than it actually is, so use compare here. LIEF::ELF::Note* existing_note = nullptr; for (LIEF::ELF::Note& note : binary->notes()) { - if (note.name() == note_name) { + if (note.name().compare(0, note_name.size(), note_name) == 0) { existing_note = ¬e; } } @@ -68,21 +78,31 @@ emscripten::val inject_into_elf(const emscripten::val& executable, } } - LIEF::ELF::Note note; - note.name(note_name); - note.description(vec_from_val(data)); - binary->add(note); + constexpr uint32_t kNoteType = 0; + std::unique_ptr note = LIEF::ELF::Note::create( + note_name, kNoteType, vec_from_val(data), note_name); - // Construct a new Uint8Array in JS - std::vector output = binary->raw(); - emscripten::val view{ - emscripten::typed_memory_view(output.size(), output.data())}; - auto output_data = emscripten::val::global("Uint8Array").new_(output.size()); - output_data.call("set", view); + if (!note) { + object.set("result", emscripten::val(InjectResult::kError)); + return object; + } - object.set("data", output_data); - object.set("result", emscripten::val(InjectResult::kSuccess)); + binary->add(*note); + + LIEF::ELF::Builder::config_t cfg; + cfg.notes = true; + cfg.dynamic_section = true; + LIEF::ELF::Builder builder(*binary, cfg); + builder.build(); + + const std::vector& output = builder.get_build(); + if (output.empty()) { + object.set("result", emscripten::val(InjectResult::kError)); + return object; + } + object.set("data", to_uint8array(output)); + object.set("result", emscripten::val(InjectResult::kSuccess)); return object; } @@ -102,33 +122,34 @@ emscripten::val inject_into_macho(const emscripten::val& executable, return object; } + std::vector payload = vec_from_val(data); + // Inject into all Mach-O binaries if there's more than one in a fat binary for (LIEF::MachO::Binary& binary : *fat_binary) { - LIEF::MachO::Section* existing_section = - binary.get_section(segment_name, section_name); - - if (existing_section) { - if (!overwrite) { - object.set("result", emscripten::val(InjectResult::kAlreadyExists)); - return object; - } - - binary.remove_section(segment_name, section_name, true); - } - LIEF::MachO::SegmentCommand* segment = binary.get_segment(segment_name); - LIEF::MachO::Section section(section_name, vec_from_val(data)); if (!segment) { // Create the segment and mark it read-only LIEF::MachO::SegmentCommand new_segment(segment_name); - new_segment.max_protection( - static_cast(LIEF::MachO::VM_PROTECTIONS::VM_PROT_READ)); - new_segment.init_protection( - static_cast(LIEF::MachO::VM_PROTECTIONS::VM_PROT_READ)); + new_segment.max_protection(static_cast( + LIEF::MachO::SegmentCommand::VM_PROTECTIONS::READ)); + new_segment.init_protection(static_cast( + LIEF::MachO::SegmentCommand::VM_PROTECTIONS::READ)); + LIEF::MachO::Section section(section_name, payload); new_segment.add_section(section); binary.add(new_segment); } else { + LIEF::MachO::Section* existing_section = + segment->get_section(section_name); + if (existing_section) { + if (!overwrite) { + object.set("result", emscripten::val(InjectResult::kAlreadyExists)); + return object; + } + binary.remove_section(segment_name, section_name, true); + segment = binary.get_segment(segment_name); + } + LIEF::MachO::Section section(section_name, payload); binary.add_section(*segment, section); } @@ -140,12 +161,7 @@ emscripten::val inject_into_macho(const emscripten::val& executable, // Construct a new Uint8Array in JS std::vector output = fat_binary->raw(); - emscripten::val view{ - emscripten::typed_memory_view(output.size(), output.data())}; - auto output_data = emscripten::val::global("Uint8Array").new_(output.size()); - output_data.call("set", view); - - object.set("data", output_data); + object.set("data", to_uint8array(output)); object.set("result", emscripten::val(InjectResult::kSuccess)); return object; @@ -181,29 +197,28 @@ emscripten::val inject_into_pe(const emscripten::val& executable, LIEF::PE::ResourceNode* id_node = nullptr; // First level => Type (ResourceDirectory node) - auto rcdata_node_iter = std::find_if( - std::begin(resources->childs()), std::end(resources->childs()), - [](const LIEF::PE::ResourceNode& node) { - return node.id() == - static_cast(LIEF::PE::RESOURCE_TYPES::RCDATA); - }); + constexpr uint32_t kRcdataId = + static_cast(LIEF::PE::ResourcesManager::TYPE::RCDATA); + auto rcdata_node_iter = std::find_if(std::begin(resources->childs()), + std::end(resources->childs()), + [](const LIEF::PE::ResourceNode& node) { + return node.id() == kRcdataId; + }); if (rcdata_node_iter != std::end(resources->childs())) { rcdata_node = &*rcdata_node_iter; } else { LIEF::PE::ResourceDirectory new_rcdata_node; - new_rcdata_node.id(static_cast(LIEF::PE::RESOURCE_TYPES::RCDATA)); + new_rcdata_node.id(kRcdataId); rcdata_node = &resources->add_child(new_rcdata_node); } - // Second level => ID (ResourceDirectory node) + // Second level => ID (ResourceDirectory node). + std::u16string resource_name_u16(resource_name.begin(), resource_name.end()); auto id_node_iter = std::find_if( std::begin(rcdata_node->childs()), std::end(rcdata_node->childs()), - [resource_name](const LIEF::PE::ResourceNode& node) { - return node.name() == - std::wstring_convert, - char16_t>{} - .from_bytes(resource_name); + [&resource_name_u16](const LIEF::PE::ResourceNode& node) { + return node.name() == resource_name_u16; }); if (id_node_iter != std::end(rcdata_node->childs())) { @@ -228,49 +243,21 @@ emscripten::val inject_into_pe(const emscripten::val& executable, id_node->delete_child(*id_node->childs()); } - LIEF::PE::ResourceData lang_node; - lang_node.content(vec_from_val(data)); + LIEF::PE::ResourceData lang_node(vec_from_val(data)); id_node->add_child(lang_node); - binary->remove_section(".rsrc", true); - - // Write out the binary, only modifying the resources - LIEF::PE::Builder builder(*binary); - builder.build_dos_stub(true); - builder.build_imports(false); - builder.build_overlay(false); - builder.build_relocations(false); - builder.build_resources(true); - builder.build_tls(false); - builder.build(); - - // TODO - Why doesn't LIEF just replace the .rsrc section? - // Can we at least change build_resources to take a section name? - - // Re-parse the output so the .l2 section is available - binary = LIEF::PE::Parser::parse(builder.get_build()); - - // Rename the rebuilt resource section - LIEF::PE::Section* section = binary->get_section(".l2"); - section->name(".rsrc"); - - LIEF::PE::Builder builder2(*binary); - builder2.build_dos_stub(true); - builder2.build_imports(false); - builder2.build_overlay(false); - builder2.build_relocations(false); - builder2.build_resources(false); - builder2.build_tls(false); - builder2.build(); - - // Construct a new Uint8Array in JS - const std::vector& output = builder2.get_build(); - emscripten::val view{ - emscripten::typed_memory_view(output.size(), output.data())}; - auto output_data = emscripten::val::global("Uint8Array").new_(output.size()); - output_data.call("set", view); + // Rebuild only the resources, keeping the section named .rsrc. + LIEF::PE::Builder::config_t cfg; + cfg.resources = true; + cfg.rsrc_section = ".rsrc"; + LIEF::PE::Builder builder(*binary, cfg); + if (!builder.build()) { + object.set("result", emscripten::val(InjectResult::kError)); + return object; + } - object.set("data", output_data); + const std::vector& output = builder.get_build(); + object.set("data", to_uint8array(output)); object.set("result", emscripten::val(InjectResult::kSuccess)); return object;