From 3346dbb25e8e612974c197bf3cd2125834c19ae1 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Sat, 27 Jun 2026 12:09:11 -0400 Subject: [PATCH] fix: avoid leaks in the per-ImageInput/Output error message (The same problem and set of fixes were applied to both ImageInput and ImageOutput. For brevity, we will only discuss ImageInput below.) ImageInput error messages are stored in a thread_local map keyed by a unique per-instance id (m_id). The destructor's erase of that entry was commented out (over caution about the static destruction order fiasco), so every ImageInput that accumulated an error and was destroyed without its error being cleared via geterror() leaked one map entry. The id is monotonically increasing, so entries never get reused and the map grows without bound. (How did we find this? A fuzzer hit this hard: opening hundreds of thousands of corrupt files -- each of which errors, and each ImageInput::create() attempt tries several readers -- grew the map to tens of MB and eventually OOMed. The leak was traced to this issue.) So we re-enable the erase, guarded against the destruction order fiasco: the thread_local map is wrapped in a struct whose destructor clears an 'alive' flag, and the ImageInput destructor only touches the map while it is alive. Each ImageInput removes only its own entry (keyed by its unique id); other inputs' errors are untouched. Cross-thread cleanup is still a known limitation -- errors accumulated in a different thread than the one that descroys the ImageInput are still leaked, noted in a TODO. However, this should be exceptionally rare, since not only do you need the two-thread situation, but the one that made the call that produced an error needs to not check for errors and return/clear the message. So only badly behaving user code that fails to check and retrieve errors should have this minor leak. And we are in the process of adding compile-time warnings for not capturing the error status values from our APIs. Assisted-by: Claude Code / Claude Opus 4.8 Signed-off-by: Larry Gritz --- src/libOpenImageIO/imageinput.cpp | 41 +++++++++++++++++++++--------- src/libOpenImageIO/imageoutput.cpp | 40 ++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/src/libOpenImageIO/imageinput.cpp b/src/libOpenImageIO/imageinput.cpp index e9b9b78a24..88a7906a1d 100644 --- a/src/libOpenImageIO/imageinput.cpp +++ b/src/libOpenImageIO/imageinput.cpp @@ -27,8 +27,20 @@ using namespace pvt; using namespace OIIO::pvt; -// store an error message per thread, for a specific ImageInput -static thread_local tsl::robin_map input_error_messages; +// Store an error message per thread, for a specific ImageInput (keyed by +// the ImageInput's unique id). Wrapped in a small struct carrying an "alive" +// flag set false by its own destructor, so that an ImageInput destroyed +// during thread/program teardown -- after this thread_local has already been +// destroyed (the static destruction order fiasco) -- can detect that and skip +// touching the dead map. +namespace { +struct InputErrorMessages { + tsl::robin_map map; + bool alive = true; + ~InputErrorMessages() { alive = false; } +}; +} // namespace +static thread_local InputErrorMessages input_error_messages; static std::atomic_int64_t input_next_id(0); @@ -92,10 +104,15 @@ ImageInput::ImageInput() ImageInput::~ImageInput() { - // Erase any leftover errors from this thread - // TODO: can we clear other threads' errors? - // TODO: potentially unsafe due to the static destruction order fiasco - // input_error_messages.erase(m_impl->m_id); + // Erase any leftover error for this ImageInput so the per-thread map does + // not grow without bound when many inputs are opened (e.g. a process that + // opens lots of files without checking errors, or a fuzzer). Guard against + // the static destruction order fiasco: if the thread_local map has already + // been destroyed, its 'alive' flag is false and we must not touch it. + // TODO: this only clears the entry if we are destroyed on the same thread + // that accumulated the error; cross-thread errors still leak. + if (input_error_messages.alive) + input_error_messages.map.erase(m_impl->m_id); } @@ -1340,7 +1357,7 @@ ImageInput::append_error(string_view message) const { if (message.size() && message.back() == '\n') message.remove_suffix(1); - std::string& err_str = input_error_messages[m_impl->m_id]; + std::string& err_str = input_error_messages.map[m_impl->m_id]; OIIO_DASSERT( err_str.size() < 1024 * 1024 * 16 && "Accumulated error messages > 16MB. Try checking return codes!"); @@ -1356,8 +1373,8 @@ ImageInput::append_error(string_view message) const bool ImageInput::has_error() const { - auto iter = input_error_messages.find(m_impl->m_id); - if (iter == input_error_messages.end()) + auto iter = input_error_messages.map.find(m_impl->m_id); + if (iter == input_error_messages.map.end()) return false; return iter.value().size() > 0; } @@ -1368,11 +1385,11 @@ std::string ImageInput::geterror(bool clear) const { std::string e; - auto iter = input_error_messages.find(m_impl->m_id); - if (iter != input_error_messages.end()) { + auto iter = input_error_messages.map.find(m_impl->m_id); + if (iter != input_error_messages.map.end()) { e = iter.value(); if (clear) - input_error_messages.erase(iter); + input_error_messages.map.erase(iter); } return e; } diff --git a/src/libOpenImageIO/imageoutput.cpp b/src/libOpenImageIO/imageoutput.cpp index 216c2a18f9..9ca17c0600 100644 --- a/src/libOpenImageIO/imageoutput.cpp +++ b/src/libOpenImageIO/imageoutput.cpp @@ -30,8 +30,20 @@ using namespace pvt; using namespace OIIO::pvt; -// store an error message per thread, for a specific ImageInput -static thread_local tsl::robin_map output_error_messages; +// Store an error message per thread, for a specific ImageOutput (keyed by +// the ImageOutput's unique id). Wrapped in a small struct carrying an "alive" +// flag set false by its own destructor, so that an ImageOutput destroyed +// during thread/program teardown -- after this thread_local has already been +// destroyed (the static destruction order fiasco) -- can detect that and skip +// touching the dead map. +namespace { +struct OutputErrorMessages { + tsl::robin_map map; + bool alive = true; + ~OutputErrorMessages() { alive = false; } +}; +} // namespace +static thread_local OutputErrorMessages output_error_messages; static std::atomic_int64_t output_next_id(0); @@ -94,10 +106,14 @@ ImageOutput::ImageOutput() ImageOutput::~ImageOutput() { - // Erase any leftover errors from this thread - // TODO: can we clear other threads' errors? - // TODO: potentially unsafe due to the static destruction order fiasco - // output_error_messages.erase(this); + // Erase any leftover error for this ImageOutput so the per-thread map does + // not grow without bound when many outputs are created. Guard against the + // static destruction order fiasco: if the thread_local map has already + // been destroyed, its 'alive' flag is false and we must not touch it. + // TODO: this only clears the entry if we are destroyed on the same thread + // that accumulated the error; cross-thread errors still leak. + if (output_error_messages.alive) + output_error_messages.map.erase(m_impl->m_id); } @@ -392,7 +408,7 @@ ImageOutput::append_error(string_view message) const { if (message.size() && message.back() == '\n') message.remove_suffix(1); - std::string& err_str = output_error_messages[m_impl->m_id]; + std::string& err_str = output_error_messages.map[m_impl->m_id]; OIIO_DASSERT( err_str.size() < 1024 * 1024 * 16 && "Accumulated error messages > 16MB. Try checking return codes!"); @@ -859,8 +875,8 @@ ImageOutput::copy_tile_to_image_buffer(int x, int y, int z, TypeDesc format, bool ImageOutput::has_error() const { - auto iter = output_error_messages.find(m_impl->m_id); - if (iter == output_error_messages.end()) + auto iter = output_error_messages.map.find(m_impl->m_id); + if (iter == output_error_messages.map.end()) return false; return iter.value().size() > 0; } @@ -871,11 +887,11 @@ std::string ImageOutput::geterror(bool clear) const { std::string e; - auto iter = output_error_messages.find(m_impl->m_id); - if (iter != output_error_messages.end()) { + auto iter = output_error_messages.map.find(m_impl->m_id); + if (iter != output_error_messages.map.end()) { e = iter.value(); if (clear) - output_error_messages.erase(iter); + output_error_messages.map.erase(iter); } return e; }