From f27cf23e5674959206d87863e192248761bf8ebf Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 28 Apr 2026 19:00:41 +0200 Subject: [PATCH 1/3] Add dumpResults() to IdBasedSolverResults + auto-conversion from owning->non-owning --- .../IfdsIde/Solver/IdBasedSolverResults.h | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h index c8e746079e..b15eaa691e 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h @@ -3,8 +3,10 @@ #include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolverResults.h" #include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include #include @@ -12,6 +14,11 @@ #include #include +namespace llvm { +class Instruction; +class Value; +} // namespace llvm + namespace psr { namespace detail { @@ -174,6 +181,10 @@ class IdBasedSolverResultsBase { } } + template + void dumpResults(const ICFGTy &ICF, + llvm::raw_ostream &OS = llvm::outs()) const; + private: [[nodiscard]] constexpr const auto &results() const noexcept { return static_cast(this)->resultsImpl(); @@ -219,6 +230,17 @@ class OwningIdBasedSolverResults assert(this->Results != nullptr); } + [[nodiscard]] IdBasedSolverResults get() const & noexcept { + return {Results.get()}; + } + IdBasedSolverResults get() && = delete; + + [[nodiscard]] operator IdBasedSolverResults() const & noexcept { + return get(); + } + + operator IdBasedSolverResults() && = delete; + private: [[nodiscard]] constexpr const auto &resultsImpl() const noexcept { assert(Results != nullptr); @@ -227,6 +249,50 @@ class OwningIdBasedSolverResults std::unique_ptr> Results{}; }; + +// For sorting the results in dumpResults() +std::string getMetaDataID(const llvm::Value *V); + +template +template +void detail::IdBasedSolverResultsBase::dumpResults( + const ICFGTy &ICF, llvm::raw_ostream &OS) const { + using f_t = typename ICFGTy::f_t; + + auto ResultEntries = llvm::to_vector(getAllResultEntries()); + + std::ranges::sort(ResultEntries, [](const auto &Lhs, const auto &Rhs) { + const auto &LRow = std::get<0>(Lhs); + const auto &RRow = std::get<0>(Rhs); + if constexpr (std::is_same_v) { + return StringIDLess{}(getMetaDataID(LRow), getMetaDataID(RRow)); + } else { + // If non-LLVM IR is used + return LRow < RRow; + } + }); + + f_t PrevFn = f_t{}; + f_t CurrFn = f_t{}; + + for (const auto &[Row, Column] : ResultEntries) { + CurrFn = ICF.getFunctionOf(Row); + if (PrevFn != CurrFn) { + PrevFn = CurrFn; + OS << "\n\n============ Results for function '" + + ICF.getFunctionName(CurrFn) + "' ============\n"; + } + + std::string NString = NToString(Row); + std::string Line(NString.size(), '-'); + OS << "\n\nN: " << NString << "\n---" << Line << '\n'; + + for (const auto &[Col, Val] : Column) { + OS << "\tD: " << DToString(Col) << " | V: " << LToString(Val) << '\n'; + } + } + OS << '\n'; +} } // namespace psr #endif // PHASAR_PHASARLLVM_DATAFLOWSOLVER_IFDSIDE_SOLVER_IDBASEDSOLVERRESULTS_H From 175067ebedf977e55feb92372714b0bba7a4ddfd Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 28 Apr 2026 19:28:50 +0200 Subject: [PATCH 2/3] Integrate result[s]AtInLLVMSSA() into IdBasedSolverResults + fix dumpResults() --- .../IfdsIde/Solver/IdBasedSolverResults.h | 50 +++- .../IfdsIde/Solver/IterativeIDESolver.h | 49 +--- .../DataFlow/IfdsIde/LLVMSolverResults.h | 232 ++++++++++-------- 3 files changed, 183 insertions(+), 148 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h index b15eaa691e..134ae626bc 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h @@ -3,6 +3,7 @@ #include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolverResults.h" #include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/STLExtras.h" @@ -38,7 +39,7 @@ class IdBasedSolverResultsBase { // --v Needed for llvm::mapped_iterator // NOLINTNEXTLINE(readability-const-return-type) - const std::pair + const std::pair, ByConstRef> operator()(ByConstRef Entry) const { return {Results->FactCompressor[Entry.first], Results->ValCompressor[Entry.second]}; @@ -88,6 +89,7 @@ class IdBasedSolverResultsBase { } [[nodiscard]] size_t size() const noexcept { return Row->size(); } + [[nodiscard]] bool empty() const noexcept { return Row->empty(); } private: const detail::IterativeIDESolverResults *Results{}; @@ -181,6 +183,45 @@ class IdBasedSolverResultsBase { } } + /// Returns the data-flow results at the given statement while respecting + /// LLVM's SSA semantics. + /// + /// An example: when a value is loaded and the location loaded from, here + /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' + /// will usually be generated and also holds. However, due to the underlying + /// theory (and respective implementation) this load instruction causes the + /// loaded value to be generated and thus, it will be valid only AFTER the + /// load instruction, i.e., at the successor instruction. + /// + /// %0 = load i32, i32* %i, align 4 + /// + /// This result accessor function returns the results at the successor + /// instruction(s) reflecting that the expression on the left-hand side holds + /// if the expression on the right-hand side holds. + [[nodiscard]] std::unordered_map + resultsAtInLLVMSSA(ByConstRef Stmt, + bool AllowOverapproximation = false) const + requires same_as_decay, llvm::Instruction>; + + /// Returns the L-type result at the given statement for the given data-flow + /// fact while respecting LLVM's SSA semantics. + /// + /// An example: when a value is loaded and the location loaded from, here + /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' + /// will usually be generated and also holds. However, due to the underlying + /// theory (and respective implementation) this load instruction causes the + /// loaded value to be generated and thus, it will be valid only AFTER the + /// load instruction, i.e., at the successor instruction. + /// + /// %0 = load i32, i32* %i, align 4 + /// + /// This result accessor function returns the results at the successor + /// instruction(s) reflecting that the expression on the left-hand side holds + /// if the expression on the right-hand side holds. + [[nodiscard]] l_t resultAtInLLVMSSA(ByConstRef Stmt, d_t Value, + bool AllowOverapproximation = false) const + requires same_as_decay, llvm::Instruction>; + template void dumpResults(const ICFGTy &ICF, llvm::raw_ostream &OS = llvm::outs()) const; @@ -272,10 +313,17 @@ void detail::IdBasedSolverResultsBase::dumpResults( } }); + OS << "\n***************************************************************\n" + << "* Raw IterativeIDESolver results *\n" + << "***************************************************************\n"; + f_t PrevFn = f_t{}; f_t CurrFn = f_t{}; for (const auto &[Row, Column] : ResultEntries) { + if (Column.empty()) { + continue; + } CurrFn = ICF.getFunctionOf(Row); if (PrevFn != CurrFn) { PrevFn = CurrFn; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h index fc79c6e2b0..c4daaa727d 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h @@ -154,54 +154,7 @@ class IterativeIDESolver } void dumpResults(llvm::raw_ostream &OS = llvm::outs()) const { - OS << "\n***************************************************************\n" - << "* Raw IDESolver results *\n" - << "***************************************************************\n"; - auto Cells = this->base_results_t::ValTab_cellVec(); - if (Cells.empty()) { - OS << "No results computed!\n"; - return; - } - - std::sort(Cells.begin(), Cells.end(), [](const auto &Lhs, const auto &Rhs) { - if constexpr (std::is_same_v) { - return StringIDLess{}(getMetaDataID(Lhs.getRowKey()), - getMetaDataID(Rhs.getRowKey())); - } else { - // If non-LLVM IR is used - return Lhs.getRowKey() < Rhs.getRowKey(); - } - }); - - n_t Prev{}; - n_t Curr{}; - f_t PrevFn{}; - f_t CurrFn{}; - - for (const auto &Cell : Cells) { - Curr = Cell.getRowKey(); - CurrFn = ICFG.getFunctionOf(Curr); - if (PrevFn != CurrFn) { - PrevFn = CurrFn; - OS << "\n\n============ Results for function '" + - ICFG.getFunctionName(CurrFn) + "' ============\n"; - } - if (Prev != Curr) { - Prev = Curr; - std::string NString = NToString(Curr); - std::string Line(NString.size(), '-'); - - OS << "\n\nN: " << NString << "\n---" << Line << '\n'; - } - OS << "\tD: " << DToString(Cell.getColumnKey()); - if constexpr (ComputeValues) { - OS << " | V: " << LToString(Cell.getValue()); - } - - OS << '\n'; - } - - OS << '\n'; + getSolverResults().dumpResults(ICFG, OS); } [[nodiscard]] IterativeIDESolverStats getStats() const noexcept diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h index 6a9c60e45d..30c74931e0 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h @@ -10,6 +10,7 @@ #ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LLVMSOLVERRESULTS_H #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LLVMSOLVERRESULTS_H +#include "phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h" #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/JoinLattice.h" @@ -28,93 +29,18 @@ namespace psr::detail { /// does not work for backwards analyses. /// The right way would be to ask the ICFG, but we don't have a reference to it /// here yet (TODO!) -template -auto SolverResultsBase::resultsAtInLLVMSSA( - ByConstRef Stmt, bool AllowOverapproximation, bool StripZero) const - -> std::unordered_map - requires same_as_decay, llvm::Instruction> -{ - std::unordered_map Result = [this, Stmt, AllowOverapproximation]() { - if (Stmt->getType()->isVoidTy()) { - return self().Results.row(Stmt); - } - if (!Stmt->getNextNode()) { - auto GetStartRow = [this](const llvm::BasicBlock *BB) -> decltype(auto) { - const auto *First = &BB->front(); - if (llvm::isa(First)) { - First = First->getNextNonDebugInstruction(); - } - return self().Results.row(First); - }; - - // We have reached the end of a BasicBlock. If there is a successor BB - // that only has one predecessor, we are lucky and can just take results - // from there - for (const llvm::BasicBlock *Succ : llvm::successors(Stmt)) { - if (Succ->hasNPredecessors(1)) { - return GetStartRow(Succ); - } - } - - if (!AllowOverapproximation) { - llvm::report_fatal_error("[resultsAtInLLVMSSA]: Cannot precisely " - "collect the results at instruction " + - llvm::Twine(llvmIRToString(Stmt))); - } - - // There is no successor with only one predecessor. - // All we can do is merge the results from all successors to get a sound - // overapproximation. This is not optimal and may be replaced in the - // future. - PHASAR_LOG_LEVEL(WARNING, "[resultsAtInLLVMSSA]: Cannot precisely " - "collect the results at instruction " - << llvmIRToString(Stmt) - << ". Use a sound, but potentially " - "imprecise overapproximation"); - std::unordered_map Ret; - for (const llvm::BasicBlock *Succ : llvm::successors(Stmt)) { - const auto &Row = GetStartRow(Succ); - for (const auto &[Fact, Value] : Row) { - auto [It, Inserted] = Ret.try_emplace(Fact, Value); - if (!Inserted && Value != It->second) { - if constexpr (HasJoinLatticeTraits) { - It->second = JoinLatticeTraits::join(It->second, Value); - } else { - // We have no way of correctly merging, so set the value to the - // default constructed l_t hoping it marks BOTTOM. - It->second = l_t(); - } - } - } - } - return Ret; - } - assert(Stmt->getNextNode() && "Expected to find a valid successor node!"); - return self().Results.row(Stmt->getNextNode()); - }(); - if (StripZero) { - Result.erase(self().ZV); - } - return Result; -} - -template - -auto SolverResultsBase::resultAtInLLVMSSA( - ByConstRef Stmt, d_t Value, bool AllowOverapproximation) const -> l_t - requires same_as_decay, llvm::Instruction> -{ +[[nodiscard]] auto resultsAtInLLVMSSAImpl(const auto &Self, const auto &Stmt, + bool AllowOverapproximation) { if (Stmt->getType()->isVoidTy()) { - return self().Results.get(Stmt, Value); + return Self.row(Stmt); } if (!Stmt->getNextNode()) { - auto GetStartVal = [this, - &Value](const llvm::BasicBlock *BB) -> decltype(auto) { + auto GetStartRow = [&Self](const llvm::BasicBlock *BB) -> decltype(auto) { const auto *First = &BB->front(); if (llvm::isa(First)) { First = First->getNextNonDebugInstruction(); } - return self().Results.get(First, Value); + return Self.row(First); }; // We have reached the end of a BasicBlock. If there is a successor BB @@ -122,7 +48,7 @@ auto SolverResultsBase::resultAtInLLVMSSA( // from there for (const llvm::BasicBlock *Succ : llvm::successors(Stmt)) { if (Succ->hasNPredecessors(1)) { - return GetStartVal(Succ); + return GetStartRow(Succ); } } @@ -136,28 +62,24 @@ auto SolverResultsBase::resultAtInLLVMSSA( // All we can do is merge the results from all successors to get a sound // overapproximation. This is not optimal and may be replaced in the // future. - PHASAR_LOG_LEVEL(WARNING, "[resultAtInLLVMSSA]: Cannot precisely " + PHASAR_LOG_LEVEL(WARNING, "[resultsAtInLLVMSSA]: Cannot precisely " "collect the results at instruction " - << *Stmt + << llvmIRToString(Stmt) << ". Use a sound, but potentially " "imprecise overapproximation"); - auto It = llvm::succ_begin(Stmt); - auto End = llvm::succ_end(Stmt); - l_t Ret{}; - if (It != End) { - Ret = GetStartVal(*It); - for (++It; It != End; ++It) { - const auto &Val = GetStartVal(*It); - if constexpr (HasJoinLatticeTraits) { - Ret = JoinLatticeTraits::join(Ret, Val); - if (Ret == JoinLatticeTraits::bottom()) { - break; - } - } else { - if (Ret != Val) { + std::remove_cvref_t Ret; + using l_t = typename decltype(Ret)::mapped_type; + for (const llvm::BasicBlock *Succ : llvm::successors(Stmt)) { + const auto &Row = GetStartRow(Succ); + for (const auto &[Fact, Value] : Row) { + auto [It, Inserted] = Ret.try_emplace(Fact, Value); + if (!Inserted && Value != It->second) { + if constexpr (HasJoinLatticeTraits) { + It->second = JoinLatticeTraits::join(It->second, Value); + } else { // We have no way of correctly merging, so set the value to the // default constructed l_t hoping it marks BOTTOM. - return {}; + It->second = l_t(); } } } @@ -165,7 +87,119 @@ auto SolverResultsBase::resultAtInLLVMSSA( return Ret; } assert(Stmt->getNextNode() && "Expected to find a valid successor node!"); - return self().Results.get(Stmt->getNextNode(), Value); + return Self.row(Stmt->getNextNode()); +} + +[[nodiscard]] auto resultAtInLLVMSSAImpl(const auto &Self, const auto &Stmt, + const auto &Fact, + bool AllowOverapproximation) { + if (Stmt->getType()->isVoidTy()) { + return Self.resultAt(Stmt, Fact); + } + + if (auto Next = Stmt->getNextNode()) { + return Self.resultAt(Next, Fact); + } + + auto GetStartVal = [&Self, + &Fact](const llvm::BasicBlock *BB) -> decltype(auto) { + const auto *First = &BB->front(); + if (llvm::isa(First)) { + First = First->getNextNonDebugInstruction(); + } + return Self.resultAt(First, Fact); + }; + + // We have reached the end of a BasicBlock. If there is a successor BB + // that only has one predecessor, we are lucky and can just take results + // from there + for (const llvm::BasicBlock *Succ : llvm::successors(Stmt)) { + if (Succ->hasNPredecessors(1)) { + return GetStartVal(Succ); + } + } + + if (!AllowOverapproximation) { + llvm::report_fatal_error("[resultsAtInLLVMSSA]: Cannot precisely " + "collect the results at instruction " + + llvm::Twine(llvmIRToString(Stmt))); + } + + // There is no successor with only one predecessor. + // All we can do is merge the results from all successors to get a sound + // overapproximation. This is not optimal and may be replaced in the + // future. + PHASAR_LOG_LEVEL(WARNING, "[resultAtInLLVMSSA]: Cannot precisely " + "collect the results at instruction " + << *Stmt + << ". Use a sound, but potentially " + "imprecise overapproximation"); + auto It = llvm::succ_begin(Stmt); + auto End = llvm::succ_end(Stmt); + + using l_t = std::remove_cvref_t; + + l_t Ret{}; + if (It != End) { + Ret = GetStartVal(*It); + for (++It; It != End; ++It) { + const auto &Val = GetStartVal(*It); + if constexpr (HasJoinLatticeTraits) { + Ret = JoinLatticeTraits::join(Ret, Val); + if (Ret == JoinLatticeTraits::bottom()) { + break; + } + } else { + if (Ret != Val) { + // We have no way of correctly merging, so set the value to the + // default constructed l_t hoping it marks BOTTOM. + return l_t{}; + } + } + } + } + return Ret; +} + +template +auto SolverResultsBase::resultsAtInLLVMSSA( + ByConstRef Stmt, bool AllowOverapproximation, bool StripZero) const + -> std::unordered_map + requires same_as_decay, llvm::Instruction> +{ + std::unordered_map Result = + resultsAtInLLVMSSAImpl(self(), Stmt, AllowOverapproximation); + if (StripZero) { + Result.erase(self().ZV); + } + return Result; +} + +template +auto SolverResultsBase::resultAtInLLVMSSA( + ByConstRef Stmt, d_t Value, bool AllowOverapproximation) const -> l_t + requires same_as_decay, llvm::Instruction> +{ + return resultAtInLLVMSSAImpl(self(), Stmt, Value, AllowOverapproximation); +} + +template +auto IdBasedSolverResultsBase::resultsAtInLLVMSSA( + ByConstRef Stmt, bool AllowOverapproximation) const + -> std::unordered_map + requires same_as_decay, llvm::Instruction> +{ + return resultsAtInLLVMSSAImpl(static_cast(*this), Stmt, + AllowOverapproximation); +} + +template +auto IdBasedSolverResultsBase::resultAtInLLVMSSA( + ByConstRef Stmt, d_t Value, bool AllowOverapproximation) const -> l_t + requires same_as_decay, llvm::Instruction> +{ + return resultAtInLLVMSSAImpl(static_cast(*this), Stmt, Value, + AllowOverapproximation); } } // namespace psr::detail From e385374cdd86a4ec43d81156a499eb999670fc8c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 29 Apr 2026 12:31:46 +0200 Subject: [PATCH 3/3] Fix missing explicit ctor invocation --- include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h index 134ae626bc..3fe24970e8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h @@ -272,7 +272,7 @@ class OwningIdBasedSolverResults } [[nodiscard]] IdBasedSolverResults get() const & noexcept { - return {Results.get()}; + return IdBasedSolverResults{Results.get()}; } IdBasedSolverResults get() && = delete;