Implement DynamicPPL.rand_with_logpdf

penelopeysm · penelopeysm · commit a35d0e22bdc9 · 2025-12-14T17:34:58.000Z
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,9 @@
 # DynamicPPL Changelog
 
+## 0.39.5
+
+Introduces a new function, `DynamicPPL.rand_with_logpdf([rng,] ldf[, strategy])`, which allows generating new trial parameter values from a `LogDensityFunction` (previously this would have been accomplished using the `ldf.varinfo` object, but since v0.39 there is no longer a `VarInfo` inside a `LogDensityFunction`, so this function is a direct replacement).
+
 ## 0.39.4
 
 Removed the internal functions `DynamicPPL.getranges`, `DynamicPPL.vector_getrange`, and `DynamicPPL.vector_getranges` (the new LogDensityFunction construction does exactly the same thing, so this specialised function was not needed).
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "DynamicPPL"
 uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8"
-version = "0.39.4"
+version = "0.39.5"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -80,6 +80,13 @@ Internally, this is accomplished using [`init!!`](@ref) on:
 OnlyAccsVarInfo
 ```
 
+When given a `LogDensityFunction` (and only a `LogDensityFunction`!) it is often useful to be able to sample new parameters from the prior of the model, for example, when searching for initial points for MCMC sampling.
+This can be done with:
+
+```@docs
+rand_with_logpdf
+```
+
 ## Condition and decondition
 
 A [`Model`](@ref) can be conditioned on a set of observations with [`AbstractPPL.condition`](@ref) or its alias [`|`](@ref).
diff --git a/src/DynamicPPL.jl b/src/DynamicPPL.jl
@@ -100,6 +100,7 @@ export AbstractVarInfo,
     # LogDensityFunction
     LogDensityFunction,
     OnlyAccsVarInfo,
+    rand_with_logpdf,
     # Leaf contexts
     AbstractContext,
     contextualize,
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
@@ -388,3 +388,166 @@ function get_ranges_and_linked_metadata(vnv::VarNamedVector, start_offset::Int)
     end
     return all_iden_ranges, all_ranges, offset
 end
+
+####################################################
+# Generate new parameters for a LogDensityFunction #
+####################################################
+# Previously, when LogDensityFunction contained a full VarInfo, it was easy to generate
+# new 'trial' parameters for a LogDensityFunction by doing
+#
+#     new_vi = last(DynamicPPL.init!!(rng, ldf.model, ldf.varinfo, strategy))
+#
+# This is useful e.g. when initialising MCMC sampling.
+#
+# However, now that LogDensityFunction only contains ranges and link status, we need to
+# provide some functionality to generate new parameter vectors (and also return their
+# logp).
+
+struct LDFValuesAccumulator{T<:Real,N<:NamedTuple} <: AbstractAccumulator
+    # These are copied over from the LogDensityFunction
+    iden_varname_ranges::N
+    varname_ranges::Dict{VarName,RangeAndLinked}
+    # These are the actual outputs
+    values::Dict{UnitRange{Int},Vector{T}}
+    # This is the forward log-Jacobian term
+    fwd_logjac::T
+end
+function LDFValuesAccumulator(ldf::LogDensityFunction)
+    nt = ldf._iden_varname_ranges
+    T = eltype(_get_input_vector_type(ldf))
+    return LDFValuesAccumulator{T,typeof(nt)}(
+        nt, ldf._varname_ranges, Dict{UnitRange{Int},Vector{T}}(), zero(T)
+    )
+end
+
+const _LDFValuesAccName = :LDFValues
+accumulator_name(::Type{<:LDFValuesAccumulator}) = _LDFValuesAccName
+accumulate_observe!!(acc::LDFValuesAccumulator, dist, val, vn) = acc
+function accumulate_assume!!(acc::LDFValuesAccumulator, val, logjac, vn::VarName, dist)
+    ral = if DynamicPPL.getoptic(vn) === identity
+        acc.iden_varname_ranges[DynamicPPL.getsym(vn)]
+    else
+        acc.varname_ranges[vn]
+    end
+    range = ral.range
+    # Since `val` is always unlinked, we need to regenerate the vectorised value. This is a
+    # bit wasteful if `tilde_assume!!` also did the invlinking, but in general, this is not
+    # guaranteed: indeed, `tilde_assume!!` may never have seen a linked vector at all (e.g.
+    # if it was called with `InitContext{rng,<:Union{InitFromPrior,InitFromUniform}}`; which
+    # is the most likely situation where this accumulator will be used).
+    y, fwd_logjac = if ral.is_linked
+        with_logabsdet_jacobian(DynamicPPL.to_linked_vec_transform(dist), val)
+    else
+        with_logabsdet_jacobian(DynamicPPL.to_vec_transform(dist), val)
+    end
+    acc.values[range] = y
+    return LDFValuesAccumulator(
+        acc.iden_varname_ranges, acc.varname_ranges, acc.values, acc.fwd_logjac + fwd_logjac
+    )
+end
+function reset(acc::LDFValuesAccumulator{T}) where {T}
+    return LDFValuesAccumulator(
+        acc.iden_varname_ranges,
+        acc.varname_ranges,
+        Dict{UnitRange{Int},Vector{T}}(),
+        zero(T),
+    )
+end
+function Base.copy(acc::LDFValuesAccumulator)
+    return LDFValuesAccumulator(
+        acc.iden_varname_ranges, copy(acc.varname_ranges), copy(acc.values), acc.fwd_logjac
+    )
+end
+function split(acc::LDFValuesAccumulator{T}) where {T}
+    return LDFValuesAccumulator(
+        acc.iden_varname_ranges,
+        acc.varname_ranges,
+        Dict{UnitRange{Int},Vector{T}}(),
+        zero(T),
+    )
+end
+function combine(acc::LDFValuesAccumulator{T}, acc2::LDFValuesAccumulator{T}) where {T}
+    if acc.iden_varname_ranges != acc2.iden_varname_ranges ||
+        acc.varname_ranges != acc2.varname_ranges
+        throw(
+            ArgumentError(
+                "cannot combine LDFValuesAccumulators with different varname ranges"
+            ),
+        )
+    end
+    combined_values = merge(acc.values, acc2.values)
+    combined_logjac = acc.fwd_logjac + acc2.fwd_logjac
+    return LDFValuesAccumulator(
+        acc.iden_varname_ranges, acc.varname_ranges, combined_values, combined_logjac
+    )
+end
+
+"""
+    DynamicPPL.rand_with_logpdf(
+        [rng::Random.AbstractRNG,]
+        ldf::LogDensityFunction,
+        strategy::AbstractInitStrategy=InitFromPrior(),
+    )
+
+Given a LogDensityFunction, generate a new parameter vector by sampling from the model using
+the given strategy. Returns a tuple of (new parameters, logpdf).
+
+If `ldf` was generated using the call `LogDensityFunction(model, getlogdensity, vi)`, then
+the outputs of
+
+```julia
+new_params, new_logp = rand_with_logpdf(rng, ldf, strategy)
+```
+
+and
+
+```julia
+_, new_vi = DynamicPPL.init!!(rng, model, vi, strategy)
+```
+
+are guaranteed to be related in that
+
+```julia
+new_params ≈ new_vi[:]
+new_logp = getlogdensity(new_vi)
+```
+
+Furthermore, it is guaranteed that
+
+```julia
+LogDensityProblems.logdensity(ldf, new_params) ≈ new_logp
+```
+
+(but this function is more efficient, as it only performs one model evaluation to generate
+both parameters and log-density).
+
+This function therefore provides an interface to sample from the model, even though the
+LogDensityFunction no longer carries a full VarInfo with it which would ordinarily be
+required for such sampling.
+"""
+function rand_with_logpdf(
+    rng::Random.AbstractRNG,
+    ldf::LogDensityFunction,
+    strategy::AbstractInitStrategy=InitFromPrior(),
+)
+    # Create a VarInfo with the necessary accumulators to generate both parameters and logp
+    accs = (ldf_accs(ldf._getlogdensity)..., LDFValuesAccumulator(ldf))
+    vi = OnlyAccsVarInfo(accs)
+    # Initialise the model with the given strategy
+    _, new_vi = DynamicPPL.init!!(rng, ldf.model, vi, strategy)
+    # Extract the new parameters into a vector
+    x = Vector{eltype(_get_input_vector_type(ldf))}(
+        undef, LogDensityProblems.dimension(ldf)
+    )
+    values_acc = DynamicPPL.getacc(new_vi, Val(_LDFValuesAccName))
+    for (range, val) in values_acc.values
+        x[range] = val
+    end
+    lp = ldf._getlogdensity(new_vi) - values_acc.fwd_logjac
+    return x, lp
+end
+function rand_with_logpdf(
+    ldf::LogDensityFunction, strategy::AbstractInitStrategy=InitFromPrior()
+)
+    return rand_with_logpdf(Random.default_rng(), ldf, strategy)
+end
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
@@ -8,6 +8,7 @@ using DistributionsAD: filldist
 using ADTypes
 using DynamicPPL.TestUtils.AD: run_ad, WithExpectedResult, NoTest
 using LinearAlgebra: I
+using Random: Xoshiro
 using Test
 using LogDensityProblems: LogDensityProblems
 
@@ -205,6 +206,28 @@ end
         end
     end
 
+    @testset "rand_with_logpdf" begin
+        @testset "$(m.f)" for m in DynamicPPL.TestUtils.DEMO_MODELS
+            @testset for linked in (false, true)
+                vi = if linked
+                    DynamicPPL.link!!(VarInfo(m), m)
+                else
+                    VarInfo(m)
+                end
+                ldf = LogDensityFunction(m, getlogjoint_internal, vi)
+                @testset for strategy in (InitFromPrior(), InitFromUniform())
+                    new_params, new_logp = DynamicPPL.rand_with_logpdf(
+                        Xoshiro(468), ldf, strategy
+                    )
+                    _, new_vi = DynamicPPL.init!!(Xoshiro(468), m, vi, strategy)
+                    @test new_params ≈ new_vi[:]
+                    @test new_logp ≈ getlogjoint_internal(new_vi)
+                    @test LogDensityProblems.logdensity(ldf, new_params) ≈ new_logp
+                end
+            end
+        end
+    end
+
     # Test that various different ways of specifying array types as arguments work with all
     # ADTypes.
     @testset "Array argument types" begin