Implement LogDensityFunctionWithGrad

penelopeysm · penelopeysm · commit 8de4742a5d0d · 2025-02-13T20:36:02.000Z
diff --git a/HISTORY.md b/HISTORY.md
@@ -49,6 +49,15 @@ This release removes the feature of `VarInfo` where it kept track of which varia
     
     This change also affects sampling in Turing.jl.
 
+**Other changes**
+
+LogDensityProblemsAD is now removed as a dependency.
+Instead of constructing a `LogDensityProblemAD.ADgradient` object, we now directly use `DifferentiationInterface` to calculate the gradient of the log density with respect to model parameters.
+
+In practice, this means that if you want to calculate the gradient for a model, you can do:
+
+TODO(penelopeysm): Finish this
+
 ## 0.34.2
 
   - Fixed bugs in ValuesAsInModelContext as well as DebugContext where underlying PrefixContexts were not being applied.
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
@@ -106,36 +106,49 @@ Return the parameters of the wrapped varinfo as a vector.
 """
 getparams(f::LogDensityFunction) = f.varinfo[:]
 
-# LogDensityProblems interface
-function LogDensityProblems.logdensity(f::LogDensityFunction, θ::AbstractVector)
+# LogDensityProblems interface: logp (0th order)
+function LogDensityProblems.logdensity(f::LogDensityFunction, x::AbstractVector)
     context = getcontext(f)
-    vi_new = unflatten(f.varinfo, θ)
+    vi_new = unflatten(f.varinfo, x)
     return getlogp(last(evaluate!!(f.model, vi_new, context)))
 end
+function _flipped_logdensity(x::AbstractVector, f::LogDensityFunction)
+    return LogDensityProblems.logdensity(f, x)
+end
 function LogDensityProblems.capabilities(::Type{<:LogDensityFunction})
     return LogDensityProblems.LogDensityOrder{0}()
 end
 # TODO: should we instead implement and call on `length(f.varinfo)` (at least in the cases where no sampler is involved)?
 LogDensityProblems.dimension(f::LogDensityFunction) = length(getparams(f))
 
-_flipped_logdensity(θ, f) = LogDensityProblems.logdensity(f, θ)
-
+# LogDensityProblems interface: gradient (1st order)
+struct LogDensityFunctionWithGrad{V,M,C,TAD}
+    ldf::LogDensityFunction{V,M,C}
+    adtype::TAD
+    prep::DI.GradientPrep
+
+    function LogDensityFunctionWithGrad(
+        ldf::LogDensityFunction{V,M,C}, adtype::TAD
+    ) where {V,M,C,TAD}
+        # Get a set of dummy params to use for prep
+        x = ldf.varinfo[:]
+        prep = DI.prepare_gradient(_flipped_logdensity, adtype, x, DI.Constant(ldf))
+        # Store the prep with the struct
+        return new{V,M,C,TAD}(ldf, adtype, prep)
+    end
+end
+function LogDensityProblems.logdensity(f::LogDensityFunctionWithGrad)
+    return LogDensityProblems.logdensity(f.ldf)
+end
+function LogDensityProblems.capabilities(::Type{<:LogDensityFunctionWithGrad})
+    return LogDensityProblems.LogDensityOrder{1}()
+end
 # By default, the AD backend to use is inferred from the context, which would
 # typically be a SamplingContext which contains a sampler.
 function LogDensityProblems.logdensity_and_gradient(
-    f::LogDensityFunction, θ::AbstractVector
-)
-    adtype = getadtype(getsampler(getcontext(f)))
-    return LogDensityProblems.logdensity_and_gradient(f, θ, adtype)
-end
-
-# Extra method allowing one to manually specify the AD backend to use, thus
-# overriding the default AD backend inferred from the sampler.
-function LogDensityProblems.logdensity_and_gradient(
-    f::LogDensityFunction, θ::AbstractVector, adtype::ADTypes.AbstractADType
+    f::LogDensityFunctionWithGrad, x::AbstractVector
 )
-    # Ensure we concretise the elements of the params.
-    θ = map(identity, θ) # TODO: Is this needed?
-    prep = DI.prepare_gradient(_flipped_logdensity, adtype, θ, DI.Constant(f))
-    return DI.value_and_gradient(_flipped_logdensity, prep, adtype, θ, DI.Constant(f))
+    return DI.value_and_gradient(
+        _flipped_logdensity, f.prep, f.adtype, x, DI.Constant(f.ldf)
+    )
 end
diff --git a/test/ad.jl b/test/ad.jl
@@ -8,11 +8,12 @@
             f = DynamicPPL.LogDensityFunction(m, varinfo)
             # convert to `Vector{Float64}` to avoid `ReverseDiff` initializing the gradients to Integer 0
             # reference: https://github.com/TuringLang/DynamicPPL.jl/pull/571#issuecomment-1924304489
-            θ = convert(Vector{Float64}, varinfo[:])
+            x = convert(Vector{Float64}, varinfo[:])
             # Calculate reference logp + gradient of logp using ForwardDiff
             default_adtype = ADTypes.AutoForwardDiff()
+            ldf_with_grad = DynamicPPL.LogDensityFunctionWithGrad(f, default_adtype)
             ref_logp, ref_grad = LogDensityProblems.logdensity_and_gradient(
-                f, θ, default_adtype
+                ldf_with_grad, x
             )
 
             @testset "$adtype" for adtype in [
@@ -25,7 +26,8 @@
                 if adtype isa ADTypes.AutoMooncake && varinfo isa DynamicPPL.SimpleVarInfo
                     @test_broken 1 == 0
                 else
-                    logp, grad = LogDensityProblems.logdensity_and_gradient(f, θ, adtype)
+                    ldf_with_grad = DynamicPPL.LogDensityFunctionWithGrad(f, adtype)
+                    logp, grad = LogDensityProblems.logdensity_and_gradient(f, x)
                     @test grad ≈ ref_grad
                     @test logp ≈ ref_logp
                 end
@@ -62,15 +64,17 @@
         # of implementation
         struct MyEmptyAlg end
         DynamicPPL.getspace(::DynamicPPL.Sampler{MyEmptyAlg}) = ()
-        DynamicPPL.assume(rng, ::DynamicPPL.Sampler{MyEmptyAlg}, dist, vn, vi) =
-            DynamicPPL.assume(dist, vn, vi)
+        DynamicPPL.assume(
+            ::Random.AbstractRNG, ::DynamicPPL.Sampler{MyEmptyAlg}, dist, vn, vi
+        ) = DynamicPPL.assume(dist, vn, vi)
 
         # Compiling the ReverseDiff tape used to fail here
         spl = Sampler(MyEmptyAlg())
         vi = VarInfo(model)
         ldf = DynamicPPL.LogDensityFunction(vi, model, SamplingContext(spl))
-        @test LogDensityProblems.logdensity_and_gradient(
-            ldf, vi[:], AutoReverseDiff(; compile=true)
-        ) isa Any
+        ldf_grad = DynamicPPL.LogDensityFunctionWithGrad(
+            ldf, AutoReverseDiff(; compile=true)
+        )
+        @test LogDensityProblems.logdensity_and_gradient(ldf_grad, vi[:]) isa Any
     end
 end