diff --git a/docs/src/implementer_guide.md b/docs/src/implementer_guide.md index 8dfafa3..4a14e43 100644 --- a/docs/src/implementer_guide.md +++ b/docs/src/implementer_guide.md @@ -29,11 +29,14 @@ They are just listed here to help readers figure out the code structure: - `derivative` calls `jacobian` - `gradient` calls `jacobian` - `hessian` calls `jacobian` and `gradient` + - `second_derivative` calls `derivative` - `value_and_jacobian` calls `jacobian` - `value_and_derivative` calls `value_and_jacobian` - `value_and_gradient` calls `value_and_jacobian` - `value_and_hessian` calls `jacobian` and `gradient` + - `value_and_second_derivative` calls `second_derivative` - `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient` + - `value_derivative_and_second_derivative` calls `value_and_derivative` and `second_derivative` - `pushforward_function` calls `jacobian` - `value_and_pushforward_function` calls `pushforward_function` - `pullback_function` calls `value_and_pullback_function` diff --git a/docs/src/user_guide.md b/docs/src/user_guide.md index 77ba2c2..e09768c 100644 --- a/docs/src/user_guide.md +++ b/docs/src/user_guide.md @@ -53,24 +53,27 @@ AbstractDifferentiation.HigherOrderBackend ## Derivatives -The following list of functions can be used to request the derivative, gradient, Jacobian or Hessian without the function value. +The following list of functions can be used to request the derivative, gradient, Jacobian, second derivative or Hessian without the function value. ```@docs AbstractDifferentiation.derivative AbstractDifferentiation.gradient AbstractDifferentiation.jacobian +AbstractDifferentiation.second_derivative AbstractDifferentiation.hessian ``` ## Value and derivatives -The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian or Hessian. You can also request the function value, its gradient and Hessian for single-input functions. +The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian, second derivative, or Hessian. You can also request the function value, its derivative (or its gradient) and its second derivative (or Hessian) for single-input functions. ```@docs AbstractDifferentiation.value_and_derivative AbstractDifferentiation.value_and_gradient AbstractDifferentiation.value_and_jacobian +AbstractDifferentiation.value_and_second_derivative AbstractDifferentiation.value_and_hessian +AbstractDifferentiation.value_derivative_and_second_derivative AbstractDifferentiation.value_gradient_and_hessian ``` diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index ff0c52c..b8e82cf 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -61,6 +61,12 @@ function AD.hessian(ba::AD.ForwardDiffBackend, f, x::AbstractArray) return (ForwardDiff.hessian(f, x, cfg),) end +function AD.value_and_derivative(::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + ydual = f(ForwardDiff.Dual{T}(x, one(x))) + return ForwardDiff.value(T, ydual), (ForwardDiff.partials(T, ydual, 1),) +end + function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray) result = DiffResults.GradientResult(x) cfg = ForwardDiff.GradientConfig(f, x, chunk(ba, x)) @@ -68,6 +74,16 @@ function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray) return DiffResults.value(result), (DiffResults.derivative(result),) end +function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + xdual = ForwardDiff.Dual{T}(x, one(x)) + T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) + ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) + v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual)) + d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1) + return v, (d2,) +end + function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) result = DiffResults.HessianResult(x) cfg = ForwardDiff.HessianConfig(f, result, x, chunk(ba, x)) @@ -75,6 +91,17 @@ function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) return DiffResults.value(result), (DiffResults.hessian(result),) end +function AD.value_derivative_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + xdual = ForwardDiff.Dual{T}(x, one(x)) + T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) + ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) + v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual)) + d = ForwardDiff.partials(T, ForwardDiff.value(T2, ydual), 1) + d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1) + return v, (d,), (d2,) +end + @inline step_toward(x::Number, v::Number, h) = x + h * v # support arrays and tuples @noinline step_toward(x, v, h) = x .+ h .* v diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index a868b37..e7a6fd5 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -85,6 +85,24 @@ function jacobian(ab::HigherOrderBackend, f, xs...) return jacobian(lowest(ab), f, xs...) end +""" + AD.second_derivative(ab::AD.AbstractBackend, f, x) + +Compute the second derivative of `f` with respect to the input `x` using the backend `ab`. + +The function returns a single value because `second_derivative` currently only supports a single input. +""" +function second_derivative(ab::AbstractBackend, f, x) + if x isa Tuple + # only support computation of second derivative for functions with single input argument + x = only(x) + end + return derivative(second_lowest(ab), x -> begin + d = derivative(lowest(ab), f, x) + return d[1] # derivative returns a tuple + end, x) +end + """ AD.hessian(ab::AD.AbstractBackend, f, x) @@ -139,12 +157,23 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) return value, jacs end +""" + AD.value_and_second_derivative(ab::AD.AbstractBackend, f, x) + +Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivative `d2 = AD.second_derivative(ab, f, x)`. + +See also [`AbstractDifferentiation.second_derivative`](@ref) +""" +function value_and_second_derivative(ab::AbstractBackend, f, x) + return f(x), second_derivative(ab, f, x) +end + """ AD.value_and_hessian(ab::AD.AbstractBackend, f, x) Return the tuple `(v, H)` of the function value `v = f(x)` and the Hessian `H = AD.hessian(ab, f, x)`. -See also [`AbstractDifferentiation.hessian`](@ref). +See also [`AbstractDifferentiation.hessian`](@ref). """ function value_and_hessian(ab::AbstractBackend, f, x) if x isa Tuple @@ -176,6 +205,28 @@ function value_and_hessian(ab::HigherOrderBackend, f, x) return value, hess end +""" + AD.value_derivative_and_second_derivative(ab::AD.AbstractBackend, f, x) + +Return the tuple `(v, d, d2)` of the function value `v = f(x)`, the first derivative `d = AD.derivative(ab, f, x)`, and the second derivative `d2 = AD.second_derivative(ab, f, x)`. +""" +function value_derivative_and_second_derivative(ab::AbstractBackend, f, x) + if x isa Tuple + # only support computation of Hessian for functions with single input argument + x = only(x) + end + + value = f(x) + deriv, secondderiv = value_and_derivative( + second_lowest(ab), _x -> begin + d = derivative(lowest(ab), f, _x) + return d[1] # derivative returns a tuple + end, x + ) + + return value, (deriv,), secondderiv +end + """ AD.value_gradient_and_hessian(ab::AD.AbstractBackend, f, x) diff --git a/test/finitedifferences.jl b/test/finitedifferences.jl index 568f0e9..df97b5e 100644 --- a/test/finitedifferences.jl +++ b/test/finitedifferences.jl @@ -21,6 +21,9 @@ using FiniteDifferences @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/forwarddiff.jl b/test/forwarddiff.jl index 0b6bf26..47a95c9 100644 --- a/test/forwarddiff.jl +++ b/test/forwarddiff.jl @@ -19,6 +19,9 @@ using ForwardDiff @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/reversediff.jl b/test/reversediff.jl index 06da46f..ed6ad21 100644 --- a/test/reversediff.jl +++ b/test/reversediff.jl @@ -14,6 +14,9 @@ using ReverseDiff @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/ruleconfig.jl b/test/ruleconfig.jl index 0a97b66..a4b2dd9 100644 --- a/test/ruleconfig.jl +++ b/test/ruleconfig.jl @@ -21,6 +21,9 @@ using Zygote @testset "j′vp" begin test_j′vp(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Lazy Derivative" begin test_lazy_derivatives(backend) end diff --git a/test/test_utils.jl b/test/test_utils.jl index 6eb4677..22e00c3 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -6,6 +6,7 @@ Random.seed!(1234) fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1 / x dfderdy(x, y) = exp(y) * x + log(x) +dfderdxdx(x, y) = -y / x^2 fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) dfgraddx(x, y) = prod(x) ./ x @@ -143,6 +144,44 @@ function test_jacobians(backend; multiple_inputs=true, test_types=true) @test yvec == yvec2 end +function test_second_derivatives(backend; test_types=true) + # explicit test that AbstractDifferentiation throws an error + # don't support tuple of second derivatives + @test_throws ArgumentError AD.second_derivative( + backend, x -> fder(x, yscalar), (xscalar, yscalar) + ) + @test_throws MethodError AD.second_derivative( + backend, x -> fder(x, yscalar), xscalar, yscalar + ) + + # test if single input (no tuple works) + dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) + if test_types + @test only(dder1) isa Float64 + end + @test dfderdxdx(xscalar, yscalar) ≈ only(dder1) atol = 1e-8 + valscalar, dder2 = AD.value_and_second_derivative( + backend, x -> fder(x, yscalar), xscalar + ) + if test_types + @test valscalar isa Float64 + @test only(dder2) isa Float64 + end + @test valscalar == fder(xscalar, yscalar) + @test dder2 == dder1 + valscalar, der, dder3 = AD.value_derivative_and_second_derivative( + backend, x -> fder(x, yscalar), xscalar + ) + if test_types + @test valscalar isa Float64 + @test only(der) isa Float64 + @test only(dder3) isa Float64 + end + @test valscalar == fder(xscalar, yscalar) + @test der == AD.derivative(backend, x -> fder(x, yscalar), xscalar) + @test dder3 == dder1 +end + function test_hessians(backend; multiple_inputs=false, test_types=true) if multiple_inputs # ... but