Merge branch 'main' of https://github.com/dotnet/TorchSharp

NiklasGustafsson · NiklasGustafsson · commit be61da61f73f · 2024-03-15T09:37:44.000-07:00
diff --git a/RELEASENOTES.md b/RELEASENOTES.md
@@ -2,6 +2,12 @@
 
 Releases, starting with 9/2/2021, are listed with the most recent release at the top.
 
+# NuGet Version 0.102.3
+
+__API Changes__:
+
+#1243 `fuse_conv_bn_weights` and `fuse_linear_bn_weights` are added.<br/>
+
 # NuGet Version 0.102.2
 
 __Bug Fixes__:
diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs
@@ -9,6 +9,7 @@
 using System.Runtime.InteropServices;
 using System.Text;
 using System.Text.RegularExpressions;
+using TorchSharp.Modules;
 using TorchSharp.PInvoke;
 using static TorchSharp.PInvoke.NativeMethods;
 
@@ -415,6 +416,80 @@ public static void vector_to_parameters(Tensor vec, IEnumerable<Modules.Paramete
                         CheckForErrors();
                     }
                 }
+
+                /// <summary>
+                /// Fuse convolutional module parameters and BatchNorm module parameters into new convolutional module parameters.
+                /// </summary>
+                /// <param name="conv_w">Convolutional weight.</param>
+                /// <param name="conv_b">Convolutional bias.</param>
+                /// <param name="bn_rm">BatchNorm running mean.</param>
+                /// <param name="bn_rv">BatchNorm running variance.</param>
+                /// <param name="bn_eps">BatchNorm epsilon.</param>
+                /// <param name="bn_w">BatchNorm weight.</param>
+                /// <param name="bn_b">BatchNorm bias.</param>
+                /// <param name="transpose">If <c>true</c>, transpose the conv weight. Defaults to <c>false</c>.</param>
+                /// <returns>Fused convolutional weight and bias.</returns>
+                public static (Parameter weight, Parameter bias) fuse_conv_bn_weights(
+                    Tensor conv_w, Tensor? conv_b,
+                    Tensor bn_rm, Tensor bn_rv, double bn_eps,
+                    Tensor? bn_w, Tensor? bn_b,
+                    bool transpose = false)
+                {
+                    using var scope = NewDisposeScope();
+
+                    var conv_weight_dtype = conv_w.dtype;
+                    var conv_bias_dtype = conv_b?.dtype ?? conv_weight_dtype;
+                    conv_b ??= zeros_like(bn_rm);
+                    bn_w ??= ones_like(bn_rm);
+                    bn_b ??= zeros_like(bn_rm);
+                    var shape = conv_w.shape.Select(_ => 1L).ToArray();
+                    if (transpose)
+                        shape[1] = -1;
+                    else
+                        shape[0] = -1;
+
+                    var bn_var_rsqrt = rsqrt(bn_rv + bn_eps);
+                    var fused_conv_w = (conv_w * (bn_w * bn_var_rsqrt).reshape(shape))
+                        .to(conv_weight_dtype);
+                    var fused_conv_b = ((conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b)
+                        .to(conv_bias_dtype);
+
+                    var weight = new Parameter(fused_conv_w, conv_w.requires_grad);
+                    var bias = new Parameter(fused_conv_b, conv_b.requires_grad);
+
+                    return scope.MoveToOuter(weight, bias);
+                }
+
+                /// <summary>
+                /// Fuse linear module parameters and BatchNorm module parameters into new linear module parameters.
+                /// </summary>
+                /// <param name="linear_w">Linear weight.</param>
+                /// <param name="linear_b">Linear bias.</param>
+                /// <param name="bn_rm">BatchNorm running mean.</param>
+                /// <param name="bn_rv">BatchNorm running variance.</param>
+                /// <param name="bn_eps">BatchNorm epsilon.</param>
+                /// <param name="bn_w">BatchNorm weight.</param>
+                /// <param name="bn_b">BatchNorm bias.</param>
+                /// <returns>Fused linear weight and bias.</returns>
+                public static (Parameter weight, Parameter bias) fuse_linear_bn_weights(
+                    Tensor linear_w, Tensor? linear_b,
+                    Tensor bn_rm, Tensor bn_rv, double bn_eps,
+                    Tensor bn_w, Tensor bn_b)
+                {
+                    using var scope = NewDisposeScope();
+
+                    linear_b ??= zeros_like(bn_rm);
+
+                    var bn_scale = bn_w * rsqrt(bn_rv + bn_eps);
+
+                    var fused_w = linear_w * bn_scale.unsqueeze(-1);
+                    var fused_b = (linear_b - bn_rm) * bn_scale + bn_b;
+
+                    var weight = new Parameter(fused_w, linear_w.requires_grad);
+                    var bias = new Parameter(fused_b, linear_b.requires_grad);
+
+                    return scope.MoveToOuter(weight, bias);
+                }
             }
         }
 
diff --git a/test/TorchSharpTest/TestTorchSharp.cs b/test/TorchSharpTest/TestTorchSharp.cs
@@ -1,6 +1,9 @@
 // Copyright (c) .NET Foundation and Contributors.  All Rights Reserved.  See LICENSE in the project root for license information.
 
+using System;
 using System.Collections.Generic;
+using System.Linq.Expressions;
+using System.Reflection;
 using Xunit;
 
 using static TorchSharp.torch;
@@ -233,6 +236,110 @@ public void UtilsVtoP()
             Assert.Equal(data, data1);
         }
 
+        [Fact]
+        public void UtilsFusion()
+        {
+            static void SetRandomParameter<T>(
+                T module,
+                Expression<Func<T, Modules.Parameter>> parameterProperty)
+            {
+                var propertyExpression = (MemberExpression)parameterProperty.Body;
+                var property = (PropertyInfo)propertyExpression.Member;
+                var parameter = (Modules.Parameter)property.GetValue(module)!;
+                var randomTensor = rand_like(
+                    parameter,
+                    parameter.dtype,
+                    parameter.device) * 100;
+                var newParameter = new Modules.Parameter(randomTensor, parameter.requires_grad);
+                property.SetValue(module, newParameter);
+            }
+
+            static void SetRandomTensor<T>(
+                T module,
+                Expression<Func<T, Tensor>> tensorProperty)
+            {
+                var propertyExpression = (MemberExpression)tensorProperty.Body;
+                var property = (PropertyInfo)propertyExpression.Member;
+                var tensor = (Tensor)property.GetValue(module)!;
+                var newTensor = rand_like(
+                    tensor,
+                    tensor.dtype,
+                    tensor.device,
+                    tensor.requires_grad) * 100;
+                property.SetValue(module, newTensor);
+            }
+
+            static void AssertRelativelyEqual(
+                Tensor expected, Tensor actual, double tolerance = 1e-5)
+            {
+                Assert.Equal(expected.size(), actual.size());
+                var difference = (expected - actual) / expected;
+                var maxDifference = (double)difference.abs().max();
+                Assert.InRange(maxDifference, -tolerance, tolerance);
+            }
+
+            {
+                // linear
+                var x = rand(new long[] { 20, 20 }) * 100;
+
+                var linear = nn.Linear(20, 5);
+                linear.eval();
+                SetRandomParameter(linear, x => x.weight!);
+                SetRandomParameter(linear, x => x.bias!);
+
+                var batchNorm1d = nn.BatchNorm1d(5, eps: 1);
+                batchNorm1d.eval();
+                SetRandomParameter(batchNorm1d, x => x.weight!);
+                SetRandomParameter(batchNorm1d, x => x.bias!);
+                SetRandomTensor(batchNorm1d, x => x.running_mean!);
+                SetRandomTensor(batchNorm1d, x => x.running_var!);
+
+                (var weight, var bias) = nn.utils.fuse_linear_bn_weights(
+                    linear.weight!, linear.bias,
+                    batchNorm1d.running_mean!, batchNorm1d.running_var!,
+                    bn_eps: 1, batchNorm1d.weight!, batchNorm1d.bias!);
+
+                var newLinear = nn.Linear(20, 5);
+                newLinear.eval();
+                newLinear.weight = weight;
+                newLinear.bias = bias;
+
+                AssertRelativelyEqual(
+                    batchNorm1d.call(linear.call(x)),
+                    newLinear.call(x));
+            }
+
+            {
+                // conv
+                var x = rand(new long[] { 20, 20, 20, 20 }) * 100;
+                var conv = nn.Conv2d(20, 5, 3);
+                conv.eval();
+                SetRandomParameter(conv, x => x.weight!);
+                SetRandomParameter(conv, x => x.bias!);
+
+                var batchNorm2d = nn.BatchNorm2d(5, eps: 13);
+                batchNorm2d.eval();
+                SetRandomParameter(batchNorm2d, x => x.weight!);
+                SetRandomParameter(batchNorm2d, x => x.bias!);
+                SetRandomTensor(batchNorm2d, x => x.running_mean!);
+                SetRandomTensor(batchNorm2d, x => x.running_var!);
+
+                (var weight, var bias) = nn.utils.fuse_conv_bn_weights(
+                    conv.weight!, conv.bias,
+                    batchNorm2d.running_mean!, batchNorm2d.running_var!,
+                    bn_eps: 13, batchNorm2d.weight!, batchNorm2d.bias!);
+
+                var newConv = nn.Conv2d(20, 5, 3);
+                newConv.eval();
+                newConv.weight = weight;
+                newConv.bias = bias;
+
+                AssertRelativelyEqual(
+                    batchNorm2d.call(conv.call(x)),
+                    newConv.call(x));
+            }
+        }
+
         [Fact(Skip = "Intermittently fails")]
         public void AllowTF32()
         {