diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthModelTests.cs new file mode 100644 index 000000000..5ef3c8022 --- /dev/null +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthModelTests.cs @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.Spark.E2ETest.IpcTests.ML.Feature; +using Microsoft.Spark.ML.Feature.Param; +using Microsoft.Spark.ML.Fpm; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; +using Microsoft.Spark.UnitTest.TestUtils; +using Microsoft.Spark.Utils; +using Xunit; + +namespace Microsoft.Spark.E2ETest.IpcTests.ML.Fpm +{ + [Collection("Spark E2E Tests")] + public class FPGrowthModelTests : FeatureBaseTests + { + private readonly SparkSession _spark; + + public FPGrowthModelTests(SparkFixture fixture) : base(fixture) + { + _spark = fixture.Spark; + } + + /// + /// Create a and test the + /// available methods. Test the FeatureBase methods using . + /// + [Fact] + public void TestFPGrowthModel() + { + var fpGrowth = new FPGrowth(); + fpGrowth.SetMinSupport(0.2) + .SetMinConfidence(0.7); + + DataFrame dataFrame = _spark.CreateDataFrame( + new List + { + new GenericRow(new object[] { new string[] { "r", "z", "h", "k", "p" }}), + new GenericRow(new object[] { new string[] { "z", "y", "x", "w", "v", "u", "t", "s" }}), + new GenericRow(new object[] { new string[] { "s", "x", "o", "n", "r" }}), + new GenericRow(new object[] { new string[] { "x", "z", "y", "m", "t", "s", "q", "e" }}), + new GenericRow(new object[] { new string[] { "z" }}), + new GenericRow(new object[] { new string[] { "x", "z", "y", "r", "q", "t", "p" }}), + }, + new StructType(new List + { + new StructField("items", new ArrayType(new StringType())), + })); + + FPGrowthModel fpm = fpGrowth.Fit(dataFrame); + fpm.SetPredictionCol("newPrediction"); + Assert.Equal(0.2, fpm.GetMinSupport()); + Assert.Equal(0.7, fpm.GetMinConfidence()); + + DataFrame newData = _spark.CreateDataFrame( + new List + { + new GenericRow(new object[] { new string[] {"t", "s"}}) + }, + new StructType(new List + { + new StructField("items", new ArrayType(new StringType())), + }) + ); + var prediction = TypeConverter.ConvertTo( + fpm.Transform(newData).Select("newPrediction").First().Values[0]); + Array.Sort(prediction); + Assert.Equal(prediction, new string[]{ "x", "y", "z"}); + + using (var tempDirectory = new TemporaryDirectory()) + { + string savePath = Path.Join(tempDirectory.Path, "fpm"); + fpm.Save(savePath); + + FPGrowthModel loadedFPGrowthModel = FPGrowthModel.Load(savePath); + Assert.Equal(fpm.Uid(), loadedFPGrowthModel.Uid()); + var newPrediction = TypeConverter.ConvertTo(loadedFPGrowthModel + .Transform(newData).Select("newPrediction").First().Values[0]); + Array.Sort(newPrediction); + Assert.Equal(new string[]{ "x", "y", "z"}, newPrediction); + } + + TestFeatureBase(fpm, "itemsCol", "items"); + TestFeatureBase(fpm, "minConfidence", 0.7); + TestFeatureBase(fpm, "minSupport", 0.2); + TestFeatureBase(fpm, "numPartitions", 2); + TestFeatureBase(fpm, "predictionCol", "prediction"); + } + } +} diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthTests.cs new file mode 100644 index 000000000..853208fe8 --- /dev/null +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Fpm/FPGrowthTests.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.Spark.E2ETest.IpcTests.ML.Feature; +using Microsoft.Spark.ML.Feature.Param; +using Microsoft.Spark.ML.Fpm; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; +using Microsoft.Spark.UnitTest.TestUtils; +using Microsoft.Spark.Utils; +using Xunit; + +namespace Microsoft.Spark.E2ETest.IpcTests.ML.Fpm +{ + [Collection("Spark E2E Tests")] + public class FPGrowthTests : FeatureBaseTests + { + private readonly SparkSession _spark; + + public FPGrowthTests(SparkFixture fixture) : base(fixture) + { + _spark = fixture.Spark; + } + + /// + /// Create a and test the + /// available methods. Test the FeatureBase methods using . + /// + [Fact] + public void TestFPGrowth() + { + + double minSupport = 0.2; + double minConfidence = 0.7; + + var fpGrowth = new FPGrowth(); + fpGrowth.SetMinSupport(minSupport) + .SetMinConfidence(minConfidence); + + Assert.Equal(minSupport, fpGrowth.GetMinSupport()); + Assert.Equal(minConfidence, fpGrowth.GetMinConfidence()); + + DataFrame dataFrame = _spark.CreateDataFrame( + new List + { + new GenericRow(new object[] { new string[] { "r", "z", "h", "k", "p" }}), + }, + new StructType(new List + { + new StructField("items", new ArrayType(new StringType())), + })); + + FPGrowthModel fpm = fpGrowth.Fit(dataFrame); + + using (var tempDirectory = new TemporaryDirectory()) + { + string savePath = Path.Join(tempDirectory.Path, "fpgrowth"); + fpGrowth.Save(savePath); + + FPGrowth loadedFPGrowth = FPGrowth.Load(savePath); + Assert.Equal(fpGrowth.Uid(), loadedFPGrowth.Uid()); + } + + TestFeatureBase(fpGrowth, "itemsCol", "items"); + TestFeatureBase(fpGrowth, "numPartitions", 2); + } + + } +} diff --git a/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowth.cs b/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowth.cs new file mode 100644 index 000000000..1dfc29b43 --- /dev/null +++ b/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowth.cs @@ -0,0 +1,170 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.Spark.ML.Feature; +using Microsoft.Spark.Interop; +using Microsoft.Spark.Interop.Ipc; +using Microsoft.Spark.Sql; + +namespace Microsoft.Spark.ML.Fpm +{ + /// + /// implements FPGrowth + /// + public class FPGrowth : JavaEstimator, IJavaMLWritable, IJavaMLReadable + { + + private static readonly string s_className = "org.apache.spark.ml.fpm.FPGrowth"; + + /// + /// Creates a without any parameters. + /// + public FPGrowth() : base(s_className) + { + } + + /// + /// Creates a with a UID that is used to give the + /// a unique ID. + /// + /// An immutable unique ID for the object and its derivatives. + public FPGrowth(string uid) : base(s_className, uid) + { + } + + internal FPGrowth(JvmObjectReference jvmObject) : base(jvmObject) + { + } + + /// + /// Sets value for itemsCol + /// + /// + /// items column name + /// + /// New FPGrowth object + public FPGrowth SetItemsCol(string value) => + WrapAsFPGrowth(Reference.Invoke("setItemsCol", (object)value)); + + /// + /// Sets value for minConfidence + /// + /// + /// minimal confidence for generating Association Rule + /// + /// New FPGrowth object + public FPGrowth SetMinConfidence(double value) => + WrapAsFPGrowth(Reference.Invoke("setMinConfidence", (object)value)); + + /// + /// Sets value for minSupport + /// + /// + /// the minimal support level of a frequent pattern + /// + /// New FPGrowth object + public FPGrowth SetMinSupport(double value) => + WrapAsFPGrowth(Reference.Invoke("setMinSupport", (object)value)); + + /// + /// Sets value for numPartitions + /// + /// + /// Number of partitions used by parallel FP-growth + /// + /// New FPGrowth object + public FPGrowth SetNumPartitions(int value) => + WrapAsFPGrowth(Reference.Invoke("setNumPartitions", (object)value)); + + /// + /// Sets value for predictionCol + /// + /// + /// prediction column name + /// + /// New FPGrowth object + public FPGrowth SetPredictionCol(string value) => + WrapAsFPGrowth(Reference.Invoke("setPredictionCol", (object)value)); + /// + /// Gets itemsCol value + /// + /// + /// itemsCol: items column name + /// + public string GetItemsCol() => + (string)Reference.Invoke("getItemsCol"); + + /// + /// Gets minConfidence value + /// + /// + /// minConfidence: minimal confidence for generating Association Rule + /// + public double GetMinConfidence() => + (double)Reference.Invoke("getMinConfidence"); + + /// + /// Gets minSupport value + /// + /// + /// minSupport: the minimal support level of a frequent pattern + /// + public double GetMinSupport() => + (double)Reference.Invoke("getMinSupport"); + + /// + /// Gets numPartitions value + /// + /// + /// numPartitions: Number of partitions used by parallel FP-growth + /// + public int GetNumPartitions() => + (int)Reference.Invoke("getNumPartitions"); + + /// + /// Gets predictionCol value + /// + /// + /// predictionCol: prediction column name + /// + public string GetPredictionCol() => + (string)Reference.Invoke("getPredictionCol"); + /// Fits a model to the input data. + /// The to fit the model to. + /// + override public FPGrowthModel Fit(DataFrame dataset) => + new FPGrowthModel( + (JvmObjectReference)Reference.Invoke("fit", dataset)); + /// + /// Loads the that was previously saved using Save(string). + /// + /// The path the previous was saved to + /// New object, loaded from path. + public static FPGrowth Load(string path) => WrapAsFPGrowth( + SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); + + /// + /// Saves the object so that it can be loaded later using Load. Note that these objects + /// can be shared with Scala by Loading or Saving in Scala. + /// + /// The path to save the object to + public void Save(string path) => Reference.Invoke("save", path); + + /// a instance for this ML instance. + public JavaMLWriter Write() => + new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); + + /// + /// Get the corresponding JavaMLReader instance. + /// + /// an instance for this ML instance. + public JavaMLReader Read() => + new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); + private static FPGrowth WrapAsFPGrowth(object obj) => + new FPGrowth((JvmObjectReference)obj); + + } +} + + \ No newline at end of file diff --git a/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowthModel.cs b/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowthModel.cs new file mode 100644 index 000000000..351ec8778 --- /dev/null +++ b/src/csharp/Microsoft.Spark/ML/Fpm/FPGrowthModel.cs @@ -0,0 +1,149 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.Spark.ML.Feature; +using Microsoft.Spark.Interop; +using Microsoft.Spark.Interop.Ipc; +using Microsoft.Spark.Sql; + +namespace Microsoft.Spark.ML.Fpm +{ + /// + /// implements FPGrowthModel + /// + public class FPGrowthModel : JavaModel, IJavaMLWritable, IJavaMLReadable + { + private static readonly string s_className = "org.apache.spark.ml.fpm.FPGrowthModel"; + + internal FPGrowthModel(JvmObjectReference jvmObject) : base(jvmObject) + { + } + + /// + /// Sets value for itemsCol + /// + /// + /// items column name + /// + /// New FPGrowthModel object + public FPGrowthModel SetItemsCol(string value) => + WrapAsFPGrowthModel(Reference.Invoke("setItemsCol", (object)value)); + + /// + /// Sets value for minConfidence + /// + /// + /// minimal confidence for generating Association Rule + /// + /// New FPGrowthModel object + public FPGrowthModel SetMinConfidence(double value) => + WrapAsFPGrowthModel(Reference.Invoke("setMinConfidence", (object)value)); + + /// + /// Sets value for minSupport + /// + /// + /// the minimal support level of a frequent pattern + /// + /// New FPGrowthModel object + public FPGrowthModel SetMinSupport(double value) => + WrapAsFPGrowthModel(Reference.Invoke("setMinSupport", (object)value)); + + /// + /// Sets value for numPartitions + /// + /// + /// Number of partitions used by parallel FP-growth + /// + /// New FPGrowthModel object + public FPGrowthModel SetNumPartitions(int value) => + WrapAsFPGrowthModel(Reference.Invoke("setNumPartitions", (object)value)); + + /// + /// Sets value for predictionCol + /// + /// + /// prediction column name + /// + /// New FPGrowthModel object + public FPGrowthModel SetPredictionCol(string value) => + WrapAsFPGrowthModel(Reference.Invoke("setPredictionCol", (object)value)); + /// + /// Gets itemsCol value + /// + /// + /// itemsCol: items column name + /// + public string GetItemsCol() => + (string)Reference.Invoke("getItemsCol"); + + /// + /// Gets minConfidence value + /// + /// + /// minConfidence: minimal confidence for generating Association Rule + /// + public double GetMinConfidence() => + (double)Reference.Invoke("getMinConfidence"); + + /// + /// Gets minSupport value + /// + /// + /// minSupport: the minimal support level of a frequent pattern + /// + public double GetMinSupport() => + (double)Reference.Invoke("getMinSupport"); + + /// + /// Gets numPartitions value + /// + /// + /// numPartitions: Number of partitions used by parallel FP-growth + /// + public int GetNumPartitions() => + (int)Reference.Invoke("getNumPartitions"); + + /// + /// Gets predictionCol value + /// + /// + /// predictionCol: prediction column name + /// + public string GetPredictionCol() => + (string)Reference.Invoke("getPredictionCol"); + + /// + /// Loads the that was previously saved using Save(string). + /// + /// The path the previous was saved to + /// New object, loaded from path. + public static FPGrowthModel Load(string path) => WrapAsFPGrowthModel( + SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); + + /// + /// Saves the object so that it can be loaded later using Load. Note that these objects + /// can be shared with Scala by Loading or Saving in Scala. + /// + /// The path to save the object to + public void Save(string path) => Reference.Invoke("save", path); + + /// a instance for this ML instance. + public JavaMLWriter Write() => + new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); + + /// + /// Get the corresponding JavaMLReader instance. + /// + /// an instance for this ML instance. + public JavaMLReader Read() => + new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); + private static FPGrowthModel WrapAsFPGrowthModel(object obj) => + new FPGrowthModel((JvmObjectReference)obj); + + } +} + + \ No newline at end of file