diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs index 098d58945..e9bb8d7ea 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs @@ -796,5 +796,55 @@ public void TestSignaturesV2_4_X() col = MapConcat(col); col = MapConcat(col, col); } + + /// + /// Test signatures for APIs introduced in Spark 3.0.*. + /// + [SkipIfSparkVersionIsLessThan(Versions.V3_0_0)] + public void TestSignaturesV3_0_X() + { + Column col = Column("col"); + + Assert.IsType(XXHash64()); + Assert.IsType(XXHash64(col)); + Assert.IsType(XXHash64(col, col)); + + Assert.IsType(Split(col, "\t", 1)); + Assert.IsType(Split(col, "\t", -1)); + + Assert.IsType(Overlay(col, col, col)); + Assert.IsType(Overlay(col, col, col, col)); + + Assert.IsType(AddMonths(col, col)); + + Assert.IsType(DateAdd(col, col)); + + Assert.IsType(DateSub(col, col)); + + var options = new Dictionary() { { "hello", "world" } }; + Assert.IsType(SchemaOfJson(col, options)); + + Assert.IsType(MapEntries(col)); + + Column schemaCol = SchemaOfCsv("[{\"col\":0}]"); + Assert.IsType(FromCsv(col, schemaCol, options)); + + Assert.IsType(SchemaOfCsv(col)); + Assert.IsType(SchemaOfCsv(col, options)); + + Assert.IsType(ToCsv(col)); + Assert.IsType(ToCsv(col, options)); + + Assert.IsType(Years(col)); + + Assert.IsType(Months(col)); + + Assert.IsType(Days(col)); + + Assert.IsType(Hours(col)); + + Assert.IsType(Bucket(Lit(1), col)); + Assert.IsType(Bucket(1, col)); + } } } diff --git a/src/csharp/Microsoft.Spark/Sql/Functions.cs b/src/csharp/Microsoft.Spark/Sql/Functions.cs index 9f0231fb7..4d9a4c6ae 100644 --- a/src/csharp/Microsoft.Spark/Sql/Functions.cs +++ b/src/csharp/Microsoft.Spark/Sql/Functions.cs @@ -860,7 +860,7 @@ public static Column Lead(string columnName, int offset, object defaultValue = n /// /// This is equivalent to the NTILE function in SQL. /// Number of buckets - /// + /// Column object public static Column Ntile(int n) { return ApplyFunction("ntile", n); @@ -2185,6 +2185,18 @@ public static Column Hash(params Column[] columns) return ApplyFunction("hash", (object)columns); } + /// + /// Calculates the hash code of given columns using the 64-bit variant of the xxHash + /// algorithm, and returns the result as a long column. + /// + /// Columns to apply + /// Column object + [Since(Versions.V3_0_0)] + public static Column XXHash64(params Column[] columns) + { + return ApplyFunction("xxhash64", (object)columns); + } + ///////////////////////////////////////////////////////////////////////////////// // String functions ///////////////////////////////////////////////////////////////////////////////// @@ -2522,6 +2534,25 @@ public static Column Split(Column column, string pattern) return ApplyFunction("split", column, pattern); } + /// + /// Splits str around matches of the given pattern. + /// + /// Column to apply + /// Regular expression pattern + /// An integer expression which controls the number of times the regex + /// is applied. + /// 1. limit greater than 0: The resulting array's length will not be more than limit, and + /// the resulting array's last entry will contain all input beyond the last matched regex. + /// 2. limit less than or equal to 0: `regex` will be applied as many times as possible, + /// and the resulting array can be of any size. + /// + /// Column object + [Since(Versions.V3_0_0)] + public static Column Split(Column column, string pattern, int limit) + { + return ApplyFunction("split", column, pattern, limit); + } + /// /// Returns the substring (or slice of byte array) starting from the given /// position for the given length. @@ -2551,6 +2582,35 @@ public static Column SubstringIndex(Column column, string delimiter, int count) return ApplyFunction("substring_index", column, delimiter, count); } + /// + /// Overlay the specified portion of `src` with `replace`, starting from byte position + /// `pos` of `src` and proceeding for `len` bytes. + /// + /// Source column to replace + /// Replacing column + /// Byte position to start overlaying from + /// Number of bytes to overlay + /// Column object + [Since(Versions.V3_0_0)] + public static Column Overlay(Column src, Column replace, Column pos, Column len) + { + return ApplyFunction("overlay", src, replace, pos, len); + } + + /// + /// Overlay the specified portion of `src` with `replace`, starting from byte position + /// `pos` of `src`. + /// + /// Source column to replace + /// Replacing column + /// Byte position to start overlaying from + /// Column object + [Since(Versions.V3_0_0)] + public static Column Overlay(Column src, Column replace, Column pos) + { + return ApplyFunction("overlay", src, replace, pos); + } + /// /// Translate any characters that match with the given `matchingString` in the column /// by the given `replaceString`. @@ -2610,6 +2670,18 @@ public static Column AddMonths(Column startDate, int numMonths) return ApplyFunction("add_months", startDate, numMonths); } + /// + /// Returns the date that is `numMonths` after `startDate`. + /// + /// Start date + /// A column of the number of months to add to start date + /// Column object + [Since(Versions.V3_0_0)] + public static Column AddMonths(Column startDate, Column numMonths) + { + return ApplyFunction("add_months", startDate, numMonths); + } + /// /// Returns the current date as a date column. /// @@ -2651,6 +2723,18 @@ public static Column DateAdd(Column start, int days) return ApplyFunction("date_add", start, days); } + /// + /// Returns the date that is `days` days after `start`. + /// + /// Start date + /// A column of number of days to add to start data + /// Column object + [Since(Versions.V3_0_0)] + public static Column DateAdd(Column start, Column days) + { + return ApplyFunction("date_add", start, days); + } + /// /// Returns the date that is `days` days before `start`. /// @@ -2662,6 +2746,18 @@ public static Column DateSub(Column start, int days) return ApplyFunction("date_sub", start, days); } + /// + /// Returns the date that is `days` days before `start`. + /// + /// Start date + /// A column of number of days to subtract from start data + /// Column object + [Since(Versions.V3_0_0)] + public static Column DateSub(Column start, Column days) + { + return ApplyFunction("date_sub", start, days); + } + /// /// Returns the number of days from `start` to `end`. /// @@ -3227,7 +3323,7 @@ public static Column ArraySort(Column column) /// /// Column to apply /// Element to remove - /// + /// Column object [Since(Versions.V2_4_0)] public static Column ArrayRemove(Column column, object element) { @@ -3413,6 +3509,18 @@ public static Column SchemaOfJson(Column json) return ApplyFunction("schema_of_json", json); } + /// + /// Parses a JSON string and infers its schema in DDL format. + /// + /// String literal containing a JSON string. + /// Options to control how the json is parsed. + /// Column object + [Since(Versions.V3_0_0)] + public static Column SchemaOfJson(Column json, Dictionary options) + { + return ApplyFunction("schema_of_json", json, options); + } + /// /// Converts a column containing a `StructType`, `ArrayType` of `StructType`s, /// a `MapType` or `ArrayType` of `MapType`s into a JSON string. @@ -3578,6 +3686,17 @@ public static Column MapValues(Column column) return ApplyFunction("map_values", column); } + /// + /// Returns an unordered array of all entries in the given map. + /// + /// Column to apply + /// Column object + [Since(Versions.V3_0_0)] + public static Column MapEntries(Column column) + { + return ApplyFunction("map_entries", column); + } + /// /// Returns a map created from the given array of entries. /// @@ -3612,6 +3731,170 @@ public static Column MapConcat(params Column[] columns) return ApplyFunction("map_concat", (object)columns); } + /// + /// Parses a column containing a CSV string into a `StructType` with the specified schema. + /// + /// Column to apply + /// The schema to use when parsing the CSV string + /// Options to control how the CSV is parsed. + /// Column object + [Since(Versions.V3_0_0)] + public static Column FromCsv( + Column column, + StructType schema, + Dictionary options) + { + return ApplyFunction( + "from_csv", + column, + DataType.FromJson(Jvm, schema.Json), + options); + } + + /// + /// Parses a column containing a CSV string into a `StructType` with the specified schema. + /// + /// Column to apply + /// The schema to use when parsing the CSV string + /// Options to control how the CSV is parsed. + /// Column object + [Since(Versions.V3_0_0)] + public static Column FromCsv( + Column column, + Column schema, + Dictionary options) + { + return ApplyFunction("from_csv", column, schema, options); + } + + /// + /// Parses a CSV string and infers its schema in DDL format. + /// + /// CSV string to parse + /// Column object + [Since(Versions.V3_0_0)] + public static Column SchemaOfCsv(string csv) + { + return ApplyFunction("schema_of_csv", csv); + } + + /// + /// Parses a CSV string and infers its schema in DDL format. + /// + /// CSV string to parse + /// Column object + [Since(Versions.V3_0_0)] + public static Column SchemaOfCsv(Column csv) + { + return ApplyFunction("schema_of_csv", csv); + } + + /// + /// Parses a CSV string and infers its schema in DDL format. + /// + /// CSV string to parse + /// Options to control how the CSV is parsed. + /// Column object + [Since(Versions.V3_0_0)] + public static Column SchemaOfCsv(Column csv, Dictionary options) + { + return ApplyFunction("schema_of_csv", csv, options); + } + + /// + /// Converts a column containing a `StructType` into a CSV string with the specified + /// schema. + /// + /// A column containing a struct. + /// Column object + [Since(Versions.V3_0_0)] + public static Column ToCsv(Column column) + { + return ApplyFunction("to_csv", column); + } + + /// + /// Converts a column containing a `StructType` into a CSV string with the specified + /// schema. + /// + /// A column containing a struct. + /// Options to control how the struct column is converted into a CSV + /// string + /// Column object + [Since(Versions.V3_0_0)] + public static Column ToCsv(Column column, Dictionary options) + { + return ApplyFunction("to_csv", column, options); + } + + /// + /// A transform for timestamps and dates to partition data into years. + /// + /// A column containing a struct. + /// Column object + [Since(Versions.V3_0_0)] + public static Column Years(Column column) + { + return ApplyFunction("years", column); + } + + /// + /// A transform for timestamps and dates to partition data into months. + /// + /// A column containing a struct. + /// Column object + [Since(Versions.V3_0_0)] + public static Column Months(Column column) + { + return ApplyFunction("months", column); + } + + /// + /// A transform for timestamps and dates to partition data into days. + /// + /// A column containing a struct. + /// Column object + [Since(Versions.V3_0_0)] + public static Column Days(Column column) + { + return ApplyFunction("days", column); + } + + /// + /// A transform for timestamps to partition data into hours. + /// + /// A column containing a struct. + /// Column object + [Since(Versions.V3_0_0)] + public static Column Hours(Column column) + { + return ApplyFunction("hours", column); + } + + /// + /// A transform for any type that partitions by a hash of the input column. + /// + /// A column containing number of buckets + /// Column to apply + /// Column object + [Since(Versions.V3_0_0)] + public static Column Bucket(Column numBuckets, Column column) + { + return ApplyFunction("bucket", numBuckets, column); + } + + /// + /// A transform for any type that partitions by a hash of the input column. + /// + /// Number of buckets + /// Column to apply + /// Column object + [Since(Versions.V3_0_0)] + public static Column Bucket(int numBuckets, Column column) + { + return ApplyFunction("bucket", numBuckets, column); + } + ///////////////////////////////////////////////////////////////////////////////// // UDF helper functions /////////////////////////////////////////////////////////////////////////////////