diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs
index 098d58945..e9bb8d7ea 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs
@@ -796,5 +796,55 @@ public void TestSignaturesV2_4_X()
col = MapConcat(col);
col = MapConcat(col, col);
}
+
+ ///
+ /// Test signatures for APIs introduced in Spark 3.0.*.
+ ///
+ [SkipIfSparkVersionIsLessThan(Versions.V3_0_0)]
+ public void TestSignaturesV3_0_X()
+ {
+ Column col = Column("col");
+
+ Assert.IsType(XXHash64());
+ Assert.IsType(XXHash64(col));
+ Assert.IsType(XXHash64(col, col));
+
+ Assert.IsType(Split(col, "\t", 1));
+ Assert.IsType(Split(col, "\t", -1));
+
+ Assert.IsType(Overlay(col, col, col));
+ Assert.IsType(Overlay(col, col, col, col));
+
+ Assert.IsType(AddMonths(col, col));
+
+ Assert.IsType(DateAdd(col, col));
+
+ Assert.IsType(DateSub(col, col));
+
+ var options = new Dictionary() { { "hello", "world" } };
+ Assert.IsType(SchemaOfJson(col, options));
+
+ Assert.IsType(MapEntries(col));
+
+ Column schemaCol = SchemaOfCsv("[{\"col\":0}]");
+ Assert.IsType(FromCsv(col, schemaCol, options));
+
+ Assert.IsType(SchemaOfCsv(col));
+ Assert.IsType(SchemaOfCsv(col, options));
+
+ Assert.IsType(ToCsv(col));
+ Assert.IsType(ToCsv(col, options));
+
+ Assert.IsType(Years(col));
+
+ Assert.IsType(Months(col));
+
+ Assert.IsType(Days(col));
+
+ Assert.IsType(Hours(col));
+
+ Assert.IsType(Bucket(Lit(1), col));
+ Assert.IsType(Bucket(1, col));
+ }
}
}
diff --git a/src/csharp/Microsoft.Spark/Sql/Functions.cs b/src/csharp/Microsoft.Spark/Sql/Functions.cs
index 9f0231fb7..4d9a4c6ae 100644
--- a/src/csharp/Microsoft.Spark/Sql/Functions.cs
+++ b/src/csharp/Microsoft.Spark/Sql/Functions.cs
@@ -860,7 +860,7 @@ public static Column Lead(string columnName, int offset, object defaultValue = n
///
/// This is equivalent to the NTILE function in SQL.
/// Number of buckets
- ///
+ /// Column object
public static Column Ntile(int n)
{
return ApplyFunction("ntile", n);
@@ -2185,6 +2185,18 @@ public static Column Hash(params Column[] columns)
return ApplyFunction("hash", (object)columns);
}
+ ///
+ /// Calculates the hash code of given columns using the 64-bit variant of the xxHash
+ /// algorithm, and returns the result as a long column.
+ ///
+ /// Columns to apply
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column XXHash64(params Column[] columns)
+ {
+ return ApplyFunction("xxhash64", (object)columns);
+ }
+
/////////////////////////////////////////////////////////////////////////////////
// String functions
/////////////////////////////////////////////////////////////////////////////////
@@ -2522,6 +2534,25 @@ public static Column Split(Column column, string pattern)
return ApplyFunction("split", column, pattern);
}
+ ///
+ /// Splits str around matches of the given pattern.
+ ///
+ /// Column to apply
+ /// Regular expression pattern
+ /// An integer expression which controls the number of times the regex
+ /// is applied.
+ /// 1. limit greater than 0: The resulting array's length will not be more than limit, and
+ /// the resulting array's last entry will contain all input beyond the last matched regex.
+ /// 2. limit less than or equal to 0: `regex` will be applied as many times as possible,
+ /// and the resulting array can be of any size.
+ ///
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Split(Column column, string pattern, int limit)
+ {
+ return ApplyFunction("split", column, pattern, limit);
+ }
+
///
/// Returns the substring (or slice of byte array) starting from the given
/// position for the given length.
@@ -2551,6 +2582,35 @@ public static Column SubstringIndex(Column column, string delimiter, int count)
return ApplyFunction("substring_index", column, delimiter, count);
}
+ ///
+ /// Overlay the specified portion of `src` with `replace`, starting from byte position
+ /// `pos` of `src` and proceeding for `len` bytes.
+ ///
+ /// Source column to replace
+ /// Replacing column
+ /// Byte position to start overlaying from
+ /// Number of bytes to overlay
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Overlay(Column src, Column replace, Column pos, Column len)
+ {
+ return ApplyFunction("overlay", src, replace, pos, len);
+ }
+
+ ///
+ /// Overlay the specified portion of `src` with `replace`, starting from byte position
+ /// `pos` of `src`.
+ ///
+ /// Source column to replace
+ /// Replacing column
+ /// Byte position to start overlaying from
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Overlay(Column src, Column replace, Column pos)
+ {
+ return ApplyFunction("overlay", src, replace, pos);
+ }
+
///
/// Translate any characters that match with the given `matchingString` in the column
/// by the given `replaceString`.
@@ -2610,6 +2670,18 @@ public static Column AddMonths(Column startDate, int numMonths)
return ApplyFunction("add_months", startDate, numMonths);
}
+ ///
+ /// Returns the date that is `numMonths` after `startDate`.
+ ///
+ /// Start date
+ /// A column of the number of months to add to start date
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column AddMonths(Column startDate, Column numMonths)
+ {
+ return ApplyFunction("add_months", startDate, numMonths);
+ }
+
///
/// Returns the current date as a date column.
///
@@ -2651,6 +2723,18 @@ public static Column DateAdd(Column start, int days)
return ApplyFunction("date_add", start, days);
}
+ ///
+ /// Returns the date that is `days` days after `start`.
+ ///
+ /// Start date
+ /// A column of number of days to add to start data
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column DateAdd(Column start, Column days)
+ {
+ return ApplyFunction("date_add", start, days);
+ }
+
///
/// Returns the date that is `days` days before `start`.
///
@@ -2662,6 +2746,18 @@ public static Column DateSub(Column start, int days)
return ApplyFunction("date_sub", start, days);
}
+ ///
+ /// Returns the date that is `days` days before `start`.
+ ///
+ /// Start date
+ /// A column of number of days to subtract from start data
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column DateSub(Column start, Column days)
+ {
+ return ApplyFunction("date_sub", start, days);
+ }
+
///
/// Returns the number of days from `start` to `end`.
///
@@ -3227,7 +3323,7 @@ public static Column ArraySort(Column column)
///
/// Column to apply
/// Element to remove
- ///
+ /// Column object
[Since(Versions.V2_4_0)]
public static Column ArrayRemove(Column column, object element)
{
@@ -3413,6 +3509,18 @@ public static Column SchemaOfJson(Column json)
return ApplyFunction("schema_of_json", json);
}
+ ///
+ /// Parses a JSON string and infers its schema in DDL format.
+ ///
+ /// String literal containing a JSON string.
+ /// Options to control how the json is parsed.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column SchemaOfJson(Column json, Dictionary options)
+ {
+ return ApplyFunction("schema_of_json", json, options);
+ }
+
///
/// Converts a column containing a `StructType`, `ArrayType` of `StructType`s,
/// a `MapType` or `ArrayType` of `MapType`s into a JSON string.
@@ -3578,6 +3686,17 @@ public static Column MapValues(Column column)
return ApplyFunction("map_values", column);
}
+ ///
+ /// Returns an unordered array of all entries in the given map.
+ ///
+ /// Column to apply
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column MapEntries(Column column)
+ {
+ return ApplyFunction("map_entries", column);
+ }
+
///
/// Returns a map created from the given array of entries.
///
@@ -3612,6 +3731,170 @@ public static Column MapConcat(params Column[] columns)
return ApplyFunction("map_concat", (object)columns);
}
+ ///
+ /// Parses a column containing a CSV string into a `StructType` with the specified schema.
+ ///
+ /// Column to apply
+ /// The schema to use when parsing the CSV string
+ /// Options to control how the CSV is parsed.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column FromCsv(
+ Column column,
+ StructType schema,
+ Dictionary options)
+ {
+ return ApplyFunction(
+ "from_csv",
+ column,
+ DataType.FromJson(Jvm, schema.Json),
+ options);
+ }
+
+ ///
+ /// Parses a column containing a CSV string into a `StructType` with the specified schema.
+ ///
+ /// Column to apply
+ /// The schema to use when parsing the CSV string
+ /// Options to control how the CSV is parsed.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column FromCsv(
+ Column column,
+ Column schema,
+ Dictionary options)
+ {
+ return ApplyFunction("from_csv", column, schema, options);
+ }
+
+ ///
+ /// Parses a CSV string and infers its schema in DDL format.
+ ///
+ /// CSV string to parse
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column SchemaOfCsv(string csv)
+ {
+ return ApplyFunction("schema_of_csv", csv);
+ }
+
+ ///
+ /// Parses a CSV string and infers its schema in DDL format.
+ ///
+ /// CSV string to parse
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column SchemaOfCsv(Column csv)
+ {
+ return ApplyFunction("schema_of_csv", csv);
+ }
+
+ ///
+ /// Parses a CSV string and infers its schema in DDL format.
+ ///
+ /// CSV string to parse
+ /// Options to control how the CSV is parsed.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column SchemaOfCsv(Column csv, Dictionary options)
+ {
+ return ApplyFunction("schema_of_csv", csv, options);
+ }
+
+ ///
+ /// Converts a column containing a `StructType` into a CSV string with the specified
+ /// schema.
+ ///
+ /// A column containing a struct.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column ToCsv(Column column)
+ {
+ return ApplyFunction("to_csv", column);
+ }
+
+ ///
+ /// Converts a column containing a `StructType` into a CSV string with the specified
+ /// schema.
+ ///
+ /// A column containing a struct.
+ /// Options to control how the struct column is converted into a CSV
+ /// string
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column ToCsv(Column column, Dictionary options)
+ {
+ return ApplyFunction("to_csv", column, options);
+ }
+
+ ///
+ /// A transform for timestamps and dates to partition data into years.
+ ///
+ /// A column containing a struct.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Years(Column column)
+ {
+ return ApplyFunction("years", column);
+ }
+
+ ///
+ /// A transform for timestamps and dates to partition data into months.
+ ///
+ /// A column containing a struct.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Months(Column column)
+ {
+ return ApplyFunction("months", column);
+ }
+
+ ///
+ /// A transform for timestamps and dates to partition data into days.
+ ///
+ /// A column containing a struct.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Days(Column column)
+ {
+ return ApplyFunction("days", column);
+ }
+
+ ///
+ /// A transform for timestamps to partition data into hours.
+ ///
+ /// A column containing a struct.
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Hours(Column column)
+ {
+ return ApplyFunction("hours", column);
+ }
+
+ ///
+ /// A transform for any type that partitions by a hash of the input column.
+ ///
+ /// A column containing number of buckets
+ /// Column to apply
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Bucket(Column numBuckets, Column column)
+ {
+ return ApplyFunction("bucket", numBuckets, column);
+ }
+
+ ///
+ /// A transform for any type that partitions by a hash of the input column.
+ ///
+ /// Number of buckets
+ /// Column to apply
+ /// Column object
+ [Since(Versions.V3_0_0)]
+ public static Column Bucket(int numBuckets, Column column)
+ {
+ return ApplyFunction("bucket", numBuckets, column);
+ }
+
/////////////////////////////////////////////////////////////////////////////////
// UDF helper functions
/////////////////////////////////////////////////////////////////////////////////