@@ -70,6 +70,16 @@ public void PrintSchema() =>
70
70
Console . WriteLine (
71
71
( string ) ( ( JvmObjectReference ) _jvmObject . Invoke ( "schema" ) ) . Invoke ( "treeString" ) ) ;
72
72
73
+ /// <summary>
74
+ /// Prints the schema up to the given level to the console in a nice tree format.
75
+ /// </summary>
76
+ [ Since ( Versions . V3_0_0 ) ]
77
+ public void PrintSchema ( int level )
78
+ {
79
+ var schema = ( JvmObjectReference ) _jvmObject . Invoke ( "schema" ) ;
80
+ Console . WriteLine ( ( string ) schema . Invoke ( "treeString" , level ) ) ;
81
+ }
82
+
73
83
/// <summary>
74
84
/// Prints the plans (logical and physical) to the console for debugging purposes.
75
85
/// </summary>
@@ -80,6 +90,30 @@ public void Explain(bool extended = false)
80
90
Console . WriteLine ( ( string ) execution . Invoke ( extended ? "toString" : "simpleString" ) ) ;
81
91
}
82
92
93
+ /// <summary>
94
+ /// Prints the plans (logical and physical) with a format specified by a given explain
95
+ /// mode.
96
+ ///
97
+ /// </summary>
98
+ /// <param name="mode">Specifies the expected output format of plans.
99
+ /// 1. `simple` Print only a physical plan.
100
+ /// 2. `extended`: Print both logical and physical plans.
101
+ /// 3. `codegen`: Print a physical plan and generated codes if they are available.
102
+ /// 4. `cost`: Print a logical plan and statistics if they are available.
103
+ /// 5. `formatted`: Split explain output into two sections: a physical plan outline and
104
+ /// node details.
105
+ /// </param>
106
+ [ Since ( Versions . V3_0_0 ) ]
107
+ public void Explain ( string mode )
108
+ {
109
+ var execution = ( JvmObjectReference ) _jvmObject . Invoke ( "queryExecution" ) ;
110
+ var explainMode = ( JvmObjectReference ) _jvmObject . Jvm . CallStaticJavaMethod (
111
+ "org.apache.spark.sql.execution.ExplainMode" ,
112
+ "fromString" ,
113
+ mode ) ;
114
+ Console . WriteLine ( ( string ) execution . Invoke ( "explainString" , explainMode ) ) ;
115
+ }
116
+
83
117
/// <summary>
84
118
/// Returns all column names and their data types as an IEnumerable of Tuples.
85
119
/// </summary>
@@ -480,6 +514,27 @@ public RelationalGroupedDataset Cube(string column, params string[] columns) =>
480
514
public DataFrame Agg ( Column expr , params Column [ ] exprs ) =>
481
515
WrapAsDataFrame ( _jvmObject . Invoke ( "agg" , expr , exprs ) ) ;
482
516
517
+ /// <summary>
518
+ /// Define (named) metrics to observe on the Dataset. This method returns an 'observed'
519
+ /// DataFrame that returns the same result as the input, with the following guarantees:
520
+ ///
521
+ /// 1. It will compute the defined aggregates(metrics) on all the data that is flowing
522
+ /// through the Dataset at that point.
523
+ /// 2. It will report the value of the defined aggregate columns as soon as we reach a
524
+ /// completion point.A completion point is either the end of a query(batch mode) or the end
525
+ /// of a streaming epoch. The value of the aggregates only reflects the data processed
526
+ /// since the previous completion point.
527
+ ///
528
+ /// Please note that continuous execution is currently not supported.
529
+ /// </summary>
530
+ /// <param name="name">Named metrics to observe</param>
531
+ /// <param name="expr">Defined aggregate to observe</param>
532
+ /// <param name="exprs">Defined aggregates to observe</param>
533
+ /// <returns>DataFrame object</returns>
534
+ [ Since ( Versions . V3_0_0 ) ]
535
+ public DataFrame Observe ( string name , Column expr , params Column [ ] exprs ) =>
536
+ WrapAsDataFrame ( _jvmObject . Invoke ( "observe" , name , expr , exprs ) ) ;
537
+
483
538
/// <summary>
484
539
/// Returns a new `DataFrame` by taking the first `number` rows.
485
540
/// </summary>
@@ -702,6 +757,17 @@ public DataFrame Summary(params string[] statistics) =>
702
757
/// <returns>First `n` rows</returns>
703
758
public IEnumerable < Row > Take ( int n ) => Head ( n ) ;
704
759
760
+ /// <summary>
761
+ /// Returns the last `n` rows in the `DataFrame`.
762
+ /// </summary>
763
+ /// <param name="n">Number of rows</param>
764
+ /// <returns>Last `n` rows</returns>
765
+ [ Since ( Versions . V3_0_0 ) ]
766
+ public IEnumerable < Row > Tail ( int n )
767
+ {
768
+ return GetRows ( "tailToPython" , n ) ;
769
+ }
770
+
705
771
/// <summary>
706
772
/// Returns an array that contains all rows in this `DataFrame`.
707
773
/// </summary>
@@ -929,16 +995,15 @@ public DataStreamWriter WriteStream() =>
929
995
new DataStreamWriter ( ( JvmObjectReference ) _jvmObject . Invoke ( "writeStream" ) , this ) ;
930
996
931
997
/// <summary>
932
- /// Returns row objects based on the function (either "toPythonIterator" or
933
- /// "collectToPython").
998
+ /// Returns row objects based on the function (either "toPythonIterator",
999
+ /// "collectToPython", or "tailToPython" ).
934
1000
/// </summary>
935
- /// <param name="funcName">
936
- /// The name of the function to call, either "toPythonIterator" or "collectToPython".
937
- /// </param>
938
- /// <returns><see cref="Row"/> objects</returns>
939
- private IEnumerable < Row > GetRows ( string funcName )
1001
+ /// <param name="funcName">String name of function to call</param>
1002
+ /// <param name="args">Arguments to the function</param>
1003
+ /// <returns>IEnumerable of Rows from Spark</returns>
1004
+ private IEnumerable < Row > GetRows ( string funcName , params object [ ] args )
940
1005
{
941
- ( int port , string secret , _ ) = GetConnectionInfo ( funcName ) ;
1006
+ ( int port , string secret , _ ) = GetConnectionInfo ( funcName , args ) ;
942
1007
using ISocketWrapper socket = SocketFactory . CreateSocket ( ) ;
943
1008
socket . Connect ( IPAddress . Loopback , port , secret ) ;
944
1009
foreach ( Row row in new RowCollector ( ) . Collect ( socket ) )
@@ -952,9 +1017,11 @@ private IEnumerable<Row> GetRows(string funcName)
952
1017
/// used for connecting with Spark to receive rows for this `DataFrame`.
953
1018
/// </summary>
954
1019
/// <returns>A tuple of port number, secret string, and JVM socket auth server.</returns>
955
- private ( int , string , JvmObjectReference ) GetConnectionInfo ( string funcName )
1020
+ private ( int , string , JvmObjectReference ) GetConnectionInfo (
1021
+ string funcName ,
1022
+ params object [ ] args )
956
1023
{
957
- object result = _jvmObject . Invoke ( funcName ) ;
1024
+ object result = _jvmObject . Invoke ( funcName , args ) ;
958
1025
Version version = SparkEnvironment . SparkVersion ;
959
1026
return ( version . Major , version . Minor , version . Build ) switch
960
1027
{
0 commit comments