Skip to content

Commit 4fe7405

Browse files
authored
PicklingUdfWrapper cache type information (#689)
1 parent 1d862a1 commit 4fe7405

File tree

3 files changed

+150
-62
lines changed

3 files changed

+150
-62
lines changed

src/csharp/Microsoft.Spark/Sql/PicklingUdfWrapper.cs

+141-56
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6-
using Microsoft.Spark.Utils;
6+
using static Microsoft.Spark.Utils.TypeConverter;
77

88
namespace Microsoft.Spark.Sql
99
{
@@ -35,6 +35,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
3535
[UdfWrapper]
3636
internal class PicklingUdfWrapper<T, TResult>
3737
{
38+
[NonSerialized]
39+
private bool? _sameT = null;
40+
3841
private readonly Func<T, TResult> _func;
3942

4043
internal PicklingUdfWrapper(Func<T, TResult> func)
@@ -44,7 +47,8 @@ internal PicklingUdfWrapper(Func<T, TResult> func)
4447

4548
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
4649
{
47-
return _func(TypeConverter.ConvertTo<T>(input[argOffsets[0]]));
50+
object param = input[argOffsets[0]];
51+
return _func((_sameT ??= param is T) ? (T)param : ConvertTo<T>(param));
4852
}
4953
}
5054

@@ -57,6 +61,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
5761
[UdfWrapper]
5862
internal class PicklingUdfWrapper<T1, T2, TResult>
5963
{
64+
[NonSerialized]
65+
private readonly bool?[] _sameT = new bool?[2];
66+
6067
private readonly Func<T1, T2, TResult> _func;
6168

6269
internal PicklingUdfWrapper(Func<T1, T2, TResult> func)
@@ -66,9 +73,11 @@ internal PicklingUdfWrapper(Func<T1, T2, TResult> func)
6673

6774
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
6875
{
76+
object param1 = input[argOffsets[0]];
77+
object param2 = input[argOffsets[1]];
6978
return _func(
70-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
71-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]));
79+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
80+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2));
7281
}
7382
}
7483

@@ -82,6 +91,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
8291
[UdfWrapper]
8392
internal class PicklingUdfWrapper<T1, T2, T3, TResult>
8493
{
94+
[NonSerialized]
95+
private readonly bool?[] _sameT = new bool?[3];
96+
8597
private readonly Func<T1, T2, T3, TResult> _func;
8698

8799
internal PicklingUdfWrapper(Func<T1, T2, T3, TResult> func)
@@ -91,10 +103,13 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, TResult> func)
91103

92104
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
93105
{
106+
object param1 = input[argOffsets[0]];
107+
object param2 = input[argOffsets[1]];
108+
object param3 = input[argOffsets[2]];
94109
return _func(
95-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
96-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
97-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]));
110+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
111+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
112+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3));
98113
}
99114
}
100115

@@ -109,6 +124,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
109124
[UdfWrapper]
110125
internal class PicklingUdfWrapper<T1, T2, T3, T4, TResult>
111126
{
127+
[NonSerialized]
128+
private readonly bool?[] _sameT = new bool?[4];
129+
112130
private readonly Func<T1, T2, T3, T4, TResult> _func;
113131

114132
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, TResult> func)
@@ -118,11 +136,15 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, TResult> func)
118136

119137
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
120138
{
139+
object param1 = input[argOffsets[0]];
140+
object param2 = input[argOffsets[1]];
141+
object param3 = input[argOffsets[2]];
142+
object param4 = input[argOffsets[3]];
121143
return _func(
122-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
123-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
124-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
125-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]));
144+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
145+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
146+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
147+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4));
126148
}
127149
}
128150

@@ -138,6 +160,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
138160
[UdfWrapper]
139161
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, TResult>
140162
{
163+
[NonSerialized]
164+
private readonly bool?[] _sameT = new bool?[5];
165+
141166
private readonly Func<T1, T2, T3, T4, T5, TResult> _func;
142167

143168
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, TResult> func)
@@ -147,12 +172,17 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, TResult> func)
147172

148173
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
149174
{
175+
object param1 = input[argOffsets[0]];
176+
object param2 = input[argOffsets[1]];
177+
object param3 = input[argOffsets[2]];
178+
object param4 = input[argOffsets[3]];
179+
object param5 = input[argOffsets[4]];
150180
return _func(
151-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
152-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
153-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
154-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
155-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]));
181+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
182+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
183+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
184+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
185+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5));
156186
}
157187
}
158188

@@ -169,6 +199,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
169199
[UdfWrapper]
170200
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, T6, TResult>
171201
{
202+
[NonSerialized]
203+
private readonly bool?[] _sameT = new bool?[6];
204+
172205
private readonly Func<T1, T2, T3, T4, T5, T6, TResult> _func;
173206

174207
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, TResult> func)
@@ -178,13 +211,19 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, TResult> func)
178211

179212
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
180213
{
214+
object param1 = input[argOffsets[0]];
215+
object param2 = input[argOffsets[1]];
216+
object param3 = input[argOffsets[2]];
217+
object param4 = input[argOffsets[3]];
218+
object param5 = input[argOffsets[4]];
219+
object param6 = input[argOffsets[5]];
181220
return _func(
182-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
183-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
184-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
185-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
186-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]),
187-
TypeConverter.ConvertTo<T6>(input[argOffsets[5]]));
221+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
222+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
223+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
224+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
225+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5),
226+
(_sameT[5] ??= param6 is T6) ? (T6)param6 : ConvertTo<T6>(param6));
188227
}
189228
}
190229

@@ -202,6 +241,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
202241
[UdfWrapper]
203242
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, T6, T7, TResult>
204243
{
244+
[NonSerialized]
245+
private readonly bool?[] _sameT = new bool?[7];
246+
205247
private readonly Func<T1, T2, T3, T4, T5, T6, T7, TResult> _func;
206248

207249
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, TResult> func)
@@ -211,14 +253,21 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, TResult> func)
211253

212254
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
213255
{
256+
object param1 = input[argOffsets[0]];
257+
object param2 = input[argOffsets[1]];
258+
object param3 = input[argOffsets[2]];
259+
object param4 = input[argOffsets[3]];
260+
object param5 = input[argOffsets[4]];
261+
object param6 = input[argOffsets[5]];
262+
object param7 = input[argOffsets[6]];
214263
return _func(
215-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
216-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
217-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
218-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
219-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]),
220-
TypeConverter.ConvertTo<T6>(input[argOffsets[5]]),
221-
TypeConverter.ConvertTo<T7>(input[argOffsets[6]]));
264+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
265+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
266+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
267+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
268+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5),
269+
(_sameT[5] ??= param6 is T6) ? (T6)param6 : ConvertTo<T6>(param6),
270+
(_sameT[6] ??= param7 is T7) ? (T7)param7 : ConvertTo<T7>(param7));
222271
}
223272
}
224273

@@ -237,6 +286,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
237286
[UdfWrapper]
238287
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, T6, T7, T8, TResult>
239288
{
289+
[NonSerialized]
290+
private readonly bool?[] _sameT = new bool?[8];
291+
240292
private readonly Func<T1, T2, T3, T4, T5, T6, T7, T8, TResult> _func;
241293

242294
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, TResult> func)
@@ -246,15 +298,23 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, TResult> func)
246298

247299
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
248300
{
301+
object param1 = input[argOffsets[0]];
302+
object param2 = input[argOffsets[1]];
303+
object param3 = input[argOffsets[2]];
304+
object param4 = input[argOffsets[3]];
305+
object param5 = input[argOffsets[4]];
306+
object param6 = input[argOffsets[5]];
307+
object param7 = input[argOffsets[6]];
308+
object param8 = input[argOffsets[7]];
249309
return _func(
250-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
251-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
252-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
253-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
254-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]),
255-
TypeConverter.ConvertTo<T6>(input[argOffsets[5]]),
256-
TypeConverter.ConvertTo<T7>(input[argOffsets[6]]),
257-
TypeConverter.ConvertTo<T8>(input[argOffsets[7]]));
310+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
311+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
312+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
313+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
314+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5),
315+
(_sameT[5] ??= param6 is T6) ? (T6)param6 : ConvertTo<T6>(param6),
316+
(_sameT[6] ??= param7 is T7) ? (T7)param7 : ConvertTo<T7>(param7),
317+
(_sameT[7] ??= param8 is T8) ? (T8)param8 : ConvertTo<T8>(param8));
258318
}
259319
}
260320

@@ -274,6 +334,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
274334
[UdfWrapper]
275335
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, T6, T7, T8, T9, TResult>
276336
{
337+
[NonSerialized]
338+
private readonly bool?[] _sameT = new bool?[9];
339+
277340
private readonly Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, TResult> _func;
278341

279342
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, TResult> func)
@@ -282,16 +345,25 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, TResult> fu
282345
}
283346
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
284347
{
348+
object param1 = input[argOffsets[0]];
349+
object param2 = input[argOffsets[1]];
350+
object param3 = input[argOffsets[2]];
351+
object param4 = input[argOffsets[3]];
352+
object param5 = input[argOffsets[4]];
353+
object param6 = input[argOffsets[5]];
354+
object param7 = input[argOffsets[6]];
355+
object param8 = input[argOffsets[7]];
356+
object param9 = input[argOffsets[8]];
285357
return _func(
286-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
287-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
288-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
289-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
290-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]),
291-
TypeConverter.ConvertTo<T6>(input[argOffsets[5]]),
292-
TypeConverter.ConvertTo<T7>(input[argOffsets[6]]),
293-
TypeConverter.ConvertTo<T8>(input[argOffsets[7]]),
294-
TypeConverter.ConvertTo<T9>(input[argOffsets[8]]));
358+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
359+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
360+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
361+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
362+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5),
363+
(_sameT[5] ??= param6 is T6) ? (T6)param6 : ConvertTo<T6>(param6),
364+
(_sameT[6] ??= param7 is T7) ? (T7)param7 : ConvertTo<T7>(param7),
365+
(_sameT[7] ??= param8 is T8) ? (T8)param8 : ConvertTo<T8>(param8),
366+
(_sameT[8] ??= param9 is T9) ? (T9)param9 : ConvertTo<T9>(param9));
295367
}
296368
}
297369

@@ -312,6 +384,9 @@ internal object Execute(int splitIndex, object[] input, int[] argOffsets)
312384
[UdfWrapper]
313385
internal class PicklingUdfWrapper<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, TResult>
314386
{
387+
[NonSerialized]
388+
private readonly bool?[] _sameT = new bool?[10];
389+
315390
private readonly Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, TResult> _func;
316391

317392
internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, TResult> func)
@@ -321,17 +396,27 @@ internal PicklingUdfWrapper(Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, TResul
321396

322397
internal object Execute(int splitIndex, object[] input, int[] argOffsets)
323398
{
399+
object param1 = input[argOffsets[0]];
400+
object param2 = input[argOffsets[1]];
401+
object param3 = input[argOffsets[2]];
402+
object param4 = input[argOffsets[3]];
403+
object param5 = input[argOffsets[4]];
404+
object param6 = input[argOffsets[5]];
405+
object param7 = input[argOffsets[6]];
406+
object param8 = input[argOffsets[7]];
407+
object param9 = input[argOffsets[8]];
408+
object param10 = input[argOffsets[9]];
324409
return _func(
325-
TypeConverter.ConvertTo<T1>(input[argOffsets[0]]),
326-
TypeConverter.ConvertTo<T2>(input[argOffsets[1]]),
327-
TypeConverter.ConvertTo<T3>(input[argOffsets[2]]),
328-
TypeConverter.ConvertTo<T4>(input[argOffsets[3]]),
329-
TypeConverter.ConvertTo<T5>(input[argOffsets[4]]),
330-
TypeConverter.ConvertTo<T6>(input[argOffsets[5]]),
331-
TypeConverter.ConvertTo<T7>(input[argOffsets[6]]),
332-
TypeConverter.ConvertTo<T8>(input[argOffsets[7]]),
333-
TypeConverter.ConvertTo<T9>(input[argOffsets[8]]),
334-
TypeConverter.ConvertTo<T10>(input[argOffsets[9]]));
410+
(_sameT[0] ??= param1 is T1) ? (T1)param1 : ConvertTo<T1>(param1),
411+
(_sameT[1] ??= param2 is T2) ? (T2)param2 : ConvertTo<T2>(param2),
412+
(_sameT[2] ??= param3 is T3) ? (T3)param3 : ConvertTo<T3>(param3),
413+
(_sameT[3] ??= param4 is T4) ? (T4)param4 : ConvertTo<T4>(param4),
414+
(_sameT[4] ??= param5 is T5) ? (T5)param5 : ConvertTo<T5>(param5),
415+
(_sameT[5] ??= param6 is T6) ? (T6)param6 : ConvertTo<T6>(param6),
416+
(_sameT[6] ??= param7 is T7) ? (T7)param7 : ConvertTo<T7>(param7),
417+
(_sameT[7] ??= param8 is T8) ? (T8)param8 : ConvertTo<T8>(param8),
418+
(_sameT[8] ??= param9 is T9) ? (T9)param9 : ConvertTo<T9>(param9),
419+
(_sameT[9] ??= param10 is T10) ? (T10)param10 : ConvertTo<T10>(param10));
335420
}
336421
}
337422
}

src/csharp/Microsoft.Spark/Utils/TypeConverter.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ internal static class TypeConverter
2020
/// <typeparam name="T">Type to convert to</typeparam>
2121
/// <param name="obj">The object to convert</param>
2222
/// <returns>Converted object.</returns>
23-
internal static T ConvertTo<T>(object obj) => obj is T t ? t : (T)Convert(obj, typeof(T));
23+
internal static T ConvertTo<T>(object obj) => (T)Convert(obj, typeof(T));
2424

2525
private static object Convert(object obj, Type toType)
2626
{

src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs

+8-5
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,15 @@ private static TargetData SerializeTarget(object target)
201201
BindingFlags.Public |
202202
BindingFlags.NonPublic))
203203
{
204-
fields.Add(new FieldData()
204+
if (!field.GetCustomAttributes(typeof(NonSerializedAttribute)).Any())
205205
{
206-
TypeData = SerializeType(field.FieldType),
207-
Name = field.Name,
208-
Value = field.GetValue(target)
209-
});
206+
fields.Add(new FieldData()
207+
{
208+
TypeData = SerializeType(field.FieldType),
209+
Name = field.Name,
210+
Value = field.GetValue(target)
211+
});
212+
}
210213
}
211214

212215
// Even when an UDF does not have any closure, GetFields() returns some fields

0 commit comments

Comments
 (0)