20
20
import sys
21
21
import timeit
22
22
import re
23
+ import platform
24
+ import hashlib
25
+ import os
26
+ import subprocess
23
27
24
28
import numpy as np
25
29
import sklearn
26
-
30
+ try :
31
+ import itt
32
+ itt_module_installed = True
33
+ except :
34
+ itt_module_installed = False
27
35
28
36
def get_dtype (data ):
29
37
'''
@@ -159,6 +167,8 @@ def parse_args(parser, size=None, loop_types=(),
159
167
parser .add_argument ('--time-method' , type = str , default = 'box_filter' ,
160
168
choices = ('box_filter' ),
161
169
help = 'Method used for time mesurements' )
170
+ parser .add_argument ('--box-filter-measurements-analysis' , type = int , default = 100 ,
171
+ help = 'Maximum number of measurements in box filter (for analyzed stage)' )
162
172
parser .add_argument ('--box-filter-measurements' , type = int , default = 100 ,
163
173
help = 'Maximum number of measurements in box filter' )
164
174
parser .add_argument ('--inner-loops' , default = 100 , type = int ,
@@ -167,6 +177,8 @@ def parse_args(parser, size=None, loop_types=(),
167
177
parser .add_argument ('--outer-loops' , default = 100 , type = int ,
168
178
help = 'Maximum outer loop iterations '
169
179
'(we take the min over outer iterations)' )
180
+ parser .add_argument ('--time-limit-analysis' , default = 10. , type = float ,
181
+ help = 'Target time to spend to benchmark (for analyzed stage)' )
170
182
parser .add_argument ('--time-limit' , default = 10. , type = float ,
171
183
help = 'Target time to spend to benchmark' )
172
184
parser .add_argument ('--goal-outer-loops' , default = 10 ,
@@ -186,6 +198,25 @@ def parse_args(parser, size=None, loop_types=(),
186
198
parser .add_argument ('--device' , default = 'none' , type = str ,
187
199
choices = ('host' , 'cpu' , 'gpu' , 'none' ),
188
200
help = 'Execution context device' )
201
+ parser .add_argument ('--emon' , default = False ,
202
+ action = 'store_true' ,
203
+ help = 'Should emon profiling be started' )
204
+ parser .add_argument ('--vtune' , default = False ,
205
+ action = 'store_true' ,
206
+ help = 'Should vtune profiling be started' )
207
+ parser .add_argument ('--psrecord' , default = False ,
208
+ action = 'store_true' ,
209
+ help = 'Should psrecord profiling be started' )
210
+ parser .add_argument ('--ittpy' , default = False ,
211
+ action = 'store_true' ,
212
+ help = 'Should ittpy domains be integrated' )
213
+ parser .add_argument ('--sgx-gramine' , default = False ,
214
+ action = 'store_true' ,
215
+ help = 'Should benchmark run with Gramine & Intel(R) SGX' )
216
+ parser .add_argument ('--flush-caches' , default = False ,
217
+ action = 'store_true' ,
218
+ help = 'Should benchmark flush CPU caches after each run during measuring' )
219
+ parser .add_argument ('--target-stage' , type = str , default = 'default' , help = 'Select target stage for analysis.' )
189
220
190
221
for data in ['X' , 'y' ]:
191
222
for stage in ['train' , 'test' ]:
@@ -201,6 +232,9 @@ def parse_args(parser, size=None, loop_types=(),
201
232
202
233
params = parser .parse_args ()
203
234
235
+ if params .ittpy and itt_module_installed :
236
+ itt .pause ()
237
+
204
238
if not params .no_intel_optimized :
205
239
try :
206
240
from sklearnex import patch_sklearn
@@ -272,18 +306,68 @@ def prepare_daal_threads(num_threads=-1):
272
306
return num_threads
273
307
274
308
275
- def measure_function_time (func , * args , params , ** kwargs ):
276
- return time_box_filter (func , * args ,
277
- n_meas = params .box_filter_measurements ,
278
- time_limit = params .time_limit , ** kwargs )
309
+ def measure_function_time (func , * args , params , stage , ** kwargs ):
310
+ results = time_box_filter (func , * args , params = params , stage = stage , ** kwargs )
311
+ return results
279
312
280
313
281
- def time_box_filter (func , * args , n_meas , time_limit , ** kwargs ):
314
+ def detect_LLC_size ():
315
+ with open ('/sys/devices/system/cpu/cpu0/cache/index3/size' , 'r' ) as f :
316
+ llc_size_str = f .readline ().strip ()
317
+ llc_size = int (llc_size_str [:- 1 ]) * 1024
318
+ return llc_size
319
+
320
+
321
+ def flush_caches ():
322
+ flush_datafile = 'data/flush_data.npy'
323
+ if os .path .exists (flush_datafile ):
324
+ with open (flush_datafile , 'rb' ) as f :
325
+ data = np .load (f ).astype (np .double )
326
+ else :
327
+ data_size = detect_LLC_size () // 8 * 8 # size in doubles x8
328
+ columns_number = 100
329
+ rows_number = data_size // columns_number
330
+ data = np .random .rand (rows_number , columns_number ).astype (np .double )
331
+ with open (flush_datafile , 'wb' ) as f :
332
+ np .save (f , data )
333
+
334
+ iterations_to_flush = 3
335
+ try :
336
+ from sklearnex .cluster import KMeans
337
+ except :
338
+ from sklearn .cluster import KMeans
339
+ for number_flush_iteration in range (iterations_to_flush ):
340
+ model = KMeans (max_iter = 3 , tol = 1e-7 ).fit (data )
341
+
342
+
343
+ def time_box_filter (func , * args , params , stage , ** kwargs ):
344
+ flush_caches_flag = params .flush_caches
345
+ if params .target_stage != 'default' :
346
+ if params .target_stage == stage :
347
+ time_limit = params .time_limit_analysis
348
+ n_meas = params .box_filter_measurements_analysis
349
+ is_the_target_stage = True
350
+ else :
351
+ time_limit = 0
352
+ n_meas = 1
353
+ is_the_target_stage = False
354
+ else :
355
+ time_limit = params .time_limit
356
+ n_meas = params .box_filter_measurements
357
+ is_the_target_stage = True
358
+
282
359
times = []
283
360
while len (times ) < n_meas :
361
+ if flush_caches_flag :
362
+ flush_caches ()
363
+
364
+ if params .ittpy and is_the_target_stage and itt_module_installed :
365
+ itt .resume ()
284
366
t0 = timeit .default_timer ()
285
367
val = func (* args , ** kwargs )
286
368
t1 = timeit .default_timer ()
369
+ if params .ittpy and is_the_target_stage and itt_module_installed :
370
+ itt .pause ()
287
371
times .append (t1 - t0 )
288
372
if sum (times ) > time_limit :
289
373
break
@@ -560,7 +644,9 @@ def print_output(library, algorithm, stages, params, functions,
560
644
result ['algorithm_parameters' ]['init' ] = 'random'
561
645
result ['algorithm_parameters' ].pop ('handle' , None )
562
646
output .append (result )
647
+ print ('# Intel(R) Extension for Scikit-learn case result:' )
563
648
print (json .dumps (output , indent = 4 ))
649
+ print ('# Intel(R) Extension for Scikit-learn case finished.' )
564
650
565
651
566
652
def run_with_context (params , function ):
0 commit comments