Skip to content

Commit fb5575f

Browse files
committed
Add opportuniry to collect performance data with VTune, emon, psrecord
1 parent 2c1fec5 commit fb5575f

25 files changed

+491
-88
lines changed

Diff for: bench.py

+92-6
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,18 @@
2020
import sys
2121
import timeit
2222
import re
23+
import platform
24+
import hashlib
25+
import os
26+
import subprocess
2327

2428
import numpy as np
2529
import sklearn
26-
30+
try:
31+
import itt
32+
itt_module_installed = True
33+
except:
34+
itt_module_installed = False
2735

2836
def get_dtype(data):
2937
'''
@@ -159,6 +167,8 @@ def parse_args(parser, size=None, loop_types=(),
159167
parser.add_argument('--time-method', type=str, default='box_filter',
160168
choices=('box_filter'),
161169
help='Method used for time mesurements')
170+
parser.add_argument('--box-filter-measurements-analysis', type=int, default=100,
171+
help='Maximum number of measurements in box filter (for analyzed stage)')
162172
parser.add_argument('--box-filter-measurements', type=int, default=100,
163173
help='Maximum number of measurements in box filter')
164174
parser.add_argument('--inner-loops', default=100, type=int,
@@ -167,6 +177,8 @@ def parse_args(parser, size=None, loop_types=(),
167177
parser.add_argument('--outer-loops', default=100, type=int,
168178
help='Maximum outer loop iterations '
169179
'(we take the min over outer iterations)')
180+
parser.add_argument('--time-limit-analysis', default=10., type=float,
181+
help='Target time to spend to benchmark (for analyzed stage)')
170182
parser.add_argument('--time-limit', default=10., type=float,
171183
help='Target time to spend to benchmark')
172184
parser.add_argument('--goal-outer-loops', default=10,
@@ -186,6 +198,25 @@ def parse_args(parser, size=None, loop_types=(),
186198
parser.add_argument('--device', default='none', type=str,
187199
choices=('host', 'cpu', 'gpu', 'none'),
188200
help='Execution context device')
201+
parser.add_argument('--emon', default=False,
202+
action='store_true',
203+
help='Should emon profiling be started')
204+
parser.add_argument('--vtune', default=False,
205+
action='store_true',
206+
help='Should vtune profiling be started')
207+
parser.add_argument('--psrecord', default=False,
208+
action='store_true',
209+
help='Should psrecord profiling be started')
210+
parser.add_argument('--ittpy', default=False,
211+
action='store_true',
212+
help='Should ittpy domains be integrated')
213+
parser.add_argument('--sgx-gramine', default=False,
214+
action='store_true',
215+
help='Should benchmark run with Gramine & Intel(R) SGX')
216+
parser.add_argument('--flush-caches', default=False,
217+
action='store_true',
218+
help='Should benchmark flush CPU caches after each run during measuring')
219+
parser.add_argument('--target-stage', type=str, default='default', help='Select target stage for analysis.')
189220

190221
for data in ['X', 'y']:
191222
for stage in ['train', 'test']:
@@ -201,6 +232,9 @@ def parse_args(parser, size=None, loop_types=(),
201232

202233
params = parser.parse_args()
203234

235+
if params.ittpy and itt_module_installed:
236+
itt.pause()
237+
204238
if not params.no_intel_optimized:
205239
try:
206240
from sklearnex import patch_sklearn
@@ -272,18 +306,68 @@ def prepare_daal_threads(num_threads=-1):
272306
return num_threads
273307

274308

275-
def measure_function_time(func, *args, params, **kwargs):
276-
return time_box_filter(func, *args,
277-
n_meas=params.box_filter_measurements,
278-
time_limit=params.time_limit, **kwargs)
309+
def measure_function_time(func, *args, params, stage, **kwargs):
310+
results = time_box_filter(func, *args, params=params, stage=stage, **kwargs)
311+
return results
279312

280313

281-
def time_box_filter(func, *args, n_meas, time_limit, **kwargs):
314+
def detect_LLC_size():
315+
with open('/sys/devices/system/cpu/cpu0/cache/index3/size', 'r') as f:
316+
llc_size_str = f.readline().strip()
317+
llc_size = int(llc_size_str[:-1]) * 1024
318+
return llc_size
319+
320+
321+
def flush_caches():
322+
flush_datafile = 'data/flush_data.npy'
323+
if os.path.exists(flush_datafile):
324+
with open(flush_datafile, 'rb') as f:
325+
data = np.load(f).astype(np.double)
326+
else:
327+
data_size = detect_LLC_size() // 8 * 8 # size in doubles x8
328+
columns_number = 100
329+
rows_number = data_size // columns_number
330+
data = np.random.rand(rows_number, columns_number).astype(np.double)
331+
with open(flush_datafile, 'wb') as f:
332+
np.save(f, data)
333+
334+
iterations_to_flush = 3
335+
try:
336+
from sklearnex.cluster import KMeans
337+
except:
338+
from sklearn.cluster import KMeans
339+
for number_flush_iteration in range(iterations_to_flush):
340+
model = KMeans(max_iter=3, tol=1e-7).fit(data)
341+
342+
343+
def time_box_filter(func, *args, params, stage, **kwargs):
344+
flush_caches_flag = params.flush_caches
345+
if params.target_stage != 'default':
346+
if params.target_stage == stage:
347+
time_limit = params.time_limit_analysis
348+
n_meas = params.box_filter_measurements_analysis
349+
is_the_target_stage = True
350+
else:
351+
time_limit = 0
352+
n_meas = 1
353+
is_the_target_stage = False
354+
else:
355+
time_limit = params.time_limit
356+
n_meas = params.box_filter_measurements
357+
is_the_target_stage = True
358+
282359
times = []
283360
while len(times) < n_meas:
361+
if flush_caches_flag:
362+
flush_caches()
363+
364+
if params.ittpy and is_the_target_stage and itt_module_installed:
365+
itt.resume()
284366
t0 = timeit.default_timer()
285367
val = func(*args, **kwargs)
286368
t1 = timeit.default_timer()
369+
if params.ittpy and is_the_target_stage and itt_module_installed:
370+
itt.pause()
287371
times.append(t1 - t0)
288372
if sum(times) > time_limit:
289373
break
@@ -560,7 +644,9 @@ def print_output(library, algorithm, stages, params, functions,
560644
result['algorithm_parameters']['init'] = 'random'
561645
result['algorithm_parameters'].pop('handle', None)
562646
output.append(result)
647+
print('# Intel(R) Extension for Scikit-learn case result:')
563648
print(json.dumps(output, indent=4))
649+
print('# Intel(R) Extension for Scikit-learn case finished.')
564650

565651

566652
def run_with_context(params, function):

0 commit comments

Comments
 (0)