Skip to content

Commit 81ea614

Browse files
author
Satheesh Rajendran
committed
Add support for monitors
Let's add support for monitors to framework, this would help us capture the snapshot of system details through any user provided commands running in a predefined frequent intervals and store in a file in test-reports which then can be used for further processing later. Usage: `monitors` file in the basepath documents how user can create one monitor instance and running the test with `--enable-monitors` will allow the framework to enable the monitor threads run in parallel to the test and collect the output and monitor threads gets stopped at the end of tests and additional regular expression will help to extract useful information in the final output file. Signed-off-by: Satheesh Rajendran <[email protected]>
1 parent 0eff377 commit 81ea614

File tree

4 files changed

+274
-0
lines changed

4 files changed

+274
-0
lines changed

OpTestConfiguration.py

+6
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,12 @@ def get_parser():
424424
misc_group.add_argument("--accept-unknown-args", default=False, action='store_true',
425425
help="Don't exit if we find unknown command line arguments")
426426

427+
monitorgroup = parser.add_argument_group('Monitor',
428+
'Monitor enable commands')
429+
monitorgroup.add_argument("--enable-monitors", help="If set, monitors will be enabled",
430+
action='store_true', default=False)
431+
monitorgroup.add_argument("--monitor-file", help="provide the monitors file, monitors given in the file will be enabled",
432+
default="./monitors")
427433
return parser
428434

429435

common/OpTestMonitor.py

+227
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
#!/usr/bin/env python3
2+
# OpenPOWER Automated Test Project
3+
#
4+
# Contributors Listed Below - COPYRIGHT 2019
5+
# [+] International Business Machines Corp.
6+
#
7+
#
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
17+
# implied. See the License for the specific language governing
18+
# permissions and limitations under the License.
19+
#
20+
21+
'''
22+
Monitor library
23+
---------------------
24+
This adds a support to add user defined monitors
25+
'''
26+
27+
import re
28+
import os
29+
import time
30+
import threading
31+
32+
import OpTestConfiguration
33+
from .OpTestSystem import OpSystemState
34+
from .Exceptions import CommandFailed
35+
36+
import OpTestLogger
37+
log = OpTestLogger.optest_logger_glob.get_logger(__name__)
38+
39+
40+
class monitorThread(threading.Thread):
41+
def __init__(self, cmd):
42+
threading.Thread.__init__(self)
43+
self.env = cmd['env']
44+
self.cmd = cmd['cmd']
45+
self.freq = int(cmd['freq'])
46+
self.name = cmd['name'] if cmd['name'] else self.cmd.replace(' ', '_')
47+
self.pattern = cmd["pattern"] if cmd["pattern"] else "*"
48+
self._stop_event = threading.Event()
49+
self.conf = OpTestConfiguration.conf
50+
# TODO: consider adding all monitor output into seperate folder
51+
self.host = self.conf.host()
52+
self.system = self.conf.system()
53+
self.console = None
54+
if self.env == 'sut':
55+
try:
56+
self.console = self.host.get_new_ssh_connection(self.name)
57+
except Exception as err:
58+
# might not be yet in OS state
59+
pass
60+
elif self.env == 'server':
61+
pass
62+
elif self.env == 'bmc':
63+
pass
64+
else:
65+
log.warning("Unknown env given to run monitors, give either sut to"
66+
"run inside host or server to run ipmi commands")
67+
68+
def run(self):
69+
log.info("Starting monitor %s" % self.name)
70+
self.executed = False
71+
pat = re.compile(r"%s" % self.pattern)
72+
self.monitor_output = os.path.join(self.conf.output, self.name)
73+
fd = open(self.monitor_output, "w+")
74+
while True:
75+
if self.freq > 0:
76+
if self.env == 'sut':
77+
if self.system.state != OpSystemState.OS:
78+
continue
79+
if self.console:
80+
try:
81+
output = self.console.run_command(self.cmd)
82+
parsed_out = pat.findall('\n'.join(output))
83+
if parsed_out:
84+
fd.write(str(parsed_out[0]))
85+
fd.write('\n')
86+
except CommandFailed as cf:
87+
log.warning('Monitor cmd failed to run %s', self.cmd)
88+
else:
89+
# try to reconnect
90+
log.warning('Reconnecting SSH console...')
91+
self.console = self.host.get_new_ssh_connection(self.name)
92+
93+
elif self.env == 'server':
94+
# TODO:
95+
log.warning("Yet to implement")
96+
break
97+
elif self.env == 'bmc':
98+
# TODO:
99+
log.warning("Yet to implement")
100+
break
101+
time.sleep(self.freq)
102+
if self.is_stopped():
103+
fd.close()
104+
break
105+
106+
else:
107+
if not self.executed:
108+
# FIXME: NEED add support for running long run cmds
109+
if self.env == 'sut':
110+
if self.system.state != OpSystemState.OS:
111+
continue
112+
if self.console:
113+
try:
114+
output = self.console.run_command(self.cmd)
115+
except CommandFailed as cf:
116+
log.warning('Monitor cmd failed to run %s', self.cmd)
117+
else:
118+
self.console = self.host.get_new_ssh_connection(self.name)
119+
try:
120+
output = self.console.run_command(self.cmd)
121+
parsed_out = pat.findall('\n'.join(output))
122+
if parsed_out:
123+
fd.write(str(parsed_out[0]))
124+
except CommandFailed as cf:
125+
log.warning('Monitor cmd failed to run %s', self.cmd)
126+
elif self.env == 'server':
127+
# TODO:
128+
log.warning("Yet to implement")
129+
break
130+
elif self.env == 'bmc':
131+
# TODO:
132+
log.warning("Yet to implement")
133+
break
134+
self.executed = True
135+
if self.is_stopped():
136+
fd.close()
137+
break
138+
139+
def stop(self):
140+
log.info("Stopping monitor %s", self.name)
141+
self._stop_event.set()
142+
143+
def is_stopped(self):
144+
return self._stop_event.is_set()
145+
146+
def wait(self, delaysec=5):
147+
self._stop_event.wait(delaysec)
148+
149+
150+
class Monitors(object):
151+
def __init__(self, monitor_cmd_path=None, monitor_cmd=None):
152+
"""
153+
Monitor class to create monitor threads
154+
params: monitor_cmd_path: file with monitor information,by default it
155+
will use the 'monitors' file kept in basepath
156+
params: monitor_cmd: dict type optional monitor, if given will take the
157+
precedence over monitor_cmd_path argument,
158+
can be used inside testcase, E:g:-
159+
{'cmd': vmstat,
160+
'freq': 2,
161+
'env': 'sut',
162+
'name': 'vmstat-1'}
163+
"""
164+
self.conf = OpTestConfiguration.conf
165+
self.path = monitor_cmd_path if monitor_cmd_path else os.path.join(os.path.dirname(os.path.abspath(__file__)), 'monitors')
166+
if not os.path.isfile(self.path):
167+
log.warning("Check the monitor command path, given path is not valid: %s", self.path)
168+
self.monitors = []
169+
# Optional and if given takes precedence
170+
if monitor_cmd:
171+
self.monitors.append(monitor_cmd)
172+
else:
173+
self.monitors = self.parse_monitors()
174+
self.host = self.conf.host()
175+
self.system = self.conf.system()
176+
self.monitorthreads = []
177+
178+
def parse_monitors(self):
179+
monitor_content = []
180+
monitor_list = []
181+
monitor = {'cmd': None,
182+
'freq': 0,
183+
'env': 'sut',
184+
'name': None,
185+
'pattern': None}
186+
temp = monitor.copy()
187+
try:
188+
with open(self.path) as monitor_obj:
189+
monitor_content = [line.strip('\n') for line in monitor_obj.readlines()]
190+
except Exception as err:
191+
log.warning("Error reading monitor cmd file")
192+
pass
193+
else:
194+
for item in monitor_content:
195+
if item.startswith("#"):
196+
continue
197+
try:
198+
temp['cmd'] = item.split(',')[0]
199+
temp['freq'] = int(item.split(',')[1])
200+
temp['env'] = item.split(',')[2]
201+
temp['name'] = item.split(',')[3]
202+
temp['pattern'] = item.split(',')[4]
203+
except IndexError:
204+
pass
205+
monitor_list.append(temp.copy())
206+
temp = monitor.copy()
207+
finally:
208+
return monitor_list
209+
210+
def create_monitor_threads(self):
211+
monitor_threads = []
212+
for prof in self.monitors:
213+
self.monitorthreads.append(monitorThread(prof))
214+
return self.monitorthreads
215+
216+
def run(self):
217+
self.create_monitor_threads()
218+
for thread in self.monitorthreads:
219+
thread.start()
220+
221+
def stop(self):
222+
for thread in self.monitorthreads:
223+
thread.stop()
224+
225+
def join(self):
226+
for thread in self.monitorthreads:
227+
thread.join()

monitors

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Supported format to run monitors
2+
# command,freqency in seconds,where to run,name of monitor
3+
# Eg:-
4+
# date,2,sut,date-1
5+
#
6+
# Above line will create a monitor which runs `date`
7+
# command every 2 seconds inside host using SSH session
8+
# and stores in a file which named *date-1*.log
9+
# in respective test-reports folder.
10+
#
11+
# command: Any command that is available in the place where it runs.
12+
#
13+
# freqency in seconds: Takes any integer value, `0` is a special value
14+
# where given command itself will run in batch and no
15+
# need to run the command in intervals.
16+
#
17+
# where to run: Currently supports only in `sut` ie. Host for which
18+
# test is run.
19+
# TODO:-
20+
# server - runs commands in the server where optest runs.
21+
# bmc - runs commands inside bmc.
22+
#
23+
# name of monitor: Name to be used to represent the monitor
24+
# bydefault, command name is used.
25+
#
26+
# regular expression: To extract useful information from monitor output file.
27+
#
28+
#
29+
#
30+
#date,2,sut,date-1,*,
31+
#vmstat 1,0,sut,test1,*,
32+
#date,2,sut,,*,
33+
#lparstat 1,2,sut,,.*---\n([(\d+.\d+)\s+]+),

op-test

+8
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ from testcases import OpTestSensors
102102
from testcases import OpTestSwitchEndianSyscall
103103
from testcases import OpTestHostboot
104104
from testcases import OpTestExample
105+
from common.OpTestMonitor import Monitors
105106
import OpTestConfiguration
106107
import sys
107108
import time
@@ -952,6 +953,10 @@ try:
952953
OpTestConfiguration.conf.util.cleanup()
953954
sys.exit(exit_code)
954955

956+
# create monitor instances
957+
if OpTestConfiguration.conf.args.enable_monitors:
958+
monitor = Monitors(monitor_cmd_path=OpTestConfiguration.conf.args.monitor_file)
959+
monitor.run()
955960
if not res or (res and not (res.errors or res.failures)):
956961
res = run_tests(t, failfast=OpTestConfiguration.conf.args.failfast)
957962
else:
@@ -986,6 +991,9 @@ except Exception as e:
986991
exit_code = -1
987992
sys.exit(exit_code)
988993
finally:
994+
# stop monitor instances
995+
if OpTestConfiguration.conf.args.enable_monitors:
996+
monitor.stop()
989997
# Create a softlink to `latest` test results
990998
output = OpTestConfiguration.conf.logdir
991999
if not os.path.exists(output):

0 commit comments

Comments
 (0)