Skip to content

Commit ca86f29

Browse files
authored
Merge pull request #141 from tekaracybersolutions/ps_everywhere-enhancement
chg: adopt ps_everywhere analyzer to use JSONL format
2 parents 3533e41 + 65ef0e3 commit ca86f29

File tree

2 files changed

+241
-110
lines changed

2 files changed

+241
-110
lines changed
Lines changed: 240 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#! /usr/bin/env python3
22

3-
from sysdiagnose.utils.base import BaseAnalyserInterface
3+
from typing import Generator
4+
from sysdiagnose.utils.base import BaseAnalyserInterface, logger
45
from sysdiagnose.parsers.ps import PsParser
56
from sysdiagnose.parsers.psthread import PsThreadParser
67
from sysdiagnose.parsers.spindumpnosymbols import SpindumpNoSymbolsParser
@@ -12,122 +13,254 @@
1213

1314

1415
class PsEverywhereAnalyser(BaseAnalyserInterface):
16+
"""
17+
Analyser that gathers process information from multiple sources
18+
to build a comprehensive list of running processes across different system logs.
19+
"""
20+
1521
description = "List all processes we can find a bit everywhere."
16-
format = "json"
22+
format = "jsonl"
1723

1824
def __init__(self, config: dict, case_id: str):
1925
super().__init__(__file__, config, case_id)
2026
self.all_ps = set()
2127

22-
def execute(self):
23-
# the order of below is important: we want to have the most detailed information first
24-
# - first processes with full path and parameters
25-
# - then processes with full path and no parameters
26-
# - then processes no full path and no parameters
27-
28-
# processes with full path and parameters, no threads
29-
ps_json = PsParser(self.config, self.case_id).get_result()
30-
self.all_ps.update([p['command'] for p in ps_json])
31-
print(f"{len(self.all_ps)} entries after ps")
32-
33-
# processes with full path and parameters
34-
35-
psthread_json = PsThreadParser(self.config, self.case_id).get_result()
36-
self.all_ps.update([p['command'] for p in psthread_json])
37-
print(f"{len(self.all_ps)} entries after psthread")
38-
39-
# processes with full path, no parameters, with threads
40-
spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result()
41-
for p in spindumpnosymbols_json:
42-
if 'process' not in p:
43-
continue
44-
try:
45-
self.add_if_full_command_is_not_in_set(p['path'])
46-
# all_ps.add(f"{p['path']}::#{len(p['threads'])}") # count is different than in taskinfo
47-
except KeyError:
48-
if p['process'] == 'kernel_task [0]':
49-
self.all_ps.add('/kernel') # is similar to the other formats
50-
else:
51-
self.add_if_full_command_is_not_in_set(p['process']) # backup uption to keep trace of this anomaly
52-
for t in p['threads']:
53-
try:
54-
self.add_if_full_command_is_not_in_set(f"{p['path']}::{t['thread_name']}")
55-
except KeyError:
56-
pass
57-
print(f"{len(self.all_ps)} entries after spindumpnosymbols")
58-
59-
# processes with full path, no parameters, no threads
60-
shutdownlogs_json = ShutdownLogsParser(self.config, self.case_id).get_result()
61-
for p in shutdownlogs_json:
62-
# not using 'path' but 'command', as the path being appended by the UUID will be counter productive to normalisation
63-
self.add_if_full_command_is_not_in_set(p['command'])
64-
print(f"{len(self.all_ps)} entries after shutdownlogs")
65-
66-
# processes with full path, no parameters, no threads
67-
logarchive_procs = set()
68-
for event in LogarchiveParser(self.config, self.case_id).get_result():
69-
try:
70-
logarchive_procs.add(event['process'])
71-
except KeyError:
72-
pass
73-
74-
for entry in logarchive_procs:
75-
self.add_if_full_command_is_not_in_set(entry)
76-
print(f"{len(self.all_ps)} entries after logarchive")
77-
78-
# processes with full path, no parameters, no threads
79-
uuid2path_json = UUID2PathParser(self.config, self.case_id).get_result()
80-
for item in uuid2path_json.values():
81-
self.add_if_full_command_is_not_in_set(item)
82-
print(f"{len(self.all_ps)} entries after uuid2path")
83-
84-
# processes no full path, no parameters, with threads
85-
taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result()
86-
# p['name'] is the short version of COMMAND, so incompatible with the other formats.
87-
# on the other hand it may contain valuable stuff, so we use it in 2 formats
88-
# - name::#num_of_threads
89-
# - name::thread name
90-
for p in taskinfo_json:
91-
if 'name' not in p:
92-
continue
93-
self.add_if_full_path_is_not_in_set(p['name'])
94-
# add_if_full_path_is_not_in_set(f"{p['name']}::#{len(p['threads'])}") # count is different than in spindumpnosymbols
95-
for t in p['threads']:
96-
try:
97-
self.add_if_full_path_is_not_in_set(f"{p['name']}::{t['thread name']}")
98-
except KeyError:
99-
pass
100-
print(f"{len(self.all_ps)} entries after taskinfo")
101-
102-
# processes no full path, no parameters, no threads
103-
remotectl_dumpstate_json = RemotectlDumpstateParser(self.config, self.case_id).get_result()
104-
if remotectl_dumpstate_json:
105-
for p in remotectl_dumpstate_json['Local device']['Services']:
106-
self.add_if_full_path_is_not_in_set(p)
107-
108-
print(f"{len(self.all_ps)} entries after remotectl_dumpstate")
109-
110-
# TODO powerlogs - bundleID, ProcessName
111-
112-
self.all_ps = list(self.all_ps)
113-
self.all_ps.sort()
114-
return self.all_ps
115-
116-
def add_if_full_path_is_not_in_set(self, name: str):
28+
@staticmethod
29+
def _strip_flags(process: str) -> str:
30+
"""
31+
Extracts the base command by removing everything after the first space.
32+
33+
:param process: Full process command string.
34+
:return: Command string without flags.
35+
"""
36+
process, *_ = process.partition(' ')
37+
return process
38+
39+
def execute(self) -> Generator[dict, None, None]:
40+
"""
41+
Executes all extraction methods dynamically, ensuring that each extracted process is unique.
42+
43+
:yield: A dictionary containing process details from various sources.
44+
"""
45+
for func in dir(self):
46+
if func.startswith(f"_{self.__class__.__name__}__extract_ps_"):
47+
yield from getattr(self, func)() # Dynamically call extract methods
48+
49+
def __extract_ps_base_file(self) -> Generator[dict, None, None]:
50+
"""
51+
Extracts process data from ps.txt.
52+
53+
:return: A generator yielding dictionaries containing process details from ps.txt.
54+
"""
55+
entity_type = 'ps.txt'
56+
try:
57+
for p in PsParser(self.config, self.case_id).get_result():
58+
ps_event = {
59+
'process': self._strip_flags(p['command']),
60+
'timestamp': p['timestamp'],
61+
'datetime': p['datetime'],
62+
'source': entity_type
63+
}
64+
if self.add_if_full_command_is_not_in_set(ps_event['process']):
65+
yield ps_event
66+
except Exception as e:
67+
logger.exception(f"ERROR while extracting {entity_type} file. {e}")
68+
69+
def __extract_ps_thread_file(self) -> Generator[dict, None, None]:
70+
"""
71+
Extracts process data from psthread.txt.
72+
73+
:return: A generator yielding dictionaries containing process details from psthread.txt.
74+
"""
75+
entity_type = 'psthread.txt'
76+
try:
77+
for p in PsThreadParser(self.config, self.case_id).get_result():
78+
ps_event = {
79+
'process': self._strip_flags(p['command']),
80+
'timestamp': p['timestamp'],
81+
'datetime': p['datetime'],
82+
'source': entity_type
83+
}
84+
if self.add_if_full_command_is_not_in_set(ps_event['process']):
85+
yield ps_event
86+
except Exception as e:
87+
logger.exception(f"ERROR while extracting {entity_type} file. {e}")
88+
89+
def __extract_ps_spindump_nosymbols_file(self) -> Generator[dict, None, None]:
90+
"""
91+
Extracts process data from spindump-nosymbols.txt.
92+
93+
:return: A generator yielding dictionaries containing process and thread details from spindump-nosymbols.txt.
94+
"""
95+
entity_type = 'spindump-nosymbols.txt'
96+
try:
97+
for p in SpindumpNoSymbolsParser(self.config, self.case_id).get_result():
98+
if 'process' not in p:
99+
continue
100+
process_name = p.get('path', '/kernel' if p['process'] == 'kernel_task [0]' else p['process'])
101+
102+
if self.add_if_full_command_is_not_in_set(self._strip_flags(process_name)):
103+
yield {
104+
'process': self._strip_flags(process_name),
105+
'timestamp': p['timestamp'],
106+
'datetime': p['datetime'],
107+
'source': entity_type
108+
}
109+
110+
for t in p['threads']:
111+
try:
112+
thread_name = f"{self._strip_flags(process_name)}::{t['thread_name']}"
113+
if self.add_if_full_command_is_not_in_set(thread_name):
114+
yield {
115+
'process': thread_name,
116+
'timestamp': p['timestamp'],
117+
'datetime': p['datetime'],
118+
'source': entity_type
119+
}
120+
except KeyError:
121+
pass
122+
except Exception as e:
123+
logger.exception(f"ERROR while extracting {entity_type} file. {e}")
124+
125+
def __extract_ps_shutdownlogs(self) -> Generator[dict, None, None]:
126+
"""
127+
Extracts process data from shutdown logs.
128+
129+
:return: A generator yielding dictionaries containing process details from shutdown logs.
130+
"""
131+
entity_type = 'shutdown.logs'
132+
try:
133+
for p in ShutdownLogsParser(self.config, self.case_id).get_result():
134+
if self.add_if_full_command_is_not_in_set(self._strip_flags(p['command'])):
135+
yield {
136+
'process': self._strip_flags(p['command']),
137+
'timestamp': p['timestamp'],
138+
'datetime': p['datetime'],
139+
'source': entity_type
140+
}
141+
except Exception as e:
142+
logger.exception(f"ERROR while extracting {entity_type}. {e}")
143+
144+
def __extract_ps_logarchive(self) -> Generator[dict, None, None]:
145+
"""
146+
Extracts process data from logarchive.
147+
148+
:return: A generator yielding dictionaries containing process details from logarchive.
149+
"""
150+
entity_type = 'log archive'
151+
try:
152+
for p in LogarchiveParser(self.config, self.case_id).get_result():
153+
if self.add_if_full_command_is_not_in_set(self._strip_flags(p['process'])):
154+
yield {
155+
'process': self._strip_flags(p['process']),
156+
'timestamp': p['timestamp'],
157+
'datetime': p['datetime'],
158+
'source': entity_type
159+
}
160+
except Exception as e:
161+
logger.exception(f"ERROR while extracting {entity_type}. {e}")
162+
163+
def __extract_ps_uuid2path(self) -> Generator[dict, None, None]:
164+
"""
165+
Extracts process data from UUID2PathParser.
166+
167+
:return: A generator yielding process data from uuid2path.
168+
"""
169+
entity_type = 'uuid2path'
170+
try:
171+
for p in UUID2PathParser(self.config, self.case_id).get_result().values():
172+
if self.add_if_full_command_is_not_in_set(self._strip_flags(p)):
173+
yield {
174+
'process': self._strip_flags(p),
175+
'timestamp': None,
176+
'datetime': None,
177+
'source': entity_type
178+
}
179+
except Exception as e:
180+
logger.exception(f"ERROR while extracting {entity_type}. {e}")
181+
182+
def __extract_ps_taskinfo(self) -> Generator[dict, None, None]:
183+
"""
184+
Extracts process and thread information from TaskinfoParser.
185+
186+
:return: A generator yielding process and thread information from taskinfo.
187+
"""
188+
entity_type = 'taskinfo.txt'
189+
try:
190+
for p in TaskinfoParser(self.config, self.case_id).get_result():
191+
if 'name' not in p:
192+
continue
193+
194+
if self.add_if_full_path_is_not_in_set(self._strip_flags(p['name'])):
195+
yield {
196+
'process': self._strip_flags(p['name']),
197+
'timestamp': p['timestamp'],
198+
'datetime': p['datetime'],
199+
'source': entity_type
200+
}
201+
202+
for t in p['threads']:
203+
try:
204+
thread_name = f"{self._strip_flags(p['name'])}::{t['thread name']}"
205+
if self.add_if_full_path_is_not_in_set(thread_name):
206+
yield {
207+
'process': thread_name,
208+
'timestamp': p['timestamp'],
209+
'datetime': p['datetime'],
210+
'source': entity_type
211+
}
212+
except KeyError:
213+
pass
214+
except Exception as e:
215+
logger.exception(f"ERROR while extracting {entity_type}. {e}")
216+
217+
def __extract_ps_remotectl_dumpstate(self) -> Generator[dict, None, None]:
218+
"""
219+
Extracts process data from RemotectlDumpstateParser.
220+
221+
:return: A generator yielding process data from remotectl_dumpstate.txt.
222+
"""
223+
entity_type = 'remotectl_dumpstate.txt'
224+
try:
225+
remotectl_dumpstate_json = RemotectlDumpstateParser(self.config, self.case_id).get_result()
226+
if remotectl_dumpstate_json:
227+
for p in remotectl_dumpstate_json['Local device']['Services']:
228+
if self.add_if_full_path_is_not_in_set(self._strip_flags(p)):
229+
yield {
230+
'process': self._strip_flags(p),
231+
'timestamp': None,
232+
'datetime': None,
233+
'source': entity_type
234+
}
235+
except Exception as e:
236+
logger.exception(f"ERROR while extracting {entity_type}. {e}")
237+
238+
def add_if_full_path_is_not_in_set(self, name: str) -> bool:
239+
"""
240+
Ensures that a process path is unique before adding it to the shared set.
241+
242+
:param name: Process path name
243+
:return: True if the process was not in the set and was added, False otherwise.
244+
"""
117245
for item in self.all_ps:
118-
# no need to add it in the following cases
119246
if item.endswith(name):
120-
return
121-
if item.split('::').pop(0).endswith(name):
122-
return
123-
if '::' not in item and item.split(' ').pop(0).endswith(name):
124-
# this will but with commands that have a space, but looking at data this should not happend often
125-
return
247+
return False
248+
if item.split('::')[0].endswith(name):
249+
return False
250+
if '::' not in item and item.split(' ')[0].endswith(name):
251+
return False # This covers cases with space-separated commands
126252
self.all_ps.add(name)
253+
return True
254+
255+
def add_if_full_command_is_not_in_set(self, name: str) -> bool:
256+
"""
257+
Ensures that a process command is unique before adding it to the shared set.
127258
128-
def add_if_full_command_is_not_in_set(self, name: str):
259+
:param name: Process command name
260+
:return: True if the process was not in the set and was added, False otherwise.
261+
"""
129262
for item in self.all_ps:
130263
if item.startswith(name):
131-
# no need to add it
132-
return
264+
return False
133265
self.all_ps.add(name)
266+
return True

tests/test_analysers_ps_everywhere.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,10 @@ def test_analyse_ps_everywhere(self):
1111
# run the analyser
1212
a = PsEverywhereAnalyser(self.sd.config, case_id=case_id)
1313
a.save_result(force=True)
14+
1415
self.assertTrue(os.path.isfile(a.output_file))
1516
self.assertTrue(os.path.getsize(a.output_file) > 0)
1617

17-
result = a.get_result()
18-
self.assertGreater(len(result), 0)
19-
2018

2119
if __name__ == '__main__':
2220
unittest.main()

0 commit comments

Comments
 (0)