|
1 | 1 | #! /usr/bin/env python3
|
2 | 2 |
|
3 |
| -from sysdiagnose.utils.base import BaseAnalyserInterface |
| 3 | +from typing import Generator |
| 4 | +from sysdiagnose.utils.base import BaseAnalyserInterface, logger |
4 | 5 | from sysdiagnose.parsers.ps import PsParser
|
5 | 6 | from sysdiagnose.parsers.psthread import PsThreadParser
|
6 | 7 | from sysdiagnose.parsers.spindumpnosymbols import SpindumpNoSymbolsParser
|
|
12 | 13 |
|
13 | 14 |
|
14 | 15 | class PsEverywhereAnalyser(BaseAnalyserInterface):
|
| 16 | + """ |
| 17 | + Analyser that gathers process information from multiple sources |
| 18 | + to build a comprehensive list of running processes across different system logs. |
| 19 | + """ |
| 20 | + |
15 | 21 | description = "List all processes we can find a bit everywhere."
|
16 |
| - format = "json" |
| 22 | + format = "jsonl" |
17 | 23 |
|
18 | 24 | def __init__(self, config: dict, case_id: str):
|
19 | 25 | super().__init__(__file__, config, case_id)
|
20 | 26 | self.all_ps = set()
|
21 | 27 |
|
22 |
| - def execute(self): |
23 |
| - # the order of below is important: we want to have the most detailed information first |
24 |
| - # - first processes with full path and parameters |
25 |
| - # - then processes with full path and no parameters |
26 |
| - # - then processes no full path and no parameters |
27 |
| - |
28 |
| - # processes with full path and parameters, no threads |
29 |
| - ps_json = PsParser(self.config, self.case_id).get_result() |
30 |
| - self.all_ps.update([p['command'] for p in ps_json]) |
31 |
| - print(f"{len(self.all_ps)} entries after ps") |
32 |
| - |
33 |
| - # processes with full path and parameters |
34 |
| - |
35 |
| - psthread_json = PsThreadParser(self.config, self.case_id).get_result() |
36 |
| - self.all_ps.update([p['command'] for p in psthread_json]) |
37 |
| - print(f"{len(self.all_ps)} entries after psthread") |
38 |
| - |
39 |
| - # processes with full path, no parameters, with threads |
40 |
| - spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result() |
41 |
| - for p in spindumpnosymbols_json: |
42 |
| - if 'process' not in p: |
43 |
| - continue |
44 |
| - try: |
45 |
| - self.add_if_full_command_is_not_in_set(p['path']) |
46 |
| - # all_ps.add(f"{p['path']}::#{len(p['threads'])}") # count is different than in taskinfo |
47 |
| - except KeyError: |
48 |
| - if p['process'] == 'kernel_task [0]': |
49 |
| - self.all_ps.add('/kernel') # is similar to the other formats |
50 |
| - else: |
51 |
| - self.add_if_full_command_is_not_in_set(p['process']) # backup uption to keep trace of this anomaly |
52 |
| - for t in p['threads']: |
53 |
| - try: |
54 |
| - self.add_if_full_command_is_not_in_set(f"{p['path']}::{t['thread_name']}") |
55 |
| - except KeyError: |
56 |
| - pass |
57 |
| - print(f"{len(self.all_ps)} entries after spindumpnosymbols") |
58 |
| - |
59 |
| - # processes with full path, no parameters, no threads |
60 |
| - shutdownlogs_json = ShutdownLogsParser(self.config, self.case_id).get_result() |
61 |
| - for p in shutdownlogs_json: |
62 |
| - # not using 'path' but 'command', as the path being appended by the UUID will be counter productive to normalisation |
63 |
| - self.add_if_full_command_is_not_in_set(p['command']) |
64 |
| - print(f"{len(self.all_ps)} entries after shutdownlogs") |
65 |
| - |
66 |
| - # processes with full path, no parameters, no threads |
67 |
| - logarchive_procs = set() |
68 |
| - for event in LogarchiveParser(self.config, self.case_id).get_result(): |
69 |
| - try: |
70 |
| - logarchive_procs.add(event['process']) |
71 |
| - except KeyError: |
72 |
| - pass |
73 |
| - |
74 |
| - for entry in logarchive_procs: |
75 |
| - self.add_if_full_command_is_not_in_set(entry) |
76 |
| - print(f"{len(self.all_ps)} entries after logarchive") |
77 |
| - |
78 |
| - # processes with full path, no parameters, no threads |
79 |
| - uuid2path_json = UUID2PathParser(self.config, self.case_id).get_result() |
80 |
| - for item in uuid2path_json.values(): |
81 |
| - self.add_if_full_command_is_not_in_set(item) |
82 |
| - print(f"{len(self.all_ps)} entries after uuid2path") |
83 |
| - |
84 |
| - # processes no full path, no parameters, with threads |
85 |
| - taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result() |
86 |
| - # p['name'] is the short version of COMMAND, so incompatible with the other formats. |
87 |
| - # on the other hand it may contain valuable stuff, so we use it in 2 formats |
88 |
| - # - name::#num_of_threads |
89 |
| - # - name::thread name |
90 |
| - for p in taskinfo_json: |
91 |
| - if 'name' not in p: |
92 |
| - continue |
93 |
| - self.add_if_full_path_is_not_in_set(p['name']) |
94 |
| - # add_if_full_path_is_not_in_set(f"{p['name']}::#{len(p['threads'])}") # count is different than in spindumpnosymbols |
95 |
| - for t in p['threads']: |
96 |
| - try: |
97 |
| - self.add_if_full_path_is_not_in_set(f"{p['name']}::{t['thread name']}") |
98 |
| - except KeyError: |
99 |
| - pass |
100 |
| - print(f"{len(self.all_ps)} entries after taskinfo") |
101 |
| - |
102 |
| - # processes no full path, no parameters, no threads |
103 |
| - remotectl_dumpstate_json = RemotectlDumpstateParser(self.config, self.case_id).get_result() |
104 |
| - if remotectl_dumpstate_json: |
105 |
| - for p in remotectl_dumpstate_json['Local device']['Services']: |
106 |
| - self.add_if_full_path_is_not_in_set(p) |
107 |
| - |
108 |
| - print(f"{len(self.all_ps)} entries after remotectl_dumpstate") |
109 |
| - |
110 |
| - # TODO powerlogs - bundleID, ProcessName |
111 |
| - |
112 |
| - self.all_ps = list(self.all_ps) |
113 |
| - self.all_ps.sort() |
114 |
| - return self.all_ps |
115 |
| - |
116 |
| - def add_if_full_path_is_not_in_set(self, name: str): |
| 28 | + @staticmethod |
| 29 | + def _strip_flags(process: str) -> str: |
| 30 | + """ |
| 31 | + Extracts the base command by removing everything after the first space. |
| 32 | +
|
| 33 | + :param process: Full process command string. |
| 34 | + :return: Command string without flags. |
| 35 | + """ |
| 36 | + process, *_ = process.partition(' ') |
| 37 | + return process |
| 38 | + |
| 39 | + def execute(self) -> Generator[dict, None, None]: |
| 40 | + """ |
| 41 | + Executes all extraction methods dynamically, ensuring that each extracted process is unique. |
| 42 | +
|
| 43 | + :yield: A dictionary containing process details from various sources. |
| 44 | + """ |
| 45 | + for func in dir(self): |
| 46 | + if func.startswith(f"_{self.__class__.__name__}__extract_ps_"): |
| 47 | + yield from getattr(self, func)() # Dynamically call extract methods |
| 48 | + |
| 49 | + def __extract_ps_base_file(self) -> Generator[dict, None, None]: |
| 50 | + """ |
| 51 | + Extracts process data from ps.txt. |
| 52 | +
|
| 53 | + :return: A generator yielding dictionaries containing process details from ps.txt. |
| 54 | + """ |
| 55 | + entity_type = 'ps.txt' |
| 56 | + try: |
| 57 | + for p in PsParser(self.config, self.case_id).get_result(): |
| 58 | + ps_event = { |
| 59 | + 'process': self._strip_flags(p['command']), |
| 60 | + 'timestamp': p['timestamp'], |
| 61 | + 'datetime': p['datetime'], |
| 62 | + 'source': entity_type |
| 63 | + } |
| 64 | + if self.add_if_full_command_is_not_in_set(ps_event['process']): |
| 65 | + yield ps_event |
| 66 | + except Exception as e: |
| 67 | + logger.exception(f"ERROR while extracting {entity_type} file. {e}") |
| 68 | + |
| 69 | + def __extract_ps_thread_file(self) -> Generator[dict, None, None]: |
| 70 | + """ |
| 71 | + Extracts process data from psthread.txt. |
| 72 | +
|
| 73 | + :return: A generator yielding dictionaries containing process details from psthread.txt. |
| 74 | + """ |
| 75 | + entity_type = 'psthread.txt' |
| 76 | + try: |
| 77 | + for p in PsThreadParser(self.config, self.case_id).get_result(): |
| 78 | + ps_event = { |
| 79 | + 'process': self._strip_flags(p['command']), |
| 80 | + 'timestamp': p['timestamp'], |
| 81 | + 'datetime': p['datetime'], |
| 82 | + 'source': entity_type |
| 83 | + } |
| 84 | + if self.add_if_full_command_is_not_in_set(ps_event['process']): |
| 85 | + yield ps_event |
| 86 | + except Exception as e: |
| 87 | + logger.exception(f"ERROR while extracting {entity_type} file. {e}") |
| 88 | + |
| 89 | + def __extract_ps_spindump_nosymbols_file(self) -> Generator[dict, None, None]: |
| 90 | + """ |
| 91 | + Extracts process data from spindump-nosymbols.txt. |
| 92 | +
|
| 93 | + :return: A generator yielding dictionaries containing process and thread details from spindump-nosymbols.txt. |
| 94 | + """ |
| 95 | + entity_type = 'spindump-nosymbols.txt' |
| 96 | + try: |
| 97 | + for p in SpindumpNoSymbolsParser(self.config, self.case_id).get_result(): |
| 98 | + if 'process' not in p: |
| 99 | + continue |
| 100 | + process_name = p.get('path', '/kernel' if p['process'] == 'kernel_task [0]' else p['process']) |
| 101 | + |
| 102 | + if self.add_if_full_command_is_not_in_set(self._strip_flags(process_name)): |
| 103 | + yield { |
| 104 | + 'process': self._strip_flags(process_name), |
| 105 | + 'timestamp': p['timestamp'], |
| 106 | + 'datetime': p['datetime'], |
| 107 | + 'source': entity_type |
| 108 | + } |
| 109 | + |
| 110 | + for t in p['threads']: |
| 111 | + try: |
| 112 | + thread_name = f"{self._strip_flags(process_name)}::{t['thread_name']}" |
| 113 | + if self.add_if_full_command_is_not_in_set(thread_name): |
| 114 | + yield { |
| 115 | + 'process': thread_name, |
| 116 | + 'timestamp': p['timestamp'], |
| 117 | + 'datetime': p['datetime'], |
| 118 | + 'source': entity_type |
| 119 | + } |
| 120 | + except KeyError: |
| 121 | + pass |
| 122 | + except Exception as e: |
| 123 | + logger.exception(f"ERROR while extracting {entity_type} file. {e}") |
| 124 | + |
| 125 | + def __extract_ps_shutdownlogs(self) -> Generator[dict, None, None]: |
| 126 | + """ |
| 127 | + Extracts process data from shutdown logs. |
| 128 | +
|
| 129 | + :return: A generator yielding dictionaries containing process details from shutdown logs. |
| 130 | + """ |
| 131 | + entity_type = 'shutdown.logs' |
| 132 | + try: |
| 133 | + for p in ShutdownLogsParser(self.config, self.case_id).get_result(): |
| 134 | + if self.add_if_full_command_is_not_in_set(self._strip_flags(p['command'])): |
| 135 | + yield { |
| 136 | + 'process': self._strip_flags(p['command']), |
| 137 | + 'timestamp': p['timestamp'], |
| 138 | + 'datetime': p['datetime'], |
| 139 | + 'source': entity_type |
| 140 | + } |
| 141 | + except Exception as e: |
| 142 | + logger.exception(f"ERROR while extracting {entity_type}. {e}") |
| 143 | + |
| 144 | + def __extract_ps_logarchive(self) -> Generator[dict, None, None]: |
| 145 | + """ |
| 146 | + Extracts process data from logarchive. |
| 147 | +
|
| 148 | + :return: A generator yielding dictionaries containing process details from logarchive. |
| 149 | + """ |
| 150 | + entity_type = 'log archive' |
| 151 | + try: |
| 152 | + for p in LogarchiveParser(self.config, self.case_id).get_result(): |
| 153 | + if self.add_if_full_command_is_not_in_set(self._strip_flags(p['process'])): |
| 154 | + yield { |
| 155 | + 'process': self._strip_flags(p['process']), |
| 156 | + 'timestamp': p['timestamp'], |
| 157 | + 'datetime': p['datetime'], |
| 158 | + 'source': entity_type |
| 159 | + } |
| 160 | + except Exception as e: |
| 161 | + logger.exception(f"ERROR while extracting {entity_type}. {e}") |
| 162 | + |
| 163 | + def __extract_ps_uuid2path(self) -> Generator[dict, None, None]: |
| 164 | + """ |
| 165 | + Extracts process data from UUID2PathParser. |
| 166 | +
|
| 167 | + :return: A generator yielding process data from uuid2path. |
| 168 | + """ |
| 169 | + entity_type = 'uuid2path' |
| 170 | + try: |
| 171 | + for p in UUID2PathParser(self.config, self.case_id).get_result().values(): |
| 172 | + if self.add_if_full_command_is_not_in_set(self._strip_flags(p)): |
| 173 | + yield { |
| 174 | + 'process': self._strip_flags(p), |
| 175 | + 'timestamp': None, |
| 176 | + 'datetime': None, |
| 177 | + 'source': entity_type |
| 178 | + } |
| 179 | + except Exception as e: |
| 180 | + logger.exception(f"ERROR while extracting {entity_type}. {e}") |
| 181 | + |
| 182 | + def __extract_ps_taskinfo(self) -> Generator[dict, None, None]: |
| 183 | + """ |
| 184 | + Extracts process and thread information from TaskinfoParser. |
| 185 | +
|
| 186 | + :return: A generator yielding process and thread information from taskinfo. |
| 187 | + """ |
| 188 | + entity_type = 'taskinfo.txt' |
| 189 | + try: |
| 190 | + for p in TaskinfoParser(self.config, self.case_id).get_result(): |
| 191 | + if 'name' not in p: |
| 192 | + continue |
| 193 | + |
| 194 | + if self.add_if_full_path_is_not_in_set(self._strip_flags(p['name'])): |
| 195 | + yield { |
| 196 | + 'process': self._strip_flags(p['name']), |
| 197 | + 'timestamp': p['timestamp'], |
| 198 | + 'datetime': p['datetime'], |
| 199 | + 'source': entity_type |
| 200 | + } |
| 201 | + |
| 202 | + for t in p['threads']: |
| 203 | + try: |
| 204 | + thread_name = f"{self._strip_flags(p['name'])}::{t['thread name']}" |
| 205 | + if self.add_if_full_path_is_not_in_set(thread_name): |
| 206 | + yield { |
| 207 | + 'process': thread_name, |
| 208 | + 'timestamp': p['timestamp'], |
| 209 | + 'datetime': p['datetime'], |
| 210 | + 'source': entity_type |
| 211 | + } |
| 212 | + except KeyError: |
| 213 | + pass |
| 214 | + except Exception as e: |
| 215 | + logger.exception(f"ERROR while extracting {entity_type}. {e}") |
| 216 | + |
| 217 | + def __extract_ps_remotectl_dumpstate(self) -> Generator[dict, None, None]: |
| 218 | + """ |
| 219 | + Extracts process data from RemotectlDumpstateParser. |
| 220 | +
|
| 221 | + :return: A generator yielding process data from remotectl_dumpstate.txt. |
| 222 | + """ |
| 223 | + entity_type = 'remotectl_dumpstate.txt' |
| 224 | + try: |
| 225 | + remotectl_dumpstate_json = RemotectlDumpstateParser(self.config, self.case_id).get_result() |
| 226 | + if remotectl_dumpstate_json: |
| 227 | + for p in remotectl_dumpstate_json['Local device']['Services']: |
| 228 | + if self.add_if_full_path_is_not_in_set(self._strip_flags(p)): |
| 229 | + yield { |
| 230 | + 'process': self._strip_flags(p), |
| 231 | + 'timestamp': None, |
| 232 | + 'datetime': None, |
| 233 | + 'source': entity_type |
| 234 | + } |
| 235 | + except Exception as e: |
| 236 | + logger.exception(f"ERROR while extracting {entity_type}. {e}") |
| 237 | + |
| 238 | + def add_if_full_path_is_not_in_set(self, name: str) -> bool: |
| 239 | + """ |
| 240 | + Ensures that a process path is unique before adding it to the shared set. |
| 241 | +
|
| 242 | + :param name: Process path name |
| 243 | + :return: True if the process was not in the set and was added, False otherwise. |
| 244 | + """ |
117 | 245 | for item in self.all_ps:
|
118 |
| - # no need to add it in the following cases |
119 | 246 | if item.endswith(name):
|
120 |
| - return |
121 |
| - if item.split('::').pop(0).endswith(name): |
122 |
| - return |
123 |
| - if '::' not in item and item.split(' ').pop(0).endswith(name): |
124 |
| - # this will but with commands that have a space, but looking at data this should not happend often |
125 |
| - return |
| 247 | + return False |
| 248 | + if item.split('::')[0].endswith(name): |
| 249 | + return False |
| 250 | + if '::' not in item and item.split(' ')[0].endswith(name): |
| 251 | + return False # This covers cases with space-separated commands |
126 | 252 | self.all_ps.add(name)
|
| 253 | + return True |
| 254 | + |
| 255 | + def add_if_full_command_is_not_in_set(self, name: str) -> bool: |
| 256 | + """ |
| 257 | + Ensures that a process command is unique before adding it to the shared set. |
127 | 258 |
|
128 |
| - def add_if_full_command_is_not_in_set(self, name: str): |
| 259 | + :param name: Process command name |
| 260 | + :return: True if the process was not in the set and was added, False otherwise. |
| 261 | + """ |
129 | 262 | for item in self.all_ps:
|
130 | 263 | if item.startswith(name):
|
131 |
| - # no need to add it |
132 |
| - return |
| 264 | + return False |
133 | 265 | self.all_ps.add(name)
|
| 266 | + return True |
0 commit comments