- 
                Notifications
    
You must be signed in to change notification settings  - Fork 110
 
feature(aws): log fallback to ssm based access #12298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -28,7 +28,8 @@ | |
| from dataclasses import dataclass | ||
| 
     | 
||
| import requests | ||
| from invoke.exceptions import UnexpectedExit | ||
| from invoke.exceptions import UnexpectedExit, Failure as InvokeFailure | ||
| from botocore.exceptions import ClientError | ||
| 
     | 
||
| import sdcm.monitorstack.ui as monitoring_ui | ||
| from sdcm.paths import SCYLLA_YAML_PATH, SCYLLA_PROPERTIES_PATH, SCYLLA_MANAGER_AGENT_YAML_PATH, \ | ||
| 
          
            
          
           | 
    @@ -65,6 +66,8 @@ | |
| from sdcm.utils.gce_utils import gce_public_addresses, gce_private_addresses | ||
| from sdcm.localhost import LocalHost | ||
| from sdcm.cloud_api_client import ScyllaCloudAPIClient | ||
| from sdcm.utils.aws_ssm_runner import SSMCommandRunner | ||
| from sdcm.remote.libssh2_client.exceptions import Failure as Libssh2_Failure | ||
| 
     | 
||
| LOGGER = logging.getLogger(__name__) | ||
| 
     | 
||
| 
          
            
          
           | 
    @@ -97,6 +100,17 @@ def distro(self): | |
| LOGGER.info("Detected Linux distribution: %s", _distro.name) | ||
| return _distro | ||
| 
     | 
||
| @cached_property | ||
| def is_aws(self) -> bool: | ||
| return isinstance(self._instance, dict) and self._instance.get("InstanceId") is not None | ||
| 
     | 
||
| @cached_property | ||
| def aws_ssm_runner(self) -> Optional[SSMCommandRunner]: | ||
| if self.is_aws: | ||
| region = self._instance.get("Placement").get("AvailabilityZone")[:-1] | ||
| return SSMCommandRunner(region_name=region, instance_id=self._instance.get("InstanceId")) | ||
| return None | ||
| 
     | 
||
| @retrying(n=30, sleep_time=15, allowed_exceptions=(UnexpectedExit, Libssh2_UnexpectedExit,)) | ||
| def install_package(self, | ||
| package_name: str, | ||
| 
          
            
          
           | 
    @@ -217,18 +231,24 @@ class CommandLog(BaseLogEntity): | |
| def collect(self, node, local_dst, remote_dst=None, local_search_path=None) -> Optional[str]: | ||
| if not node or not node.remoter or remote_dst is None: | ||
| return None | ||
| remote_logfile = LogCollector.collect_log_remotely(node=node, | ||
| cmd=self.cmd, | ||
| log_filename=os.path.join(remote_dst, self.name)) | ||
| 
     | 
||
| remote_logfile, is_file_remote = LogCollector.collect_log_remotely(node=node, | ||
| cmd=self.cmd, | ||
| log_filename=os.path.join(remote_dst, self.name)) | ||
| if not remote_logfile: | ||
| LOGGER.warning( | ||
| "Nothing to collect. Command '%s' did not prepare log file on remote host '%s'", self.cmd, node.name) | ||
| return None | ||
| LogCollector.receive_log(node=node, | ||
| remote_log_path=remote_logfile, | ||
| local_dir=local_dst, | ||
| timeout=self.collect_timeout) | ||
| return os.path.join(local_dst, os.path.basename(remote_logfile)) | ||
| local_path = Path(local_dst) / Path(remote_logfile).name | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| if is_file_remote: | ||
| LogCollector.receive_log(node=node, | ||
| remote_log_path=remote_logfile, | ||
| local_dir=local_dst, | ||
| timeout=self.collect_timeout) | ||
| else: | ||
| # copy locally | ||
| shutil.copyfile(remote_logfile, str(local_path)) | ||
| return str(local_path) | ||
| 
     | 
||
| 
     | 
||
| class FileLog(CommandLog): | ||
| 
          
            
          
           | 
    @@ -617,29 +637,76 @@ def create_local_storage_dir(self, base_local_dir): | |
| def create_remote_storage_dir(self, node, path=''): | ||
| if not path: | ||
| path = node.name | ||
| try: | ||
| remote_dir = os.path.join(self.node_remote_dir, path) | ||
| result = node.remoter.run('mkdir -p {}'.format(remote_dir), ignore_status=True) | ||
| remote_dir = os.path.join(self.node_remote_dir, path) | ||
| 
     | 
||
| if result.exited > 0: | ||
| LOGGER.error( | ||
| 'Remote storing folder not created.\n{}'.format(result)) | ||
| remote_dir = self.node_remote_dir | ||
| if ssh_connected := node.remoter.is_up(): | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| 
     | 
||
| except Exception as details: # noqa: BLE001 | ||
| LOGGER.error("Error during creating remote directory %s", details) | ||
| try: | ||
| result = node.remoter.run('mkdir -p {}'.format(remote_dir), ignore_status=True) | ||
| 
     | 
||
| if result.exited > 0: | ||
| LOGGER.error( | ||
| 'Remote storing folder not created.\n{}'.format(result)) | ||
| remote_dir = self.node_remote_dir | ||
| 
     | 
||
| except (Libssh2_Failure, InvokeFailure) as details: | ||
| LOGGER.error("Error during creating remote directory %s", details) | ||
| elif not ssh_connected and (ssm_runner := node.aws_ssm_runner): | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| try: | ||
| ssm_result = ssm_runner.run('mkdir -p {}'.format(remote_dir), ignore_status=True) | ||
| ok = ssm_result.ok | ||
| if not ok: | ||
| LOGGER.error("SSM command failed for instance %s: mkdir", node._instance.get("InstanceId")) | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| 
     | 
||
| except (ClientError, AttributeError) as e: | ||
| LOGGER.error("Failed to run SSM command: %s", e) | ||
| remote_dir = self.node_remote_dir | ||
| else: | ||
| remote_dir = self.node_remote_dir | ||
| 
     | 
||
| return remote_dir | ||
| 
     | 
||
| @staticmethod | ||
| def collect_log_remotely(node, cmd: str, log_filename: str) -> Optional[str]: | ||
| def collect_log_remotely(node, cmd: str, log_filename: str) -> Tuple[Optional[str], bool]: | ||
| if not node.remoter: | ||
| return None | ||
| collect_log_command = f"{cmd} > '{log_filename}' 2>&1" | ||
| node.remoter.run(collect_log_command, ignore_status=True, verbose=True) | ||
| result = node.remoter.run(f"test -f '{log_filename}'", ignore_status=True) | ||
| return log_filename if result.ok else None | ||
| return None, False | ||
| 
     | 
||
| is_file_remote = True | ||
| 
     | 
||
| if ssh_connected := node.remoter.is_up(): | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| try: | ||
| collect_log_command = f"{cmd} > '{log_filename}' 2>&1" | ||
| node.remoter.run(collect_log_command, ignore_status=True, verbose=True) | ||
| result = node.remoter.run(f"test -f '{log_filename}'", ignore_status=True) | ||
| ok = result.ok | ||
| except (Libssh2_Failure, InvokeFailure): | ||
| ssh_connected = False | ||
| 
     | 
||
| # Check if node is AWS-based | ||
| if not ssh_connected and (ssm_runner := node.aws_ssm_runner): | ||
| LOGGER.info("Collecting Node %s via SSM: %s", node.name, log_filename) | ||
| Path(log_filename).parent.mkdir(parents=True, exist_ok=True) | ||
| 
     | 
||
| # Use SSM to run the command and save it to a local file | ||
| is_file_remote = False | ||
| 
     | 
||
| try: | ||
| collect_log_command = f"{cmd}" | ||
| ssm_result = ssm_runner.run_command_and_save_output( | ||
| command=collect_log_command, | ||
| local_output_file=log_filename, | ||
| comment=f'Collect log {log_filename}', | ||
| ignore_status=True | ||
| ) | ||
| ok = ssm_result.ok | ||
| if not ssm_result.ok: | ||
| LOGGER.error("SSM command failed for instance %s: %s ", | ||
| node._instance.get("InstanceId"), collect_log_command) | ||
| return None, is_file_remote | ||
| except (ImportError, AttributeError, TypeError, ValueError, KeyError, IndexError) as e: | ||
                
      
                  fruch marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| LOGGER.error("Failed to run SSM command: %s", e) | ||
| return None, is_file_remote | ||
| return log_filename if ok else None, is_file_remote | ||
        
    
 | 
||
| 
     | 
||
| @staticmethod | ||
| def archive_log_remotely(node, log_filename: str, archive_name: Optional[str] = None) -> Optional[str]: | ||
| 
          
            
          
           | 
    ||
Uh oh!
There was an error while loading. Please reload this page.