diff --git a/CHANGELOG.md b/CHANGELOG.md index 91485ca..60877d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,19 +1,22 @@ -## [3.1.0] - 2024-11-x -### Changed -- Package management and deployment moved to Poetry -- Docker build process improved using multi-stage builds. The Dockerfile now doesn't contain any unnecessary files, and is much smaller. -- Refactor to separate GitLab client and Watchman processing into modules -- Refactor to implement python-gitlab library for GitLab API calls, instead of the custom client used previously. - - This change allows for more efficient and easier to read code, is more reliable, and also allows for enhancements to be added more easily in the future. - +## [3.1.0] - 2024-11-18 ### Added - Signatures now loaded into memory instead of being saved to disk. This allows for running on read-only filesystems. +- Ability to disable signatures by their ID in the watchman.conf config file. + - These signatures will not be used when running Slack Watchman + - Signature IDs for each signature can be found in the Watchman Signatures repository - Tests for Docker build - Enhanced deduplication of findings - The same match should not be returned multiple times within the same scope. E.g. if a token is found in a commit, it should not be returned multiple times in the same commit. - All dates are now converted and logged in UTC - Unit tests added for models and utils +### Changed +- Package management and deployment moved to Poetry +- Docker build process improved using multi-stage builds. The Dockerfile now doesn't contain any unnecessary files, and is much smaller. +- Refactor to separate GitLab client and Watchman processing into modules +- Refactor to implement [python-gitlab](https://python-gitlab.readthedocs.io/) library for GitLab API calls, instead of the custom client used previously. + - This change gives more efficient and easier to read code, is more reliable, and also allows for enhancements to be added more easily in the future. + ### Fixed - Error when searching wiki-blobs - There would often be failures when trying to find projects or groups associated with blobs. This is now fixed by adding logic to check if the blob is associated with a project or group, and get the correct information accordingly. diff --git a/README.md b/README.md index 02af30f..91c508d 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,18 @@ GitLab Watchman can enumerate potentially useful information from a GitLab insta ### Signatures GitLab Watchman uses custom YAML signatures to detect matches in GitLab. These signatures are pulled from the central [Watchman Signatures repository](https://github.com/PaperMtn/watchman-signatures). Slack Watchman automatically updates its signature base at runtime to ensure its using the latest signatures to detect secrets. +#### Suppressing Signatures +You can define signatures that you want to disable when running GitLab Watchman by adding their IDs to the `disabled_signatures` section of the `watchman.conf` file. For example: + +```yaml +gitlab_watchman: + disabled_signatures: + - tokens_generic_bearer_tokens + - tokens_generic_access_tokens +``` + +You can find the ID of a signature in the individual YAML files in [Watchman Signatures repository](https://github.com/PaperMtn/watchman-signatures). + ### Logging GitLab Watchman gives the following logging options: @@ -106,6 +118,16 @@ You also need to provide the URL of your GitLab instance. #### Providing token & URL GitLab Watchman will get the GitLab token and URL from the environment variables `GITLAB_WATCHMAN_TOKEN` and `GITLAB_WATCHMAN_URL`. +### watchman.conf file +Configuration options can be passed in a file named `watchman.conf` which must be stored in your home directory. The file should follow the YAML format, and should look like below: +```yaml +gitlab_watchman: + disabled_signatures: + - tokens_generic_bearer_tokens + - tokens_generic_access_tokens +``` +GitLab Watchman will look for this file at runtime, and use the configuration options from here. + ## Installation You can install the latest stable version via pip: diff --git a/src/gitlab_watchman/__init__.py b/src/gitlab_watchman/__init__.py index 9ac18c7..3f0e63e 100644 --- a/src/gitlab_watchman/__init__.py +++ b/src/gitlab_watchman/__init__.py @@ -8,7 +8,9 @@ import traceback from dataclasses import dataclass from importlib import metadata -from typing import List +from typing import List, Dict, Any + +import yaml from gitlab_watchman import watchman_processor from gitlab_watchman.clients.gitlab_client import GitLabAPIClient @@ -19,7 +21,8 @@ GitLabWatchmanNotAuthorisedError, GitLabWatchmanAuthenticationError, ElasticsearchMissingError, - MissingEnvVarError + MissingEnvVarError, + MisconfiguredConfFileError ) from gitlab_watchman.loggers import ( JSONLogger, @@ -100,7 +103,7 @@ def perform_search(search_args: SearchArgs): search(search_args, sig, scope) -def validate_variables() -> bool: +def validate_variables() -> Dict[str, Any]: """ Validate whether GitLab Watchman environment variables have been set Returns: @@ -112,8 +115,30 @@ def validate_variables() -> bool: for var in required_vars: if var not in os.environ: raise MissingEnvVarError(var) + path = f'{os.path.expanduser("~")}/watchman.conf' + if os.path.exists(path): + try: + with open(path) as yaml_file: + conf_details = yaml.safe_load(yaml_file)['gitlab_watchman'] + return { + 'disabled_signatures': conf_details.get('disabled_signatures', []) + } + except Exception as e: + raise MisconfiguredConfFileError from e + return {} + + +def supress_disabled_signatures(signatures: List[signature.Signature], + disabled_signatures: List[str]) -> List[signature.Signature]: + """ Supress signatures that are disabled in the config file + Args: + signatures: List of signatures to filter + disabled_signatures: List of signatures to disable + Returns: + List of signatures with disabled signatures removed + """ - return True + return [sig for sig in signatures if sig.id not in disabled_signatures] # pylint: disable=too-many-locals, missing-function-docstring, global-variable-undefined @@ -183,7 +208,8 @@ def main(): OUTPUT_LOGGER = init_logger(logging_type, debug) - validate_variables() + config = validate_variables() + disabled_signatures = config.get('disabled_signatures', []) gitlab_client = watchman_processor.initiate_gitlab_connection( os.environ.get('GITLAB_WATCHMAN_TOKEN'), os.environ.get('GITLAB_WATCHMAN_URL')) @@ -204,6 +230,9 @@ def main(): OUTPUT_LOGGER.log('INFO', 'Downloading and importing signatures') signature_list = SignatureDownloader(OUTPUT_LOGGER).download_signatures() + if len(disabled_signatures) > 0: + signature_list = supress_disabled_signatures(signature_list, disabled_signatures) + OUTPUT_LOGGER.log('INFO', f'The following signatures have been suppressed: {disabled_signatures}') OUTPUT_LOGGER.log('SUCCESS', f'{len(signature_list)} signatures loaded') OUTPUT_LOGGER.log('INFO', f'{multiprocessing.cpu_count() - 1} cores being used') diff --git a/src/gitlab_watchman/clients/gitlab_client.py b/src/gitlab_watchman/clients/gitlab_client.py index 2b4b7a5..f71474b 100644 --- a/src/gitlab_watchman/clients/gitlab_client.py +++ b/src/gitlab_watchman/clients/gitlab_client.py @@ -42,9 +42,9 @@ def inner_function(*args, **kwargs): elif e.response_code == 500: pass else: - raise GitLabWatchmanGetObjectError(e.error_message, func) from e - except IndexError as e: - raise GitLabWatchmanGetObjectError('Object not found', func) from e + raise GitLabWatchmanGetObjectError(e.error_message, func, args) from e + except IndexError: + pass except Exception as e: raise e @@ -112,7 +112,7 @@ def get_user_by_username(self, username: str) -> Dict[str, Any] | None: GitLabWatchmanNotAuthorisedError: If the user is not authorized to access the resource GitlabWatchmanGetObjectError: If an error occurs while getting the object """ - return self.gitlab_client.users.list(username=username)[0].asdict() + return self.gitlab_client.users.list(username=username, active=False, blocked=True)[0].asdict() @exception_handler def get_settings(self) -> Dict[str, Any]: @@ -272,7 +272,7 @@ def get_group_members(self, group_id: str) -> List[Dict]: GitLabWatchmanNotAuthorisedError: If the user is not authorized to access the resource GitLabWatchmanGetObjectError: If an error occurs while getting the object """ - members = self.gitlab_client.groups.get(group_id).members.list(as_list=True) + members = self.gitlab_client.groups.get(group_id).members.list(as_list=True, get_all=True) return [member.asdict() for member in members] @exception_handler diff --git a/src/gitlab_watchman/exceptions.py b/src/gitlab_watchman/exceptions.py index a32d9a1..b45fd8c 100644 --- a/src/gitlab_watchman/exceptions.py +++ b/src/gitlab_watchman/exceptions.py @@ -36,8 +36,8 @@ class GitLabWatchmanGetObjectError(GitLabWatchmanError): """ Exception raised when an error occurs while getting a GitLab API object. """ - def __init__(self, error_message: str, func): - super().__init__(f'GitLab get object error: {error_message} - Function: {func.__name__}') + def __init__(self, error_message: str, func, arg): + super().__init__(f'GitLab get object error: {error_message} - Function: {func.__name__} - Arg: {arg}') self.error_message = error_message @@ -49,3 +49,12 @@ class GitLabWatchmanNotAuthorisedError(GitLabWatchmanError): def __init__(self, error_message: str, func): super().__init__(f'Not authorised: {error_message} - {func.__name__}') self.error_message = error_message + + +class MisconfiguredConfFileError(Exception): + """ Exception raised when the config file watchman.conf is missing. + """ + + def __init__(self): + self.message = f"The file watchman.conf doesn't contain config details for GitLab Watchman" + super().__init__(self.message) diff --git a/src/gitlab_watchman/loggers.py b/src/gitlab_watchman/loggers.py index 30032c1..eb714a0 100644 --- a/src/gitlab_watchman/loggers.py +++ b/src/gitlab_watchman/loggers.py @@ -80,9 +80,9 @@ def log(self, if notify_type == "result": if scope == 'blobs': message = 'SCOPE: Blob' \ - f' AUTHOR: {message.get("commit").get("author_name")} - ' \ - f'{message.get("commit").get("author_email")}' \ f' COMMITTED: {message.get("commit").get("committed_date")} \n' \ + f' AUTHOR: {message.get("commit").get("author_name")} ' \ + f'EMAIL: {message.get("commit").get("author_email")}\n' \ f' FILENAME: {message.get("blob").get("basename")} \n' \ f' URL: {message.get("project").get("web_url")}/-/blob/{message.get("blob").get("ref")}/' \ f'{message.get("blob").get("filename")} \n' \ diff --git a/src/gitlab_watchman/models/signature.py b/src/gitlab_watchman/models/signature.py index 82d94b8..e311893 100644 --- a/src/gitlab_watchman/models/signature.py +++ b/src/gitlab_watchman/models/signature.py @@ -18,6 +18,7 @@ class Signature: They also contain regex patterns to validate data that is found""" name: str + id: str status: str author: str date: str | datetime.date | datetime.datetime @@ -33,6 +34,8 @@ class Signature: def __post_init__(self): if self.name and not isinstance(self.name, str): raise TypeError(f'Expected `name` to be of type str, received {type(self.name).__name__}') + if self.id and not isinstance(self.id, str): + raise TypeError(f'Expected `id` to be of type str, received {type(self.id).__name__}') if self.status and not isinstance(self.status, str): raise TypeError(f'Expected `status` to be of type str, received {type(self.status).__name__}') if self.author and not isinstance(self.author, str): @@ -65,6 +68,7 @@ def create_from_dict(signature_dict: Dict[str, Any]) -> Signature: return Signature( name=signature_dict.get('name'), + id=signature_dict.get('id'), status=signature_dict.get('status'), author=signature_dict.get('author'), date=signature_dict.get('date'), diff --git a/tests/unit/models/fixtures.py b/tests/unit/models/fixtures.py index 827ee6e..fdeef2b 100644 --- a/tests/unit/models/fixtures.py +++ b/tests/unit/models/fixtures.py @@ -476,6 +476,7 @@ class GitLabMockData: MOCK_SIGNATURE_DICT = { 'name': 'Akamai API Access Tokens', + 'id': 'akamai_api_access_tokens', 'status': 'enabled', 'author': 'PaperMtn', 'date': '2023-12-22', @@ -566,6 +567,7 @@ def mock_user(): def mock_wiki_blob(): return wiki_blob.create_from_dict(GitLabMockData.MOCK_WIKI_BLOB_DICT) + @pytest.fixture def mock_signature(): - return signature.create_from_dict(GitLabMockData.MOCK_SIGNATURE_DICT) \ No newline at end of file + return signature.create_from_dict(GitLabMockData.MOCK_SIGNATURE_DICT) diff --git a/tests/unit/models/test_unit_signature.py b/tests/unit/models/test_unit_signature.py index d8c2273..8cc9614 100644 --- a/tests/unit/models/test_unit_signature.py +++ b/tests/unit/models/test_unit_signature.py @@ -11,6 +11,7 @@ def test_signature_initialisation(mock_signature): # Test that the signature object has the correct attributes assert mock_signature.name == GitLabMockData.MOCK_SIGNATURE_DICT.get('name') + assert mock_signature.id == GitLabMockData.MOCK_SIGNATURE_DICT.get('id') assert mock_signature.status == GitLabMockData.MOCK_SIGNATURE_DICT.get('status') assert mock_signature.author == GitLabMockData.MOCK_SIGNATURE_DICT.get('author') assert mock_signature.date == GitLabMockData.MOCK_SIGNATURE_DICT.get('date') @@ -27,6 +28,12 @@ def test_field_type(): with pytest.raises(TypeError): test_signature = signature.create_from_dict(signature_dict) + # Test that correct error is raised when id is not a string + signature_dict = copy.deepcopy(GitLabMockData.MOCK_SIGNATURE_DICT) + signature_dict['id'] = 123 + with pytest.raises(TypeError): + test_signature = signature.create_from_dict(signature_dict) + # Test that correct error is raised when status is not a string signature_dict = copy.deepcopy(GitLabMockData.MOCK_SIGNATURE_DICT) signature_dict['status'] = 123