diff --git a/crates/zizmor/src/audit/artipacked.rs b/crates/zizmor/src/audit/artipacked.rs index fefe7b91..fad12055 100644 --- a/crates/zizmor/src/audit/artipacked.rs +++ b/crates/zizmor/src/audit/artipacked.rs @@ -5,7 +5,7 @@ use itertools::Itertools as _; use super::{Audit, AuditLoadError, audit_meta}; use crate::{ finding::{Confidence, Finding, Fix, Persona, Severity, location::Routable as _}, - models::{StepBodyCommon, StepCommon, uses::RepositoryUsesExt as _}, + models::{StepBodyCommon, StepCommon, uses::repository::RepositoryUsesExt as _}, state::AuditState, utils::split_patterns, }; diff --git a/crates/zizmor/src/audit/cache_poisoning.rs b/crates/zizmor/src/audit/cache_poisoning.rs index 182501cb..5e17eb71 100644 --- a/crates/zizmor/src/audit/cache_poisoning.rs +++ b/crates/zizmor/src/audit/cache_poisoning.rs @@ -9,6 +9,7 @@ use crate::finding::location::{Locatable as _, Routable}; use crate::finding::{Confidence, Finding, Fix, FixDisposition, Severity}; use crate::models::StepCommon; use crate::models::coordinate::{ActionCoordinate, ControlExpr, ControlFieldType, Toggle, Usage}; +use crate::models::uses::repository::RepositoryUsesPattern; use crate::models::workflow::{JobExt as _, NormalJob, Step, Steps}; use crate::state::AuditState; @@ -339,7 +340,7 @@ impl CachePoisoning { fn create_configurable_action_fix<'doc>( &self, - _uses_pattern: &crate::models::uses::RepositoryUsesPattern, + _uses_pattern: &RepositoryUsesPattern, control: &ControlExpr, step: &Step<'doc>, ) -> Option> { diff --git a/crates/zizmor/src/audit/impostor_commit.rs b/crates/zizmor/src/audit/impostor_commit.rs index c4fc598d..4d3f4940 100644 --- a/crates/zizmor/src/audit/impostor_commit.rs +++ b/crates/zizmor/src/audit/impostor_commit.rs @@ -13,7 +13,7 @@ use crate::{ config::Config, finding::{Confidence, Finding, Severity, location::Locatable as _}, github_api::{self, ComparisonStatus}, - models::{StepCommon, uses::RepositoryUsesExt as _, workflow::Workflow}, + models::{StepCommon, uses::repository::RepositoryUsesExt as _, workflow::Workflow}, state::AuditState, }; diff --git a/crates/zizmor/src/audit/known_vulnerable_actions.rs b/crates/zizmor/src/audit/known_vulnerable_actions.rs index d512becf..bed22897 100644 --- a/crates/zizmor/src/audit/known_vulnerable_actions.rs +++ b/crates/zizmor/src/audit/known_vulnerable_actions.rs @@ -13,7 +13,9 @@ use crate::{ config::Config, finding::{Confidence, Finding, Fix, Severity, location::Routable as _}, github_api, - models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, + models::{ + StepCommon, action::CompositeStep, uses::repository::RepositoryUsesExt as _, workflow::Step, + }, state::AuditState, }; use yamlpatch::{Op, Patch}; diff --git a/crates/zizmor/src/audit/ref_confusion.rs b/crates/zizmor/src/audit/ref_confusion.rs index e569a4ae..1c293a3a 100644 --- a/crates/zizmor/src/audit/ref_confusion.rs +++ b/crates/zizmor/src/audit/ref_confusion.rs @@ -16,7 +16,7 @@ use crate::models::{StepCommon, action::CompositeStep}; use crate::{ finding::{Confidence, Severity}, github_api, - models::uses::RepositoryUsesExt as _, + models::uses::repository::RepositoryUsesExt as _, state::AuditState, }; diff --git a/crates/zizmor/src/audit/stale_action_refs.rs b/crates/zizmor/src/audit/stale_action_refs.rs index 8d7bb3d4..53303e79 100644 --- a/crates/zizmor/src/audit/stale_action_refs.rs +++ b/crates/zizmor/src/audit/stale_action_refs.rs @@ -9,7 +9,9 @@ use crate::{ config::Config, finding::{Confidence, Finding, Severity}, github_api, - models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, + models::{ + StepCommon, action::CompositeStep, uses::repository::RepositoryUsesExt as _, workflow::Step, + }, state::AuditState, }; diff --git a/crates/zizmor/src/audit/template_injection.rs b/crates/zizmor/src/audit/template_injection.rs index 8c79460e..be9e4118 100644 --- a/crates/zizmor/src/audit/template_injection.rs +++ b/crates/zizmor/src/audit/template_injection.rs @@ -33,8 +33,8 @@ use crate::{ location::{Routable as _, SymbolicLocation}, }, models::{ - self, StepCommon, action::CompositeStep, inputs::Capability, uses::RepositoryUsesPattern, - workflow::Step, + self, StepCommon, action::CompositeStep, inputs::Capability, + uses::repository::RepositoryUsesPattern, workflow::Step, }, state::AuditState, utils::{self, DEFAULT_ENVIRONMENT_VARIABLES, ExtractedExpr, extract_fenced_expressions}, diff --git a/crates/zizmor/src/audit/unpinned_uses.rs b/crates/zizmor/src/audit/unpinned_uses.rs index d6a7b1c8..2133a64d 100644 --- a/crates/zizmor/src/audit/unpinned_uses.rs +++ b/crates/zizmor/src/audit/unpinned_uses.rs @@ -3,7 +3,7 @@ use github_actions_models::common::Uses; use super::{Audit, AuditLoadError, AuditState, audit_meta}; use crate::config::{Config, UsesPolicy}; use crate::finding::{Confidence, Finding, Persona, Severity}; -use crate::models::uses::RepositoryUsesPattern; +use crate::models::uses::repository::RepositoryUsesPattern; use crate::models::{StepCommon, action::CompositeStep, uses::UsesExt as _, workflow::Step}; pub(crate) struct UnpinnedUses; diff --git a/crates/zizmor/src/config.rs b/crates/zizmor/src/config.rs index 206501cc..6bbd7678 100644 --- a/crates/zizmor/src/config.rs +++ b/crates/zizmor/src/config.rs @@ -13,7 +13,7 @@ use crate::{ audit::{AuditCore, forbidden_uses::ForbiddenUses, unpinned_uses::UnpinnedUses}, finding::Finding, github_api::Client, - models::uses::RepositoryUsesPattern, + models::uses::repository::RepositoryUsesPattern, registry::input::RepoSlug, tips, }; diff --git a/crates/zizmor/src/models/coordinate.rs b/crates/zizmor/src/models/coordinate.rs index 48737f56..1825c7fc 100644 --- a/crates/zizmor/src/models/coordinate.rs +++ b/crates/zizmor/src/models/coordinate.rs @@ -20,7 +20,7 @@ use indexmap::IndexMap; use crate::utils::ExtractedExpr; -use super::{StepBodyCommon, StepCommon, uses::RepositoryUsesPattern}; +use super::{StepBodyCommon, StepCommon, uses::repository::RepositoryUsesPattern}; pub(crate) enum ActionCoordinate { Configurable { @@ -389,7 +389,7 @@ mod tests { use crate::{ models::{ coordinate::{ControlExpr, ControlFieldType, Toggle, Usage}, - uses::RepositoryUsesPattern, + uses::repository::RepositoryUsesPattern, workflow::{Job, Workflow}, }, registry::input::InputKey, diff --git a/crates/zizmor/src/models/uses.rs b/crates/zizmor/src/models/uses.rs index e70ec722..1d0648ae 100644 --- a/crates/zizmor/src/models/uses.rs +++ b/crates/zizmor/src/models/uses.rs @@ -1,216 +1,11 @@ //! Extension traits for the `Uses` APIs. -use std::{str::FromStr, sync::LazyLock}; +use github_actions_models::common::Uses; -use github_actions_models::common::{RepositoryUses, Uses}; -use regex::Regex; -use serde::Deserialize; +use crate::models::uses::repository::RepositoryUsesExt as _; -/// Matches all variants of [`RepositoryUsesPattern`] except `*`. -/// -/// TODO: Replace this with a real parser; this is ridiculous. -static REPOSITORY_USES_PATTERN: LazyLock = LazyLock::new(|| { - Regex::new( - r#"(?xmi) # verbose, multi-line mode, case-insensitive - ^ # start of line - ([\w-]+) # (1) owner - / # / - ([\w\.-]+|\*) # (2) repo or * - (?: # non-capturing group for optional subpath - / # / - ( # (3) subpath - [[[:graph:]]&&[^@\*]]+ # any non-space, non-@, non-* characters - | # OR - \* # * - ) # end of (3) subpath - )? # end of non-capturing group for optional subpath - (?: # non-capturing group for optional git ref - @ # @ - ([[[:graph:]]&&[^\*]]+) # (4) git ref (any non-space, non-* characters) - )? # end of non-capturing group for optional git ref - $ # end of line - "#, - ) - .unwrap() -}); - -/// Represents a pattern for matching repository `uses` references. -/// These patterns are ordered by specificity; more specific patterns -/// should be listed first. -#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] -pub(crate) enum RepositoryUsesPattern { - /// Matches exactly `owner/repo/subpath@ref`. - ExactWithRef { - owner: String, - repo: String, - subpath: Option, - git_ref: String, - }, - /// Matches exactly `owner/repo/subpath`. - ExactPath { - owner: String, - repo: String, - subpath: String, - }, - /// Matches exactly `owner/repo`. - ExactRepo { owner: String, repo: String }, - /// Matches `owner/repo/*` (i.e. any subpath under the given repo, including - /// the empty subpath). - InRepo { owner: String, repo: String }, - /// Matches `owner/*` (i.e. any repo under the given owner). - InOwner(String), - /// Matches any `owner/repo`. - Any, -} - -impl RepositoryUsesPattern { - pub(crate) fn matches(&self, uses: &RepositoryUses) -> bool { - match self { - RepositoryUsesPattern::ExactWithRef { - owner, - repo, - subpath, - git_ref, - } => { - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath == *subpath - && uses.git_ref.as_str() == git_ref - } - RepositoryUsesPattern::ExactPath { - owner, - repo, - subpath, - } => { - // TODO: Normalize the subpath here. - // This is nontrivial, since we need to normalize - // both leading slashes *and* arbitrary ./.. components. - // Utf8Path gets us part of the way there, but is - // platform dependent (i.e. will do the wrong thing - // if the platform separator is not /). - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath.as_deref().is_some_and(|s| s == subpath) - } - RepositoryUsesPattern::ExactRepo { owner, repo } => { - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath.is_none() - } - RepositoryUsesPattern::InRepo { owner, repo } => { - uses.owner.eq_ignore_ascii_case(owner) && uses.repo.eq_ignore_ascii_case(repo) - } - RepositoryUsesPattern::InOwner(owner) => uses.owner.eq_ignore_ascii_case(owner), - RepositoryUsesPattern::Any => true, - } - } -} - -impl FromStr for RepositoryUsesPattern { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - if s == "*" { - return Ok(RepositoryUsesPattern::Any); - } - - let caps = REPOSITORY_USES_PATTERN - .captures(s) - .ok_or_else(|| anyhow::anyhow!("invalid pattern: {s}"))?; - - let owner = &caps[1]; - let repo = &caps[2]; - let subpath = caps.get(3).map(|m| m.as_str()); - let git_ref = caps.get(4).map(|m| m.as_str()); - - match (owner, repo, subpath, git_ref) { - (owner, "*", None, None) => Ok(RepositoryUsesPattern::InOwner(owner.into())), - (owner, repo, None, None) => Ok(RepositoryUsesPattern::ExactRepo { - owner: owner.into(), - repo: repo.into(), - }), - (_, "*", Some(_), _) => Err(anyhow::anyhow!("invalid pattern: {s}")), - (owner, repo, Some("*"), None) => Ok(RepositoryUsesPattern::InRepo { - owner: owner.into(), - repo: repo.into(), - }), - (owner, repo, Some(subpath), None) => Ok(RepositoryUsesPattern::ExactPath { - owner: owner.into(), - repo: repo.into(), - subpath: subpath.into(), - }), - (_, _, Some("*"), Some(_)) => Err(anyhow::anyhow!("invalid pattern: {s}")), - (owner, repo, subpath, Some(git_ref)) => Ok(RepositoryUsesPattern::ExactWithRef { - owner: owner.into(), - repo: repo.into(), - subpath: subpath.map(|s| s.into()), - git_ref: git_ref.into(), - }), - } - } -} - -impl<'de> Deserialize<'de> for RepositoryUsesPattern { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - RepositoryUsesPattern::from_str(&s).map_err(serde::de::Error::custom) - } -} - -/// Useful APIs for interacting with `uses: owner/repo` clauses. -pub(crate) trait RepositoryUsesExt { - /// Returns whether this `uses:` clause matches the given pattern. - /// - /// This uses [`RepositoryUsesPattern`] under the hood, and follows the - /// same matching rules. - fn matches(&self, pattern: &str) -> bool; - - /// Returns whether this `uses:` clause has a `git` ref and, if so, - /// whether that ref is a commit ref. - /// - /// For example, `foo/bar@baz` returns false while `foo/bar@1234...` - /// returns true. - fn ref_is_commit(&self) -> bool; - - /// Returns the `git` ref for this `uses:`, if present. - fn commit_ref(&self) -> Option<&str>; - - /// Returns the *symbolic* `git` ref for this `uses`, if present. - /// - /// Commit refs (i.e. SHA refs) are not returned. - fn symbolic_ref(&self) -> Option<&str>; -} - -impl RepositoryUsesExt for RepositoryUses { - fn matches(&self, template: &str) -> bool { - let Ok(pat) = template.parse::() else { - return false; - }; - - pat.matches(self) - } - - fn ref_is_commit(&self) -> bool { - self.git_ref.len() == 40 && self.git_ref.chars().all(|c| c.is_ascii_hexdigit()) - } - - fn commit_ref(&self) -> Option<&str> { - match &self.git_ref { - git_ref if self.ref_is_commit() => Some(git_ref), - _ => None, - } - } - - fn symbolic_ref(&self) -> Option<&str> { - match &self.git_ref { - git_ref if !self.ref_is_commit() => Some(git_ref), - _ => None, - } - } -} +pub(crate) mod docker; +pub(crate) mod repository; /// Useful APIs for interacting with all kinds of `uses:` clauses. pub(crate) trait UsesExt { @@ -243,229 +38,3 @@ impl UsesExt for Uses { } } } - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use anyhow::anyhow; - use github_actions_models::common::Uses; - - use super::RepositoryUsesPattern; - - #[test] - fn test_repositoryusespattern_parse() { - for (pattern, expected) in [ - ("", None), // Invalid, empty - ("/", None), // Invalid, not well formed - ("//", None), // Invalid, not well formed - ("///", None), // Invalid, not well formed - ("owner", None), // Invalid, sho,uld be owner/* - ("**", None), // Invalid, should be * - ("*", Some(RepositoryUsesPattern::Any)), - ( - "owner/*", - Some(RepositoryUsesPattern::InOwner("owner".into())), - ), - ("owner/*/", None), // Invalid, should be owner/* - ("owner/*/foo", None), // Invalid, not well formed - ("owner/*/*", None), // Invalid, not well formed - ("*/foo", None), // Invalid, not well formed - ("owner/repo/**", None), // Invalid, not well formed. - ( - "owner/repo/*", - Some(RepositoryUsesPattern::InRepo { - owner: "owner".into(), - repo: "repo".into(), - }), - ), - ( - "owner/repo", - Some(RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into(), - }), - ), - ( - "owner/repo/subpath", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath".into(), - }), - ), - // We don't do any subpath normalization at construction time. - ( - "owner/repo//", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "/".into(), - }), - ), - ( - "owner/repo/subpath/", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath/".into(), - }), - ), - ( - "owner/repo/subpath/very/nested////and/literal", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath/very/nested////and/literal".into(), - }), - ), - ( - "owner/repo@v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: None, - git_ref: "v1".into(), - }), - ), - ( - "owner/repo/subpath@v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: Some("subpath".into()), - git_ref: "v1".into(), - }), - ), - ( - "owner/repo@172239021f7ba04fe7327647b213799853a9eb89", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: None, - git_ref: "172239021f7ba04fe7327647b213799853a9eb89".into(), - }), - ), - ( - "pypa/gh-action-pypi-publish@release/v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "pypa".into(), - repo: "gh-action-pypi-publish".into(), - subpath: None, - git_ref: "release/v1".into(), - }), - ), - // Invalid: no wildcards allowed when refs are present. - ("owner/repo/*@v1", None), - ("owner/repo/*/subpath@v1", None), - ("owner/*/subpath@v1", None), - ("*/*/subpath@v1", None), - // Ref also cannot be a wildcard. - ("owner/repo@*", None), - ("owner/repo@**", None), - ("owner/repo@***", None), - ("owner/repo/subpath@*", None), - ("owner/*@*", None), - ("*@*", None), - ] { - let pattern = RepositoryUsesPattern::from_str(pattern).ok(); - assert_eq!(pattern, expected); - } - } - - #[test] - fn test_repositoryusespattern_ord() { - let mut patterns = vec![ - RepositoryUsesPattern::Any, - RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into(), - }, - RepositoryUsesPattern::InOwner("owner".into()), - ]; - - patterns.sort(); - - assert_eq!( - patterns, - vec![ - RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into() - }, - RepositoryUsesPattern::InOwner("owner".into()), - RepositoryUsesPattern::Any, - ] - ); - } - - #[test] - fn test_repositoryusespattern_matches() -> anyhow::Result<()> { - for (uses, pattern, matches) in [ - // OK: case-insensitive, except subpath and tag - ("actions/checkout@v3", "Actions/Checkout@v3", true), - ("actions/checkout/foo@v3", "Actions/Checkout/foo", true), - ("actions/checkout@v3", "actions/checkout@V3", false), - // NOT OK: owner/repo do not match - ("actions/checkout@v3", "foo/checkout", false), - ("actions/checkout@v3", "actions/bar", false), - // NOT OK: subpath does not match - ("actions/checkout/foo@v3", "actions/checkout@v3", false), - // NOT OK: template is more specific than `uses:` - ("actions/checkout@v3", "actions/checkout/foo@v3", false), - // owner/repo/subpath matches regardless of ref and casing - // but only when the subpath matches. - // the subpath must share the same case but might not be - // normalized - ("actions/checkout/foo@v3", "actions/checkout/foo", true), - ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo", true), - // TODO: See comment in `RepositoryUsesPattern::matches` - // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo/", true), - // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo//", true), - // ("ACTIONS/CHECKOUT//foo////@v3", "actions/checkout/foo", true), - // owner/repo matches regardless of ref and casing - // but does not match subpaths - ("ACTIONS/CHECKOUT@v3", "actions/checkout", true), - ("actions/checkout@v3", "actions/checkout", true), - ("actions/checkout/foo@v3", "actions/checkout", false), - ("actions/somethingelse@v3", "actions/checkout", false), - ("whatever/checkout@v3", "actions/checkout", false), - // owner/repo/* matches regardless of ref and casing - // including subpaths - // but does not match when owner diverges - ("ACTIONS/CHECKOUT@v3", "actions/checkout/*", true), - ("actions/checkout@v3", "actions/checkout/*", true), - ("actions/checkout/foo@v3", "actions/checkout/*", true), - ("actions/checkout/foo/bar@v3", "actions/checkout/*", true), - ("someoneelse/checkout@v3", "actions/checkout/*", false), - // owner/* matches regardless of ref, casing, and subpath - // but rejects when owner diverges - ("ACTIONS/CHECKOUT@v3", "actions/*", true), - ("actions/checkout@v3", "actions/*", true), - ("actions/checkout/foo@v3", "actions/*", true), - ("someoneelse/checkout@v3", "actions/*", false), - // * matches everything - ("actions/checkout@v3", "*", true), - ("actions/checkout/foo@v3", "*", true), - ("whatever/checkout@v3", "*", true), - // exact matches - ("actions/checkout@v3", "actions/checkout@v3", true), - ("actions/checkout/foo@v3", "actions/checkout/foo@v3", true), - ("actions/checkout/foo@v1", "actions/checkout/foo@v3", false), - ] { - let Ok(Uses::Repository(uses)) = Uses::from_str(uses) else { - return Err(anyhow!("invalid uses: {uses}")); - }; - - let pattern = RepositoryUsesPattern::from_str(pattern)?; - - assert_eq!( - pattern.matches(&uses), - matches, - "pattern: {pattern:?}, uses: {uses:?}, matches: {matches}" - ); - } - - Ok(()) - } -} diff --git a/crates/zizmor/src/models/uses/docker.rs b/crates/zizmor/src/models/uses/docker.rs new file mode 100644 index 00000000..881a4228 --- /dev/null +++ b/crates/zizmor/src/models/uses/docker.rs @@ -0,0 +1,45 @@ +//! Patterns for Docker images (including in `uses:` clauses) and corresponding extension traits. + +use std::sync::LazyLock; + +use regex::Regex; + +// static DOCKER_IMAGE_PATTERN: LazyLock = LazyLock::new(|| { +// Regex::new( +// r#"(?xmi) # verbose, multi-line mode, case-insensitive +// ^ # start of line +// (?: # start of optional non-capturing group for [registry/] +// (? # start of capturing group for [registry] +// localhost|\w+\.\w+|\w+:\d+ # match localhost, domain-like, or domain:port +// ) # end of capturing group for [registry] +// / # / +// )? # end of optional non-capturing group for [registry/] +// (?: + +// )? +// "#, +// ) +// .unwrap() +// }); + +/// Represents a pattern for matching Docker images. +/// +/// These patterns are used for both `uses:` clauses and for other +/// audits that match image references, e.g. `unpinned-images`. +pub(crate) enum DockerImagePattern { + /// Matches `[registry/]namespace/image`, i.e. a specific image. + ExactImage { + registry: Option, + namespace: String, + image: String, + }, + /// Matches `[registry/]namespace/*`, i.e. any image in the given namespace. + InNamespace { + registry: Option, + namespace: String, + }, + /// Matches `registry/*`, i.e. any image in the given registry. + InRegistry(String), + /// Matches any image. + Any, +} diff --git a/crates/zizmor/src/models/uses/repository.rs b/crates/zizmor/src/models/uses/repository.rs new file mode 100644 index 00000000..1da6292a --- /dev/null +++ b/crates/zizmor/src/models/uses/repository.rs @@ -0,0 +1,438 @@ +//! Patterns for repository `uses:` and corresponding extension traits. + +use github_actions_models::common::RepositoryUses; +use regex::Regex; +use serde::Deserialize; +use std::{str::FromStr, sync::LazyLock}; + +/// Matches all variants of [`RepositoryUsesPattern`] except `*`. +/// +/// TODO: Replace this with a real parser; this is ridiculous. +static REPOSITORY_USES_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new( + r#"(?xmi) # verbose, multi-line mode, case-insensitive + ^ # start of line + ([\w-]+) # (1) owner + / # / + ([\w\.-]+|\*) # (2) repo or * + (?: # non-capturing group for optional subpath + / # / + ( # (3) subpath + [[[:graph:]]&&[^@\*]]+ # any non-space, non-@, non-* characters + | # OR + \* # * + ) # end of (3) subpath + )? # end of non-capturing group for optional subpath + (?: # non-capturing group for optional git ref + @ # @ + ([[[:graph:]]&&[^\*]]+) # (4) git ref (any non-space, non-* characters) + )? # end of non-capturing group for optional git ref + $ # end of line + "#, + ) + .unwrap() +}); + +/// Represents a pattern for matching repository `uses` references. +/// These patterns are ordered by specificity; more specific patterns +/// should be listed first. +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +pub(crate) enum RepositoryUsesPattern { + /// Matches exactly `owner/repo/subpath@ref`. + ExactWithRef { + owner: String, + repo: String, + subpath: Option, + git_ref: String, + }, + /// Matches exactly `owner/repo/subpath`. + ExactPath { + owner: String, + repo: String, + subpath: String, + }, + /// Matches exactly `owner/repo`. + ExactRepo { owner: String, repo: String }, + /// Matches `owner/repo/*` (i.e. any subpath under the given repo, including + /// the empty subpath). + InRepo { owner: String, repo: String }, + /// Matches `owner/*` (i.e. any repo under the given owner). + InOwner(String), + /// Matches any `owner/repo`. + Any, +} + +impl RepositoryUsesPattern { + pub(crate) fn matches(&self, uses: &RepositoryUses) -> bool { + match self { + RepositoryUsesPattern::ExactWithRef { + owner, + repo, + subpath, + git_ref, + } => { + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath == *subpath + && uses.git_ref.as_str() == git_ref + } + RepositoryUsesPattern::ExactPath { + owner, + repo, + subpath, + } => { + // TODO: Normalize the subpath here. + // This is nontrivial, since we need to normalize + // both leading slashes *and* arbitrary ./.. components. + // Utf8Path gets us part of the way there, but is + // platform dependent (i.e. will do the wrong thing + // if the platform separator is not /). + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath.as_deref().is_some_and(|s| s == subpath) + } + RepositoryUsesPattern::ExactRepo { owner, repo } => { + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath.is_none() + } + RepositoryUsesPattern::InRepo { owner, repo } => { + uses.owner.eq_ignore_ascii_case(owner) && uses.repo.eq_ignore_ascii_case(repo) + } + RepositoryUsesPattern::InOwner(owner) => uses.owner.eq_ignore_ascii_case(owner), + RepositoryUsesPattern::Any => true, + } + } +} + +impl FromStr for RepositoryUsesPattern { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + if s == "*" { + return Ok(RepositoryUsesPattern::Any); + } + + let caps = REPOSITORY_USES_PATTERN + .captures(s) + .ok_or_else(|| anyhow::anyhow!("invalid pattern: {s}"))?; + + let owner = &caps[1]; + let repo = &caps[2]; + let subpath = caps.get(3).map(|m| m.as_str()); + let git_ref = caps.get(4).map(|m| m.as_str()); + + match (owner, repo, subpath, git_ref) { + (owner, "*", None, None) => Ok(RepositoryUsesPattern::InOwner(owner.into())), + (owner, repo, None, None) => Ok(RepositoryUsesPattern::ExactRepo { + owner: owner.into(), + repo: repo.into(), + }), + (_, "*", Some(_), _) => Err(anyhow::anyhow!("invalid pattern: {s}")), + (owner, repo, Some("*"), None) => Ok(RepositoryUsesPattern::InRepo { + owner: owner.into(), + repo: repo.into(), + }), + (owner, repo, Some(subpath), None) => Ok(RepositoryUsesPattern::ExactPath { + owner: owner.into(), + repo: repo.into(), + subpath: subpath.into(), + }), + (_, _, Some("*"), Some(_)) => Err(anyhow::anyhow!("invalid pattern: {s}")), + (owner, repo, subpath, Some(git_ref)) => Ok(RepositoryUsesPattern::ExactWithRef { + owner: owner.into(), + repo: repo.into(), + subpath: subpath.map(|s| s.into()), + git_ref: git_ref.into(), + }), + } + } +} + +impl<'de> Deserialize<'de> for RepositoryUsesPattern { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + RepositoryUsesPattern::from_str(&s).map_err(serde::de::Error::custom) + } +} + +/// Useful APIs for interacting with `uses: owner/repo` clauses. +pub(crate) trait RepositoryUsesExt { + /// Returns whether this `uses:` clause matches the given pattern. + /// + /// This uses [`RepositoryUsesPattern`] under the hood, and follows the + /// same matching rules. + fn matches(&self, pattern: &str) -> bool; + + /// Returns whether this `uses:` clause has a `git` ref and, if so, + /// whether that ref is a commit ref. + /// + /// For example, `foo/bar@baz` returns false while `foo/bar@1234...` + /// returns true. + fn ref_is_commit(&self) -> bool; + + /// Returns the `git` ref for this `uses:`, if present. + fn commit_ref(&self) -> Option<&str>; + + /// Returns the *symbolic* `git` ref for this `uses`, if present. + /// + /// Commit refs (i.e. SHA refs) are not returned. + fn symbolic_ref(&self) -> Option<&str>; +} + +impl RepositoryUsesExt for RepositoryUses { + fn matches(&self, template: &str) -> bool { + let Ok(pat) = template.parse::() else { + return false; + }; + + pat.matches(self) + } + + fn ref_is_commit(&self) -> bool { + self.git_ref.len() == 40 && self.git_ref.chars().all(|c| c.is_ascii_hexdigit()) + } + + fn commit_ref(&self) -> Option<&str> { + match &self.git_ref { + git_ref if self.ref_is_commit() => Some(git_ref), + _ => None, + } + } + + fn symbolic_ref(&self) -> Option<&str> { + match &self.git_ref { + git_ref if !self.ref_is_commit() => Some(git_ref), + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use anyhow::anyhow; + use github_actions_models::common::Uses; + + use super::RepositoryUsesPattern; + + #[test] + fn test_repositoryusespattern_parse() { + for (pattern, expected) in [ + ("", None), // Invalid, empty + ("/", None), // Invalid, not well formed + ("//", None), // Invalid, not well formed + ("///", None), // Invalid, not well formed + ("owner", None), // Invalid, sho,uld be owner/* + ("**", None), // Invalid, should be * + ("*", Some(RepositoryUsesPattern::Any)), + ( + "owner/*", + Some(RepositoryUsesPattern::InOwner("owner".into())), + ), + ("owner/*/", None), // Invalid, should be owner/* + ("owner/*/foo", None), // Invalid, not well formed + ("owner/*/*", None), // Invalid, not well formed + ("*/foo", None), // Invalid, not well formed + ("owner/repo/**", None), // Invalid, not well formed. + ( + "owner/repo/*", + Some(RepositoryUsesPattern::InRepo { + owner: "owner".into(), + repo: "repo".into(), + }), + ), + ( + "owner/repo", + Some(RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into(), + }), + ), + ( + "owner/repo/subpath", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath".into(), + }), + ), + // We don't do any subpath normalization at construction time. + ( + "owner/repo//", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "/".into(), + }), + ), + ( + "owner/repo/subpath/", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath/".into(), + }), + ), + ( + "owner/repo/subpath/very/nested////and/literal", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath/very/nested////and/literal".into(), + }), + ), + ( + "owner/repo@v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: None, + git_ref: "v1".into(), + }), + ), + ( + "owner/repo/subpath@v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: Some("subpath".into()), + git_ref: "v1".into(), + }), + ), + ( + "owner/repo@172239021f7ba04fe7327647b213799853a9eb89", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: None, + git_ref: "172239021f7ba04fe7327647b213799853a9eb89".into(), + }), + ), + ( + "pypa/gh-action-pypi-publish@release/v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "pypa".into(), + repo: "gh-action-pypi-publish".into(), + subpath: None, + git_ref: "release/v1".into(), + }), + ), + // Invalid: no wildcards allowed when refs are present. + ("owner/repo/*@v1", None), + ("owner/repo/*/subpath@v1", None), + ("owner/*/subpath@v1", None), + ("*/*/subpath@v1", None), + // Ref also cannot be a wildcard. + ("owner/repo@*", None), + ("owner/repo@**", None), + ("owner/repo@***", None), + ("owner/repo/subpath@*", None), + ("owner/*@*", None), + ("*@*", None), + ] { + let pattern = RepositoryUsesPattern::from_str(pattern).ok(); + assert_eq!(pattern, expected); + } + } + + #[test] + fn test_repositoryusespattern_ord() { + let mut patterns = vec![ + RepositoryUsesPattern::Any, + RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into(), + }, + RepositoryUsesPattern::InOwner("owner".into()), + ]; + + patterns.sort(); + + assert_eq!( + patterns, + vec![ + RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into() + }, + RepositoryUsesPattern::InOwner("owner".into()), + RepositoryUsesPattern::Any, + ] + ); + } + + #[test] + fn test_repositoryusespattern_matches() -> anyhow::Result<()> { + for (uses, pattern, matches) in [ + // OK: case-insensitive, except subpath and tag + ("actions/checkout@v3", "Actions/Checkout@v3", true), + ("actions/checkout/foo@v3", "Actions/Checkout/foo", true), + ("actions/checkout@v3", "actions/checkout@V3", false), + // NOT OK: owner/repo do not match + ("actions/checkout@v3", "foo/checkout", false), + ("actions/checkout@v3", "actions/bar", false), + // NOT OK: subpath does not match + ("actions/checkout/foo@v3", "actions/checkout@v3", false), + // NOT OK: template is more specific than `uses:` + ("actions/checkout@v3", "actions/checkout/foo@v3", false), + // owner/repo/subpath matches regardless of ref and casing + // but only when the subpath matches. + // the subpath must share the same case but might not be + // normalized + ("actions/checkout/foo@v3", "actions/checkout/foo", true), + ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo", true), + // TODO: See comment in `RepositoryUsesPattern::matches` + // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo/", true), + // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo//", true), + // ("ACTIONS/CHECKOUT//foo////@v3", "actions/checkout/foo", true), + // owner/repo matches regardless of ref and casing + // but does not match subpaths + ("ACTIONS/CHECKOUT@v3", "actions/checkout", true), + ("actions/checkout@v3", "actions/checkout", true), + ("actions/checkout/foo@v3", "actions/checkout", false), + ("actions/somethingelse@v3", "actions/checkout", false), + ("whatever/checkout@v3", "actions/checkout", false), + // owner/repo/* matches regardless of ref and casing + // including subpaths + // but does not match when owner diverges + ("ACTIONS/CHECKOUT@v3", "actions/checkout/*", true), + ("actions/checkout@v3", "actions/checkout/*", true), + ("actions/checkout/foo@v3", "actions/checkout/*", true), + ("actions/checkout/foo/bar@v3", "actions/checkout/*", true), + ("someoneelse/checkout@v3", "actions/checkout/*", false), + // owner/* matches regardless of ref, casing, and subpath + // but rejects when owner diverges + ("ACTIONS/CHECKOUT@v3", "actions/*", true), + ("actions/checkout@v3", "actions/*", true), + ("actions/checkout/foo@v3", "actions/*", true), + ("someoneelse/checkout@v3", "actions/*", false), + // * matches everything + ("actions/checkout@v3", "*", true), + ("actions/checkout/foo@v3", "*", true), + ("whatever/checkout@v3", "*", true), + // exact matches + ("actions/checkout@v3", "actions/checkout@v3", true), + ("actions/checkout/foo@v3", "actions/checkout/foo@v3", true), + ("actions/checkout/foo@v1", "actions/checkout/foo@v3", false), + ] { + let Ok(Uses::Repository(uses)) = Uses::from_str(uses) else { + return Err(anyhow!("invalid uses: {uses}")); + }; + + let pattern = RepositoryUsesPattern::from_str(pattern)?; + + assert_eq!( + pattern.matches(&uses), + matches, + "pattern: {pattern:?}, uses: {uses:?}, matches: {matches}" + ); + } + + Ok(()) + } +}