-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathquery_repos.sql
More file actions
48 lines (45 loc) · 1.83 KB
/
query_repos.sql
File metadata and controls
48 lines (45 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
-- This query finds all non-archived repositories that contain one or more packages
-- with a 'STOP' or 'CAUTION' license. It returns all columns for each matching
-- repository, along with a count of its distinct affected packages.
-- A Common Table Expression (CTE) to first find and aggregate the affected repos
WITH AffectedRepoCounts AS (
SELECT
r.html_url,
-- Count the number of *distinct* packages that are affected
COUNT(DISTINCT p.name) AS affected_package_count
FROM
repos r
JOIN
-- Link repos to packages: based on your info, these are an exact match.
-- This is more accurate and much faster than the old 'LIKE' join.
packages p ON p.importing_repo = r.html_url
WHERE
-- Condition 1: Exclude archived repositories
r.is_archived != 'TRUE'
-- Condition 2: Filter for packages that have at least one 'STOP' or 'CAUTION' license
AND (EXISTS (
SELECT 1
FROM licenses l
WHERE
(l.allowed_status = 'STOP' OR l.allowed_status = 'CAUTION' OR l.allowed_status = 'UNCLASSIFIED' OR l.allowed_status = 'UNDETECTED')
-- This checks if the package's license string "mentions" the name of a bad license
AND p.licenses LIKE '%' || l.name || '%'
)
--)
GROUP BY
-- Group the results by repo to get the count per repo
r.html_url
)
-- Final SELECT statement
-- Get all details for the repos identified in the CTE
SELECT
r.*, -- Selects all columns from the 'repos' table
arc.affected_package_count
FROM
repos r
JOIN
-- Join the full repos table with our list of affected repos
AffectedRepoCounts arc ON r.html_url = arc.html_url
ORDER BY
-- Sort the results to see the most affected repos first
arc.affected_package_count DESC;