Skip to content

Commit 5f5b482

Browse files
authored
Fetch data for all PyPI packages (#41)
1 parent e3cb027 commit 5f5b482

14 files changed

+24089
-23991
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,7 @@ ENV/
9999

100100
# mypy
101101
.mypy_cache/
102+
103+
# Big unzipped files
104+
top-pypi-packages-30-days-all.csv
105+
top-pypi-packages-30-days-all.json

.pre-commit-config.yaml

+16-2
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,39 @@
11
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.8.0
4+
hooks:
5+
- id: ruff
6+
args: [--exit-non-zero-on-fix]
7+
8+
- repo: https://github.com/psf/black-pre-commit-mirror
9+
rev: 24.10.0
10+
hooks:
11+
- id: black
12+
213
- repo: https://github.com/pre-commit/pre-commit-hooks
314
rev: v5.0.0
415
hooks:
516
- id: check-added-large-files
17+
exclude: top-pypi-packages-30-days-all.*
618
- id: check-case-conflict
719
- id: check-merge-conflict
820
- id: check-json
21+
- id: check-toml
922
- id: check-yaml
23+
- id: debug-statements
1024
- id: end-of-file-fixer
1125
- id: forbid-submodules
1226
- id: trailing-whitespace
1327

1428
- repo: https://github.com/python-jsonschema/check-jsonschema
15-
rev: 0.29.3
29+
rev: 0.29.4
1630
hooks:
1731
- id: check-github-workflows
1832

19-
2033
- repo: meta
2134
hooks:
2235
- id: check-hooks-apply
2336
- id: check-useless-excludes
37+
2438
ci:
2539
autoupdate_schedule: quarterly

.ruff.toml

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
fix = true
2+
3+
lint.select = [
4+
"C4", # flake8-comprehensions
5+
"E", # pycodestyle
6+
"EM", # flake8-errmsg
7+
"F", # pyflakes
8+
"I", # isort
9+
"ICN", # flake8-import-conventions
10+
"ISC", # flake8-implicit-str-concat
11+
"LOG", # flake8-logging
12+
"PGH", # pygrep-hooks
13+
"PT", # flake8-pytest-style
14+
"PYI", # flake8-pyi
15+
"RUF022", # unsorted-dunder-all
16+
"RUF100", # unused noqa (yesqa)
17+
"S", # flake8-bandit
18+
"UP", # pyupgrade
19+
"W", # pycodestyle
20+
"YTT", # flake8-2020
21+
]
22+
lint.ignore = [
23+
"E203", # Whitespace before ':'
24+
"E221", # Multiple spaces before operator
25+
"E226", # Missing whitespace around arithmetic operator
26+
"E241", # Multiple spaces after ','
27+
"UP038", # Makes code slower and more verbose
28+
]
29+
lint.isort.required-imports = [ "from __future__ import annotations" ]

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ Old versions can be found in [releases](https://github.com/hugovk/top-pypi-packa
1818

1919
From cron, it runs pypinfo to dump JSON and commit back to this repo.
2020

21-
### Install jq
21+
### Install jq and zip
2222

2323
For example on Ubuntu 22.04:
2424

2525
```bash
26-
sudo apt-get install jq
26+
sudo apt-get install jq zip
2727
```
2828

2929
### Install and set up pypinfo

build.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@
44
set -e
55

66
# Timestamp for logs
7-
echo "$(date)"
7+
date
88

99
# Update
1010
git pull origin main
1111

1212
# Generate the files
1313
bash generate.sh
1414

15+
# Remove big unzipped file
16+
rm top-pypi-packages-30-days-all.csv
17+
rm top-pypi-packages-30-days-all.json
18+
1519
# Make output directory, don't fail if it exists
1620
# mkdir -p build
1721

deploy.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
set -e
88

99
# Gets commit hash as message
10-
REV=`git rev-parse HEAD`
10+
REV=$(git rev-parse HEAD)
1111

1212
# git checkout gh-pages # Step 3
1313

@@ -27,6 +27,6 @@ git push # Step 9
2727

2828
# CalVer YYYY.0M
2929
date=$(date '+%Y.%m')
30-
echo $date
30+
echo "$date"
3131
git tag -a "$date" -m "Release $date"
3232
git push --tags

generate.sh

+14-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,20 @@ python3 -m pip install -U pypinfo
1515
python3 -m pip --version
1616
/home/botuser/.local/bin/pypinfo --version
1717

18+
# Check if zip is installed
19+
if ! command -v zip &> /dev/null
20+
then
21+
echo "zip not found, consider: apt install zip"
22+
exit 1
23+
fi
24+
1825
# Generate and minify for 30 days
19-
/home/botuser/.local/bin/pypinfo --all --json --indent 0 --limit 8000 --days 30 "" project > top-pypi-packages-30-days.json
26+
/home/botuser/.local/bin/pypinfo --all --json --indent 0 --limit 10000000 --days 30 "" project > top-pypi-packages-30-days-all.json
27+
python3 trim.py > top-pypi-packages-30-days.json
2028
jq -c . < top-pypi-packages-30-days.json > top-pypi-packages-30-days.min.json
29+
echo 'download_count,project' > top-pypi-packages-30-days-all.csv
2130
echo 'download_count,project' > top-pypi-packages-30-days.csv
22-
jq -r '.rows[] | [.download_count, .project] | @csv' top-pypi-packages-30-days.json >> top-pypi-packages-30-days.csv
31+
jq -r '.rows[] | [.download_count, .project] | @csv' top-pypi-packages-30-days-all.json >> top-pypi-packages-30-days-all.csv
32+
jq -r '.rows[] | [.download_count, .project] | @csv' top-pypi-packages-30-days.json >> top-pypi-packages-30-days.csv
33+
zip top-pypi-packages-30-days-all.csv.zip top-pypi-packages-30-days-all.csv
34+
zip top-pypi-packages-30-days-all.json.zip top-pypi-packages-30-days-all.json

index.html

+2-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ <h2 id="changelog">Changelog</h2>
129129
<li>2021-07: Fetch data for 5,000 packages over only 30 days (<a href="https://github.com/hugovk/top-pypi-packages/pull/20">#20</a>)</li>
130130
<li>2021-09: Fetch data for 8,000 packages (<a href="https://github.com/hugovk/top-pypi-packages/pull/30">#30</a>)</li>
131131
<li>2024-05: Provide data in CSV in addition to JSON (<a href="https://github.com/hugovk/top-pypi-packages/issues/31">#31</a>)</li>
132-
<li>2024-11: Fetch data for all installers, not only pip (<a href="https://github.com/hugovk/top-pypi-packages/issues/39">#39</a>)</li>
132+
<li>2024-11: Fetch data for all PyPI packages (<a href="https://github.com/hugovk/top-pypi-packages/issues/41">#41</a>)
133+
and for installers, not only pip (<a href="https://github.com/hugovk/top-pypi-packages/issues/39">#39</a>)</li>
133134
</ul>
134135
</div>
135136
<div class="col-sm-6">

top-pypi-packages-30-days-all.csv.zip

4.43 MB
Binary file not shown.
5.17 MB
Binary file not shown.

0 commit comments

Comments
 (0)