forked from natke/repostats
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter-docs-issues.py
50 lines (41 loc) · 2.09 KB
/
filter-docs-issues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import csv
import json
import glob
from dateutil import parser
c = csv.writer(open("data/docs-issues2.csv", "w", newline=''))
c.writerow(['id', 'title','state','created','closed', 'url', 'Update Made'])
issue_files = sorted(glob.glob('data/issues-*.json'))
for issue_file in issue_files:
with open(issue_file, encoding="utf8") as f:
d = json.load(f)
pr = "pull_request"
for x in d:
id = x["number"]
labels = x["labels"]
for label in labels:
if (label["name"] == 'documentation'):
if not pr in x:
print(id)
created = parser.isoparse(x["created_at"])
update_made = False
if (x["closed_at"]):
# Events data: labeling etc
timeline_file=f'data/json/timeline-{id}.json'
if os.path.exists(timeline_file):
with open(timeline_file, encoding="utf8") as g:
t = json.load(g)
for y in t:
event_type = y["event"]
event_time = parser.isoparse(y["created_at"])
if event_type == "closed" or event_type == "referenced":
print(y["commit_id"])
if y["commit_id"] != None:
update_made=True
elif event_type == "cross-referenced":
if y["source"]["type"] == "issue":
issue = y["source"]["issue"]
if "pull_request" in issue:
print(y["source"]["issue"]["pull_request"]["url"])
update_made=True
c.writerow([id, x["title"].encode('utf-8'), x["state"], x["created_at"], x["closed_at"], x["url"], update_made])