Skip to content

Commit 1452df8

Browse files
authored
Merge pull request #509 from mapswipe/osm_api_to_osmcha
Osm api to osmcha
2 parents d4d4fa1 + 6b4d0fc commit 1452df8

File tree

5 files changed

+66
-9
lines changed

5 files changed

+66
-9
lines changed

.github/workflows/actions.yml

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
POSTGRES_PASSWORD: test
6060
POSTGRES_USER: test
6161
POSTGRES_DB: test
62+
OSMCHA_API_KEY: ${{ secrets.OSMCHA_API_KEY }}
6263
run: |
6364
docker-compose run mapswipe_workers_creation python -m unittest discover --verbose --start-directory tests/unittests/
6465
docker-compose run mapswipe_workers_creation python -m unittest discover --verbose --start-directory tests/integration/

docker-compose.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ services:
7373
SLACK_TOKEN: '${SLACK_TOKEN}'
7474
SLACK_CHANNEL: '${SLACK_CHANNEL}'
7575
SENTRY_DSN: '${SENTRY_DSN}'
76+
OSMCHA_API_KEY: '${OSMCHA_API_KEY}'
7677
depends_on:
7778
- postgres
7879
command: mapswipe_workers --verbose run --analysis_type=creation --schedule --time_interval=5
@@ -107,6 +108,7 @@ services:
107108
SLACK_TOKEN: '${SLACK_TOKEN}'
108109
SLACK_CHANNEL: '${SLACK_CHANNEL}'
109110
SENTRY_DSN: '${SENTRY_DSN}'
111+
OSMCHA_API_KEY: '${OSMCHA_API_KEY}'
110112
depends_on:
111113
- postgres
112114
command: mapswipe_workers --verbose run --analysis_type=generate-stats --schedule --time_interval=60
@@ -141,6 +143,7 @@ services:
141143
SLACK_TOKEN: '${SLACK_TOKEN}'
142144
SLACK_CHANNEL: '${SLACK_CHANNEL}'
143145
SENTRY_DSN: '${SENTRY_DSN}'
146+
OSMCHA_API_KEY: '${OSMCHA_API_KEY}'
144147
depends_on:
145148
- postgres
146149
command: mapswipe_workers --verbose run --analysis_type=firebase-to-postgres --schedule --time_interval=2

example.env

+3
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ SLACK_CHANNEL=
2828

2929
# sentry configuration
3030
SENTRY_DSN=
31+
32+
# osmcha configuration
33+
OSMCHA_API_KEY=

mapswipe_workers/mapswipe_workers/definitions.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
OHSOME_API_LINK = "https://api.ohsome.org/v1/"
1616
OSM_API_LINK = "https://www.openstreetmap.org/api/0.6/"
17+
OSMCHA_API_LINK = "https://osmcha.org/api/v1/"
18+
OSMCHA_API_KEY = os.environ["OSMCHA_API_KEY"]
1719

1820
# number of geometries for project geometries
1921
MAX_INPUT_GEOMETRIES = 10

mapswipe_workers/mapswipe_workers/utils/api_calls.py

+57-9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from mapswipe_workers.definitions import (
88
OHSOME_API_LINK,
99
OSM_API_LINK,
10+
OSMCHA_API_KEY,
11+
OSMCHA_API_LINK,
1012
CustomError,
1113
logger,
1214
)
@@ -22,12 +24,16 @@ def remove_troublesome_chars(string: str):
2224
return string
2325

2426

25-
def retry_get(url, retries=3, timeout=4):
27+
def retry_get(url, retries=3, timeout=4, to_osmcha: bool = False):
2628
"""Retry a query for a variable amount of tries."""
2729
retry = Retry(total=retries)
2830
with requests.Session() as session:
2931
session.mount("https://", HTTPAdapter(max_retries=retry))
30-
return session.get(url, timeout=timeout)
32+
if to_osmcha:
33+
headers = {"Authorization": f"Token {OSMCHA_API_KEY}"}
34+
return session.get(url, timeout=timeout, headers=headers)
35+
else:
36+
return session.get(url, timeout=timeout)
3137

3238

3339
def geojsonToFeatureCollection(geojson: dict) -> dict:
@@ -49,6 +55,29 @@ def chunks(arr, n_objects):
4955
]
5056

5157

58+
def query_osmcha(changeset_ids: list, changeset_results):
59+
"""Get data from changesetId."""
60+
id_string = ",".join(map(str, changeset_ids))
61+
62+
url = OSMCHA_API_LINK + f"changesets/?ids={id_string}"
63+
response = retry_get(url, to_osmcha=True)
64+
if response.status_code != 200:
65+
err = f"osmcha request failed: {response.status_code}"
66+
logger.warning(f"{err}")
67+
logger.warning(response.json())
68+
raise CustomError(err)
69+
response = response.json()
70+
for feature in response["features"]:
71+
changeset_results[int(feature["id"])] = {
72+
"username": remove_troublesome_chars(feature["properties"]["user"]),
73+
"userid": feature["properties"]["uid"],
74+
"comment": remove_troublesome_chars(feature["properties"]["comment"]),
75+
"editor": remove_troublesome_chars(feature["properties"]["editor"]),
76+
}
77+
78+
return changeset_results
79+
80+
5281
def query_osm(changeset_ids: list, changeset_results):
5382
"""Get data from changesetId."""
5483
id_string = ",".join(map(str, changeset_ids))
@@ -77,14 +106,17 @@ def query_osm(changeset_ids: list, changeset_results):
77106
"username": remove_troublesome_chars(username),
78107
"userid": userid,
79108
"comment": remove_troublesome_chars(comment),
80-
"created_by": remove_troublesome_chars(created_by),
109+
"editor": remove_troublesome_chars(created_by),
81110
}
82111
return changeset_results
83112

84113

85114
def remove_noise_and_add_user_info(json: dict) -> dict:
86115
"""Delete unwanted information from properties."""
87116
logger.info("starting filtering and adding extra info")
117+
batch_size = 100
118+
119+
# remove noise
88120
changeset_results = {}
89121

90122
missing_rows = {
@@ -106,21 +138,37 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
106138
changeset_results[new_properties["changesetId"]] = None
107139
feature["properties"] = new_properties
108140

141+
# add info
109142
len_osm = len(changeset_results.keys())
110-
batches = int(len(changeset_results.keys()) / 100) + 1
143+
batches = int(len(changeset_results.keys()) / batch_size) + 1
144+
logger.info(
145+
f"""{len_osm} changesets will be queried in roughly {batches} batches from osmCHA""" # noqa E501
146+
)
147+
148+
chunk_list = chunks(list(changeset_results.keys()), batch_size)
149+
for i, subset in enumerate(chunk_list):
150+
changeset_results = query_osmcha(subset, changeset_results)
151+
progress = round(100 * ((i + 1) / len(chunk_list)), 1)
152+
logger.info(f"finished query {i+1}/{len(chunk_list)}, {progress}")
153+
154+
missing_ids = [i for i, v in changeset_results.items() if v is None]
155+
chunk_list = chunks(missing_ids, batch_size)
156+
batches = int(len(missing_ids) / batch_size) + 1
111157
logger.info(
112-
f"""{len_osm} changesets will be queried in roughly {batches} batches"""
158+
f"""{len(missing_ids)} changesets where missing from osmCHA and are now queried via osmAPI in {batches} batches""" # noqa E501
113159
)
114-
chunk_list = chunks(list(changeset_results.keys()), 100)
115160
for i, subset in enumerate(chunk_list):
116161
changeset_results = query_osm(subset, changeset_results)
117162
progress = round(100 * ((i + 1) / len(chunk_list)), 1)
118163
logger.info(f"finished query {i+1}/{len(chunk_list)}, {progress}")
119164

120165
for feature in json["features"]:
121-
changeset = changeset_results[feature["properties"]["changesetId"]]
122-
for attribute_name in ["username", "comment", "created_by", "userid"]:
123-
feature["properties"][attribute_name] = changeset[attribute_name]
166+
changeset = changeset_results[int(feature["properties"]["changesetId"])]
167+
for attribute_name in ["username", "comment", "editor", "userid"]:
168+
if attribute_name == "userid":
169+
feature["properties"][attribute_name] = int(changeset[attribute_name])
170+
else:
171+
feature["properties"][attribute_name] = changeset[attribute_name]
124172

125173
logger.info("finished filtering and adding extra info")
126174
if any(x > 0 for x in missing_rows.values()):

0 commit comments

Comments
 (0)