7
7
from mapswipe_workers .definitions import (
8
8
OHSOME_API_LINK ,
9
9
OSM_API_LINK ,
10
+ OSMCHA_API_KEY ,
11
+ OSMCHA_API_LINK ,
10
12
CustomError ,
11
13
logger ,
12
14
)
@@ -22,12 +24,16 @@ def remove_troublesome_chars(string: str):
22
24
return string
23
25
24
26
25
- def retry_get (url , retries = 3 , timeout = 4 ):
27
+ def retry_get (url , retries = 3 , timeout = 4 , to_osmcha : bool = False ):
26
28
"""Retry a query for a variable amount of tries."""
27
29
retry = Retry (total = retries )
28
30
with requests .Session () as session :
29
31
session .mount ("https://" , HTTPAdapter (max_retries = retry ))
30
- return session .get (url , timeout = timeout )
32
+ if to_osmcha :
33
+ headers = {"Authorization" : f"Token { OSMCHA_API_KEY } " }
34
+ return session .get (url , timeout = timeout , headers = headers )
35
+ else :
36
+ return session .get (url , timeout = timeout )
31
37
32
38
33
39
def geojsonToFeatureCollection (geojson : dict ) -> dict :
@@ -49,6 +55,29 @@ def chunks(arr, n_objects):
49
55
]
50
56
51
57
58
+ def query_osmcha (changeset_ids : list , changeset_results ):
59
+ """Get data from changesetId."""
60
+ id_string = "," .join (map (str , changeset_ids ))
61
+
62
+ url = OSMCHA_API_LINK + f"changesets/?ids={ id_string } "
63
+ response = retry_get (url , to_osmcha = True )
64
+ if response .status_code != 200 :
65
+ err = f"osmcha request failed: { response .status_code } "
66
+ logger .warning (f"{ err } " )
67
+ logger .warning (response .json ())
68
+ raise CustomError (err )
69
+ response = response .json ()
70
+ for feature in response ["features" ]:
71
+ changeset_results [int (feature ["id" ])] = {
72
+ "username" : remove_troublesome_chars (feature ["properties" ]["user" ]),
73
+ "userid" : feature ["properties" ]["uid" ],
74
+ "comment" : remove_troublesome_chars (feature ["properties" ]["comment" ]),
75
+ "editor" : remove_troublesome_chars (feature ["properties" ]["editor" ]),
76
+ }
77
+
78
+ return changeset_results
79
+
80
+
52
81
def query_osm (changeset_ids : list , changeset_results ):
53
82
"""Get data from changesetId."""
54
83
id_string = "," .join (map (str , changeset_ids ))
@@ -77,14 +106,17 @@ def query_osm(changeset_ids: list, changeset_results):
77
106
"username" : remove_troublesome_chars (username ),
78
107
"userid" : userid ,
79
108
"comment" : remove_troublesome_chars (comment ),
80
- "created_by " : remove_troublesome_chars (created_by ),
109
+ "editor " : remove_troublesome_chars (created_by ),
81
110
}
82
111
return changeset_results
83
112
84
113
85
114
def remove_noise_and_add_user_info (json : dict ) -> dict :
86
115
"""Delete unwanted information from properties."""
87
116
logger .info ("starting filtering and adding extra info" )
117
+ batch_size = 100
118
+
119
+ # remove noise
88
120
changeset_results = {}
89
121
90
122
missing_rows = {
@@ -106,21 +138,37 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
106
138
changeset_results [new_properties ["changesetId" ]] = None
107
139
feature ["properties" ] = new_properties
108
140
141
+ # add info
109
142
len_osm = len (changeset_results .keys ())
110
- batches = int (len (changeset_results .keys ()) / 100 ) + 1
143
+ batches = int (len (changeset_results .keys ()) / batch_size ) + 1
144
+ logger .info (
145
+ f"""{ len_osm } changesets will be queried in roughly { batches } batches from osmCHA""" # noqa E501
146
+ )
147
+
148
+ chunk_list = chunks (list (changeset_results .keys ()), batch_size )
149
+ for i , subset in enumerate (chunk_list ):
150
+ changeset_results = query_osmcha (subset , changeset_results )
151
+ progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
152
+ logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
153
+
154
+ missing_ids = [i for i , v in changeset_results .items () if v is None ]
155
+ chunk_list = chunks (missing_ids , batch_size )
156
+ batches = int (len (missing_ids ) / batch_size ) + 1
111
157
logger .info (
112
- f"""{ len_osm } changesets will be queried in roughly { batches } batches"""
158
+ f"""{ len ( missing_ids ) } changesets where missing from osmCHA and are now queried via osmAPI in { batches } batches""" # noqa E501
113
159
)
114
- chunk_list = chunks (list (changeset_results .keys ()), 100 )
115
160
for i , subset in enumerate (chunk_list ):
116
161
changeset_results = query_osm (subset , changeset_results )
117
162
progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
118
163
logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
119
164
120
165
for feature in json ["features" ]:
121
- changeset = changeset_results [feature ["properties" ]["changesetId" ]]
122
- for attribute_name in ["username" , "comment" , "created_by" , "userid" ]:
123
- feature ["properties" ][attribute_name ] = changeset [attribute_name ]
166
+ changeset = changeset_results [int (feature ["properties" ]["changesetId" ])]
167
+ for attribute_name in ["username" , "comment" , "editor" , "userid" ]:
168
+ if attribute_name == "userid" :
169
+ feature ["properties" ][attribute_name ] = int (changeset [attribute_name ])
170
+ else :
171
+ feature ["properties" ][attribute_name ] = changeset [attribute_name ]
124
172
125
173
logger .info ("finished filtering and adding extra info" )
126
174
if any (x > 0 for x in missing_rows .values ()):
0 commit comments