|
3 | 3 | import logging |
4 | 4 | import math |
5 | 5 | import requests |
6 | | -import bs4 |
7 | 6 | import mechanicalsoup as ms |
8 | 7 | from urllib.parse import urlencode |
| 8 | +from bs4 import BeautifulSoup |
9 | 9 | from pycaching.area import Area |
10 | 10 | from pycaching.cache import Cache |
11 | 11 | from pycaching.util import Util |
@@ -35,7 +35,8 @@ class Geocaching(object): |
35 | 35 | _urls = { |
36 | 36 | "login_page": _baseurl + "login/default.aspx", |
37 | 37 | "cache_details": _baseurl + "geocache/{wp}", |
38 | | - "caches_nearest": _baseurl + "seek/nearest.aspx", |
| 38 | + "search": _baseurl + "play/search", |
| 39 | + "search_more": _baseurl + "play/search/more-results", |
39 | 40 | "geocode": _baseurl + "api/geocode", |
40 | 41 | "map": _tile_url + "map.details", |
41 | 42 | "tile": _tile_url + "map.png", |
@@ -132,105 +133,84 @@ def search(self, point, limit=0): |
132 | 133 |
|
133 | 134 | logging.info("Searching at %s...", point) |
134 | 135 |
|
135 | | - page_num = 1 |
136 | | - cache_num = 0 |
| 136 | + start_index = 0 |
137 | 137 | while True: |
138 | | - try: # try to load search page |
139 | | - page = self._search_get_page(point, page_num) |
140 | | - except requests.exceptions.ConnectionError as e: |
141 | | - raise StopIteration("Cannot load search page.") from e |
142 | | - |
143 | | - for cache in page: |
144 | | - yield cache |
145 | | - |
146 | | - cache_num += 1 |
147 | | - if limit > 0 and cache_num >= limit: |
148 | | - raise StopIteration() |
149 | | - |
150 | | - page_num += 1 |
151 | | - |
152 | | - @login_needed |
153 | | - def _search_get_page(self, point, page_num): |
154 | | - """Returns one page of caches as a list. |
| 138 | + # get one page |
| 139 | + page = self._search_get_page(point, start_index) |
155 | 140 |
|
156 | | - Searches for a caches around a point and returns N-th page (specifiend by page argument).""" |
157 | | - |
158 | | - assert isinstance(point, Point) |
159 | | - assert type(page_num) is int |
160 | | - |
161 | | - logging.info("Fetching page %d.", page_num) |
162 | | - |
163 | | - # assemble request |
164 | | - params = urlencode({"lat": point.latitude, "lng": point.longitude}) |
165 | | - url = self._urls["caches_nearest"] + "?" + params |
166 | | - |
167 | | - # we have to add POST for other pages than 1st |
168 | | - if page_num == 1: |
169 | | - post = None |
170 | | - else: |
171 | | - # TODO handle searching on second page without first |
172 | | - post = self._pagging_helpers |
173 | | - post["__EVENTTARGET"] = self._pagging_postbacks[page_num] |
174 | | - post["__EVENTARGUMENT"] = "" |
| 141 | + if not page: |
| 142 | + # result is empty - no more caches |
| 143 | + raise StopIteration() |
175 | 144 |
|
176 | | - # make request |
177 | | - try: |
178 | | - root = self._browser.post(url, post).soup |
179 | | - except requests.exceptions.ConnectionError as e: |
180 | | - raise Error("Cannot load search page #{}.".format(page_num)) from e |
| 145 | + # parse caches in result |
| 146 | + for start_index, row in enumerate(BeautifulSoup(page).find_all("tr"), start_index): |
181 | 147 |
|
182 | | - # root of a few following elements |
183 | | - widget_general = root.find_all("td", "PageBuilderWidget") |
| 148 | + if limit > 0 and start_index == limit: |
| 149 | + raise StopIteration() |
184 | 150 |
|
185 | | - # parse pagging widget |
186 | | - caches_total, page_num, page_count = [int(elm.text) for elm in widget_general[0].find_all("b")] |
187 | | - logging.debug("Found %d results. Showing page %d of %d.", caches_total, page_num, page_count) |
| 151 | + # parse raw data |
| 152 | + cache_details = row.find("span", "cache-details").text.split("|") |
| 153 | + wp = cache_details[1].strip() |
| 154 | + |
| 155 | + # create and fill cache object |
| 156 | + c = Cache(wp, self) |
| 157 | + c.cache_type = cache_details[0].strip() |
| 158 | + c.name = row.find("span", "cache-name").text |
| 159 | + c.found = row.find("img", title="Found It!") is not None |
| 160 | + c.favorites = int(row.find(attrs={"data-column": "FavoritePoint"}).text) |
| 161 | + c.state = not (row.get("class") and "disabled" in row.get("class")) |
| 162 | + c.pm_only = row.find("td", "pm-upsell") is not None |
| 163 | + |
| 164 | + if c.pm_only: |
| 165 | + # PM only caches doesn't have other attributes filled in |
| 166 | + yield c |
| 167 | + continue |
188 | 168 |
|
189 | | - # save search postbacks for future usage |
190 | | - if page_num == 1: |
191 | | - pagging_links = [_ for _ in widget_general[1].find_all("a") if _.get("id")] |
192 | | - self._pagging_postbacks = {int(link.text): link.get("href").split("'")[1] for link in pagging_links} |
| 169 | + c.size = row.find(attrs={"data-column": "ContainerSize"}).text |
| 170 | + c.difficulty = float(row.find(attrs={"data-column": "Difficulty"}).text) |
| 171 | + c.terrain = float(row.find(attrs={"data-column": "Terrain"}).text) |
| 172 | + c.hidden = Util.parse_date(row.find(attrs={"data-column": "PlaceDate"}).text) |
| 173 | + c.author = row.find("span", "owner").text[3:] # delete "by " |
193 | 174 |
|
194 | | - # other nescessary fields |
195 | | - self._pagging_helpers = {field["name"]: field["value"] for field in root.find_all("input", type="hidden")} |
| 175 | + logging.debug("Cache parsed: %s", c) |
| 176 | + yield c |
196 | 177 |
|
197 | | - # parse results table |
198 | | - data = root.find("table", "SearchResultsTable").find_all("tr", "Data") |
199 | | - return [self._search_parse_cache(c) for c in data] |
| 178 | + start_index += 1 |
200 | 179 |
|
201 | 180 | @login_needed |
202 | | - def _search_parse_cache(self, root): |
203 | | - """Returns a Cache object parsed from BeautifulSoup Tag.""" |
| 181 | + def _search_get_page(self, point, start_index): |
204 | 182 |
|
205 | | - assert isinstance(root, bs4.Tag) |
| 183 | + logging.debug("Loading page from start_index: %d", start_index) |
206 | 184 |
|
207 | | - # parse raw data |
208 | | - favorites = root.find("span", "favorite-rank") |
209 | | - typeLink, nameLink = root.find_all("a", "lnk") |
210 | | - pm_only = root.find("img", title="Premium Member Only Cache") is not None |
211 | | - direction, info, D_T, placed, last_found = root.find_all("span", "small") |
212 | | - found = root.find("img", title="Found It!") is not None |
213 | | - size = root.find("td", "AlignCenter").find("img") |
214 | | - author, wp, area = [t.strip() for t in info.text.split("|")] |
| 185 | + if start_index == 0: |
| 186 | + # first request has to load normal search page |
| 187 | + logging.debug("Using normal search endpoint") |
215 | 188 |
|
216 | | - # create cache object |
217 | | - c = Cache(wp, self) |
| 189 | + params = urlencode({"origin": point.format(None, "", "", "")}) |
| 190 | + url = self._urls["search"] + "?" + params |
218 | 191 |
|
219 | | - # prettify data |
220 | | - c.cache_type = typeLink.find("img").get( |
221 | | - "src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src] |
222 | | - c.name = nameLink.span.text.strip() |
223 | | - c.found = found |
224 | | - c.state = "Strike" not in nameLink.get("class") |
225 | | - c.size = size.get("src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src] |
226 | | - c.difficulty, c.terrain = list(map(float, D_T.text.split("/"))) |
227 | | - c.hidden = Util.parse_date(placed.text) |
228 | | - c.author = author[3:] # delete "by " |
229 | | - c.favorites = int(favorites.text) |
230 | | - c.pm_only = pm_only |
| 192 | + # make request |
| 193 | + try: |
| 194 | + return str(self._browser.get(url).soup.find(id="geocaches")) |
| 195 | + except requests.exceptions.ConnectionError as e: |
| 196 | + raise Error("Cannot load search results.") from e |
231 | 197 |
|
232 | | - logging.debug("Cache parsed: %s", c) |
233 | | - return c |
| 198 | + else: |
| 199 | + # other requests can use AJAX endpoint |
| 200 | + logging.debug("Using AJAX search endpoint") |
| 201 | + |
| 202 | + params = urlencode({ |
| 203 | + "inputOrigin": point.format(None, "", "", ""), |
| 204 | + "startIndex": start_index, |
| 205 | + "originTreatment": 0 |
| 206 | + }) |
| 207 | + url = self._urls["search_more"] + "?" + params |
| 208 | + |
| 209 | + # make request |
| 210 | + try: |
| 211 | + return self._browser.get(url).json()["HtmlString"].strip() |
| 212 | + except requests.exceptions.ConnectionError as e: |
| 213 | + raise Error("Cannot load search results.") from e |
234 | 214 |
|
235 | 215 | def search_quick(self, area, precision=None, strict=False): |
236 | 216 | """Get geocaches inside area, with approximate coordinates |
@@ -483,7 +463,7 @@ def load_cache(self, wp, destination=None): |
483 | 463 |
|
484 | 464 | # prettify data |
485 | 465 | c.name = name.text |
486 | | - c.cache_type = cache_type.split("/")[-1].rsplit(".", 1)[0] |
| 466 | + c.cache_type = Cache.get_cache_type_by_img(cache_type) |
487 | 467 | c.author = author.text |
488 | 468 | c.hidden = Util.parse_date(hidden.text.split(":")[-1]) |
489 | 469 | c.location = Point.from_string(location.text) |
|
0 commit comments