Skip to content

Commit 32c3616

Browse files
committed
Merge branch 'new-search'
2 parents 5d930f3 + 9ae5a8d commit 32c3616

File tree

6 files changed

+141
-126
lines changed

6 files changed

+141
-126
lines changed

README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ Features
1111
- **login** to Geocaching.com
1212
- **search** caches
1313

14-
- normal search (up to 200 caches around any point)
15-
- **NEW:** quick search (all caches inside some area)
14+
- normal search (unlimited number of caches from any point)
15+
- quick search (all caches inside some area)
1616

1717
- **load cache** details by WP
1818

@@ -52,7 +52,7 @@ Requirements
5252
~~~~~~~~~~~~
5353

5454
- Python >= 3.0 (3.4 required for running tests)
55-
- MechanicalSoup >= 0.2.0
55+
- MechanicalSoup >= 0.3.1
5656
- geopy >= 1.0.0
5757

5858

pycaching/cache.py

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -96,28 +96,27 @@ class Cache(object):
9696
"wirelessbeacon": "Wireless Beacon"
9797
}
9898

99+
# either key and value is tuple of synonyms
99100
_possible_types = {
100101
# key is cache image url, used for parsing: http://www.geocaching.com/images/WptTypes/[KEY].gif
101-
"2": "Traditional Cache",
102-
"3": "Multi-cache",
103-
"8": "Mystery Cache",
104-
"__8": "Unknown Cache", # same as Mystery, key not used
105-
"5": "Letterbox hybrid",
106-
"6": "Event Cache",
107-
"mega": "Mega-Event Cache",
108-
"giga": "Giga-Event Cache",
109-
"earthcache": "Earthcache",
110-
"137": "Earthcache",
111-
"13": "Cache in Trash out Event",
112-
"11": "Webcam Cache",
113-
"4": "Virtual Cache",
114-
"1858": "Wherigo Cache",
115-
"10Years_32": "Lost and Found Event Cache",
116-
"ape_32": "Project Ape Cache",
117-
"HQ_32": "Groundspeak HQ",
118-
"1304": "GPS Adventures Exhibit",
119-
"4738": "Groundspeak Block Party",
120-
"12": "Locationless (Reverse) Cache",
102+
("2", ): ("Traditional", ),
103+
("3", ): ("Multi-cache", ),
104+
("8", ): ("Mystery", "Unknown", ),
105+
("5", ): ("Letterbox hybrid", ),
106+
("6", ): ("Event", ),
107+
("mega", ): ("Mega-Event", ),
108+
("giga", ): ("Giga-Event", ),
109+
("137", "earthcache", ): ("Earthcache", ),
110+
("13", ): ("Cache in Trash out Event", "CITO", ),
111+
("11", ): ("Webcam", ),
112+
("4", ): ("Virtual", ),
113+
("1858", ): ("Wherigo", ),
114+
("10Years_32", ): ("Lost and Found Event", ),
115+
("ape_32", ): ("Project Ape", ),
116+
("HQ_32", ): ("Groundspeak HQ", ),
117+
("1304", ): ("GPS Adventures Exhibit", ),
118+
("4738", ): ("Groundspeak Block Party", ),
119+
("12", ): ("Locationless (Reverse)", ),
121120
}
122121

123122
_possible_sizes = {
@@ -225,14 +224,30 @@ def cache_type(self):
225224

226225
@cache_type.setter
227226
def cache_type(self, cache_type):
227+
cache_type = cache_type.replace(" Geocache", "") # with space!
228+
cache_type = cache_type.replace(" Cache", "") # with space!
228229
cache_type = cache_type.strip()
229-
cache_type = cache_type.replace("Geocache", "Cache")
230-
if cache_type in self._possible_types.values(): # try to search in values
231-
self._cache_type = cache_type
232-
elif cache_type in self._possible_types.keys(): # not in values => it must be a key
233-
self._cache_type = self._possible_types[cache_type]
234-
else:
235-
raise ValueError("Cache type '{}' is not possible.".format(cache_type))
230+
231+
# walk trough each type and its synonyms
232+
for key, value in self._possible_types.items():
233+
for synonym in value:
234+
if cache_type.lower() == synonym.lower():
235+
self._cache_type = self._possible_types[key][0]
236+
return
237+
238+
raise ValueError("Cache type '{}' is not possible.".format(cache_type))
239+
240+
@classmethod
241+
def get_cache_type_by_img(cls, src):
242+
"""Returns cache type by its image src"""
243+
# parse src (http://www.geocaching.com/images/WptTypes/[KEY].gif)
244+
img_name = src.split("/")[-1].rsplit(".", 1)[0]
245+
246+
# walk trough each key and its synonyms
247+
for key in cls._possible_types.keys():
248+
for synonym in key:
249+
if img_name == synonym:
250+
return cls._possible_types[key][0]
236251

237252
@property
238253
@lazy_loaded
@@ -311,7 +326,7 @@ def hidden(self, hidden):
311326
if type(hidden) is str:
312327
hidden = Util.parse_date(hidden)
313328
elif type(hidden) is not datetime.date:
314-
raise ValueError("Passed object is not datetime.date instance nor string containing date.")
329+
raise ValueError("Passed object is not datetime.date instance nor string containing a date.")
315330
self._hidden = hidden
316331

317332
@property

pycaching/geocaching.py

Lines changed: 67 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import logging
44
import math
55
import requests
6-
import bs4
76
import mechanicalsoup as ms
87
from urllib.parse import urlencode
8+
from bs4 import BeautifulSoup
99
from pycaching.area import Area
1010
from pycaching.cache import Cache
1111
from pycaching.util import Util
@@ -35,7 +35,8 @@ class Geocaching(object):
3535
_urls = {
3636
"login_page": _baseurl + "login/default.aspx",
3737
"cache_details": _baseurl + "geocache/{wp}",
38-
"caches_nearest": _baseurl + "seek/nearest.aspx",
38+
"search": _baseurl + "play/search",
39+
"search_more": _baseurl + "play/search/more-results",
3940
"geocode": _baseurl + "api/geocode",
4041
"map": _tile_url + "map.details",
4142
"tile": _tile_url + "map.png",
@@ -132,105 +133,84 @@ def search(self, point, limit=0):
132133

133134
logging.info("Searching at %s...", point)
134135

135-
page_num = 1
136-
cache_num = 0
136+
start_index = 0
137137
while True:
138-
try: # try to load search page
139-
page = self._search_get_page(point, page_num)
140-
except requests.exceptions.ConnectionError as e:
141-
raise StopIteration("Cannot load search page.") from e
142-
143-
for cache in page:
144-
yield cache
145-
146-
cache_num += 1
147-
if limit > 0 and cache_num >= limit:
148-
raise StopIteration()
149-
150-
page_num += 1
151-
152-
@login_needed
153-
def _search_get_page(self, point, page_num):
154-
"""Returns one page of caches as a list.
138+
# get one page
139+
page = self._search_get_page(point, start_index)
155140

156-
Searches for a caches around a point and returns N-th page (specifiend by page argument)."""
157-
158-
assert isinstance(point, Point)
159-
assert type(page_num) is int
160-
161-
logging.info("Fetching page %d.", page_num)
162-
163-
# assemble request
164-
params = urlencode({"lat": point.latitude, "lng": point.longitude})
165-
url = self._urls["caches_nearest"] + "?" + params
166-
167-
# we have to add POST for other pages than 1st
168-
if page_num == 1:
169-
post = None
170-
else:
171-
# TODO handle searching on second page without first
172-
post = self._pagging_helpers
173-
post["__EVENTTARGET"] = self._pagging_postbacks[page_num]
174-
post["__EVENTARGUMENT"] = ""
141+
if not page:
142+
# result is empty - no more caches
143+
raise StopIteration()
175144

176-
# make request
177-
try:
178-
root = self._browser.post(url, post).soup
179-
except requests.exceptions.ConnectionError as e:
180-
raise Error("Cannot load search page #{}.".format(page_num)) from e
145+
# parse caches in result
146+
for start_index, row in enumerate(BeautifulSoup(page).find_all("tr"), start_index):
181147

182-
# root of a few following elements
183-
widget_general = root.find_all("td", "PageBuilderWidget")
148+
if limit > 0 and start_index == limit:
149+
raise StopIteration()
184150

185-
# parse pagging widget
186-
caches_total, page_num, page_count = [int(elm.text) for elm in widget_general[0].find_all("b")]
187-
logging.debug("Found %d results. Showing page %d of %d.", caches_total, page_num, page_count)
151+
# parse raw data
152+
cache_details = row.find("span", "cache-details").text.split("|")
153+
wp = cache_details[1].strip()
154+
155+
# create and fill cache object
156+
c = Cache(wp, self)
157+
c.cache_type = cache_details[0].strip()
158+
c.name = row.find("span", "cache-name").text
159+
c.found = row.find("img", title="Found It!") is not None
160+
c.favorites = int(row.find(attrs={"data-column": "FavoritePoint"}).text)
161+
c.state = not (row.get("class") and "disabled" in row.get("class"))
162+
c.pm_only = row.find("td", "pm-upsell") is not None
163+
164+
if c.pm_only:
165+
# PM only caches doesn't have other attributes filled in
166+
yield c
167+
continue
188168

189-
# save search postbacks for future usage
190-
if page_num == 1:
191-
pagging_links = [_ for _ in widget_general[1].find_all("a") if _.get("id")]
192-
self._pagging_postbacks = {int(link.text): link.get("href").split("'")[1] for link in pagging_links}
169+
c.size = row.find(attrs={"data-column": "ContainerSize"}).text
170+
c.difficulty = float(row.find(attrs={"data-column": "Difficulty"}).text)
171+
c.terrain = float(row.find(attrs={"data-column": "Terrain"}).text)
172+
c.hidden = Util.parse_date(row.find(attrs={"data-column": "PlaceDate"}).text)
173+
c.author = row.find("span", "owner").text[3:] # delete "by "
193174

194-
# other nescessary fields
195-
self._pagging_helpers = {field["name"]: field["value"] for field in root.find_all("input", type="hidden")}
175+
logging.debug("Cache parsed: %s", c)
176+
yield c
196177

197-
# parse results table
198-
data = root.find("table", "SearchResultsTable").find_all("tr", "Data")
199-
return [self._search_parse_cache(c) for c in data]
178+
start_index += 1
200179

201180
@login_needed
202-
def _search_parse_cache(self, root):
203-
"""Returns a Cache object parsed from BeautifulSoup Tag."""
181+
def _search_get_page(self, point, start_index):
204182

205-
assert isinstance(root, bs4.Tag)
183+
logging.debug("Loading page from start_index: %d", start_index)
206184

207-
# parse raw data
208-
favorites = root.find("span", "favorite-rank")
209-
typeLink, nameLink = root.find_all("a", "lnk")
210-
pm_only = root.find("img", title="Premium Member Only Cache") is not None
211-
direction, info, D_T, placed, last_found = root.find_all("span", "small")
212-
found = root.find("img", title="Found It!") is not None
213-
size = root.find("td", "AlignCenter").find("img")
214-
author, wp, area = [t.strip() for t in info.text.split("|")]
185+
if start_index == 0:
186+
# first request has to load normal search page
187+
logging.debug("Using normal search endpoint")
215188

216-
# create cache object
217-
c = Cache(wp, self)
189+
params = urlencode({"origin": point.format(None, "", "", "")})
190+
url = self._urls["search"] + "?" + params
218191

219-
# prettify data
220-
c.cache_type = typeLink.find("img").get(
221-
"src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src]
222-
c.name = nameLink.span.text.strip()
223-
c.found = found
224-
c.state = "Strike" not in nameLink.get("class")
225-
c.size = size.get("src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src]
226-
c.difficulty, c.terrain = list(map(float, D_T.text.split("/")))
227-
c.hidden = Util.parse_date(placed.text)
228-
c.author = author[3:] # delete "by "
229-
c.favorites = int(favorites.text)
230-
c.pm_only = pm_only
192+
# make request
193+
try:
194+
return str(self._browser.get(url).soup.find(id="geocaches"))
195+
except requests.exceptions.ConnectionError as e:
196+
raise Error("Cannot load search results.") from e
231197

232-
logging.debug("Cache parsed: %s", c)
233-
return c
198+
else:
199+
# other requests can use AJAX endpoint
200+
logging.debug("Using AJAX search endpoint")
201+
202+
params = urlencode({
203+
"inputOrigin": point.format(None, "", "", ""),
204+
"startIndex": start_index,
205+
"originTreatment": 0
206+
})
207+
url = self._urls["search_more"] + "?" + params
208+
209+
# make request
210+
try:
211+
return self._browser.get(url).json()["HtmlString"].strip()
212+
except requests.exceptions.ConnectionError as e:
213+
raise Error("Cannot load search results.") from e
234214

235215
def search_quick(self, area, precision=None, strict=False):
236216
"""Get geocaches inside area, with approximate coordinates
@@ -483,7 +463,7 @@ def load_cache(self, wp, destination=None):
483463

484464
# prettify data
485465
c.name = name.text
486-
c.cache_type = cache_type.split("/")[-1].rsplit(".", 1)[0]
466+
c.cache_type = Cache.get_cache_type_by_img(cache_type)
487467
c.author = author.text
488468
c.hidden = Util.parse_date(hidden.text.split(":")[-1])
489469
c.location = Point.from_string(location.text)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
info = {
1111
"name": "pycaching",
12-
"version": "3.1", # PEP 386
12+
"version": "3.1.1", # PEP 386
1313
"author": "Tomas Bedrich",
1414
"author_email": "[email protected]",
1515
"url": "https://github.com/tomasbedrich/pycaching",
@@ -19,7 +19,7 @@
1919
"description": "Geocaching.com site crawler. Provides tools for searching, fetching caches and geocoding.",
2020
"long_description": long_description,
2121
"keywords": ["geocaching", "crawler", "geocache", "cache", "searching", "geocoding"],
22-
"install_requires": ["MechanicalSoup >= 0.2.0", "geopy >= 1.0.0"],
22+
"install_requires": ["MechanicalSoup >= 0.3.0", "geopy >= 1.0.0"],
2323
"test_suite": "test"
2424
}
2525

test/test_cache.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def test___str__(self):
2323
def test___eq__(self):
2424
self.assertEqual(self.c, Cache("GC12345", self.gc))
2525

26+
def test_geocaching(self):
27+
with self.assertRaises(ValueError):
28+
Cache("GC12345", None)
29+
2630
def test_wp(self):
2731
self.assertEqual(self.c.wp, "GC12345")
2832

@@ -34,7 +38,7 @@ def test_name(self):
3438
self.assertEqual(self.c.name, "Testing")
3539

3640
def test_type(self):
37-
self.assertEqual(self.c.cache_type, "Traditional Cache")
41+
self.assertEqual(self.c.cache_type, "Traditional")
3842

3943
with self.subTest("filter invalid"):
4044
with self.assertRaises(ValueError):
@@ -47,10 +51,14 @@ def test_location(self):
4751
self.c.location = "S 36 51.918 E 174 46.725"
4852
self.assertEqual(self.c.location, Point.from_string("S 36 51.918 E 174 46.725"))
4953

50-
with self.subTest("filter invalid"):
54+
with self.subTest("filter invalid string"):
5155
with self.assertRaises(ValueError):
5256
self.c.location = "somewhere"
5357

58+
with self.subTest("filter invalid types"):
59+
with self.assertRaises(ValueError):
60+
self.c.location = None
61+
5462
def test_state(self):
5563
self.assertEqual(self.c.state, True)
5664

@@ -88,17 +96,25 @@ def test_hidden(self):
8896
self.c.hidden = "1/30/2000"
8997
self.assertEqual(self.c.hidden, date(2000, 1, 30))
9098

91-
with self.subTest("filter invalid"):
99+
with self.subTest("filter invalid string"):
92100
with self.assertRaises(ValueError):
93101
self.c.hidden = "now"
94102

103+
with self.subTest("filter invalid types"):
104+
with self.assertRaises(ValueError):
105+
self.c.hidden = None
106+
95107
def test_attributes(self):
96108
self.assertEqual(self.c.attributes, {"onehour": True, "kids": False, "available": True})
97109

98110
with self.subTest("filter unknown"):
99111
self.c.attributes = {attr: True for attr in ["onehour", "xxx"]}
100112
self.assertEqual(self.c.attributes, {"onehour": True})
101113

114+
with self.subTest("filter invalid"):
115+
with self.assertRaises(ValueError):
116+
self.c.attributes = None
117+
102118
def test_summary(self):
103119
self.assertEqual(self.c.summary, "text")
104120

0 commit comments

Comments
 (0)