-
Notifications
You must be signed in to change notification settings - Fork 298
/
Copy pathgoogle_search_artworks_parser.rb
56 lines (45 loc) · 1.25 KB
/
google_search_artworks_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
require 'nokolexbor'
require 'open-uri'
require 'selenium-webdriver'
class GoogleSearchArtworksParser
def self.parse(location)
driver.navigate.to process_location(location)
doc = Nokolexbor::HTML(driver.page_source)
objects = doc.css(".iELo6")
{"artworks" => process_objects(objects)}
end
private
def self.driver
@driver ||= Selenium::WebDriver.for :chrome
end
def self.process_location(location)
if url?(location)
location
else
"File://" + Dir.pwd + "/" + location
end
end
def self.process_objects(objects)
objects.map { |o| process_object(o) }
end
def self.process_object(object)
link_object = object.css('a').first
{
"link" => "https://www.google.com" + link_object['href'],
"name" => link_object.css('.pgNMRc').first.content,
}.tap do |h|
h["image"] = get_image(link_object)
extensions = object.css('.cxzHyb').first.content
h['extensions'] = [extensions] if extensions && extensions != ''
end
end
def self.get_image(link_object)
link_object.css('img').first['data-src'] || link_object.css('img').first['src']
end
def self.url?(string)
uri = URI.parse(string)
uri.scheme && uri.host
rescue URI::InvalidURIError
false
end
end