Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 28 additions & 34 deletions lib/ingestors/taxila/surf_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,41 +28,35 @@ def read(url)
private

def process_surf(url)
Hash.from_xml(Nokogiri::XML(open_url(url, raise: true)).to_s)['urlset']['url'].each do |event_page|
next unless event_page['loc'].include?('/en/agenda/')
ical_surf_url = "https://www.surf.nl/ical/surf-agenda.ics"
ical_events = Icalendar::Event.parse(open_url(ical_surf_url, raise: true).set_encoding('utf-8'))
events = {}
ical_events.each do |ical_event|
title = ical_event.summary.to_s
events[title] ||= OpenStruct.new
events[title].title = title
events[title].url = "https://www.surf.nl/agenda##{title.parameterize(separator: '_')}"
events[title].description ||= ical_event.description.to_s
my_start = Time.zone.parse(ical_event.dtstart.strftime('%a, %d %b %Y %H:%M:%S'))
my_end = Time.zone.parse(ical_event.dtend.strftime('%a, %d %b %Y %H:%M:%S'))
events[title].start ||= my_start
events[title].end ||= my_end
events[title].set_default_times
events[title].venue ||= ical_event.location
events[title].source ||= 'SURF'
events[title].timezone ||= 'Amsterdam'

sleep(1) unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/surf.yml')
data_json = Nokogiri::HTML5.parse(open_url(event_page['loc'], raise: false))&.css('script[type="application/ld+json"]')
next unless data_json.present? && data_json.length > 0

data = JSON.parse(data_json.first.text)
begin
# create new event
event = OpenStruct.new

# extract event details from
attr = data['@graph'].first
event.title = convert_title attr['name']
event.url = attr['url']&.strip
event.description = convert_description attr['description']
event.start = attr['startDate']
event.end = attr['endDate']
event.set_default_times
event.venue = if attr['location'].is_a?(Array)
attr['location'].join(' - ')
else
attr['location']
end
event.source = 'SURF'
event.online = true
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)

# add event to events array
add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
events[title].start = [my_start, events[title].start].min
events[title].end = [my_end, events[title].end].max
rescue Exception => e
puts e
@messages << "Extract event fields failed with: #{e.message}"
end
events.values.each do |event|
add_event(event)
rescue Exception => e
puts e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
Expand Down
19 changes: 10 additions & 9 deletions test/unit/ingestors/taxila/surf_ingestor_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@ class SurfIngestorTest < ActiveSupport::TestCase

test 'can ingest events from surf' do
source = @content_provider.sources.build(
url: 'https://www.surf.nl/sitemap.xml',
url: 'https://www.surf.nl/agenda',
method: 'surf',
enabled: true
)

ingestor = Ingestors::Taxila::SurfIngestor.new

# check event doesn't
new_title = 'Master class Privacy Assessment Framework'
new_url = 'https://www.surf.nl/en/agenda/masterclass-review-framework-privacy-apr-1'
new_title = 'SURF Onderwijsdagen 2026'
new_url = 'https://www.surf.nl/agenda#surf_onderwijsdagen_2026'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 52 do
assert_difference 'Event.count', 20 do
freeze_time(2019) do
VCR.use_cassette('ingestors/surf') do
ingestor.read(source.url)
Expand All @@ -36,9 +36,9 @@ class SurfIngestorTest < ActiveSupport::TestCase
end
end

assert_equal 52, ingestor.events.count
assert_equal 20, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 52, ingestor.stats[:events][:added]
assert_equal 20, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

Expand All @@ -51,8 +51,9 @@ class SurfIngestorTest < ActiveSupport::TestCase
# check other fields
assert_equal 'Amsterdam', event.timezone
assert_equal 'SURF', event.source
assert event.online?
assert_equal Time.zone.parse('Thu, 25 Apr 2024 12:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Thu, 25 Apr 2024 12:00:00.000000000 UTC +00:00'), event.end
refute event.online?
assert_equal Time.zone.parse('Wed, 10 Nov 2026 09:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Thu, 11 Nov 2026 17:00:00.000000000 UTC +00:00'), event.end
assert_equal "Amare, Spuiplein 150, 2511 DG Den Haag", event.venue
end
end
Loading