Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/ingestors/ingestor_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def self.taxila_ingestors
Ingestors::Taxila::SenseIngestor,
Ingestors::Taxila::VuMaterialIngestor,
Ingestors::Taxila::RdnlIngestor,
Ingestors::Taxila::HanIngestor
Ingestors::Taxila::HanIngestor,
Ingestors::Taxila::CitizenScienceIngestor
]
end

Expand Down
97 changes: 97 additions & 0 deletions lib/ingestors/taxila/citizen_science_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# frozen_string_literal: true

require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
module Taxila
class CitizenScienceIngestor < Ingestor
def self.config
{
key: 'citizen_science_event',
title: 'CitizenScience Events API',
category: :events
}
end

def read(url)
begin
process_citizen_science_events(url)
process_citizen_science_materials(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_citizen_science_events(_url)
citizen_science_url = 'https://citizenscience.nl/events/'
overview_page = Nokogiri::HTML5.parse(open_url(citizen_science_url.to_s, raise: true))
.at_xpath("//h1[normalize-space(.)='Aankomende evenementen']")
&.ancestors('.container')&.first
&.css('.row')&.first
&.css('.card')

overview_page.each_with_index do |el, _idx|
event = OpenStruct.new
event.url = el.css('a.btn').first['href']
event.title = el.css('p.project-name').text.strip
date_str = el.css('.fa-calendar').first.parent.text.strip
mapped_date_str = citizen_science_month_mapping(date_str)
event.start = DateTime.parse(mapped_date_str)
event.set_default_times
event.venue = el.css('.fa-map-marker-alt').first.parent.text.strip
event.description = el.css("div.half-content > p:not([style*='display: none']):not([hidden])").first.text.strip
event.source = 'CitizenScience'
event.timezone = 'Amsterdam'
add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end

def process_citizen_science_materials(_url)
urls = [
'https://citizenscience.nl/resources',
'https://citizenscience.nl/training_resources'
]

urls.each do |citizen_science_url|
3.times do |i|
new_url = "#{citizen_science_url}?page=#{i + 1}"
sleep(1) unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/citizen_science.yml')
overview_page = Nokogiri::HTML5.parse(open_url(new_url.to_s, raise: true))
.css('.project-card')

overview_page.each_with_index do |el, _idx|
material = OpenStruct.new
material.url = "https://www.citizenscience.nl#{el.css('h3.project-name').first.parent['href']}"
material.title = el.css('h3.project-name').first.text.strip
material.description = el.css(".project-description").first.text.strip
add_material(material)
rescue Exception => e
@messages << "Extract material fields failed with: #{e.message}"
end
end
end
end
end
end
end

def citizen_science_month_mapping(str)
mapping = [
%w[Mrt Mar],
%w[Mei May],
%w[Okt Oct]
]
mapping.each do |dutch, english|
str = str.gsub(dutch, english)
end
str
end
90 changes: 90 additions & 0 deletions test/unit/ingestors/taxila/citizen_science_ingestor_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
require 'test_helper'

class CitizenScienceIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone
end

test 'can ingest events from citizen_science' do
source = @content_provider.sources.build(
url: 'https://citizenscience.nl/events/',
method: 'citizen_science',
enabled: true
)

ingestor = Ingestors::Taxila::CitizenScienceIngestor.new

# check event doesn't
new_title = 'High-Level Policy Event on the Sustainability of Citizen Science'
new_url = 'https://events.teams.microsoft.com/event/47581424-c548-4f07-928a-9fed358df416@659b3608-37a1-406b-9e1a-02c011decd3c'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 13 do
freeze_time(2023) do
VCR.use_cassette("ingestors/citizen_science") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 13, ingestor.events.count
assert_equal 13, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'CitizenScience', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Mon, 19 Feb 2026 09:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Mon, 19 Feb 2026 10:00:00.000000000 UTC +00:00'), event.end
assert_equal 'Online', event.venue
assert event.online?
end

test 'can ingest materials from citizen_science' do
source = @content_provider.sources.build(
url: 'https://citizenscience.nl/events/',
method: 'citizen_science',
enabled: true
)

ingestor = Ingestors::Taxila::CitizenScienceIngestor.new

# check event doesn't
new_title = 'Naar een vaste plek voor burgerwetenschap in het netwerk van openbare bibliotheken'
new_url = 'https://www.citizenscience.nl/resource/475'
refute Material.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Material.count', 25 do
freeze_time(2023) do
VCR.use_cassette("ingestors/citizen_science") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 33, ingestor.materials.count
assert_equal 25, ingestor.stats[:materials][:added]
assert_equal 8, ingestor.stats[:materials][:updated]
assert_equal 0, ingestor.stats[:materials][:rejected]

# check material does exist
material = Material.where(title: new_title, url: new_url).first
assert material
assert_equal new_title, material.title
assert_equal new_url, material.url
end
end
339 changes: 339 additions & 0 deletions test/vcr_cassettes/ingestors/citizen_science.yml

Large diffs are not rendered by default.