Skip to content

Commit e5ea71c

Browse files
authored
Update definitions - May of 2024 (#408)
* chore!: change license to MIT * chore: update versions * fix: compiler errors and linter violations * feature: allow for element matching to find page ID * feature: split up element matching into textcontent and href * fix: use puppeteer-extra to reduce misses * chore: fix definitions * fix: remove CBS cookiecliker * fix: account for shadow root in Parool paywall * fix: selector fixes for various media
1 parent cde76a0 commit e5ea71c

20 files changed

+1968
-2141
lines changed

.editorconfig

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
root = true
2+
3+
[*]
4+
end_of_line = lf
5+
charset = utf-8
6+
trim_trailing_whitespace = true
7+
insert_final_newline = true
8+
indent_style = space
9+
indent_size = 2
10+
11+
[*.{diff,md}]
12+
trim_trailing_whitespace = false

.eslintrc.json

+3-28
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,6 @@
11
{
2-
"extends": [
3-
"plugin:@typescript-eslint/recommended"
4-
],
5-
"parser": "@typescript-eslint/parser",
6-
"parserOptions": {
7-
"ecmaVersion": 2018,
8-
"sourceType": "module",
9-
"ecmaFeatures": {
10-
"legacyDecorators": true
11-
}
12-
},
2+
"extends": "@fdebijl",
133
"rules": {
14-
"no-console": 0,
15-
"no-plusplus": 0,
16-
"no-await-in-loop": 0,
17-
"max-len": [
18-
2,
19-
{
20-
"code": 250
21-
}
22-
],
23-
"object-curly-newline": 0,
24-
"no-trailing-spaces": 1,
25-
"@typescript-eslint/camelcase": 0,
26-
"prefer-const": 1
27-
},
28-
"env": {
29-
"node": true
4+
"@typescript-eslint/no-explicit-any": 0
305
}
31-
}
6+
}

.github/FUNDING.yml

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
# These are supported funding model platforms
2-
3-
github: [opentitles]
1+
github: [opentitles]

.github/workflows/status.yml

+7-4
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@ on:
44
pull_request:
55
types: [assigned, opened, synchronize, reopened]
66

7+
env:
8+
NODE_VERSION: 20
9+
710
jobs:
811
validate:
912
name: Validate
1013
runs-on: ubuntu-latest
1114
steps:
12-
- uses: actions/checkout@v3
15+
- uses: actions/checkout@v4
1316
with:
1417
# Checkout the head ref instead of the PR branch that github creates.
1518
ref: ${{ github.head_ref }}
16-
- name: Setup Node.js v14
17-
uses: actions/setup-node@v3
19+
- name: Setup Node
20+
uses: actions/setup-node@v4
1821
with:
19-
node-version: 14
22+
node-version: ${{ env.NODE_VERSION }}
2023
- name: Install and build
2124
run: |
2225
npm ci

LICENSE

+21-661
Large diffs are not rendered by default.

index.d.ts

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
declare module 'puppeteer-extra-plugin-stealth' {
2+
const content: any;
3+
export default content;
4+
}

media.json

+59-15
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,10 @@
111111
"page_id_location": "url",
112112
"page_id_query": "",
113113
"match_domains": ["telegraaf.nl"],
114-
"title_query": ["h1.ArticleTitleBlock__title"]
114+
"title_query": [
115+
"h1.ArticleTitleBlock__title",
116+
".Article__titleWrapper h1"
117+
]
115118
},
116119
{
117120
"name": "NUnl",
@@ -188,7 +191,13 @@
188191
"page_id_location": "url",
189192
"page_id_query": "",
190193
"match_domains": ["volkskrant.nl"],
191-
"title_query": [".artstyle__header-title", "h1.title", "h1.artstyle__header-title", ".h1-headline"]
194+
"title_query": [
195+
".artstyle__header-title",
196+
"h1.title",
197+
"h1.artstyle__header-title",
198+
".h1-headline",
199+
"[data-test-id=article-title]"
200+
]
192201
},
193202
{
194203
"name": "RTL",
@@ -208,7 +217,12 @@
208217
"page_id_location": "url",
209218
"page_id_query": "",
210219
"match_domains": ["rtlnieuws.nl", "bright.nl"],
211-
"title_query": ["div.article-title-width > h1.node-title", ".video-content > h1", "h1.article__title"]
220+
"title_query": [
221+
"div.article-title-width > h1.node-title",
222+
".video-content > h1",
223+
"h1.article__title",
224+
"[data-testid=hero-heading]"
225+
]
212226
},
213227
{
214228
"name": "Trouw",
@@ -220,7 +234,10 @@
220234
"page_id_location": "url",
221235
"page_id_query": "",
222236
"match_domains": ["trouw.nl"],
223-
"title_query": ["h1.artstyle__header-title"]
237+
"title_query": [
238+
"h1.artstyle__header-title",
239+
"[data-test-id=article-title]"
240+
]
224241
},
225242
{
226243
"name": "Parool",
@@ -243,7 +260,10 @@
243260
"page_id_location": "url",
244261
"page_id_query": "",
245262
"match_domains": ["parool.nl"],
246-
"title_query": [".artstyle__header-title"]
263+
"title_query": [
264+
".artstyle__header-title",
265+
"[data-test-id=article-title]"
266+
]
247267
},
248268
{
249269
"name": "Limburger",
@@ -259,7 +279,7 @@
259279
"page_id_location": "url",
260280
"page_id_query": "",
261281
"match_domains": ["limburger.nl"],
262-
"title_query": ["div > header.article__header > h1"]
282+
"title_query": ["div > header > h1"]
263283
},
264284
{
265285
"name": "FD",
@@ -271,7 +291,10 @@
271291
"page_id_location": "url",
272292
"page_id_query": "",
273293
"match_domains": ["fd.nl"],
274-
"title_query": [".head.full.social-quotable > h1"]
294+
"title_query": [
295+
".head.full.social-quotable > h1",
296+
"h1.heading"
297+
]
275298
},
276299
{
277300
"name": "HVNL",
@@ -387,7 +410,6 @@
387410
"cnn_topstories",
388411
"cnn_world",
389412
"cnn_us",
390-
"cnn_allpolitics",
391413
"cnn_tech",
392414
"cnn_health",
393415
"cnn_showbiz",
@@ -441,12 +463,13 @@
441463
"match_domains": ["cnn.com", "edition.cnn.com"],
442464
"title_query": [
443465
".l-container > h1.pg-headline",
444-
"h1.Article__title",
445-
"h1.PageHead__title",
466+
"h1.Article__title",
467+
"h1.PageHead__title",
446468
"h1.article-title",
447469
"h1.pg-headline",
448470
".pg-rail .el__video-collection__meta-wrapper > h1.media__video-headline",
449-
"h1.headline__text"
471+
"h1.headline__text",
472+
".headline > h1"
450473
]
451474
},
452475
{
@@ -504,7 +527,8 @@
504527
"title_query": [
505528
"h1[itemprop=\"headline\"]",
506529
"header h1",
507-
"h1[data-testid=\"headline\"]"
530+
"h1[data-testid=\"headline\"]",
531+
".article-headline > h1"
508532
]
509533
},
510534
{
@@ -659,7 +683,10 @@
659683
"page_id_location": "url",
660684
"page_id_query": "",
661685
"match_domains": ["time.com"],
662-
"title_query": ["h1.headline"]
686+
"title_query": [
687+
"h1.headline",
688+
"main h1"
689+
]
663690
}
664691
],
665692
"uk": [
@@ -679,11 +706,28 @@
679706
"page_id_query": "",
680707
"match_domains": ["theguardian.com"],
681708
"title_query": [
682-
"h1[itemprop=\"headline\"]",
683-
"h1 > span",
709+
"h1[itemprop=\"headline\"]",
710+
"h1 > span",
684711
"div[data-gu-name=\"headline\"] h1"
685712
]
686713
}
714+
],
715+
"be": [
716+
{
717+
"name": "VRT",
718+
"prefix": "https://www.vrt.be/vrtnws/",
719+
"suffix": ".rss.articles.xml",
720+
"feeds": [
721+
"nl",
722+
"en"
723+
],
724+
"id_container": "id",
725+
"id_mask": "p\\.[a-zA-Z0-9]{9}$",
726+
"page_id_location": "element_href",
727+
"page_id_query": "link[rel=shortlink]",
728+
"match_domains": ["vrt.be"],
729+
"title_query": [".vrt-title"]
730+
}
687731
]
688732
}
689733
}

0 commit comments

Comments
 (0)