-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrss-ntfy.py
executable file
·174 lines (142 loc) · 5.8 KB
/
rss-ntfy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python3
## rss-ntfy.py --- Scrape RSS feeds and ntfy yourself.
# Copyright (c) 2023 Julian Orchard <[email protected]>
## Description:
# Really simple RSS feed scraper that looks at a list of URL's
# and sends a little ntfy about it.
# Mainly designed for Proxitok and Nitter.
## License:
# See /LICENSE file in the root of this repository.
## Code:
from bs4 import BeautifulSoup
from pathlib import Path
import os
import re
import requests
import yaml
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + "/rss-ntfy/"
with open("config.yaml", "r") as config_file_contents:
CONFIG = yaml.safe_load(config_file_contents)
# TODO: This could also be abstracted to the config file, with a default value
NTFY_INSTANCE = "https://ntfy.sh/"
def ntfyr(message, ntfy_topic):
'''
This just uses the simple example on docs.ntfy.sh to
send a message via ntfy.
'''
requests.post(f'{NTFY_INSTANCE}{ntfy_topic}', data=f"{message}".encode(encoding="UTF-8"))
def ntfyr_complex(ntfy_topic, username, title, link, published, description):
'''
This sends a more complicated notification via ntfy.
It's more based around the example of 'using a JSON
array', below:
https://docs.ntfy.sh/publish/
'''
message_text = f"{description} from {username}"
if title != "":
message_text = f"{message_text}:\n\n{title}!"
else:
message_text = f"{message_text}!"
requests.post(f"{NTFY_INSTANCE}",
json = {
"topic": f"{ntfy_topic}",
"message": f"{message_text}",
"actions": [{
"action": "view",
"label": "View!",
"url": f"{link}"
}]
}
)
def get_user_list(user_list_file):
'''
Get the users list into a list to iterate.
'''
with open(user_list_file, encoding="UTF-8") as f:
user_list = [l.rstrip() for l in f]
return user_list
def handlebar_replace(input, replacement):
'''
Very simple Handlebar style replace:
https://handlebarsjs.com
Takes the input URL and replaces the {{ custom }}
part, which will be the current user part.
'''
return re.sub('{{.*}}', replacement, input)
def check_file_list_exists(file_list):
'''
Takes a list o files, checks if they exist,
creates them if they do not!
I'm using this function instead of just relying on
'w+', because we 'r+' the History file, at one point,
and 'r+' doesn't create the file if it doesn't exist
(unlike 'w+' and 'a+').
'''
for file in file_list:
Path(file).touch(exist_ok=True)
def mkdirp(folder):
'mkdir -p folder'
Path(folder).mkdir(parents=True, exist_ok=True)
def main():
'''
This article by Matthew Wimberly got me along the right lines with things:
https://codeburst.io/building-an-rss-feed-scraper-with-python-73715ca06e1f
'''
# Logfile Save Location
if "XDG_CACHE_HOME" in os.environ:
log_location = os.environ['XDG_CACHE_HOME']
else:
log_location = f"{Path.home()}/.cache"
mkdirp(f"{log_location}/rss-ntfy/")
for service_name in CONFIG:
# Follow File and History File
user_list_file = f"{SCRIPT_DIR}{CONFIG[service_name]['service']}-follow-list.txt"
service_hist = f"{log_location}{CONFIG[service_name]['service']}_hist"
check_file_list_exists([user_list_file, service_hist])
# Instance, Topic, Descriptor
instance = f"{CONFIG[service_name]['rss-url']}"
ntfy_topic = f"{CONFIG[service_name]['ntfy-topic']}"
descriptor = CONFIG[service_name]['descriptor']
# Alternative Tags Input
item_tag = CONFIG[service_name].get("item-alt", "item")
title_tag = CONFIG[service_name].get("title-alt", "title")
link_tag = CONFIG[service_name].get("link-alt", "link")
date_tag = CONFIG[service_name].get("pubdate-alt", "pubDate")
# TODO: Rename everything with 'user', as it's more generally an
# account? Not sure if account is the best name, either.
user_list = get_user_list(user_list_file)
for username in user_list:
current_instance = handlebar_replace(instance, username)
name_tag = CONFIG[service_name].get("name-alt", username)
try:
req = requests.get(f"{current_instance}")
rss_content = BeautifulSoup(req.content, "lxml-xml")
articles = rss_content.findAll(item_tag)
for a in articles:
title = a.find(title_tag).text
link = a.find(link_tag).text
published = a.find(date_tag).text
# If we need a different name from the username,
# handle that here.
if name_tag != username:
name = a.find(name_tag).text
else:
name = username
with open(service_hist, "r+") as hist_file:
data = hist_file.read()
# If the link isn't in data, not only
# do we want to add it to the Hist file,
# we also want to, of course, ntfy:
if not link in data:
ntfyr_complex(ntfy_topic,
name,
title,
link,
published,
descriptor)
hist_file.write(f"{link}\n")
except Exception as e:
# TODO: Just use the ntfy JSON request format
ntfyr(f"Error with scraping {name_tag}, '{e}'.", ntfy_topic)
if __name__ == '__main__':
main()