|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Reddit Wallscraper |
| 4 | +Course: CS 41 |
| 5 | +Name: Chris Moffit and Elizabeth Fitzgerald |
| 6 | +SUNet: cmoffitt and elizfitz |
| 7 | +
|
| 8 | +Replace this with a description of the program. |
| 9 | +""" |
| 10 | +# import utils |
| 11 | +import requests |
| 12 | +import sys |
| 13 | +import re |
| 14 | +import os |
| 15 | +import pickle |
| 16 | + |
| 17 | +#Uses requests module to query reddit for json file corresponding to subreddit |
| 18 | +def query(subreddit): |
| 19 | + URL_START = "https://reddit.com/r/" |
| 20 | + URL_END = ".json" |
| 21 | + url = URL_START + subreddit + URL_END |
| 22 | + print(url) |
| 23 | + headers = {'User-Agent': "Wallscraper Script by @cmoffitt"} |
| 24 | + r = None |
| 25 | + |
| 26 | + # Make request and catch exceptions |
| 27 | + try: |
| 28 | + r = requests.get(url, headers=headers) |
| 29 | + r.raise_for_status() |
| 30 | + except requests.exceptions.HTTPError as errh: |
| 31 | + print("Http Error:", errh) |
| 32 | + sys.exit(1) |
| 33 | + except requests.exceptions.ConnectionError as errc: |
| 34 | + print("Error Connecting: No internet connection") |
| 35 | + sys.exit(1) |
| 36 | + except requests.exceptions.Timeout as errt: |
| 37 | + print("Timeout Error:", errt) |
| 38 | + sys.exit(1) |
| 39 | + except requests.exceptions.RequestException as err: |
| 40 | + print("OOps: Something Else", err) |
| 41 | + sys.exit(1) |
| 42 | + |
| 43 | + # Capture json dict object of subreddit if response was successful |
| 44 | + print(r) |
| 45 | + if r.ok: |
| 46 | + json_data = r.json() |
| 47 | + else: |
| 48 | + print("The server did not return a successful response. Please try again") |
| 49 | + sys.exit(1) |
| 50 | + |
| 51 | + # Check if valid subreddit |
| 52 | + if not isValidSubreddit(json_data): |
| 53 | + print("Not a valid subreddit. Please try again.") |
| 54 | + sys.exit(1) |
| 55 | + |
| 56 | + return json_data |
| 57 | + |
| 58 | +#Class defining one reddit post |
| 59 | +class RedditPost: |
| 60 | + #Initializes one reddit post as a dictionary storing certain attributes from the json post object |
| 61 | + def __init__(self, data): |
| 62 | + post_data = data |
| 63 | + attr = ["subreddit", "is_self", "ups", "post_hint", "title", "downs", "score", "url", "domain", "permalink", "created_utc", "num_comments", "preview", "name", "over_18"] |
| 64 | + |
| 65 | + dict = {} |
| 66 | + for k in attr: |
| 67 | + try: |
| 68 | + dict[k] = post_data["data"][k] |
| 69 | + except: |
| 70 | + dict[k] = None |
| 71 | + |
| 72 | + self.data = dict |
| 73 | + |
| 74 | + #Downloads the post image to a file on the computer, preventing duplicate image downloading |
| 75 | + def download(self): |
| 76 | + if ".jpg" in self.data["url"] or ".png" in self.data["url"]: #only download if it actually is an image |
| 77 | + #format the correct name and path for the file |
| 78 | + name = re.sub(r'\[.*\]', '', self.data["title"]) |
| 79 | + name = re.sub(" ", "", name) |
| 80 | + name = re.sub(r'[^a-zA-Z0-9]', "", name) |
| 81 | + path = "wallpapers/" + str(self.data["preview"]["images"][0]["source"]["width"]) + "x" + str(self.data["preview"]["images"][0]["source"]["height"]) + "/" |
| 82 | + filename = name + ".png" |
| 83 | + |
| 84 | + if not os.path.exists(path): |
| 85 | + os.makedirs(path) |
| 86 | + |
| 87 | + img_data = requests.get(self.data["url"]).content #unique info regarding the particular image to save in order to prevent duplicate image downloading |
| 88 | + |
| 89 | + #Run this code the first time in order to create the pickle file for seen_wallpapers |
| 90 | + #seen_wallpapers.append(img_data) |
| 91 | + #f = open("seen_wallpapers.pickle", 'wb') |
| 92 | + #pickle.dump(seen_wallpapers, f) |
| 93 | + #f.close() |
| 94 | + |
| 95 | + #upload seen_wallpapers pickle file to compare against img_data and prevent duplicae image downloading |
| 96 | + seen_wallpapers = [] |
| 97 | + f = open("seen_wallpapers.pickle", 'rb') |
| 98 | + seen_wallpapers = pickle.load(f) |
| 99 | + f.close() |
| 100 | + if img_data not in seen_wallpapers: |
| 101 | + seen_wallpapers.append(img_data) |
| 102 | + f = open("seen_wallpapers.pickle", 'wb') |
| 103 | + pickle.dump(seen_wallpapers, f) |
| 104 | + f.close() |
| 105 | + #save image in file |
| 106 | + with open(os.path.join(path, filename), 'wb') as temp_file: |
| 107 | + temp_file.write(img_data) |
| 108 | + temp_file.close() |
| 109 | + |
| 110 | + else: |
| 111 | + pass |
| 112 | + |
| 113 | + def __str__(self): |
| 114 | + #"RedditPost({title} ({score}): {url}) |
| 115 | + string = "RedditPost({" + self.data["title"] + "} ({" + str(self.data["score"]) + "}): {" + self.data["url"] + "})" |
| 116 | + return string |
| 117 | + |
| 118 | + |
| 119 | + |
| 120 | +# Checks if valid subreddit by making sure json dict object is properly filled with contents |
| 121 | +def isValidSubreddit(json_data): |
| 122 | + if json_data['data']['dist'] == 0: |
| 123 | + return False |
| 124 | + else: |
| 125 | + return True |
| 126 | + |
| 127 | + |
| 128 | +def main(subreddit): |
| 129 | + q = query(subreddit) |
| 130 | + |
| 131 | + children = (q['data']['children']) |
| 132 | + postCount = 0 # To confirm we have all 25 "posts" |
| 133 | + scoreCount = 0 # To check number of posts with a score above 500 |
| 134 | + |
| 135 | + RedditPosts = [RedditPost(post) for post in children] |
| 136 | + |
| 137 | + for post in RedditPosts: |
| 138 | + new_post = post |
| 139 | + postCount += 1 |
| 140 | + if new_post.data["score"] > 500: |
| 141 | + scoreCount += 1 |
| 142 | + post.download() |
| 143 | + |
| 144 | + print("There were " + str(postCount) + " posts.") |
| 145 | + print(str(scoreCount) + " of those posts had a score over 500.") |
| 146 | + |
| 147 | +if __name__ == '__main__': |
| 148 | + main(sys.argv[1]) |
0 commit comments