Skip to content

Commit cf2b6be

Browse files
committed
google slides to pdf
1 parent 7979659 commit cf2b6be

File tree

1 file changed

+176
-14
lines changed

1 file changed

+176
-14
lines changed

slides.py

+176-14
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@
3939

4040

4141
import io
42+
import json
4243
import os
4344

4445
from google.auth.transport.requests import Request
4546
from google.oauth2.credentials import Credentials
4647
from google_auth_oauthlib.flow import InstalledAppFlow
4748
from googleapiclient.discovery import build
48-
from googleapiclient.http import MediaIoBaseDownload
49+
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
4950

5051

5152
def get_credentials():
@@ -73,25 +74,22 @@ def get_credentials():
7374
return creds
7475

7576

76-
def download_as_pdf(service, file_id, output_filename):
77+
def download_as_pdf(service, file_id, output_file_path):
7778
request = service.files().export_media(fileId=file_id, mimeType="application/pdf")
7879
fh = io.BytesIO()
7980
downloader = MediaIoBaseDownload(fh, request)
8081
done = False
82+
83+
print(f"Download PDF to {output_file_path}...")
8184
while done is False:
8285
status, done = downloader.next_chunk()
8386
print("Download %d%%." % int(status.progress() * 100))
8487

85-
with open(output_filename, "wb") as f:
88+
with open(output_file_path, "wb") as f:
8689
f.write(fh.getbuffer())
8790

8891

89-
# Load credentials
90-
creds = Credentials.from_authorized_user_file("secrets/token.json")
91-
service = build("drive", "v3", credentials=creds)
92-
93-
94-
def find_file_id_by_path(service, path):
92+
def find_item_id_by_path(service, path):
9593
folder_id = "root" # start from the root
9694
for name in path.split("/"):
9795
if not name:
@@ -108,19 +106,183 @@ def find_file_id_by_path(service, path):
108106
files = response.get("files", [])
109107

110108
if not files:
111-
raise Exception(f"No such file/dir named {name} in path {path}")
109+
print(f"No such file/dir named {name} in path {path}")
110+
return None
112111

113112
# Assuming the first found file/folder is the correct one
114113
folder_id = files[0]["id"]
115114

116115
return folder_id
117116

118117

119-
if __name__ == "__main__":
120-
path = "CV Course/slides/Intro to Computer Vision"
118+
def generate_current_slides_info(service, folder_path):
119+
"""
120+
Retrieves all Google Slides files along with their last modified times within a specified directory on Google Drive.
121+
122+
:param service: Authorized Google Drive service instance.
123+
:param folder_path: The path of the directory to search in, e.g., "CV Course/slides".
124+
:return: A list of dictionaries containing file IDs, names, and last modified times of the Google Slides files.
125+
"""
126+
folder_id = find_item_id_by_path(service, folder_path)
127+
slides_mime_type = "application/vnd.google-apps.presentation"
128+
query = f"'{folder_id}' in parents and mimeType='{slides_mime_type}'"
129+
response = (
130+
service.files()
131+
.list(
132+
q=query,
133+
spaces="drive",
134+
fields="files(id, name, modifiedTime)",
135+
pageSize=100, # Adjust pageSize as needed
136+
)
137+
.execute()
138+
)
139+
140+
slides_info = response.get("files", [])
141+
slides_info_dict = {item["id"]: item for item in slides_info}
142+
143+
return slides_info_dict
144+
145+
146+
def write_slides_info_to_drive(service, folder_path, slides_files, json_filename):
147+
"""
148+
Writes data about Google Slides files to a JSON file and updates or uploads it to a specified directory on Google Drive.
149+
"""
150+
# Write the data to a JSON file locally
151+
with open(json_filename, "w") as json_file:
152+
json.dump(slides_files, json_file, indent=4)
153+
154+
# Check if the JSON file already exists in the specified folder
155+
drive_json_id = find_item_id_by_path(
156+
service, os.path.join(folder_path, json_filename)
157+
)
158+
159+
file_metadata = {
160+
"name": json_filename,
161+
"parents": [find_item_id_by_path(service, folder_path)],
162+
}
163+
media = MediaFileUpload(json_filename, mimetype="application/json")
164+
165+
if drive_json_id is None:
166+
# File doesn't exist, create a new one
167+
file_metadata = {
168+
"name": json_filename,
169+
"parents": [find_item_id_by_path(service, folder_path)],
170+
}
171+
created_file = (
172+
service.files()
173+
.create(body=file_metadata, media_body=media, fields="id")
174+
.execute()
175+
)
176+
print(
177+
f"Uploaded {json_filename} to Google Drive with ID: {created_file.get('id')}"
178+
)
179+
else:
180+
# File exists, update it
181+
# Note: Removed 'parents' from file_metadata as it's not needed for update
182+
file_metadata = {
183+
"name": json_filename,
184+
# Do not include 'parents' here for the update operation
185+
}
186+
updated_file = (
187+
service.files()
188+
.update(
189+
fileId=drive_json_id, body=file_metadata, media_body=media, fields="id"
190+
)
191+
.execute()
192+
)
193+
print(
194+
f"Updated {json_filename} in Google Drive with ID: {updated_file.get('id')}"
195+
)
196+
197+
# Clean up the local file
198+
os.remove(json_filename)
199+
200+
201+
def read_json_from_drive(service, file_id):
202+
"""
203+
Reads data from a JSON file stored on Google Drive.
121204
205+
:param service: Authorized Google Drive service instance.
206+
:param file_id: The ID of the JSON file to read data from.
207+
:return: The data read from the JSON file.
208+
"""
209+
# Step 1: Download the file
210+
request = service.files().get_media(fileId=file_id)
211+
fh = io.BytesIO()
212+
downloader = MediaIoBaseDownload(fh, request)
213+
done = False
214+
while not done:
215+
_, done = downloader.next_chunk()
216+
217+
fh.seek(0)
218+
219+
# Step 2: Read the JSON data from the file
220+
json_data = json.load(fh)
221+
222+
return json_data
223+
224+
225+
def has_file_changed(old_info, new_info):
226+
"""
227+
Check if the file's modified time in the new info is more recent than in the old info.
228+
"""
229+
old_modified_time = old_info.get("modifiedTime")
230+
new_modified_time = new_info.get("modifiedTime")
231+
return new_modified_time > old_modified_time
232+
233+
234+
def get_old_slides_info(folder_path, json_filename, service):
235+
drive_json_id = find_item_id_by_path(
236+
service, os.path.join(folder_path, json_filename)
237+
)
238+
if drive_json_id is None:
239+
return {}
240+
241+
slides_info_dict = read_json_from_drive(service, drive_json_id)
242+
243+
# Convert json_data into a dict for easier comparison
244+
return slides_info_dict
245+
246+
247+
def generate_changed_slides_as_pdfs(
248+
service, last_slides_info, slides_info, out_folder_path
249+
):
250+
for slide_id, slide_data in slides_info.items():
251+
slides_info[slide_id]
252+
if (
253+
slide_id in last_slides_info
254+
and has_file_changed(last_slides_info[slide_id], slide_data)
255+
) or (slide_id not in last_slides_info):
256+
output_filename = f"{slide_data['name']}.pdf"
257+
download_as_pdf(
258+
service, slide_id, os.path.join(out_folder_path, output_filename)
259+
)
260+
print(f"Updated slide downloaded as PDF: {output_filename}")
261+
262+
263+
def connect_to_google_drive_service():
122264
creds = get_credentials()
123265
service = build("drive", "v3", credentials=creds)
266+
return service
267+
268+
269+
if __name__ == "__main__":
270+
GOOGLE_DRIVE_SLIDES_FOLDER_PATH = "CV Course/slides"
271+
JSON_FILENAME = "last_slides_info.json"
272+
PDF_OUT_FOLDER_PATH = "/home/yoni/Desktop/AI_is_Math/lectures"
273+
274+
service = connect_to_google_drive_service()
275+
276+
last_slides_info = get_old_slides_info(
277+
GOOGLE_DRIVE_SLIDES_FOLDER_PATH, JSON_FILENAME, service
278+
)
279+
280+
slides_info = generate_current_slides_info(service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH)
281+
282+
generate_changed_slides_as_pdfs(
283+
service, last_slides_info, slides_info, PDF_OUT_FOLDER_PATH
284+
)
124285

125-
file_id = find_file_id_by_path(service, path)
126-
download_as_pdf(service, file_id, "output2.pdf")
286+
write_slides_info_to_drive(
287+
service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH, slides_info, JSON_FILENAME
288+
)

0 commit comments

Comments
 (0)