39
39
40
40
41
41
import io
42
+ import json
42
43
import os
43
44
44
45
from google .auth .transport .requests import Request
45
46
from google .oauth2 .credentials import Credentials
46
47
from google_auth_oauthlib .flow import InstalledAppFlow
47
48
from googleapiclient .discovery import build
48
- from googleapiclient .http import MediaIoBaseDownload
49
+ from googleapiclient .http import MediaFileUpload , MediaIoBaseDownload
49
50
50
51
51
52
def get_credentials ():
@@ -73,25 +74,22 @@ def get_credentials():
73
74
return creds
74
75
75
76
76
- def download_as_pdf (service , file_id , output_filename ):
77
+ def download_as_pdf (service , file_id , output_file_path ):
77
78
request = service .files ().export_media (fileId = file_id , mimeType = "application/pdf" )
78
79
fh = io .BytesIO ()
79
80
downloader = MediaIoBaseDownload (fh , request )
80
81
done = False
82
+
83
+ print (f"Download PDF to { output_file_path } ..." )
81
84
while done is False :
82
85
status , done = downloader .next_chunk ()
83
86
print ("Download %d%%." % int (status .progress () * 100 ))
84
87
85
- with open (output_filename , "wb" ) as f :
88
+ with open (output_file_path , "wb" ) as f :
86
89
f .write (fh .getbuffer ())
87
90
88
91
89
- # Load credentials
90
- creds = Credentials .from_authorized_user_file ("secrets/token.json" )
91
- service = build ("drive" , "v3" , credentials = creds )
92
-
93
-
94
- def find_file_id_by_path (service , path ):
92
+ def find_item_id_by_path (service , path ):
95
93
folder_id = "root" # start from the root
96
94
for name in path .split ("/" ):
97
95
if not name :
@@ -108,19 +106,183 @@ def find_file_id_by_path(service, path):
108
106
files = response .get ("files" , [])
109
107
110
108
if not files :
111
- raise Exception (f"No such file/dir named { name } in path { path } " )
109
+ print (f"No such file/dir named { name } in path { path } " )
110
+ return None
112
111
113
112
# Assuming the first found file/folder is the correct one
114
113
folder_id = files [0 ]["id" ]
115
114
116
115
return folder_id
117
116
118
117
119
- if __name__ == "__main__" :
120
- path = "CV Course/slides/Intro to Computer Vision"
118
+ def generate_current_slides_info (service , folder_path ):
119
+ """
120
+ Retrieves all Google Slides files along with their last modified times within a specified directory on Google Drive.
121
+
122
+ :param service: Authorized Google Drive service instance.
123
+ :param folder_path: The path of the directory to search in, e.g., "CV Course/slides".
124
+ :return: A list of dictionaries containing file IDs, names, and last modified times of the Google Slides files.
125
+ """
126
+ folder_id = find_item_id_by_path (service , folder_path )
127
+ slides_mime_type = "application/vnd.google-apps.presentation"
128
+ query = f"'{ folder_id } ' in parents and mimeType='{ slides_mime_type } '"
129
+ response = (
130
+ service .files ()
131
+ .list (
132
+ q = query ,
133
+ spaces = "drive" ,
134
+ fields = "files(id, name, modifiedTime)" ,
135
+ pageSize = 100 , # Adjust pageSize as needed
136
+ )
137
+ .execute ()
138
+ )
139
+
140
+ slides_info = response .get ("files" , [])
141
+ slides_info_dict = {item ["id" ]: item for item in slides_info }
142
+
143
+ return slides_info_dict
144
+
145
+
146
+ def write_slides_info_to_drive (service , folder_path , slides_files , json_filename ):
147
+ """
148
+ Writes data about Google Slides files to a JSON file and updates or uploads it to a specified directory on Google Drive.
149
+ """
150
+ # Write the data to a JSON file locally
151
+ with open (json_filename , "w" ) as json_file :
152
+ json .dump (slides_files , json_file , indent = 4 )
153
+
154
+ # Check if the JSON file already exists in the specified folder
155
+ drive_json_id = find_item_id_by_path (
156
+ service , os .path .join (folder_path , json_filename )
157
+ )
158
+
159
+ file_metadata = {
160
+ "name" : json_filename ,
161
+ "parents" : [find_item_id_by_path (service , folder_path )],
162
+ }
163
+ media = MediaFileUpload (json_filename , mimetype = "application/json" )
164
+
165
+ if drive_json_id is None :
166
+ # File doesn't exist, create a new one
167
+ file_metadata = {
168
+ "name" : json_filename ,
169
+ "parents" : [find_item_id_by_path (service , folder_path )],
170
+ }
171
+ created_file = (
172
+ service .files ()
173
+ .create (body = file_metadata , media_body = media , fields = "id" )
174
+ .execute ()
175
+ )
176
+ print (
177
+ f"Uploaded { json_filename } to Google Drive with ID: { created_file .get ('id' )} "
178
+ )
179
+ else :
180
+ # File exists, update it
181
+ # Note: Removed 'parents' from file_metadata as it's not needed for update
182
+ file_metadata = {
183
+ "name" : json_filename ,
184
+ # Do not include 'parents' here for the update operation
185
+ }
186
+ updated_file = (
187
+ service .files ()
188
+ .update (
189
+ fileId = drive_json_id , body = file_metadata , media_body = media , fields = "id"
190
+ )
191
+ .execute ()
192
+ )
193
+ print (
194
+ f"Updated { json_filename } in Google Drive with ID: { updated_file .get ('id' )} "
195
+ )
196
+
197
+ # Clean up the local file
198
+ os .remove (json_filename )
199
+
200
+
201
+ def read_json_from_drive (service , file_id ):
202
+ """
203
+ Reads data from a JSON file stored on Google Drive.
121
204
205
+ :param service: Authorized Google Drive service instance.
206
+ :param file_id: The ID of the JSON file to read data from.
207
+ :return: The data read from the JSON file.
208
+ """
209
+ # Step 1: Download the file
210
+ request = service .files ().get_media (fileId = file_id )
211
+ fh = io .BytesIO ()
212
+ downloader = MediaIoBaseDownload (fh , request )
213
+ done = False
214
+ while not done :
215
+ _ , done = downloader .next_chunk ()
216
+
217
+ fh .seek (0 )
218
+
219
+ # Step 2: Read the JSON data from the file
220
+ json_data = json .load (fh )
221
+
222
+ return json_data
223
+
224
+
225
+ def has_file_changed (old_info , new_info ):
226
+ """
227
+ Check if the file's modified time in the new info is more recent than in the old info.
228
+ """
229
+ old_modified_time = old_info .get ("modifiedTime" )
230
+ new_modified_time = new_info .get ("modifiedTime" )
231
+ return new_modified_time > old_modified_time
232
+
233
+
234
+ def get_old_slides_info (folder_path , json_filename , service ):
235
+ drive_json_id = find_item_id_by_path (
236
+ service , os .path .join (folder_path , json_filename )
237
+ )
238
+ if drive_json_id is None :
239
+ return {}
240
+
241
+ slides_info_dict = read_json_from_drive (service , drive_json_id )
242
+
243
+ # Convert json_data into a dict for easier comparison
244
+ return slides_info_dict
245
+
246
+
247
+ def generate_changed_slides_as_pdfs (
248
+ service , last_slides_info , slides_info , out_folder_path
249
+ ):
250
+ for slide_id , slide_data in slides_info .items ():
251
+ slides_info [slide_id ]
252
+ if (
253
+ slide_id in last_slides_info
254
+ and has_file_changed (last_slides_info [slide_id ], slide_data )
255
+ ) or (slide_id not in last_slides_info ):
256
+ output_filename = f"{ slide_data ['name' ]} .pdf"
257
+ download_as_pdf (
258
+ service , slide_id , os .path .join (out_folder_path , output_filename )
259
+ )
260
+ print (f"Updated slide downloaded as PDF: { output_filename } " )
261
+
262
+
263
+ def connect_to_google_drive_service ():
122
264
creds = get_credentials ()
123
265
service = build ("drive" , "v3" , credentials = creds )
266
+ return service
267
+
268
+
269
+ if __name__ == "__main__" :
270
+ GOOGLE_DRIVE_SLIDES_FOLDER_PATH = "CV Course/slides"
271
+ JSON_FILENAME = "last_slides_info.json"
272
+ PDF_OUT_FOLDER_PATH = "/home/yoni/Desktop/AI_is_Math/lectures"
273
+
274
+ service = connect_to_google_drive_service ()
275
+
276
+ last_slides_info = get_old_slides_info (
277
+ GOOGLE_DRIVE_SLIDES_FOLDER_PATH , JSON_FILENAME , service
278
+ )
279
+
280
+ slides_info = generate_current_slides_info (service , GOOGLE_DRIVE_SLIDES_FOLDER_PATH )
281
+
282
+ generate_changed_slides_as_pdfs (
283
+ service , last_slides_info , slides_info , PDF_OUT_FOLDER_PATH
284
+ )
124
285
125
- file_id = find_file_id_by_path (service , path )
126
- download_as_pdf (service , file_id , "output2.pdf" )
286
+ write_slides_info_to_drive (
287
+ service , GOOGLE_DRIVE_SLIDES_FOLDER_PATH , slides_info , JSON_FILENAME
288
+ )
0 commit comments