1
+ import json
2
+ import csv
3
+ import urllib2
4
+ import re
5
+ import string
6
+
7
+ api_key = 'AIzaSyC4C3gzSSErzmc2FeUTleQqZGzw8-z-d6w'
8
+ # AIzaSyCrFWiPfGcb5IsyS-wpAMk6eaNdMaC8pXs
9
+ # AIzaSyDlZR2UhwQXeGw2IhCRnpoZB8LHZkagwI4
10
+ # AIzaSyCXqjs2ZPb0PQReIWiENMAAkSx0_tvd4nk
11
+ # AIzaSyCsE91PTD-XjTU3O_IZpY0PvVom2tw4Dr8
12
+ # AIzaSyArrhkh49b2GNlC8UdLodq3uSpKzcgdzeg
13
+ # AIzaSyCPcAKC74SzgQB8MSXKcPO6zIoVfqwlOig
14
+ # AIzaSyDBkoHdD1Iw6HooMhMoObbHFCXHFSwKzIU
15
+ # AIzaSyC4C3gzSSErzmc2FeUTleQqZGzw8-z-d6w
16
+
17
+ url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics,recordingDetails&id='
18
+
19
+ #strip punctuation
20
+ regex = re .compile ('[%s]' % re .escape (string .punctuation ))
21
+
22
+ #id, title, description, like count, dislike count, location, tags
23
+ with open ('tempList.txt' , 'rb' ) as f :
24
+ l = [line .split (',' ) for line in f ]
25
+
26
+ # data = json.load(f)
27
+ # l = []
28
+ # t = open('tempList.txt', 'wb')
29
+
30
+ # for item in data['items']:
31
+ # l.append([item['id']['videoId'], item['snippet']['title'], item['snippet']['description']])
32
+ # t.write('%s,' % item['id']['videoId'])
33
+ # try:
34
+ # t.write('%s,' % regex.sub('', item['snippet']['title'].encode('utf8').decode('unicode_escape').encode('ascii','ignore')))
35
+ # except:
36
+ # print('title missing')
37
+ # t.write(',')
38
+ # try:
39
+ # t.write('%s\n' % regex.sub('', item['snippet']['description'].encode('utf8').decode('unicode_escape').encode('ascii','ignore')))
40
+ # except:
41
+ # print('description missing')
42
+ # t.write('\n')
43
+
44
+ # t.close()
45
+
46
+ with open ('videoStats.csv' , 'wb' ) as c :
47
+ writer = csv .writer (c )
48
+ writer .writerow (['Id' , 'Title' , 'Description' , 'LikeCount' , 'DislikeCount' , 'Location (latitude, longitude)' , 'Tags (; delimited string)' ])
49
+
50
+ for vid in l :
51
+ try :
52
+ stats = json .load (urllib2 .urlopen (url + vid [0 ] + '&key=' + api_key ))
53
+ print (vid [0 ])
54
+ except :
55
+ print ('API key ran out' )
56
+ print (l .index (vid [0 ]))
57
+
58
+ if stats ['items' ] == []:
59
+ writer .writerow ([vid [0 ], vid [1 ].encode ('utf8' ), vid [2 ].encode ('utf8' ),0 ,0 ,'' ,'' ])
60
+ continue
61
+
62
+ s = stats ['items' ][0 ]
63
+ LC = 0
64
+ DC = 0
65
+ loc = ''
66
+ tags = ''
67
+
68
+ if 'likeCount' in s ['statistics' ]:
69
+ LC = s ['statistics' ]['likeCount' ]
70
+ if 'dislikeCount' in s ['statistics' ]:
71
+ DC = s ['statistics' ]['dislikeCount' ]
72
+ if 'latitude' in s .get ('recordingDetails' , {}).get ('location' , {}):
73
+ loc = str (s ['recordingDetails' ]['location' ]['latitude' ]) + ';' + str (s ['recordingDetails' ]['location' ]['longitude' ])
74
+ if 'tags' in s ['snippet' ]:
75
+ t = s ['snippet' ]['tags' ]
76
+
77
+ for i in range (len (t )):
78
+ t [i ] = re .sub (r'http\S+|www.\S+' , '' , t [i ])
79
+ t [i ] = regex .sub ('' , t [i ])
80
+
81
+ tags = ';' .join (t )
82
+
83
+ title = re .sub (r'http\S+|www.\S+' , '' , vid [1 ])
84
+ descr = re .sub (r'http\S+|www.\S+' , '' , vid [2 ])
85
+
86
+ title = regex .sub ('' , title )
87
+ descr = regex .sub ('' , descr )
88
+
89
+ writer .writerow ([vid [0 ], title .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' ), descr .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' ), LC , DC , loc , tags .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' )])
0 commit comments