-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocessFacebook2.py
More file actions
executable file
·228 lines (175 loc) · 12.8 KB
/
processFacebook2.py
File metadata and controls
executable file
·228 lines (175 loc) · 12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
from processUnis import *
import sys
sys.path.append("/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages")
import facebook
import pickle
from collections import Counter
def getUserIDs():
allComments = processComment()
commentIDs = []
lines = open("StatsForCommentsWithoutUserID.csv").readlines()[1:]
for line in lines:
commentIDs += [line.split("|")[0]]
codedCommentIDict, restCommentIDict = {},{}
for uni in allComments.keys():
for commentID in allComments[uni].keys():
if commentID in commentIDs:
codedCommentIDict[commentID] = allComments[uni][commentID]['userID']
else:
restCommentIDict[commentID] = allComments[uni][commentID]['userID']
return codedCommentIDict, restCommentIDict
def assignUserIDs(commentIDict) :
f = open("StatsForComments.csv","w")
f.write('commentID|postID|uni|post|userID| username| date|time|comment|wordCount|numLikes|isViable|isMean|isProsocial|Taboo|Stigma|isCoded\n')
lines = open("StatsForCommentsWithoutUserID.csv").readlines()[1:]
for line in lines:
first = line.rsplit("|",12)[0]
second = line.split("|",4)[-1]
userID = commentIDict[line.split("|")[0]]
newLine = first +"|"+userID+"|"+second
f.write(newLine)
#commentIDict = getUserIDs()
#assignUserIDs()
def assignGenderCodedComments():
graph = facebook.GraphAPI("CAAGljQ0ymaQBAFRbTseLLCtDwwC1HZCvcCDcHZA120WFB58F02QqCDSpFRPsfMrChUPiZCTaHOQrWvJhWGhpZApY4JITwVPZAb7rrrNfba9ZBY0PJfDfpkfApLlZBdPSyWnGQO4HfMmrDBfdGB4sUHXxxm7qKzrZAksUduARzLE41PSvKFNIaYxqTB4DDtF6tFzNHqAJmSMyYHZAZBeaLAgbxvXYXWtBKwdTAZD")
lines = open("StatsForComments.csv").readlines()[1:]
userIDs = []
genderDict = {}
simpleKeys = [u'username', u'first_name', u'last_name', u'link', u'name', u'locale', u'gender', u'id', u'updated_time']
detailedEntries = []
for line in lines:
userIDs += [line.split("|")[4]]
userIDs = list(set(userIDs))
others = []
countUS = countUM = countNP = 0
for userID in userIDs:
try:
fbDetails = graph.get_object(userID)
if 'first_name' in fbDetails.keys() or 'last_name' in fbDetails.keys():
#I.e. if its a person
if "gender" in fbDetails.keys():
genderDict[userID] = fbDetails["gender"]
keys = list(set(fbDetails.keys()) - set(simpleKeys))
if len(keys)>0:
detailedEntries += [fbDetails]
else:
countUM += 1
genderDict[userID] = "unmentioned"
else:
countNP += 1
others += [fbDetails]
genderDict[userID] = "notPerson"
except:
countUS += 1
genderDict[userID] = "unScraped"
return genderDict, countUS, countUM, countNP, others, detailedEntries
'''
[{u'category': u'Community', u'username': u'OLCCgull', u'about': u'Defender of justice at Lewis & Clark College, protecting students everywhere from the dangers of underage alcohol consumption.', u'talking_about_count': 1, u'name': u'OLCC-gull', u'has_added_app': False, u'can_post': True, u'link': u'https://www.facebook.com/OLCCgull', u'likes': 137, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'214115452099605', u'is_published': True}, {u'category': u'Community', u'username': u'TexasAMConfessions20', u'about': u'Post yours 100% ANONYMOUSLY. Forever Free: \nhttps://docs.google.com/forms/d/13EViaeAbmGoHprAt1vSH46SDBfxPz4rISpttBin62NE/viewform', u'talking_about_count': 1805, u'description': u'This page, run for and by students, is not officially affiliated with Texas A&M University or the Texas A&M System. \n\nFans of the page may post confessions anonymously and for free without charge. Likewise students may view, add or make comments, and engage in dialogue freely and without charge. \n\nThe page reserves the right to remove or block any content which violates the terms and conditions of Facebook or at request of poster.', u'has_added_app': False, u'can_post': True, u'cover': {u'source': u'https://scontent-a.xx.fbcdn.net/hphotos-xfp1/t31.0-8/s720x720/1501434_674492252589934_518311603_o.jpg', u'cover_id': 674492252589934, u'offset_x': 0, u'offset_y': 50}, u'name': u'Ag Confessions', u'website': u'https://docs.google.com/forms/d/13EViaeAbmGoHprAt1vSH46SDBfxPz4rISpttBin62NE/viewform', u'link': u'https://www.facebook.com/TexasAMConfessions20', u'likes': 10276, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'527813577257803', u'is_published': True}, {u'category': u'Community', u'username': u'peppconfessions', u'about': u'Even a small private Christian school has its secrets... Share your confessions here at http://tiny.cc/7lc3cx', u'talking_about_count': 312, u'description': u'A page for the Pepperdine University community to post their confessions anonymously.\n\nThis page is in no way affiliated with Pepperdine University or its staff.', u'has_added_app': False, u'can_post': False, u'cover': {u'source': u'https://fbcdn-sphotos-g-a.akamaihd.net/hphotos-ak-xfp1/v/t1.0-9/p180x540/994045_1426044207612466_1153793135_n.jpg?oh=1167e935f72c7003deb72570e6d3e183&oe=549BE671&__gda__=1419406570_71a4109da6e755203f3c519cee6b0b6c', u'cover_id': 1426044207612466, u'offset_x': 0, u'offset_y': 55}, u'name': u'Pepperdine University Confessions', u'link': u'https://www.facebook.com/peppconfessions', u'likes': 1083, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'1426040870946133', u'is_published': True}, {u'category': u'Community', u'username': u'LCConfessions', u'about': u"C'mon, Lewis & Clark, we all have secrets. Let 'em out here, anonymously. It's cathartic! Click this link to fill out the form: http://tinyurl.com/LCconfessions", u'talking_about_count': 25, u'description': u"A disclaimer:\nThis page is in no way associated with Lewis & Clark College's faculty, administrators, and staff; it is student-run and posts student submissions. Its posts are not intended to make objective statements about the college, and do not necessarily reflect the views of the page's administrator or of the entire student body. This is just our sounding board. \n\nLONG STORY SHORT: Don't take this page too seriously.\n\nAnother disclaimer:\nLewis & Clark is no ordinary school, so this is no ordinary confessions page. We do not post content that personally identifies anyone (even in a joking or inoffensive manner), and we don't post anything mean-spirited or that insults a particular social group on campus. This is a safe space.\nIf you really want your secret posted, try to make it personal--a secret that is yours to give out. Bonus points if it's hilarious. Secrets about something you saw are also fine, but remember to keep identifying details vague.\nWe still don't post all the good confessions we get, just for the sake of not spamming everyone's newsfeed. But rest assured, we read them, and we love them.\n\nHappy confessing!\n\n\nEmail the admin: lewisandclarkconfessions@gmail.com", u'has_added_app': False, u'can_post': True, u'cover': {u'source': u'https://fbcdn-sphotos-c-a.akamaihd.net/hphotos-ak-xfp1/t31.0-8/s720x720/903074_155770951255982_235011593_o.jpg', u'cover_id': 155770951255982, u'offset_x': 0, u'offset_y': 59}, u'name': u'Lewis & Clark Confessions', u'link': u'https://www.facebook.com/LCConfessions', u'likes': 1306, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'134331240066620', u'is_published': True}, {u'category': u'Community', u'username': u'petroliumengineeringbuildingstatueisreallyhot', u'about': u'Once there was a fake confessions page, but all that changed when the fire nation attacked. \nPage for funny posts, trolls, and pranks. Cstat based.', u'talking_about_count': 3, u'name': u'Texas A&M: Department of Trolls', u'has_added_app': False, u'can_post': True, u'cover': {u'source': u'https://fbcdn-sphotos-a-a.akamaihd.net/hphotos-ak-xfp1/v/t1.0-9/10389249_326711990837859_4680051574179118442_n.jpg?oh=e49b6c100c5a9ed92fa4ccb1ddc309df&oe=5494F680&__gda__=1419176969_17bdee9ce41050adbcb8b0e2d1acf543', u'cover_id': 326711990837859, u'offset_x': 0, u'offset_y': 0}, u'link': u'https://www.facebook.com/petroliumengineeringbuildingstatueisreallyhot', u'likes': 321, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'230522747123451', u'is_published': True}, {u'category': u'Cause', u'username': u'NYUSecrets', u'about': u"Share your secrets. Let's build a community--once and for all.\nFAQ: http://nyusecrets.tumblr.com/post/86017930330/frequently-asked-questions", u'talking_about_count': 7330, u'description': u"NYU is a strange place sometimes. It's hard to find a community. It's hard to find a place to fit in. If you're struggling with your time here, this is the place to vent. Share your secrets. Let's build a community--once and for all. Completely unaffiliated with New York University.\n", u'has_added_app': False, u'can_post': True, u'cover': {u'source': u'https://scontent-a.xx.fbcdn.net/hphotos-xap1/t31.0-8/s720x720/1912396_763842263651300_8650989269152821172_o.jpg', u'cover_id': 763842263651300, u'offset_x': 0, u'offset_y': 0}, u'mission': u'To give NYU a community. ', u'name': u'NYU Secrets', u'link': u'https://www.facebook.com/NYUSecrets', u'likes': 30920, u'parking': {u'street': 0, u'lot': 0, u'valet': 0}, u'is_community_page': False, u'were_here_count': 0, u'checkins': 0, u'id': u'455528264482703', u'is_published': True}]
'''
def assignGenderStats():
genderDict = assignGender()
def selectCodedPosts():
lines = open("StatsForPosts.csv").readlines()[1:]
postIDs = []
for line in lines:
postIDs += [line.split("|")[0]]
comments = processCommentList()
selectedComments = {}
restComments = {}
for uni in comments.keys():
for postID in comments[uni].keys():
if postID in postIDs:
selectedComments[postID] = comments[uni][postID]
else:
restComments[postID]= comments[uni][postID]
return selectedComments, restComments
def assignGender(selectedComments, postID,graph):
male = female = 0
if postID in selectedComments.keys():
for comment in selectedComments[postID]:
userID = comment["userID"]
try:
fbDetails = graph.get_object(userID)
if "gender" in fbDetails.keys() and 'first_name' in fbDetails.keys():
if "female" in fbDetails["gender"]:
female += 1
elif "male" in fbDetails["gender"]:
male += 1
except:
continue
return male,female
def assignGenderID(userID,graph):
try:
fbDetails = graph.get_object(userID)
if "gender" in fbDetails.keys():
if "female" in fbDetails["gender"]:
return "Female"
elif "male" in fbDetails["gender"]:
return "Male"
else:
return "None"
else:
return "None"
except:
return "None"
def detectGenderCodedPosts(apiKey):
selectedComments = selectCodedPosts()[1]
print "here in selected"
lines = open("StatsForPosts.csv").readlines()[1:]
graph = facebook.GraphAPI(apiKey)
userIDs, userIDict = [],{}
userGender = {}
for line in lines[1:]:
col = line.split("|")
postID = col[0]
isCoded = col[11]
if "Y" in isCoded:
continue
ids = []
if postID in selectedComments.keys():
for comment in selectedComments[postID]:
userIDs += [comment["userID"]]
ids += [comment["userID"]]
ids = list(set(ids))
userIDict[postID] = ids
print "here"
scrapedIDict = pickle.load(open('userGender2.pkl', 'rb'))
remaining = []
for userID in scrapedIDict.keys():
if "None" in scrapedIDict[userID]:
remaining += [userID]
userIDs = remaining
for userID in userIDs:
userGender[userID] = assignGenderID(userID,graph)
print userID, userGender[userID]
output = open('userGender4.pkl', 'wb')
pickle.dump(userGender, output)
output.close()
output = open('userIDict4.pkl', 'wb')
pickle.dump(userIDict, output)
output.close()
return selectedComments, userGender, userIDict
def assignGenderCodedPosts():
selectedComments = selectCodedPosts()[0]
userGender = pickle.load(open('userGender.pkl', 'rb'))
userIDict = pickle.load(open('userIDict.pkl', 'rb'))
f = open("StatsForCodedPostsGender.csv","w")
header = 'PostID | uni | Size | Religious | State | Politics | Tuition | Post | numLikes | numComments | wordCount | isCoded | Taboo | Stigma | isQuestion | questionType | Loneliness | Stress | Victim|No. of Male Commentors|No. of Female Commentors|No. of None Commentors\n'
f.write(header)
lines = open("StatsForPostsCoded.csv").readlines()[1:]
for line in lines[1:]:
postID = line.split("|")[0]
gender = []
if postID in selectedComments.keys():
for userID in userIDict[postID]:
gender += userGender[userID]
gender = Counter(gender)
print gender
if "Male" not in gender:
gender['Male'] = 0
if "Female" not in gender:
gender['Female'] = 0
if "None" not in gender:
gender['None'] = 0
f.write(line.replace("\n","")+"|"+str(gender["Male"])+"|"+str(gender["Female"])+"|"+str(gender["None"])+"\n")
detectGenderCodedPosts(sys.argv[1])