-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtwitter_api.py
More file actions
127 lines (89 loc) · 3.03 KB
/
twitter_api.py
File metadata and controls
127 lines (89 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from cgi import test
import tweepy
import configparser
import pandas as pd
import numpy as np
from emot.emo_unicode import UNICODE_EMOJI
import re
from nltk.tokenize import wordpunct_tokenize
#read configs
config = configparser.ConfigParser()
config.read('config.ini')
api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
#authentication
auth = tweepy.OAuth1UserHandler(api_key,api_key_secret)
auth.set_access_token(access_token,access_token_secret)
api = tweepy.API(auth)
#q = success also gives us the tweets to be searched
n=1600000
tweets=tweepy.Cursor(api.search_tweets,q="peace -filter:links -filter:retweets",lang="en",tweet_mode="extended").items(n)
tweets_json=[]
for tweet in tweets:
tweets_json.append(tweet._json)
x = len(tweets_json)
# To save the data as collected from twitter
import json
for i in tweets_json:
with open("basic_data.json", "w") as outfile:
json.dump(i, outfile)
df = pd.DataFrame(columns = ['target','ids','date','flag','user','text'])
def convert_emojis(text):
for emot in UNICODE_EMOJI:
text = text.replace(emot," "+UNICODE_EMOJI[emot].replace(",","").replace(":","")+" ")
text = text.replace("_"," ")
return text
artext = []
user_id = []
timing = []
user_name = []
for i in range(x):
basic = tweets_json[i]["full_text"]
basic = convert_emojis(basic)
artext.append(basic)
user_id.append(tweets_json[i]["id"])
user_name.append(tweets_json[i]["user"]["name"])
timing.append(tweets_json[i]["created_at"][:-10]+" PDT "+tweets_json[i]["created_at"][-5:])
artext = pd.Series(artext)
timing = pd.Series(timing)
user_name = pd.Series(user_name)
user_id = pd.Series(user_id)
df["text"] = artext
df["ids"] = user_id
df["date"] = timing
df["user"] = user_name
df["flag"] = "NO_QUERY"
test1=df.copy()
test1=test1.apply(lambda x:x.astype(str).str.lower())
#replace all the non-alphabetic elements with 1 and keep
# [^A-za-z.!+ ] keep all the letter, dot and exclamation mark because a lot of of word attached to ! or . as
# form of expression
test1["text"]=test1["text"].apply(lambda x: re.sub("[^A-za-z.!+ ]", '1', x))
test1["text"]=test1["text"].apply(lambda x: " ".join(w for w in wordpunct_tokenize(x.strip()) if (w.isalpha())))
x = len(test1["text"])
#TextBlob api
from textblob import TextBlob
sent = []
for i in range(x):
blob = TextBlob(test1["text"][i])
ss = 0
den = 0
for sentence in blob.sentences:
ss += sentence.sentiment.polarity
den += 1
if den==0:
sent.append(2)
else:
sco = ss/den
if sco==0:
sent.append(2)
elif sco>0:
sent.append(4)
else:
sent.append(0)
sent = pd.Series(sent,dtype="string")
test1["target"] = sent
print(test1)
test1.to_csv('testntrain.csv')