-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
129 lines (110 loc) · 3.94 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
@author: Soumil Baldota
"""
import os
import json
import streamlit as st
import plotly
import pandas as pd
import numpy as np
import pandas as pd
import re
from textblob import TextBlob
from wordcloud import WordCloud
import seaborn as sns
import matplotlib.pyplot as plt
import cufflinks as cf
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import subprocess
import sys
constraints = ['#B34D22', '#EBE00C', '#1FEB0C', '#0C92EB', '#EB0CD5']
print("hello world")
def categorical_variable_summary(df, column_name):
fig = make_subplots(rows = 1, cols = 2,
subplot_titles = ('Countplot', 'Percentage'),
specs = [[{"type" : "xy"}, {'type' : 'domain'}]])
fig.add_trace(go.Bar(y = df[column_name].value_counts().values.tolist(),
x = [str(i) for i in df[column_name].value_counts().index],
text = df[column_name].value_counts().values.tolist(),
textfont = dict(size = 14),
name = column_name,
textposition = 'auto',
showlegend = False,
marker = dict(color = constraints,
line = dict(color = '#DBE6EC',
width = 1))),
row = 1, col = 1)
fig.add_trace(go.Pie(labels = df[column_name].value_counts().keys(),
values = df[column_name].value_counts().values,
textfont = dict(size = 18),
textposition = 'auto',
showlegend = False,
name = column_name,
marker = dict(colors = constraints)),
row = 1, col = 2)
fig.update_layout(title = {'text' : column_name, 'y' : 0.9, 'x' : 0.5,
'xanchor' : 'center', 'yanchor' : 'top'},
template = 'plotly_white')
return fig
def analyser(car = 'a', dataset = 'a'):
a = rf"./data/{dataset}/{car}.json"
if(not os.path.exists(a)):
a = rf"./{dataset}/{car}.json"
f = open(a)
data = json.load(f)
l = []
for k,v in data.items():
for j in v:
l.append((j, int(k)))
df = pd.DataFrame(l, columns = ['reviews', 'overall'])
rt = lambda x : re.sub("[^a-zA-Z]", ' ', str(x))
df["reviews"] = df["reviews"].map(rt)
df["reviews"] = df["reviews"].str.lower()
#creating an instance of SentimentIntensityAnalyzer
sent_analyzer = SentimentIntensityAnalyzer()
df[['polarity', 'subjectivity']] = df['reviews'].apply(lambda Text:pd.Series(TextBlob(Text).sentiment))
#polarity sends the mood and ranges between 0 and 1 - more towards 1, it is positive and towards 0, it is negative
for index, row in df['reviews'].items():
score = sent_analyzer.polarity_scores(row)
neg = score['neg']
neu = score['neu']
pos = score['pos']
if neg > pos:
df.loc[index, 'sentiment'] = "Negative"
elif pos > neg:
df.loc[index, 'sentiment'] = "Positive"
else:
df.loc[index, 'sentiment'] = "Neutral"
return [categorical_variable_summary(df, 'overall'),
categorical_variable_summary(df, 'sentiment')
]
def main():
st.title('Mahindra Data Analysis')
car = st.selectbox('Select name of the car',
[
"thar","scorpio","xuv700",
"scorpio-classic","xuv300",
"bolero","bolero-neo","xuv400-ev",
"kuv-100-nxt","alturas-g4","marazzo"
]
)
dataset = st.selectbox('Select dataset to analyse',
[
"cardekho",
"carwale"
]
)
if(st.button('scrape(cardekho)')):
subprocess.run([f'{sys.executable}', 'cardekho_scraper.py'])
if(st.button('analyse')):
plots = analyser(dataset = dataset,car = car)
st.plotly_chart(plots[0],
use_container_width=True)
st.plotly_chart(plots[1],
use_container_width=True)
# analyser()
if __name__ == '__main__':
main()