forked from Hongtao-Lin/Car-Info
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtendency-4_9.py
85 lines (72 loc) · 1.63 KB
/
tendency-4_9.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#coding=utf8
#print ", ".join(seg_list)
import MySQLdb
import sys
import jieba
import os
import re
import xlrd
import xlwt
import jieba.posseg as pseg
reload(sys)
sys.setdefaultencoding('utf8')
stopwordsfile=open('stopwords.txt','r')
stopwords=stopwordsfile.read().split()
print stopwords[3].decode('utf8')
seg_list = jieba.cut("他来到了网易杭研大厦".decode("utf-8"))
T=[]
S=[]
S1=[]
S2=[]
S3=[]
sql=''
output = xlwt.Workbook()
tables=[]
tables.append(output.add_sheet("test",cell_overwrite_ok=True))
tables[0].write(0,0,'test1')
count=0
row=1
volumn=1
i=0
try:
conn=MySQLdb.connect(host='127.0.0.1',user='root',passwd='15980ptpt',db='mysql',port=3306)
cur=conn.cursor()
conn.set_character_set('utf8')
cur.execute('SET NAMES utf8;')
cur.execute('SET CHARACTER SET utf8;')
cur.execute('SET character_set_connection=utf8;')
sql='select advantage,shortcoming,other from comments where other!="";'
count=cur.execute(sql)
result=cur.fetchmany(count)
print 'OK',count
for r in result:
S+=r[0].split('。')
S+=r[1].split('。')
S+=r[2].split('。')
for s in S:
S1+=s.split(',')
for s in S1:
S2+=s.split('?')
for s in S2:
S3+=s.split(';')
row=1
volumn=2
for s in S3:
volumn=4
if s!='':
tables[0].write(row,volumn,s.decode('utf8'))
seg_list = pseg.cut(s.decode("utf-8"))
volumn=5
for w in seg_list:
tables[0].write(row,volumn,w.word)
volumn+=1
tables[0].write(row,volumn,w.flag)
volumn+=1
row+=1
print len(S3)
cur.close()
conn.commit()
conn.close()
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
output.save('zhutijixing.xls')