-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweet_world.py
executable file
·84 lines (60 loc) · 2 KB
/
tweet_world.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#! /usr/bin/python
__author__ = '[email protected]'
# Patrick Trinkle
# Summer 2011
#
# ---...
#
import os
import re
import sys
import sqlite3
import datetime
sys.path.append("tweetlib")
import tweetdatabase as td
def addPlaces(current_places, location):
"""Add lat, long, counts from guy to current_places."""
latlong_m = re.search("(.+?), (.+?)$", location)
if latlong_m:
lat = float(latlong_m.group(1)) # likely unnecessary thing here.
longitude = float(latlong_m.group(2))
try:
current_places["%f %f" % (longitude, lat)] += 1
except KeyError:
current_places["%f %f" % (longitude, lat)] = 1
def usage():
"""."""
print "usage: %s <database> <out_file>" % sys.argv[0]
def main():
"""."""
# Did they provide the correct args?
if len(sys.argv) != 3:
usage()
sys.exit(-1)
places = {}
startTime = datetime.datetime.now()
# --------------------------------------------------------------------------
# Parse the parameters.
database = sys.argv[1]
output_file = sys.argv[2]
conn = sqlite3.connect(database)
conn.row_factory = sqlite3.Row
c = conn.cursor()
# --------------------------------------------------------------------------
# Search the database file for certain things.
for row in c.execute("select geo from tweets where geo is not null"):
addPlaces(places, row['geo'])
for row in c.execute("select geo from s_tweets where geo is not null"):
addPlaces(places, row['geo'])
# if I swap the row thing out for the row_factor it might work better?
# --------------------------------------------------------------------------
# Done.
conn.close()
with open(output_file, "w") as fout:
fout.write("# long lat count\n")
for place in places:
fout.write("%s %d\n" % (place, places[place]))
print "total runtime: ",
print (datetime.datetime.now() - startTime)
if __name__ == "__main__":
main()