Skip to content

Commit 8a69b96

Browse files
committed
burst reader
Former-commit-id: 60dfc0a8566f703e348ed76609e867ae5af52c3a
1 parent 806aeb8 commit 8a69b96

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

Diff for: apps/read_burst.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
Spark application that reads a CloudBurst sequence file and converts it
3+
into text for review and analysis.
4+
"""
5+
6+
##########################################################################
7+
## Imports
8+
##########################################################################
9+
10+
import sys
11+
12+
from brisera.records import record_from_bytes
13+
from pyspark import SparkConf, SparkContext
14+
15+
PROG_NAME = "read_burst.py"
16+
APP_NAME = "CloudBurst Reader"
17+
18+
if __name__ == "__main__":
19+
20+
if len(sys.argv) != 3:
21+
sys.stderr.write("Usage: %s sequences.br output\n" % PROG_NAME)
22+
sys.exit(-1)
23+
24+
conf = SparkConf().setAppName(APP_NAME)
25+
sc = SparkContext(conf=conf)
26+
27+
infile = sys.argv[1]
28+
outpath = sys.argv[2]
29+
30+
print "Converting Sequence File %s to Text using directory %s" % (infile, outpath)
31+
32+
sequences = sc.sequenceFile(infile)
33+
sequences = sequences.map(lambda (k,v): (k, record_from_bytes(v)))
34+
sequences.saveAsTextFile(outpath)
35+
36+

0 commit comments

Comments
 (0)