File tree 1 file changed +36
-0
lines changed
1 file changed +36
-0
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Spark application that reads a CloudBurst sequence file and converts it
3
+ into text for review and analysis.
4
+ """
5
+
6
+ ##########################################################################
7
+ ## Imports
8
+ ##########################################################################
9
+
10
+ import sys
11
+
12
+ from brisera .records import record_from_bytes
13
+ from pyspark import SparkConf , SparkContext
14
+
15
+ PROG_NAME = "read_burst.py"
16
+ APP_NAME = "CloudBurst Reader"
17
+
18
+ if __name__ == "__main__" :
19
+
20
+ if len (sys .argv ) != 3 :
21
+ sys .stderr .write ("Usage: %s sequences.br output\n " % PROG_NAME )
22
+ sys .exit (- 1 )
23
+
24
+ conf = SparkConf ().setAppName (APP_NAME )
25
+ sc = SparkContext (conf = conf )
26
+
27
+ infile = sys .argv [1 ]
28
+ outpath = sys .argv [2 ]
29
+
30
+ print "Converting Sequence File %s to Text using directory %s" % (infile , outpath )
31
+
32
+ sequences = sc .sequenceFile (infile )
33
+ sequences = sequences .map (lambda (k ,v ): (k , record_from_bytes (v )))
34
+ sequences .saveAsTextFile (outpath )
35
+
36
+
You can’t perform that action at this time.
0 commit comments