-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCacheRdd.java
56 lines (49 loc) · 1.85 KB
/
CacheRdd.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package base;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import java.util.Collections;
public class CacheRdd {
/**
* map + plus + zip
* | multi |
*/
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("test")
.master("local[2]")
.getOrCreate();
JavaSparkContext javaSparkContext = new JavaSparkContext(spark.sparkContext());
// JavaRDD<Integer> map = javaSparkContext.parallelize(Collections.singletonList(1))
// .map(i -> {
// // 执行2次
// System.out.println("1. [" + Thread.currentThread() + "]: " + i);
// return i;
// });
JavaRDD<Integer> map = javaSparkContext.parallelize(Collections.singletonList(1))
.map(i -> {
// 执行1次
System.out.println("1. [" + Thread.currentThread() + "]: " + i);
return i;
})
.cache();
JavaRDD<Integer> plus = map.map(i -> {
// 执行1次
System.out.println("2.1 [" + Thread.currentThread() + "]: " + i);
return i + 1;
});
JavaRDD<Integer> multi = map.map(i -> {
// 执行1次
System.out.println("2.2 [" + Thread.currentThread() + "]: " + i);
return i * 2;
});
JavaRDD<Integer> zip = plus.zip(multi).map(tuple -> {
// 执行1次
System.out.println("3. [" + Thread.currentThread() + "]: " + (tuple._1() + tuple._2()));
return tuple._1() + tuple._2();
});
zip.collect().forEach(System.out::println);
spark.close();
}
}