-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathS3SingleSpillShuffleMapOutputWriter.scala
65 lines (56 loc) · 2.18 KB
/
S3SingleSpillShuffleMapOutputWriter.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
//
// Copyright 2023- IBM Inc. All rights reserved
// SPDX-License-Identifier: Apache 2.0
//
package org.apache.spark.shuffle
import org.apache.spark.TaskContext
import org.apache.spark.internal.Logging
import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter
import org.apache.spark.shuffle.helper.{S3ShuffleDispatcher, S3ShuffleHelper}
import org.apache.spark.storage.ShuffleDataBlockId
import org.apache.spark.util.Utils
import java.io.{File, FileInputStream}
import java.nio.file.{Files, Path}
class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long)
extends SingleSpillShuffleMapOutputWriter
with Logging {
private lazy val dispatcher = S3ShuffleDispatcher.get
override def transferMapSpillFile(
mapSpillFile: File,
partitionLengths: Array[Long],
checksums: Array[Long]
): Unit = {
val block = ShuffleDataBlockId(shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)
if (dispatcher.rootIsLocal) {
// Use NIO to move the file if the folder is local.
val now = System.nanoTime()
val path = dispatcher.getPath(block)
val fileDestination = path.toUri.getRawPath
val dir = path.getParent
if (!dispatcher.fs.exists(dir)) {
dispatcher.fs.mkdirs(dir)
}
Files.move(mapSpillFile.toPath, Path.of(fileDestination))
val timings = System.nanoTime() - now
val bytes = partitionLengths.sum
val tc = TaskContext.get()
val sId = tc.stageId()
val sAt = tc.stageAttemptNumber()
val t = timings / 1000000
val bw = bytes.toDouble / (t.toDouble / 1000) / (1024 * 1024)
logInfo(
s"Statistics: Stage ${sId}.${sAt} TID ${tc.taskAttemptId()} -- " +
s"Writing ${block.name} ${bytes} took ${t} ms (${bw} MiB/s)"
)
} else {
// Copy using a stream.
val in = new FileInputStream(mapSpillFile)
val out = new S3MeasureOutputStream(dispatcher.createBlock(block), block.name)
Utils.copyStream(in, out, closeStreams = true)
}
if (dispatcher.checksumEnabled) {
S3ShuffleHelper.writeChecksum(shuffleId, mapId, checksums)
}
S3ShuffleHelper.writePartitionLengths(shuffleId, mapId, partitionLengths)
}
}