Skip to content

Commit d936588

Browse files
committed
Attempt to fix race conditions with non-atomic file copy for worker jars
We were copying a file used by multiple threads directly to its destination. Problem is that copying is not an atomic action, so we could end up in states where the file wasn't correct when it was used. This should avoid that issue by first copying the file to a temp file and then using an atomic move to move the file to the destination used by other threads.
1 parent b60d0f6 commit d936588

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala

+27-9
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,22 @@ package workers.common
44
import xsbti.compile.ScalaInstance
55
import java.io.File
66
import java.net.URLClassLoader
7-
import java.nio.file.{FileAlreadyExistsException, Files, Path, Paths}
7+
import java.nio.file.{FileAlreadyExistsException, Files, Path, Paths, StandardCopyOption}
88
import java.util.Properties
99
import java.util.concurrent.ConcurrentHashMap
1010
import scala.collection.immutable.TreeMap
11+
import scala.util.control.NonFatal
1112

1213
object AnnexScalaInstance {
1314
// See the comment on getAnnexScalaInstance as to why this is necessary
1415
private val instanceCache: ConcurrentHashMap[Set[Path], AnnexScalaInstance] =
1516
new ConcurrentHashMap[Set[Path], AnnexScalaInstance]()
1617

18+
// The worker will use this directory to store temp files in order to better perform
19+
// atomic file copies.
20+
private val tmpWorkerJarDir = Paths.get("annex-tmp-worker-jars")
21+
Files.createDirectories(tmpWorkerJarDir)
22+
1723
/**
1824
* We only need to care about minimizing the number of AnnexScalaInstances we create if things are being run as a
1925
* worker. Otherwise just create the AnnexScalaInstance and be done with it because the process won't be long lived.
@@ -106,18 +112,30 @@ object AnnexScalaInstance {
106112
// This should only happen once per compiler version, so it shouldn't happen often.
107113
workRequestJarToWorkerJar.foreach { case (workRequestJar, workerJar) =>
108114
this.synchronized {
109-
// Check for existence of the file just in case another request is also writing these jars
110-
// Copying a file is not atomic, so we don't want to end up in a funky state where two
111-
// copies of the same file happen at the same time and cause something bad to happen.
112-
if (!Files.exists(workerJar)) {
115+
// Do a more atomic copy of a file by creating a temp file and then moving
116+
// the temp file to the destination. We can do a move atomically, but cannot do
117+
// a copy atomically. Copying risks the file existing at the destination in a
118+
// partially completed state.
119+
if (Files.notExists(workerJar)) {
120+
var tmpWorkerJar: Option[Path] = None
113121
try {
122+
tmpWorkerJar = Some(Files.createTempFile(tmpWorkerJarDir, workerJar.getFileName.toString, "tmp"))
123+
Files.copy(workRequestJar, tmpWorkerJar.get, StandardCopyOption.REPLACE_EXISTING)
124+
114125
Files.createDirectories(workerJar.getParent())
115-
Files.copy(workRequestJar, workerJar)
126+
Files.move(tmpWorkerJar.get, workerJar, StandardCopyOption.ATOMIC_MOVE)
116127
} catch {
117-
// We do not care if the file already exists
118-
case _: FileAlreadyExistsException => {}
119-
case e: Throwable => throw new Exception("Error adding file to instance cache", e)
128+
case NonFatal(e) =>
129+
throw new Exception(s"Error copying worker jar: ${workerJar}", e)
130+
} finally {
131+
tmpWorkerJar.foreach { tmpWorkerJar =>
132+
Files.deleteIfExists(tmpWorkerJar)
133+
}
120134
}
135+
} else if (!Files.exists(workerJar)) {
136+
// Files.exists is not the complement of Files.notExists because both return false
137+
// when the existence of the file cannot be determined.
138+
throw new Exception(s"Cannot determine existence of worker jar: ${workerJar}")
121139
}
122140
}
123141
}

0 commit comments

Comments
 (0)