From 8ec1b3755fc167645608d5570e618e999a287c9b Mon Sep 17 00:00:00 2001 From: James Judd Date: Tue, 8 Apr 2025 09:50:38 -0600 Subject: [PATCH 1/2] Use Bazel version with more multiplex sandbox fixes --- .bazelversion | 2 +- MODULE.bazel.lock | 2 +- tests/.bazelversion | 2 +- tests/MODULE.bazel.lock | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bazelversion b/.bazelversion index 1140becc..b97e4f50 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -lucidsoftware/8.1.999 +lucidsoftware/8.1.10002 diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index 4e36212c..5900fd4f 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -482,7 +482,7 @@ }, "@@rules_python+//python/private/pypi:pip.bzl%pip_internal": { "general": { - "bzlTransitiveDigest": "UT27gbYR4rXUHlRTZenxY7C2WW8iKjlj00ldj987Kyc=", + "bzlTransitiveDigest": "EK5D3F9+USljOVep0G7Rpc2ijdnDojNojbg+fkrdHfs=", "usagesDigest": "OLoIStnzNObNalKEMRq99FqenhPGLFZ5utVLV4sz7OI=", "recordedFileInputs": { "@@rules_python+//tools/publish/requirements_darwin.txt": "2994136eab7e57b083c3de76faf46f70fad130bc8e7360a7fed2b288b69e79dc", diff --git a/tests/.bazelversion b/tests/.bazelversion index 1140becc..b97e4f50 100644 --- a/tests/.bazelversion +++ b/tests/.bazelversion @@ -1 +1 @@ -lucidsoftware/8.1.999 +lucidsoftware/8.1.10002 diff --git a/tests/MODULE.bazel.lock b/tests/MODULE.bazel.lock index cb887ec6..a271dd2c 100644 --- a/tests/MODULE.bazel.lock +++ b/tests/MODULE.bazel.lock @@ -337,7 +337,7 @@ }, "@@rules_python+//python/private/pypi:pip.bzl%pip_internal": { "general": { - "bzlTransitiveDigest": "UT27gbYR4rXUHlRTZenxY7C2WW8iKjlj00ldj987Kyc=", + "bzlTransitiveDigest": "EK5D3F9+USljOVep0G7Rpc2ijdnDojNojbg+fkrdHfs=", "usagesDigest": "OLoIStnzNObNalKEMRq99FqenhPGLFZ5utVLV4sz7OI=", "recordedFileInputs": { "@@rules_python+//tools/publish/requirements_darwin.txt": "2994136eab7e57b083c3de76faf46f70fad130bc8e7360a7fed2b288b69e79dc", From 24d2bc220f1f3be5717f860081009052cb4e6231 Mon Sep 17 00:00:00 2001 From: James Judd Date: Tue, 8 Apr 2025 09:53:38 -0600 Subject: [PATCH 2/2] Attempt to fix race conditions with non-atomic file copy for worker jars We were copying a file used by multiple threads directly to its destination. Problem is that copying is not an atomic action, so we could end up in states where the file wasn't correct when it was used. This should avoid that issue by first copying the file to a temp file and then using an atomic move to move the file to the destination used by other threads. --- .../workers/common/AnnexScalaInstance.scala | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala b/src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala index 2ddc9489..c949ef5d 100644 --- a/src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala +++ b/src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala @@ -4,16 +4,22 @@ package workers.common import xsbti.compile.ScalaInstance import java.io.File import java.net.URLClassLoader -import java.nio.file.{FileAlreadyExistsException, Files, Path, Paths} +import java.nio.file.{FileAlreadyExistsException, Files, Path, Paths, StandardCopyOption} import java.util.Properties import java.util.concurrent.ConcurrentHashMap import scala.collection.immutable.TreeMap +import scala.util.control.NonFatal object AnnexScalaInstance { // See the comment on getAnnexScalaInstance as to why this is necessary private val instanceCache: ConcurrentHashMap[Set[Path], AnnexScalaInstance] = new ConcurrentHashMap[Set[Path], AnnexScalaInstance]() + // The worker will use this directory to store temp files in order to better perform + // atomic file copies. + private val tmpWorkerJarDir = Paths.get("annex-tmp-worker-jars") + Files.createDirectories(tmpWorkerJarDir) + /** * We only need to care about minimizing the number of AnnexScalaInstances we create if things are being run as a * worker. Otherwise just create the AnnexScalaInstance and be done with it because the process won't be long lived. @@ -106,18 +112,30 @@ object AnnexScalaInstance { // This should only happen once per compiler version, so it shouldn't happen often. workRequestJarToWorkerJar.foreach { case (workRequestJar, workerJar) => this.synchronized { - // Check for existence of the file just in case another request is also writing these jars - // Copying a file is not atomic, so we don't want to end up in a funky state where two - // copies of the same file happen at the same time and cause something bad to happen. - if (!Files.exists(workerJar)) { + // Do a more atomic copy of a file by creating a temp file and then moving + // the temp file to the destination. We can do a move atomically, but cannot do + // a copy atomically. Copying risks the file existing at the destination in a + // partially completed state. + if (Files.notExists(workerJar)) { + var tmpWorkerJar: Option[Path] = None try { + tmpWorkerJar = Some(Files.createTempFile(tmpWorkerJarDir, workerJar.getFileName.toString, "tmp")) + Files.copy(workRequestJar, tmpWorkerJar.get, StandardCopyOption.REPLACE_EXISTING) + Files.createDirectories(workerJar.getParent()) - Files.copy(workRequestJar, workerJar) + Files.move(tmpWorkerJar.get, workerJar, StandardCopyOption.ATOMIC_MOVE) } catch { - // We do not care if the file already exists - case _: FileAlreadyExistsException => {} - case e: Throwable => throw new Exception("Error adding file to instance cache", e) + case NonFatal(e) => + throw new Exception(s"Error copying worker jar: ${workerJar}", e) + } finally { + tmpWorkerJar.foreach { tmpWorkerJar => + Files.deleteIfExists(tmpWorkerJar) + } } + } else if (!Files.exists(workerJar)) { + // Files.exists is not the complement of Files.notExists because both return false + // when the existence of the file cannot be determined. + throw new Exception(s"Cannot determine existence of worker jar: ${workerJar}") } } }