Skip to content

Commit 58cffcc

Browse files
committed
Add use_cache option to create_files
1 parent 3ba0fa6 commit 58cffcc

File tree

2 files changed

+184
-56
lines changed

2 files changed

+184
-56
lines changed

src/fileio.jl

+104-47
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,78 @@
11
# This file is a part of ParallelProcessingTools.jl, licensed under the MIT License (MIT).
22

3+
"""
4+
ParallelProcessingTools.split_basename_ext(file_basename_with_ext::AbstractString)
5+
6+
Splits a filename (given without its directory path) into a basename without
7+
file extension and the file extension. Returns a tuple `(basename_noext, ext)`.
8+
9+
Example:
10+
11+
```
12+
ParallelProcessingTools.split_basename_ext("myfile.tar.gz") == ("myfile", ".tar.gz")
13+
```
14+
"""
15+
function split_basename_ext(bn_ext::AbstractString)
16+
ext_startpos = findfirst('.', bn_ext)
17+
bn, ext = isnothing(ext_startpos) ? (bn_ext, "") : (bn_ext[1:ext_startpos-1], bn_ext[ext_startpos:end])
18+
return bn, ext
19+
end
20+
321

422
"""
523
ParallelProcessingTools.tmp_filename(fname::AbstractString)
24+
ParallelProcessingTools.tmp_filename(fname::AbstractString, dir::AbstractString)
625
7-
Returns a temporary filename, based on `fname`, in the same directory.
26+
Returns a temporary filename, based on `fname`.
827
9-
Does *not* create the temporary file.
28+
By default, the temporary filename is in the same directory as `fname`,
29+
otherwise in `dir`.
30+
31+
Does *not* create the temporary file, only returns the filename (including
32+
directory path).
1033
"""
11-
function tmp_filename(fname::AbstractString)
12-
d, fn, ext = _split_dir_fn_ext(fname)
34+
function tmp_filename end
35+
36+
function tmp_filename(fname::AbstractString, dir::AbstractString)
37+
bn_ext = basename(fname)
38+
bn, ext = split_basename_ext(bn_ext)
1339
tag = _rand_fname_tag()
14-
joinpath(d, "$(fn)_$(tag)$(ext)")
15-
end
16-
17-
function _split_dir_fn_ext(fname::AbstractString)
18-
d = dirname(fname)
19-
f = basename(fname)
20-
ext_startpos = findfirst('.', f)
21-
fn, ext = isnothing(ext_startpos) ? (f, "") : (f[1:ext_startpos-1], f[ext_startpos:end])
22-
return d, fn, ext
40+
joinpath(dir, "$(bn)_$(tag)$(ext)")
2341
end
2442

43+
tmp_filename(fname::AbstractString) = tmp_filename(fname, dirname(fname))
44+
2545
_rand_fname_tag() = String(rand(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 8))
2646

2747

2848
"""
2949
function create_files(
3050
body, filenames::AbstractString...;
3151
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true
52+
use_cache::Bool = true, cache_dir::AbstractString = tempdir()
3253
)
3354
3455
Creates `filenames` in an atomic fashion.
3556
36-
Creates temporary files in the same directories as `filenames`, then
57+
Creates temporary files, then
3758
calls `body(temporary_filenames...)`. If `body` returns successfully,
3859
the files `temporary_filenames` are renamed to `filenames`. If `body` throws
3960
an exception, the temporary files are either deleted (if `delete_on_error` is
4061
`true`) or left in place (e.g. for debugging purposes).
4162
63+
If `create_dirs` is `true`, the `temporary_filenames` are created in
64+
`cache_dir` and then atomically moved to `filenames`, otherwise, they are
65+
created next to `filenames` (in the same directories).
66+
4267
If `create_dirs` is `true`, directories are created if necessary.
4368
4469
If all of files already exist and `overwrite` is `false`, takes no action
4570
(or, if the file is created by other code running in parallel, while `body` is
4671
running, does not overwrite it).
4772
73+
If `verbose` is `true`, uses log-level `Logging.Info` to log file creation,
74+
otherwise `Logging.Debug`.
75+
4876
Throws an error if only some of the files exist and `overwrite` is `false`.
4977
5078
Returns `nothing`.
@@ -59,75 +87,104 @@ end
5987
```
6088
"""
6189
function create_files(
62-
body, filenames::AbstractString...;
63-
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true
90+
@nospecialize(body), @nospecialize(filenames::AbstractString...);
91+
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true,
92+
use_cache::Bool = true, cache_dir::AbstractString = tempdir(),
93+
verbose::Bool = true
6494
)
65-
tmp_filenames = String[]
66-
completed_filenames = String[]
95+
loglevel = verbose ? Info : Debug
96+
97+
target_fnames = String[filenames...] # Fix type
98+
staging_fnames = String[]
99+
writeto_fnames = String[]
100+
completed_fnames = String[]
67101

68-
pre_existing = isfile.(filenames)
102+
pre_existing = isfile.(target_fnames)
69103
if any(pre_existing)
70104
if all(pre_existing)
71105
if !overwrite
72-
@info "Files $filenames already exist, nothing to do."
106+
@logmsg loglevel "Files $target_fnames already exist, nothing to do."
73107
return nothing
74108
end
75109
else
76-
!overwrite && throw(ErrorException("Only some of $filenames exist but not allowed to overwrite"))
110+
!overwrite && throw(ErrorException("Only some of $target_fnames exist but not allowed to overwrite"))
77111
end
78112
end
79113

80-
dirs = dirname.(filenames)
81-
for dir in dirs
82-
if !isdir(dir) && create_dirs
83-
mkpath(dir)
84-
@info "Created directory $dir."
114+
dirs = dirname.(target_fnames)
115+
if create_dirs
116+
for dir in dirs
117+
if !isdir(dir) && create_dirs
118+
mkpath(dir)
119+
@logmsg loglevel "Created directory $dir."
120+
end
121+
end
122+
123+
if use_cache && !isdir(cache_dir)
124+
mkpath(cache_dir)
125+
@logmsg loglevel "Created cache directory $cache_dir."
85126
end
86127
end
87128

88129
try
89-
for fname in filenames
90-
tmp_fname = tmp_filename(fname)
91-
@assert !isfile(tmp_fname)
92-
push!(tmp_filenames, tmp_fname)
93-
end
130+
staging_fnames = tmp_filename.(target_fnames)
131+
@assert !any(isfile, staging_fnames)
132+
133+
writeto_fnames = use_cache ? tmp_filename.(target_fnames, Ref(cache_dir)) : staging_fnames
134+
@assert !any(isfile, writeto_fnames)
94135

95-
body(tmp_filenames...)
136+
@debug "Creating intermediate files $writeto_fnames."
137+
body(writeto_fnames...)
96138

97-
post_body_existing = isfile.(filenames)
139+
post_body_existing = isfile.(target_fnames)
98140
if any(post_body_existing)
99141
if all(post_body_existing)
100142
if !overwrite
101-
@info "Files $filenames already exist, won't replace."
143+
@logmsg loglevel "Files $target_fnames already exist, won't replace."
102144
return nothing
103145
end
104146
else
105-
!overwrite && throw(ErrorException("Only some of $filenames exist but not allowed to replace files"))
147+
!overwrite && throw(ErrorException("Only some of $target_fnames exist but not allowed to replace files"))
106148
end
107149
end
108-
150+
109151
try
110-
for (tmp_fname, fname) in zip(tmp_filenames, filenames)
111-
mv(tmp_fname, fname; force=true)
112-
@assert isfile(fname)
113-
push!(completed_filenames, fname)
152+
if use_cache
153+
for (writeto_fn, staging_fn) in zip(writeto_fnames, staging_fnames)
154+
@assert writeto_fn != staging_fn
155+
@debug "Moving file \"$writeto_fn\" to \"$staging_fn\"."
156+
isfile(writeto_fn) || error("Expected file \"$writeto_fn\" to exist, but it doesn't.")
157+
mv(writeto_fn, staging_fn; force=true)
158+
isfile(staging_fn) || error("Tried to move file \"$writeto_fn\" to \"$staging_fn\", but \"$staging_fn\" doesn't exist.")
159+
end
160+
end
161+
for (staging_fn, target_fn) in zip(staging_fnames, target_fnames)
162+
@assert staging_fn != target_fn
163+
@debug "Renaming file \"$staging_fn\" to \"$target_fn\"."
164+
isfile(staging_fn) || error("Expected file \"$staging_fn\" to exist, but it doesn't.")
165+
mv(staging_fn, target_fn; force=true)
166+
isfile(target_fn) || error("Tried to rename file \"$staging_fn\" to \"$target_fn\", but \"$target_fn\" doesn't exist.")
167+
push!(completed_fnames, target_fn)
114168
end
115-
@info "Successfully created files $filenames."
169+
@logmsg loglevel "Created files $target_fnames."
116170
catch
117-
if !isempty(completed_filenames)
118-
@error "Failed to rename some temporary files to final filenames, removing $completed_filenames"
119-
for fname in completed_filenames
171+
if !isempty(completed_fnames)
172+
@error "Failed to rename some temporary files to final filenames, removing $completed_fnames"
173+
for fname in completed_fnames
120174
rm(fname; force=true)
121175
end
122176
end
123177
rethrow()
124178
end
125179

126-
@assert all(fn -> !isfile(fn), tmp_filenames)
180+
@assert all(fn -> !isfile(fn), staging_fnames)
127181
finally
128182
if delete_on_error
129-
for tmp_fname in tmp_filenames
130-
isfile(tmp_fname) && rm(tmp_fname; force=true);
183+
for writeto_fn in writeto_fnames
184+
isfile(writeto_fn) && rm(writeto_fn; force=true);
185+
end
186+
for staging_fn in staging_fnames
187+
isfile(staging_fn) && rm(staging_fn; force=true);
131188
end
132189
end
133190
end

test/test_fileio.jl

+80-9
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,91 @@
33
using Test
44
using ParallelProcessingTools
55

6+
using ParallelProcessingTools: split_basename_ext, tmp_filename
7+
8+
old_julia_debug = get(ENV, "JULIA_DEBUG", "")
9+
ENV["JULIA_DEBUG"] = old_julia_debug * ",ParallelProcessingTools"
10+
611

712
@testset "fileio" begin
8-
mktempdir() do dir
9-
data1 = "Hello"
10-
data2 = "World"
13+
@testset "split_basename_ext" begin
14+
@test @inferred(split_basename_ext("foo_bar baz.tar.gz")) == ("foo_bar baz", ".tar.gz")
15+
end
1116

12-
fn1 = joinpath(dir, "hello.txt")
13-
fn2 = joinpath(dir, "world.txt")
17+
@testset "tmp_filename" begin
18+
dir = joinpath("foo", "bar")
19+
tmpdir = joinpath(tempdir(), "somedir")
20+
bn = "test.tar.gz"
21+
fn = joinpath(dir, bn)
1422

15-
create_files(fn1, fn2) do fn1, fn2
16-
write(fn1, data1)
17-
write(fn2, data2)
23+
@test @inferred(tmp_filename(fn)) isa AbstractString
24+
let tmpfn = @inferred tmp_filename(fn)
25+
@test dirname(tmpfn) == dir
26+
tmp_bn, tmp_ex = split_basename_ext(basename(tmpfn))
27+
@test startswith(tmp_bn, "test_")
28+
@test tmp_ex == ".tar.gz"
1829
end
1930

20-
@test read(fn1, String) == data1 && read(fn2, String) == data2
31+
@test @inferred(tmp_filename(fn, tmpdir)) isa AbstractString
32+
let tmpfn = @inferred tmp_filename(fn, tmpdir)
33+
@test dirname(tmpfn) == tmpdir
34+
tmp_bn, tmp_ex = split_basename_ext(basename(tmpfn))
35+
@test startswith(tmp_bn, "test_")
36+
@test tmp_ex == ".tar.gz"
37+
end
38+
end
39+
40+
for use_cache in [false, true]
41+
@testset "create_files" begin
42+
mktempdir() do dir
43+
data1 = "Hello"
44+
data2 = "World"
45+
46+
fn1 = joinpath(dir, "targetdir", "hello.txt")
47+
fn2 = joinpath(dir, "targetdir", "world.txt")
48+
49+
# Target directory does not exist yet:
50+
try
51+
# Will not create missing target directory:
52+
create_files(fn1, fn2, use_cache = use_cache, create_dirs = false, verbose = true) do fn1, fn2
53+
write(fn1, data1); write(fn2, data2)
54+
end
55+
@test false # Should have thrown an exception
56+
catch err
57+
@test err isa SystemError || err isa Base.IOError
58+
end
59+
60+
# Test atomicity, fail in between writing files:
61+
@test_throws ErrorException create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
62+
write(fn1, data1)
63+
error("Some error")
64+
write(fn2, data2)
65+
end
66+
@test !isfile(fn1) && !isfile(fn2)
67+
68+
# Will create:
69+
create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
70+
write(fn1, data1); write(fn2, data2)
71+
end
72+
@test read(fn1, String) == data1 && read(fn2, String) == data2
73+
74+
# Modify the target files:
75+
write(fn1, "dummy content"); write(fn2, "dummy content");
76+
77+
# Wont't overwrite:
78+
create_files(fn1, fn2, use_cache = use_cache, overwrite = false, verbose = true) do fn1, fn2
79+
write(fn1, data1); write(fn2, data2)
80+
end
81+
@test read(fn1, String) != data1 && read(fn2, String) != data2
82+
83+
# Will overwrite:
84+
create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
85+
write(fn1, data1); write(fn2, data2)
86+
end
87+
@test read(fn1, String) == data1 && read(fn2, String) == data2
88+
end
89+
end
2190
end
2291
end
92+
93+
ENV["JULIA_DEBUG"] = old_julia_debug; nothing

0 commit comments

Comments
 (0)