|
17 | 17 |
|
18 | 18 | package com.spotify.scio.util;
|
19 | 19 |
|
| 20 | +import com.google.common.util.concurrent.Futures; |
| 21 | +import com.google.common.util.concurrent.ListenableFuture; |
| 22 | +import com.google.common.util.concurrent.ListeningExecutorService; |
| 23 | +import com.google.common.util.concurrent.MoreExecutors; |
20 | 24 | import java.io.FileNotFoundException;
|
21 | 25 | import java.io.IOException;
|
22 | 26 | import java.io.Serializable;
|
|
28 | 32 | import java.nio.file.Path;
|
29 | 33 | import java.nio.file.Paths;
|
30 | 34 | import java.nio.file.StandardOpenOption;
|
31 |
| -import java.util.ArrayList; |
32 | 35 | import java.util.List;
|
33 |
| -import java.util.Map; |
34 |
| -import java.util.concurrent.ExecutionException; |
| 36 | +import java.util.concurrent.*; |
| 37 | +import java.util.stream.Collectors; |
35 | 38 | import org.apache.beam.sdk.io.FileSystems;
|
36 | 39 | import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
|
37 | 40 | import org.apache.beam.sdk.io.fs.ResourceId;
|
38 | 41 | import org.apache.beam.sdk.options.PipelineOptions;
|
| 42 | +import org.apache.beam.sdk.transforms.DoFn; |
39 | 43 | import org.apache.beam.sdk.util.MimeTypes;
|
40 | 44 | import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets;
|
41 | 45 | import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
|
|
46 | 50 | import org.slf4j.Logger;
|
47 | 51 | import org.slf4j.LoggerFactory;
|
48 | 52 |
|
49 |
| -/** |
50 |
| - * A utility class for handling remote file systems designed to be used in a {@link |
51 |
| - * org.apache.beam.sdk.transforms.DoFn}. |
52 |
| - */ |
| 53 | +/** A utility class for handling remote file systems designed to be used in a {@link DoFn}. */ |
53 | 54 | public class RemoteFileUtil implements Serializable {
|
54 | 55 |
|
55 | 56 | private static final Logger LOG = LoggerFactory.getLogger(RemoteFileUtil.class);
|
56 | 57 |
|
57 | 58 | private static final int CONCURRENCY_LEVEL = Runtime.getRuntime().availableProcessors() * 4;
|
58 | 59 | private static final int HASH_LENGTH = 8;
|
59 | 60 |
|
| 61 | + private static final ListeningExecutorService executor = |
| 62 | + MoreExecutors.listeningDecorator( |
| 63 | + MoreExecutors.getExitingExecutorService( |
| 64 | + (ThreadPoolExecutor) Executors.newFixedThreadPool(CONCURRENCY_LEVEL))); |
| 65 | + |
60 | 66 | // Mapping of remote sources to local destinations
|
61 | 67 | private static final LoadingCache<URI, Path> paths =
|
62 | 68 | CacheBuilder.newBuilder()
|
@@ -105,21 +111,20 @@ public Path download(URI src) {
|
105 | 111 | * @return {@link Path}s to the downloaded local files.
|
106 | 112 | */
|
107 | 113 | public List<Path> download(List<URI> srcs) {
|
| 114 | + List<ListenableFuture<Path>> futures = |
| 115 | + srcs.stream() |
| 116 | + .map(uri -> executor.submit(() -> paths.get(uri))) |
| 117 | + .collect(Collectors.toList()); |
108 | 118 | try {
|
109 |
| - Map<URI, Path> results = paths.getAll(srcs); |
110 |
| - List<Path> paths = new ArrayList<>(srcs.size()); |
111 |
| - for (URI src : srcs) { |
112 |
| - paths.add(results.get(src)); |
113 |
| - } |
114 |
| - return paths; |
115 |
| - } catch (ExecutionException e) { |
| 119 | + return Futures.allAsList(futures).get(); |
| 120 | + } catch (InterruptedException | ExecutionException e) { |
116 | 121 | throw new RuntimeException(e);
|
117 | 122 | }
|
118 | 123 | }
|
119 | 124 |
|
120 | 125 | /** Delete a single downloaded local file. */
|
121 | 126 | public void delete(URI src) {
|
122 |
| - Path dst = null; |
| 127 | + Path dst; |
123 | 128 | try {
|
124 | 129 | dst = paths.get(src);
|
125 | 130 | } catch (ExecutionException e) {
|
|
0 commit comments