|
1 |
| -use ara_parser::parser; |
| 1 | +use std::fs; |
| 2 | +use std::path::PathBuf; |
| 3 | +use std::thread; |
| 4 | + |
| 5 | +use ara_parser::tree::Tree; |
2 | 6 | use ara_parser::tree::TreeMap;
|
3 |
| -use ara_source::loader; |
| 7 | +use ara_reporting::Report; |
| 8 | +use ara_source::source::Source; |
4 | 9 | use ara_source::SourceMap;
|
5 | 10 |
|
6 | 11 | use crate::config::Config;
|
| 12 | +use crate::error::Error; |
| 13 | +use crate::hash::FxHasher; |
| 14 | +use crate::serializer::BincodeSerializer; |
| 15 | +use crate::source::SourceFilesCollector; |
| 16 | +use crate::tree::TreeBuilder; |
7 | 17 |
|
8 | 18 | pub mod config;
|
| 19 | +pub mod error; |
| 20 | +pub(crate) mod hash; |
| 21 | +pub mod logger; |
| 22 | +pub(crate) mod serializer; |
| 23 | +pub mod source; |
| 24 | +pub(crate) mod tree; |
| 25 | + |
| 26 | +pub(crate) const ARA_SOURCE_EXTENSION: &str = "ara"; |
| 27 | +pub(crate) const ARA_DEFINITION_EXTENSION: &str = "d.ara"; |
| 28 | +pub(crate) const ARA_CACHED_SOURCE_EXTENSION: &str = "ara.cache"; |
9 | 29 |
|
| 30 | +#[derive(Debug)] |
10 | 31 | pub struct Forest {
|
11 | 32 | pub source: SourceMap,
|
12 | 33 | pub tree: TreeMap,
|
13 | 34 | }
|
14 | 35 |
|
15 |
| -pub struct Parser { |
16 |
| - pub config: Config, |
| 36 | +impl Forest { |
| 37 | + pub fn new(source: SourceMap, tree: TreeMap) -> Self { |
| 38 | + Self { source, tree } |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +pub struct Parser<'a> { |
| 43 | + pub config: &'a Config, |
| 44 | + tree_builder: TreeBuilder<'a>, |
17 | 45 | }
|
18 | 46 |
|
19 |
| -impl Parser { |
20 |
| - pub fn new(config: Config) -> Self { |
21 |
| - Self { config } |
| 47 | +impl<'a> Parser<'a> { |
| 48 | + pub fn new(config: &'a Config) -> Self { |
| 49 | + let tree_builder = TreeBuilder::new( |
| 50 | + config, |
| 51 | + Box::new(FxHasher::new()), |
| 52 | + Box::new(BincodeSerializer::new()), |
| 53 | + ); |
| 54 | + |
| 55 | + Self { |
| 56 | + config, |
| 57 | + tree_builder, |
| 58 | + } |
22 | 59 | }
|
23 | 60 |
|
24 |
| - pub fn parse(&self) -> Result<Forest, String> { |
25 |
| - let mut threads = Vec::with_capacity(self.config.threads); |
26 |
| - |
27 |
| - let source_map = loader::load_directories(&self.config.root, { |
28 |
| - let mut directories = self.config.definitions.clone(); |
29 |
| - directories.push(self.config.source.clone()); |
30 |
| - |
31 |
| - directories |
32 |
| - }) |
33 |
| - .expect("Failed to load source map"); |
34 |
| - |
35 |
| - // split the sources into N chunks, where N is the number of threads |
36 |
| - let chunk_size = source_map.sources.len() / self.config.threads; |
37 |
| - let chunks: Vec<Vec<ara_source::source::Source>> = source_map |
38 |
| - .sources |
39 |
| - .chunks(chunk_size) |
40 |
| - .map(|chunk| chunk.to_vec()) |
41 |
| - .collect(); |
42 |
| - |
43 |
| - for chunk in chunks { |
44 |
| - threads.push(std::thread::spawn(move || { |
45 |
| - let map = SourceMap::new(chunk); |
46 |
| - parser::parse_map(&map) |
47 |
| - })); |
| 61 | + pub fn parse(&self) -> Result<Forest, Box<Report>> { |
| 62 | + self.init_logger().map_err(|error| Box::new(error.into()))?; |
| 63 | + |
| 64 | + let (sources, trees) = |
| 65 | + thread::scope(|scope| -> Result<(Vec<Source>, Vec<Tree>), Box<Report>> { |
| 66 | + self.create_cache_dir() |
| 67 | + .map_err(|error| Box::new(error.into()))?; |
| 68 | + |
| 69 | + let files = SourceFilesCollector::new(self.config) |
| 70 | + .collect() |
| 71 | + .map_err(|error| Box::new(error.into()))?; |
| 72 | + |
| 73 | + if files.is_empty() { |
| 74 | + return Ok((Vec::new(), Vec::new())); |
| 75 | + } |
| 76 | + |
| 77 | + let threads_count = self.threads_count(files.len()); |
| 78 | + let chunks = files |
| 79 | + .chunks(files.len() / threads_count) |
| 80 | + .map(Vec::from) |
| 81 | + .collect::<Vec<Vec<PathBuf>>>(); |
| 82 | + |
| 83 | + let mut threads = Vec::with_capacity(threads_count); |
| 84 | + for chunk in chunks.into_iter() { |
| 85 | + threads.push(scope.spawn( |
| 86 | + move || -> Result<Vec<(Source, Tree)>, Box<Report>> { |
| 87 | + let mut source_tree = Vec::with_capacity(chunk.len()); |
| 88 | + for source_path in chunk { |
| 89 | + let (source, tree) = self |
| 90 | + .tree_builder |
| 91 | + .build(&source_path) |
| 92 | + .map_err(|error| match error { |
| 93 | + Error::ParseError(report) => report, |
| 94 | + _ => Box::new(error.into()), |
| 95 | + })?; |
| 96 | + source_tree.push((source, tree)); |
| 97 | + } |
| 98 | + |
| 99 | + Ok(source_tree) |
| 100 | + }, |
| 101 | + )); |
| 102 | + } |
| 103 | + |
| 104 | + let mut result = Vec::new(); |
| 105 | + for handle in threads { |
| 106 | + result.extend(handle.join().unwrap()?); |
| 107 | + } |
| 108 | + let (sources, trees) = result.into_iter().unzip(); |
| 109 | + |
| 110 | + Ok((sources, trees)) |
| 111 | + })?; |
| 112 | + |
| 113 | + Ok(Forest::new(SourceMap::new(sources), TreeMap::new(trees))) |
| 114 | + } |
| 115 | + |
| 116 | + fn threads_count(&self, files_len: usize) -> usize { |
| 117 | + if self.config.threads > files_len { |
| 118 | + files_len |
| 119 | + } else { |
| 120 | + self.config.threads |
48 | 121 | }
|
| 122 | + } |
| 123 | + |
| 124 | + fn create_cache_dir(&self) -> Result<(), Error> { |
| 125 | + if self.config.cache.is_some() { |
| 126 | + fs::create_dir_all(self.config.cache.as_ref().unwrap())?; |
| 127 | + } |
| 128 | + |
| 129 | + Ok(()) |
| 130 | + } |
49 | 131 |
|
50 |
| - let mut results = vec![]; |
51 |
| - for thread in threads { |
52 |
| - results.push(thread.join().unwrap()); |
| 132 | + fn init_logger(&self) -> Result<(), Error> { |
| 133 | + if self.config.logger.is_some() { |
| 134 | + self.config.logger.as_ref().unwrap().init()? |
53 | 135 | }
|
54 | 136 |
|
55 |
| - todo!(" |
56 |
| - the implementation above is just a placeholder |
57 |
| -
|
58 |
| - the idea is to: |
59 |
| - 1. load the source map |
60 |
| - 2. split the source map into N chunks, where N is the number of threads |
61 |
| - 3. spawn N threads, each of which parses a chunk of the source map |
62 |
| - 4. in each thread, iterate over the sources in the chunk and: |
63 |
| - first we need to check if the source is present in the cache, if yes, load the cached tree, |
64 |
| - and check if the hash of the source matches the hash of the cached tree, if yes, return the cached tree, |
65 |
| - otherwise, parse the source and save the tree to the cache |
66 |
| - if the source is not present in the cache, parse the source and save the tree to the cache. |
67 |
| - If the parser failed, return the report immediately and do not continue |
68 |
| - 5. join the threads and collect the results |
69 |
| - If any of the threads failed, return the report immediately and do not continue |
70 |
| - 6. merge the results into a single forest |
71 |
| - 7. return the forest |
72 |
| - "); |
| 137 | + Ok(()) |
73 | 138 | }
|
74 | 139 | }
|
0 commit comments