diff --git a/cli/src/commands/dump.rs b/cli/src/commands/dump.rs index c150b7b3c..9c9387c96 100644 --- a/cli/src/commands/dump.rs +++ b/cli/src/commands/dump.rs @@ -20,6 +20,8 @@ enum SupportedModules { Elf, Pe, Dotnet, + Olecf, + Vba, } #[derive(Debug, Clone, ValueEnum)] @@ -111,6 +113,12 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> { if !requested_modules.contains(&&SupportedModules::Pe) { module_output.pe = MessageField::none() } + if !requested_modules.contains(&&SupportedModules::Olecf) { + module_output.olecf = MessageField::none() + } + if !requested_modules.contains(&&SupportedModules::Vba) { + module_output.vba = MessageField::none() + } } else { // Module was not specified, only show those that produced meaningful // results, the rest are cleared out. @@ -131,6 +139,12 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> { if !module_output.pe.is_pe() { module_output.pe = MessageField::none() } + if !module_output.olecf.is_olecf() { + module_output.olecf = MessageField::none() + } + if !module_output.vba.has_macros() { + module_output.vba = MessageField::none() + } } match output_format { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 0e690b948..823f3fc30 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -146,6 +146,9 @@ magic-module = [ # The `math` module. math-module = [] +# The `olecf` module +olecf-module = [] + # The `pe` module parses PE files. pe-module = [ "dep:const-oid", @@ -182,6 +185,9 @@ text-module = [ # conditions of a rule to check against other epoch time. time-module = [] +# The `vba` module +vba-module = [] + # Features that are enabled by default. default = [ "constant-folding", @@ -194,10 +200,12 @@ default = [ "macho-module", "math-module", "hash-module", + "olecf-module", "pe-module", "string-module", "time-module", "lnk-module", + "vba-module", "test_proto2-module", "test_proto3-module", ] @@ -260,6 +268,7 @@ x509-parser = { workspace = true, optional = true } yansi = { workspace = true } yara-x-macros = { workspace = true } yara-x-parser = { workspace = true, features = ["serde"] } +zip = { workspace = true } lingua = { version = "1.6.2", optional = true, default-features = false, features = ["english", "german", "french", "spanish"] } diff --git a/lib/fuzz/Cargo.toml b/lib/fuzz/Cargo.toml index 0bd8d4fe6..79360e92c 100644 --- a/lib/fuzz/Cargo.toml +++ b/lib/fuzz/Cargo.toml @@ -47,6 +47,12 @@ path = "fuzz_targets/dotnet_parser.rs" test = false doc = false +[[bin]] +name = "vba_parser" +path = "fuzz_targets/vba_parser.rs" +test = false +doc = false + [[bin]] name = "rule_compiler" path = "fuzz_targets/rule_compiler.rs" diff --git a/lib/fuzz/fuzz_targets/vba_parser.rs b/lib/fuzz/fuzz_targets/vba_parser.rs new file mode 100644 index 000000000..343258f3f --- /dev/null +++ b/lib/fuzz/fuzz_targets/vba_parser.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + let _ = yara_x::mods::invoke::(data); +}); diff --git a/lib/src/modules/add_modules.rs b/lib/src/modules/add_modules.rs index a9a528088..9d9fdd56d 100644 --- a/lib/src/modules/add_modules.rs +++ b/lib/src/modules/add_modules.rs @@ -18,6 +18,8 @@ add_module!(modules, "macho", macho, "macho.Macho", Some("macho"), Some(macho::_ add_module!(modules, "magic", magic, "magic.Magic", Some("magic"), Some(magic::__main__ as MainFn)); #[cfg(feature = "math-module")] add_module!(modules, "math", math, "math.Math", Some("math"), Some(math::__main__ as MainFn)); +#[cfg(feature = "olecf-module")] +add_module!(modules, "olecf", olecf, "olecf.Olecf", Some("olecf"), Some(olecf::__main__ as MainFn)); #[cfg(feature = "pe-module")] add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn)); #[cfg(feature = "string-module")] @@ -30,4 +32,6 @@ add_module!(modules, "test_proto3", test_proto3, "test_proto3.TestProto3", Some( add_module!(modules, "text", text, "text.Text", Some("text"), Some(text::__main__ as MainFn)); #[cfg(feature = "time-module")] add_module!(modules, "time", time, "time.Time", Some("time"), Some(time::__main__ as MainFn)); +#[cfg(feature = "vba-module")] +add_module!(modules, "vba", vba, "vba.Vba", Some("vba"), Some(vba::__main__ as MainFn)); } \ No newline at end of file diff --git a/lib/src/modules/mod.rs b/lib/src/modules/mod.rs index d776d48b7..7b2f83895 100644 --- a/lib/src/modules/mod.rs +++ b/lib/src/modules/mod.rs @@ -174,6 +174,24 @@ pub mod mods { /// Data structure returned by the `macho` module. pub use super::protos::macho::Macho; + /// Data structures defined by the `olecf` module. + /// + /// The main structure produced by the module is [`olecf:Olecf`]. The rest + /// of them are used by one or more fields in the main structure. + /// + pub use super::protos::olecf; + /// Data structure returned by the `olecf` module. + pub use super::protos::olecf::Olecf; + + /// Data structures defined by the `vba` module. + /// + /// The main structure produced by the module is [`vba::Vba`]. The rest + /// of them are used by one or more fields in the main structure. + /// + pub use super::protos::vba; + /// Data structure returned by the `macho` module. + pub use super::protos::vba::Vba; + /// Data structures defined by the `pe` module. /// /// The main structure produced by the module is [`pe::PE`]. The rest @@ -268,6 +286,8 @@ pub mod mods { info.dotnet = protobuf::MessageField(invoke::(data)); info.macho = protobuf::MessageField(invoke::(data)); info.lnk = protobuf::MessageField(invoke::(data)); + info.olecf = protobuf::MessageField(invoke::(data)); + info.vba = protobuf::MessageField(invoke::(data)); info } diff --git a/lib/src/modules/modules.rs b/lib/src/modules/modules.rs index 7113eeaa0..e75a4c52a 100644 --- a/lib/src/modules/modules.rs +++ b/lib/src/modules/modules.rs @@ -17,6 +17,8 @@ mod macho; mod magic; #[cfg(feature = "math-module")] mod math; +#[cfg(feature = "olecf-module")] +mod olecf; #[cfg(feature = "pe-module")] mod pe; #[cfg(feature = "string-module")] @@ -28,4 +30,6 @@ mod test_proto3; #[cfg(feature = "text-module")] mod text; #[cfg(feature = "time-module")] -mod time; \ No newline at end of file +mod time; +#[cfg(feature = "vba-module")] +mod vba; \ No newline at end of file diff --git a/lib/src/modules/olecf/mod.rs b/lib/src/modules/olecf/mod.rs new file mode 100644 index 000000000..298a03319 --- /dev/null +++ b/lib/src/modules/olecf/mod.rs @@ -0,0 +1,40 @@ +/*! YARA module that parses OLE Compound File Binary Format files. + +The OLE CF format (also known as Compound File Binary Format or CFBF) is a +container format used by many Microsoft file formats including DOC, XLS, PPT, +and MSI. This module specializes in parsing OLE CF files and extracting +metadata about their structure and contents. + +Read more about the Compound File Binary File format here: +https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b +*/ + +use crate::modules::prelude::*; +use crate::modules::protos::olecf::*; + +pub mod parser; + +#[module_main] +fn main(data: &[u8], _meta: Option<&[u8]>) -> Olecf { + let mut olecf = Olecf::new(); + + match parser::OLECFParser::new(data) { + Ok(parser) => { + olecf.set_is_olecf(parser.is_valid_header()); + olecf.streams = parser + .get_streams() + .map(|(name, entry)| { + let mut s = Stream::new(); + s.set_name(name.to_string()); + s.set_size(entry.size); + s + }) + .collect(); + } + Err(_) => { + olecf.set_is_olecf(false); + } + } + + olecf +} diff --git a/lib/src/modules/olecf/parser.rs b/lib/src/modules/olecf/parser.rs new file mode 100644 index 000000000..21e0074b0 --- /dev/null +++ b/lib/src/modules/olecf/parser.rs @@ -0,0 +1,449 @@ +use std::collections::HashMap; + +use nom::multi::fold_many_m_n; +use nom::{ + bytes::complete::take, + combinator::verify, + error::{Error as NomError, ErrorKind}, + number::complete::{le_u16, le_u32}, + sequence::tuple, + IResult, +}; + +const OLECF_SIGNATURE: &[u8] = + &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; +const SECTOR_SHIFT: u16 = 9; +const MINI_SECTOR_SHIFT: u16 = 6; +const DIRECTORY_ENTRY_SIZE: u64 = 128; + +// Directory Entry Types +const STORAGE_TYPE: u8 = 1; +const STREAM_TYPE: u8 = 2; +const ROOT_STORAGE_TYPE: u8 = 5; + +// Special sectors +const ENDOFCHAIN: u32 = 0xFFFFFFFE; +const MAX_REGULAR_SECTOR: u32 = 0xFFFFFFFA; + +pub struct OLECFParser<'a> { + data: &'a [u8], + sector_size: usize, + mini_sector_size: usize, + fat_sectors: Vec, + directory_sectors: Vec, + mini_fat_sectors: Vec, + dir_entries: HashMap, + mini_stream_start: u32, + mini_stream_size: u64, +} + +pub struct DirectoryEntry { + pub name: String, + pub size: u64, + pub start_sector: u32, + pub stream_type: u8, +} + +impl<'a> OLECFParser<'a> { + pub fn new(data: &'a [u8]) -> Result { + let mut parser = OLECFParser { + data, + sector_size: 1 << SECTOR_SHIFT, + mini_sector_size: 1 << MINI_SECTOR_SHIFT, + fat_sectors: Vec::new(), + directory_sectors: Vec::new(), + mini_fat_sectors: Vec::new(), + dir_entries: HashMap::new(), + mini_stream_start: 0, + mini_stream_size: 0, + }; + + match parser.parse(data) { + Ok((_rest, ())) => Ok(parser), + Err(_) => Err("Failed to parse OLECF data"), + } + } + + fn parse(&mut self, input: &'a [u8]) -> IResult<&'a [u8], ()> { + let (input, ()) = self.parse_header(input)?; + self.parse_directory(input) + } + + /// Parses the Compound File Header. + /// + /// [MS-CFB] Section 2.2 + fn parse_header(&mut self, input: &'a [u8]) -> IResult<&'a [u8], ()> { + let ( + input, + ( + _signature, + _clsid, + _minor_version, + _major_version, + _byte_order, + _sector_shift, + _mini_sector_shift, + _reserved, + _num_dir_sectors, + num_fat_sectors, + first_dir_sector, + _transaction_sig_num, + _mini_stream_cutoff_size, + first_mini_fat, + mini_fat_count, + _first_difat_sector, + _difat_count, + ), + ) = tuple(( + verify(take(8_usize), |sig: &[u8]| sig == OLECF_SIGNATURE), + take(16usize), // CLSID, + le_u16, // minor_version + le_u16, // major_version + verify(le_u16, |byte_order| *byte_order == 0xFFFE), + le_u16, // sector_shift + le_u16, // mini_sector_shift + take(6usize), // reserved + le_u32, // num_dir_sectors + le_u32, // num_fat_sectors + le_u32, // first_dir_sector + le_u32, // transaction_sig_num + le_u32, // mini_stream_cutoff_size + le_u32, // first_mini_fat + le_u32, // mini_fat_count + le_u32, // _first_difat_sector + le_u32, // _difat_count + ))(input)?; + + // Parse the first 109 DIFAT entries, which are contained in the + // header sector. + let (input, _) = fold_many_m_n( + 0, + 109, + le_u32, + || {}, + |_, sector| { + if sector < MAX_REGULAR_SECTOR { + self.fat_sectors.push(sector); + } + }, + )(input)?; + + // (C) Directory chain + if first_dir_sector < MAX_REGULAR_SECTOR { + self.directory_sectors = self.follow_chain(first_dir_sector); + } else { + return Err(nom::Err::Error(NomError::new( + input, + ErrorKind::Verify, + ))); + } + + // (D) MiniFAT chain + if mini_fat_count > 0 && first_mini_fat < MAX_REGULAR_SECTOR { + self.mini_fat_sectors = self.follow_chain(first_mini_fat); + } + + // (E) If no FAT sectors but num_fat_sectors != 0 => error + if self.fat_sectors.is_empty() && num_fat_sectors > 0 { + return Err(nom::Err::Error(NomError::new( + input, + ErrorKind::Verify, + ))); + } + + Ok((input, ())) + } + + fn parse_directory(&mut self, _input: &'a [u8]) -> IResult<&'a [u8], ()> { + if self.directory_sectors.is_empty() { + return Err(nom::Err::Error(NomError::new( + _input, + ErrorKind::Verify, + ))); + } + + for §or in &self.directory_sectors { + let mut entry_offset = 0; + + while entry_offset + DIRECTORY_ENTRY_SIZE as usize + <= self.sector_size + { + let abs_offset = self.sector_to_offset(sector) + entry_offset; + if abs_offset + DIRECTORY_ENTRY_SIZE as usize > self.data.len() + { + break; + } + if let Ok(entry) = self.read_directory_entry(abs_offset) { + if entry.stream_type == ROOT_STORAGE_TYPE { + self.mini_stream_start = entry.start_sector; + self.mini_stream_size = entry.size; + } + if entry.stream_type == STORAGE_TYPE + || entry.stream_type == STREAM_TYPE + || entry.stream_type == ROOT_STORAGE_TYPE + { + self.dir_entries.insert(entry.name.clone(), entry); + } + } + entry_offset += DIRECTORY_ENTRY_SIZE as usize; + } + } + + Ok((_input, ())) + } + + pub fn is_valid_header(&self) -> bool { + self.data.len() >= OLECF_SIGNATURE.len() + && &self.data[..OLECF_SIGNATURE.len()] == OLECF_SIGNATURE + } + + pub fn get_stream_names(&self) -> Result, &'static str> { + if self.dir_entries.is_empty() { + return Err("No streams found"); + } + Ok(self.dir_entries.keys().cloned().collect()) + } + + pub fn get_stream_size( + &self, + stream_name: &str, + ) -> Result { + self.dir_entries + .get(stream_name) + .map(|e| e.size) + .ok_or("Stream not found") + } + + pub fn get_streams( + &self, + ) -> impl Iterator { + self.dir_entries.iter().map(|(name, entry)| (name.as_str(), entry)) + } + + pub fn get_stream_data( + &self, + stream_name: &str, + ) -> Result, &'static str> { + let entry = + self.dir_entries.get(stream_name).ok_or("Stream not found")?; + + if entry.size < 4096 && entry.stream_type != ROOT_STORAGE_TYPE { + self.get_mini_stream_data(entry.start_sector, entry.size) + } else { + self.get_regular_stream_data(entry.start_sector, entry.size) + } + } + + fn sector_to_offset(&self, sector: u32) -> usize { + // The first sector begins at offset 512 + 512 + (sector as usize * self.sector_size) + } + + fn read_sector(&self, sector: u32) -> Result<&[u8], &'static str> { + let offset = self.sector_to_offset(sector); + if offset + self.sector_size > self.data.len() { + return Err("Sector read out of bounds"); + } + Ok(&self.data[offset..offset + self.sector_size]) + } + + fn get_fat_entry(&self, sector: u32) -> Result { + let entry_index = sector as usize; + let entries_per_sector = self.sector_size / 4; + let fat_sector_index = entry_index / entries_per_sector; + if fat_sector_index >= self.fat_sectors.len() { + return Err("FAT entry sector index out of range"); + } + let fat_sector = self.fat_sectors[fat_sector_index]; + let fat = self.read_sector(fat_sector)?; + let fat_entry_offset = (entry_index % entries_per_sector) * 4; + parse_u32_at(fat, fat_entry_offset) + } + + fn follow_chain(&self, start_sector: u32) -> Vec { + let mut chain = Vec::new(); + let mut current = start_sector; + + loop { + // Ensure that the current sector is a valid one. + if current > MAX_REGULAR_SECTOR { + break; + } + + // Prevent cycles by keeping track of visited sectors + if chain.contains(¤t) { + // We've seen this sector before - it's a cycle + break; + } + + chain.push(current); + + // Now current is the next entry in the chain. + current = match self.get_fat_entry(current) { + Err(_) => break, + Ok(n) if n == ENDOFCHAIN => break, + Ok(n) => n, + }; + } + + chain + } + + fn read_directory_entry( + &self, + offset: usize, + ) -> Result { + if offset + 128 > self.data.len() { + return Err("Incomplete directory entry"); + } + + let name_len = parse_u16_at(self.data, offset + 64)? as usize; + if !(2..=64).contains(&name_len) { + return Err("Invalid name length"); + } + + let name_bytes = &self.data[offset..offset + name_len]; + let filtered: Vec = + name_bytes.iter().copied().filter(|&b| b != 0).collect(); + let name = String::from_utf8_lossy(&filtered).to_string(); + + let stream_type = self.data[offset + 66]; + let start_sector = parse_u32_at(self.data, offset + 116)?; + let size_32 = parse_u32_at(self.data, offset + 120)?; + let size = size_32 as u64; + + Ok(DirectoryEntry { name, size, start_sector, stream_type }) + } + + fn get_regular_stream_data( + &self, + start_sector: u32, + size: u64, + ) -> Result, &'static str> { + let mut data = Vec::with_capacity(size as usize); + let mut current_sector = start_sector; + let mut total_read = 0; + + while current_sector < MAX_REGULAR_SECTOR && total_read < size as usize + { + let sector_data = self.read_sector(current_sector)?; + let bytes_to_read = + std::cmp::min(self.sector_size, size as usize - total_read); + + data.extend_from_slice(§or_data[..bytes_to_read]); + total_read += bytes_to_read; + + if total_read < size as usize { + let next = self.get_fat_entry(current_sector)?; + if next == ENDOFCHAIN || next >= MAX_REGULAR_SECTOR { + break; + } + current_sector = next; + } + } + + if data.len() != size as usize { + return Err("Incomplete stream data"); + } + + Ok(data) + } + + fn get_root_mini_stream_data(&self) -> Result, &'static str> { + self.get_regular_stream_data( + self.mini_stream_start, + self.mini_stream_size, + ) + } + + fn get_minifat_entry( + &self, + mini_sector: u32, + ) -> Result { + if self.mini_fat_sectors.is_empty() { + return Ok(ENDOFCHAIN); + } + + let entry_index = mini_sector as usize; + let entries_per_sector = self.sector_size / 4; + let fat_sector_index = entry_index / entries_per_sector; + if fat_sector_index >= self.mini_fat_sectors.len() { + return Ok(ENDOFCHAIN); + } + let sector = self.mini_fat_sectors[fat_sector_index]; + let fat = self.read_sector(sector)?; + let offset = (entry_index % entries_per_sector) * 4; + parse_u32_at(fat, offset) + } + + fn get_mini_stream_data( + &self, + start_mini_sector: u32, + size: u64, + ) -> Result, &'static str> { + if self.mini_stream_size == 0 { + return Err("No mini stream present"); + } + + let mini_stream_data = self.get_root_mini_stream_data()?; + let mini_data_len = mini_stream_data.len(); + + let mut data = Vec::with_capacity(size as usize); + let mut current = start_mini_sector; + + while current < MAX_REGULAR_SECTOR && data.len() < size as usize { + let mini_offset = current as usize * self.mini_sector_size; + if mini_offset >= mini_data_len { + return Err("Mini stream offset out of range"); + } + + let bytes_to_read = std::cmp::min( + self.mini_sector_size, + size as usize - data.len(), + ); + if mini_offset + bytes_to_read > mini_data_len { + return Err("Mini stream extends beyond available data"); + } + + data.extend_from_slice( + &mini_stream_data[mini_offset..mini_offset + bytes_to_read], + ); + + if data.len() < size as usize { + let next = self.get_minifat_entry(current)?; + if next == ENDOFCHAIN || next >= MAX_REGULAR_SECTOR { + break; + } + current = next; + } + } + + if data.len() != size as usize { + return Err("Incomplete mini stream data"); + } + + Ok(data) + } +} + +fn parse_u16_at(data: &[u8], offset: usize) -> Result { + if offset + 2 > data.len() { + return Err("Buffer too small for u16"); + } + let slice = &data[offset..offset + 2]; + match le_u16::<&[u8], NomError<&[u8]>>(slice) { + Ok((_, val)) => Ok(val), + Err(_) => Err("Failed to parse u16"), + } +} + +fn parse_u32_at(data: &[u8], offset: usize) -> Result { + if offset + 4 > data.len() { + return Err("Buffer too small for u32"); + } + let slice = &data[offset..offset + 4]; + match le_u32::<&[u8], NomError<&[u8]>>(slice) { + Ok((_, val)) => Ok(val), + Err(_) => Err("Failed to parse u32"), + } +} diff --git a/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out new file mode 100644 index 000000000..e1947b31e --- /dev/null +++ b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out @@ -0,0 +1,16 @@ +olecf: + is_olecf: true + stream_names: + - "CompObj" + - "1Table" + - "SummaryInformation" + - "Root Entry" + - "WordDocument" + - "DocumentSummaryInformation" + stream_sizes: + - 114 + - 7273 + - 4096 + - 128 + - 4096 + - 4096 \ No newline at end of file diff --git a/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip new file mode 100644 index 000000000..2236f51f3 Binary files /dev/null and b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip differ diff --git a/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out new file mode 100644 index 000000000..8867215a1 --- /dev/null +++ b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out @@ -0,0 +1,2 @@ +olecf: + is_olecf: true \ No newline at end of file diff --git a/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip new file mode 100644 index 000000000..2e6f897de Binary files /dev/null and b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip differ diff --git a/lib/src/modules/protos/mods.proto b/lib/src/modules/protos/mods.proto index 486d8d96d..94bdd4a9f 100644 --- a/lib/src/modules/protos/mods.proto +++ b/lib/src/modules/protos/mods.proto @@ -6,6 +6,8 @@ import "elf.proto"; import "pe.proto"; import "lnk.proto"; import "macho.proto"; +import "olecf.proto"; +import "vba.proto"; package mods; @@ -16,4 +18,6 @@ message Modules { optional dotnet.Dotnet dotnet = 3; optional macho.Macho macho = 4; optional lnk.Lnk lnk = 5; + optional olecf.Olecf olecf = 6; + optional vba.Vba vba = 7; } \ No newline at end of file diff --git a/lib/src/modules/protos/olecf.proto b/lib/src/modules/protos/olecf.proto new file mode 100644 index 000000000..4dd56724a --- /dev/null +++ b/lib/src/modules/protos/olecf.proto @@ -0,0 +1,23 @@ +syntax = "proto2"; +import "yara.proto"; + +package olecf; + +option (yara.module_options) = { + name : "olecf" + root_message: "olecf.Olecf" + rust_module: "olecf" + cargo_feature: "olecf-module" +}; + +message Olecf { + // True if file is an OLE CF file. + required bool is_olecf = 1; + // Streams contained in the OLE CF file. + repeated Stream streams = 2; +} + +message Stream { + required string name = 1; + required uint64 size = 2; +} \ No newline at end of file diff --git a/lib/src/modules/protos/vba.proto b/lib/src/modules/protos/vba.proto new file mode 100644 index 000000000..e23a054c4 --- /dev/null +++ b/lib/src/modules/protos/vba.proto @@ -0,0 +1,37 @@ +syntax = "proto2"; +import "yara.proto"; + +package vba; + +option (yara.module_options) = { + name: "vba" + root_message: "vba.Vba" + rust_module: "vba" + cargo_feature: "vba-module" +}; + +message Vba { + // True if VBA macros are present + optional bool has_macros = 1; + + // Names of VBA macro modules found + repeated string module_names = 2; + + // Type of each module (standard, class, form) + repeated string module_types = 3; + + // The actual VBA code for each module + repeated string module_codes = 4; + + // Project metadata + message ProjectInfo { + optional string name = 1; + optional string version = 2; + repeated string references = 3; + + // Additional metadata + optional int32 module_count = 4; + optional bool is_compressed = 5; + } + optional ProjectInfo project_info = 5; +} \ No newline at end of file diff --git a/lib/src/modules/vba/mod.rs b/lib/src/modules/vba/mod.rs new file mode 100644 index 000000000..b40232ebd --- /dev/null +++ b/lib/src/modules/vba/mod.rs @@ -0,0 +1,215 @@ +/*! YARA module that extracts VBA (Visual Basic for Applications) macros from Office documents. + +Read more about the VBA file format specification here: + https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-ovba/575462ba-bf67-4190-9fac-c275523c75fc +*/ + +use crate::modules::prelude::*; +use crate::modules::protos::vba::vba::ProjectInfo; +use crate::modules::protos::vba::*; +use protobuf::MessageField; +use std::collections::HashMap; +use std::io::Cursor; +use std::io::Read; +use zip::ZipArchive; + +mod parser; +use parser::{ModuleType, VbaProject}; + +#[derive(Debug)] +struct VbaExtractor<'a> { + data: &'a [u8], +} + +impl<'a> VbaExtractor<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data } + } + + fn is_zip(&self) -> bool { + self.data.starts_with(&[0x50, 0x4B, 0x03, 0x04]) + } + + fn read_stream( + &self, + ole_parser: &crate::modules::olecf::parser::OLECFParser, + name: &str, + ) -> Result, &'static str> { + let size = ole_parser.get_stream_size(name)? as usize; + + // Skip empty streams + if size == 0 { + return Err("Stream is empty"); + } + + let data = ole_parser.get_stream_data(name)?; + + Ok(data) + } + + fn extract_from_ole(&self) -> Result { + let ole_parser = + crate::modules::olecf::parser::OLECFParser::new(self.data)?; + let stream_names = ole_parser.get_stream_names()?; + + let mut vba_dir = None; + let mut modules = HashMap::new(); + let mut project_streams = Vec::new(); + + // First process the dir stream + if let Some(dir_name) = + stream_names.iter().find(|n| n.to_lowercase().trim() == "dir") + { + if let Ok(data) = self.read_stream(&ole_parser, dir_name) { + vba_dir = Some(data); + } + } + + // Then process other streams + for name in &stream_names { + let lowercase_name = name.to_lowercase(); + + if lowercase_name != "dir" { + if lowercase_name.contains("module") + || lowercase_name.contains("thisdocument") + || lowercase_name.ends_with(".bas") + || lowercase_name.ends_with(".cls") + || lowercase_name.ends_with(".frm") + { + if let Ok(data) = self.read_stream(&ole_parser, name) { + if !data.is_empty() { + modules.insert(name.clone(), data); + } + } + } else if lowercase_name.contains("project") + && !lowercase_name.contains("_vba_project") + { + if let Ok(data) = self.read_stream(&ole_parser, name) { + project_streams.push(data); + } + } + } + } + + // Always try the dir stream first if we found it + if let Some(dir_data) = vba_dir { + parser::VbaProject::parse(&dir_data, modules) + } else { + Err("No VBA directory stream found") + } + } + + fn extract_from_zip(&self) -> Result { + let reader = Cursor::new(&self.data); + let mut archive = ZipArchive::new(reader) + .map_err(|_| "Failed to read ZIP archive")?; + + // Search for potential VBA project files + let vba_project_names = [ + "word/vbaProject.bin", + "xl/vbaProject.bin", + "ppt/vbaProject.bin", + "vbaProject.bin", + ]; + + for name in &vba_project_names { + match archive.by_name(name) { + Ok(mut file) => { + let mut contents = Vec::new(); + file.read_to_end(&mut contents) + .map_err(|_| "Failed to read vbaProject.bin")?; + + // Parse as OLE + let ole_parser = + crate::modules::olecf::parser::OLECFParser::new( + &contents, + )?; + let stream_names = ole_parser.get_stream_names()?; + + let mut vba_dir = None; + let mut modules = HashMap::new(); + + for stream_name in &stream_names { + let _stream_size = + ole_parser.get_stream_size(stream_name)?; + + if stream_name.starts_with("dir") { + if let Ok(data) = + self.read_stream(&ole_parser, stream_name) + { + if !data.is_empty() { + vba_dir = Some(data); + } + } + } + } + + // Process other streams + for name in &stream_names { + if let Ok(data) = self.read_stream(&ole_parser, name) { + if !data.is_empty() { + modules.insert(name.clone(), data); + } + } + } + + // Use dir stream if found, otherwise fail + if let Some(dir_data) = vba_dir { + return parser::VbaProject::parse(&dir_data, modules); + } + } + Err(_) => continue, + } + } + + Err("No VBA project found in ZIP") + } +} + +#[module_main] +fn main(data: &[u8], _meta: Option<&[u8]>) -> Vba { + let mut vba = Vba::new(); + vba.has_macros = Some(false); + + let extractor = VbaExtractor::new(data); + + let project_result = if extractor.is_zip() { + extractor.extract_from_zip() + } else { + extractor.extract_from_ole() + }; + + match project_result { + Ok(project) => { + vba.has_macros = Some(true); + + let mut project_info = ProjectInfo::new(); + project_info.name = Some(project.info.name.clone()); + project_info.version = Some(project.info.version.clone()); + project_info.references.clone_from(&project.info.references); + + // Add metadata + let module_count = project.modules.len() as i32; + project_info.module_count = Some(module_count); + project_info.is_compressed = Some(true); + + vba.project_info = MessageField::some(project_info); + + // Process modules + for module in project.modules.values() { + vba.module_names.push(module.name.clone()); + vba.module_types.push(match module.module_type { + ModuleType::Standard => "Standard".to_string(), + ModuleType::Class => "Class".to_string(), + ModuleType::Unknown => "Unknown".to_string(), + }); + vba.module_codes.push(module.code.clone()); + } + } + Err(_) => { + vba.has_macros = Some(false); + } + } + + vba +} diff --git a/lib/src/modules/vba/parser.rs b/lib/src/modules/vba/parser.rs new file mode 100644 index 000000000..7cf40677c --- /dev/null +++ b/lib/src/modules/vba/parser.rs @@ -0,0 +1,661 @@ +use std::collections::HashMap; +use nom::{ + number::complete::{le_u16, le_u32}, +}; + +pub enum ModuleType { + Standard, + Class, + Unknown, +} + +pub struct ProjectInfo { + pub name: String, + pub version: String, + pub references: Vec, +} + +pub struct VbaModule { + pub name: String, + pub code: String, + pub module_type: ModuleType, +} + +pub struct VbaProject { + pub modules: HashMap, + pub info: ProjectInfo, +} + +impl VbaProject { + fn copytoken_help(difference: usize) -> (u16, u16, u32, u16) { + let bit_count = (difference as f64).log2().ceil() as u32; + let bit_count = bit_count.max(4); + let length_mask = 0xFFFF >> bit_count; + let offset_mask = !length_mask; + let maximum_length = (0xFFFF >> bit_count) + 3; + + (length_mask, offset_mask, bit_count, maximum_length) + } + + pub fn decompress_stream(compressed: &[u8]) -> Result, &'static str> { + if compressed.is_empty() { + return Err("Empty input buffer"); + } + + if compressed[0] != 0x01 { + return Err("Invalid signature byte"); + } + + let mut decompressed = Vec::new(); + let mut current = 1; // Skip signature byte + + while current < compressed.len() { + // We need 2 bytes for the chunk header + if current + 2 > compressed.len() { + return Err("Incomplete chunk header"); + } + + let chunk_header = u16::from_le_bytes( + compressed[current..current+2].try_into().map_err(|_| "Failed to parse chunk header")? + ); + let chunk_size = (chunk_header & 0x0FFF) as usize + 3; + let chunk_is_compressed = (chunk_header & 0x8000) != 0; + + current += 2; + + if chunk_is_compressed && chunk_size > 4095 { + return Err("CompressedChunkSize > 4095 but CompressedChunkFlag == 1"); + } + if !chunk_is_compressed && chunk_size != 4095 { + return Err("CompressedChunkSize != 4095 but CompressedChunkFlag == 0"); + } + + let chunk_end = std::cmp::min(compressed.len(), current + chunk_size); + + if !chunk_is_compressed { + if current + 4096 > compressed.len() { + return Err("Incomplete uncompressed chunk"); + } + decompressed.extend_from_slice(&compressed[current..current + 4096]); + current += 4096; + continue; + } + + let decompressed_chunk_start = decompressed.len(); + + while current < chunk_end { + let flag_byte = compressed[current]; + current += 1; + + for bit_index in 0..8 { + if current >= chunk_end { + break; + } + + if (flag_byte & (1 << bit_index)) == 0 { + decompressed.push(compressed[current]); + current += 1; + } else { + if current + 2 > compressed.len() { + return Err("Incomplete copy token"); + } + + let copy_token = u16::from_le_bytes( + compressed[current..current+2].try_into().map_err(|_| "Failed to parse copy token")? + ); + let (length_mask, offset_mask, bit_count, _) = + Self::copytoken_help(decompressed.len() - decompressed_chunk_start); + + let length = (copy_token & length_mask) + 3; + let temp1 = copy_token & offset_mask; + let temp2 = 16 - bit_count; + let offset = (temp1 >> temp2) + 1; + + if offset as usize > decompressed.len() { + return Err("Invalid copy token offset"); + } + + let copy_source = decompressed.len() - offset as usize; + for i in 0..length { + let source_idx = copy_source + i as usize; + if source_idx >= decompressed.len() { + return Err("Copy token source out of bounds"); + } + decompressed.push(decompressed[source_idx]); + } + current += 2; + } + } + } + } + + Ok(decompressed) + } + + fn parse_u16(input: &[u8]) -> Result<(&[u8], u16), &'static str> { + le_u16::<&[u8], nom::error::Error<&[u8]>>(input) + .map_err(|_nom_err| "Failed to parse u16") + } + + fn parse_u32(input: &[u8]) -> Result<(&[u8], u32), &'static str> { + le_u32::<&[u8], nom::error::Error<&[u8]>>(input) + .map_err(|_nom_err| "Failed to parse u32") + } + + fn parse_bytes(input: &[u8], len: usize) -> Result<(&[u8], &[u8]), &'static str> { + if input.len() < len { + Err("Not enough bytes to parse the requested slice") + } else { + Ok((&input[len..], &input[..len])) + } + } + + pub fn parse(compressed_dir_stream: &[u8], module_streams: HashMap>) -> Result { + let dir_stream = Self::decompress_stream(compressed_dir_stream)?; + + // Our 'input' will move forward as we parse + let mut _input = &dir_stream[..]; + + // -- PROJECTSYSKIND Record + let (rest, syskind_id) = Self::parse_u16(_input)?; _input = rest; + if syskind_id != 0x0001 { + return Err("Invalid SYSKIND_ID"); + } + let (rest, syskind_size) = Self::parse_u32(_input)?; _input = rest; + if syskind_size != 0x0004 { + return Err("Invalid SYSKIND_SIZE"); + } + let (rest, _syskind) = Self::parse_u32(_input)?; _input = rest; + + // -- PROJECTLCID Record + let (rest, lcid_id) = Self::parse_u16(_input)?; _input = rest; + if lcid_id != 0x0002 { + return Err("Invalid LCID_ID"); + } + let (rest, lcid_size) = Self::parse_u32(_input)?; _input = rest; + if lcid_size != 0x0004 { + return Err("Invalid LCID_SIZE"); + } + let (rest, lcid) = Self::parse_u32(_input)?; _input = rest; + if lcid != 0x409 { + return Err("Invalid LCID"); + } + + // -- PROJECTLCIDINVOKE Record + let (rest, lcid_invoke_id) = Self::parse_u16(_input)?; _input = rest; + if lcid_invoke_id != 0x0014 { + return Err("Invalid LCIDINVOKE_ID"); + } + let (rest, lcid_invoke_size) = Self::parse_u32(_input)?; _input = rest; + if lcid_invoke_size != 0x0004 { + return Err("Invalid LCIDINVOKE_SIZE"); + } + let (rest, lcid_invoke) = Self::parse_u32(_input)?; _input = rest; + if lcid_invoke != 0x409 { + return Err("Invalid LCIDINVOKE"); + } + + // -- PROJECTCODEPAGE Record + let (rest, codepage_id) = Self::parse_u16(_input)?; _input = rest; + if codepage_id != 0x0003 { + return Err("Invalid CODEPAGE_ID"); + } + let (rest, codepage_size) = Self::parse_u32(_input)?; _input = rest; + if codepage_size != 0x0002 { + return Err("Invalid CODEPAGE_SIZE"); + } + let (rest, _codepage) = Self::parse_u16(_input)?; _input = rest; + + // -- PROJECTNAME Record + let (rest, name_id) = Self::parse_u16(_input)?; _input = rest; + if name_id != 0x0004 { + return Err("Invalid NAME_ID"); + } + let (rest, name_size) = Self::parse_u32(_input)?; _input = rest; + let name_size = name_size as usize; + if !(1..=128).contains(&name_size) { + return Err("Project name not in valid range"); + } + let (rest, name_bytes) = Self::parse_bytes(rest, name_size)?; + let project_name = String::from_utf8_lossy(name_bytes).to_string(); + _input = rest; + + // -- PROJECTDOCSTRING Record + let (rest, doc_id) = Self::parse_u16(_input)?; _input = rest; + if doc_id != 0x0005 { + return Err("Invalid DOCSTRING_ID"); + } + let (rest, doc_size) = Self::parse_u32(_input)?; _input = rest; + let doc_size = doc_size as usize; + let (rest, _doc_string) = Self::parse_bytes(rest, doc_size)?; + _input = rest; + let (rest, doc_reserved) = Self::parse_u16(_input)?; _input = rest; + if doc_reserved != 0x0040 { + return Err("Invalid DOCSTRING_Reserved"); + } + let (rest, doc_unicode_size) = Self::parse_u32(_input)?; _input = rest; + let doc_unicode_size = doc_unicode_size as usize; + if doc_unicode_size % 2 != 0 { + return Err("DOCSTRING_Unicode size not even"); + } + let (rest, _doc_unicode) = Self::parse_bytes(rest, doc_unicode_size)?; + _input = rest; + + // -- PROJECTHELPFILEPATH Record + let (rest, helpfile_id) = Self::parse_u16(_input)?; _input = rest; + if helpfile_id != 0x0006 { + return Err("Invalid HELPFILEPATH_ID"); + } + let (rest, helpfile_size1) = Self::parse_u32(_input)?; _input = rest; + let helpfile_size1 = helpfile_size1 as usize; + if helpfile_size1 > 260 { + return Err("Help file path 1 too long"); + } + let (rest, helpfile1) = Self::parse_bytes(rest, helpfile_size1)?; + _input = rest; + let (rest, helpfile_reserved) = Self::parse_u16(_input)?; _input = rest; + if helpfile_reserved != 0x003D { + return Err("Invalid HELPFILEPATH_Reserved"); + } + let (rest, helpfile_size2) = Self::parse_u32(_input)?; _input = rest; + let helpfile_size2 = helpfile_size2 as usize; + if helpfile_size2 != helpfile_size1 { + return Err("Help file sizes don't match"); + } + let (rest, helpfile2) = Self::parse_bytes(rest, helpfile_size2)?; + _input = rest; + if helpfile1 != helpfile2 { + return Err("Help files don't match"); + } + + // -- PROJECTHELPCONTEXT Record + let (rest, helpcontext_id) = Self::parse_u16(_input)?; _input = rest; + if helpcontext_id != 0x0007 { + return Err("Invalid HELPCONTEXT_ID"); + } + let (rest, helpcontext_size) = Self::parse_u32(_input)?; _input = rest; + if helpcontext_size != 0x0004 { + return Err("Invalid HELPCONTEXT_SIZE"); + } + let (rest, _helpcontext) = Self::parse_u32(_input)?; _input = rest; + + // -- PROJECTLIBFLAGS Record + let (rest, libflags_id) = Self::parse_u16(_input)?; _input = rest; + if libflags_id != 0x0008 { + return Err("Invalid LIBFLAGS_ID"); + } + let (rest, libflags_size) = Self::parse_u32(_input)?; _input = rest; + if libflags_size != 0x0004 { + return Err("Invalid LIBFLAGS_SIZE"); + } + let (rest, libflags) = Self::parse_u32(_input)?; _input = rest; + if libflags != 0x0000 { + return Err("Invalid LIBFLAGS"); + } + + // -- PROJECTVERSION Record + let (rest, version_id) = Self::parse_u16(_input)?; _input = rest; + if version_id != 0x0009 { + return Err("Invalid VERSION_ID"); + } + let (rest, version_reserved) = Self::parse_u32(_input)?; _input = rest; + if version_reserved != 0x0004 { + return Err("Invalid VERSION_Reserved"); + } + let (rest, version_major) = Self::parse_u32(_input)?; _input = rest; + let (rest, version_minor) = Self::parse_u16(_input)?; _input = rest; + + // -- PROJECTCONSTANTS Record + let (rest, constants_id) = Self::parse_u16(_input)?; _input = rest; + if constants_id != 0x000C { + return Err("Invalid CONSTANTS_ID"); + } + let (rest, constants_size) = Self::parse_u32(_input)?; _input = rest; + let constants_size = constants_size as usize; + if constants_size > 1015 { + return Err("Constants size too large"); + } + let (rest, _constants) = Self::parse_bytes(rest, constants_size)?; + _input = rest; + let (rest, constants_reserved) = Self::parse_u16(_input)?; _input = rest; + if constants_reserved != 0x003C { + return Err("Invalid CONSTANTS_Reserved"); + } + let (rest, constants_unicode_size) = Self::parse_u32(_input)?; _input = rest; + let constants_unicode_size = constants_unicode_size as usize; + if constants_unicode_size % 2 != 0 { + return Err("Constants unicode size not even"); + } + let (rest, _constants_unicode) = Self::parse_bytes(rest, constants_unicode_size)?; + _input = rest; + + // -- Parse references until we hit PROJECTMODULES_Id = 0x000F + let mut references = Vec::new(); + let mut last_check; + loop { + let (rest2, check) = match Self::parse_u16(_input) { + Ok(x) => x, + Err(_) => return Err("Could not parse reference type (u16)"), + }; + _input = rest2; + last_check = check; + + if check == 0x000F { + // That means we reached PROJECTMODULES_Id + break; + } + + match check { + 0x0016 => { + // REFERENCE Name + let (rest2, name_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, name_bytes) = Self::parse_bytes(_input, name_size as usize)?; + _input = rest2; + let name = String::from_utf8_lossy(name_bytes).to_string(); + references.push(name); + + let (rest2, reserved) = Self::parse_u16(_input)?; _input = rest2; + if reserved != 0x003E { + return Err("Invalid REFERENCE_Reserved"); + } + let (rest2, unicode_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _name_unicode) = Self::parse_bytes(_input, unicode_size as usize)?; + _input = rest2; + }, + 0x0033 => { + // REFERENCEORIGINAL + let (rest2, size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _libid) = Self::parse_bytes(_input, size as usize)?; + _input = rest2; + }, + 0x002F => { + // REFERENCECONTROL + let (rest2, size_twiddled) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _twiddled) = Self::parse_bytes(_input, size_twiddled as usize)?; + _input = rest2; + + let (rest2, reserved1) = Self::parse_u32(_input)?; _input = rest2; + if reserved1 != 0x0000 { + return Err("Invalid REFERENCECONTROL_Reserved1"); + } + let (rest2, reserved2) = Self::parse_u16(_input)?; _input = rest2; + if reserved2 != 0x0000 { + return Err("Invalid REFERENCECONTROL_Reserved2"); + } + + // Possibly an optional name record + let (maybe_rest, maybe_check2) = match Self::parse_u16(_input) { + Ok(x) => x, + Err(_) => return Err("Failed to read optional name or reserved3"), + }; + + if maybe_check2 == 0x0016 { + // This means we have a name record + _input = maybe_rest; + let (rest2, name_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _name) = Self::parse_bytes(_input, name_size as usize)?; + _input = rest2; + + let (rest2, reserved) = Self::parse_u16(_input)?; _input = rest2; + if reserved != 0x003E { + return Err("Invalid REFERENCECONTROL_NameRecord_Reserved"); + } + let (rest2, unicode_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _name_unicode) = Self::parse_bytes(_input, unicode_size as usize)?; + _input = rest2; + + // Next we parse the next 0x0030 + let (rest2, reserved3) = Self::parse_u16(_input)?; _input = rest2; + if reserved3 != 0x0030 { + return Err("Invalid REFERENCECONTROL_Reserved3"); + } + } else { + // No name record, so maybe_check2 is actually reserved3 + _input = maybe_rest; + if maybe_check2 != 0x0030 { + return Err("Invalid REFERENCECONTROL_Reserved3"); + } + } + + let (rest2, size_extended) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, size_libid) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _libid) = Self::parse_bytes(_input, size_libid as usize)?; + _input = rest2; + let (rest2, _reserved4) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _reserved5) = Self::parse_u16(_input)?; _input = rest2; + let (rest2, _original_typelib) = Self::parse_bytes(_input, 16)?; + _input = rest2; + let (rest2, _cookie) = Self::parse_u32(_input)?; _input = rest2; + let _ = size_extended; // just to avoid unused var warnings + }, + 0x000D => { + // REFERENCEREGISTERED + let (rest2, _size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, libid_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _libid) = Self::parse_bytes(_input, libid_size as usize)?; + _input = rest2; + let (rest2, reserved1) = Self::parse_u32(_input)?; _input = rest2; + if reserved1 != 0x0000 { + return Err("Invalid REFERENCEREGISTERED_Reserved1"); + } + let (rest2, reserved2) = Self::parse_u16(_input)?; _input = rest2; + if reserved2 != 0x0000 { + return Err("Invalid REFERENCEREGISTERED_Reserved2"); + } + }, + 0x000E => { + // REFERENCEPROJECT + let (rest2, _size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, libid_abs_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _libid_abs) = Self::parse_bytes(_input, libid_abs_size as usize)?; + _input = rest2; + let (rest2, libid_rel_size) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _libid_rel) = Self::parse_bytes(_input, libid_rel_size as usize)?; + _input = rest2; + let (rest2, _major) = Self::parse_u32(_input)?; _input = rest2; + let (rest2, _minor) = Self::parse_u16(_input)?; _input = rest2; + }, + _ => return Err("Invalid reference type"), + } + } + + if last_check != 0x000F { + return Err("Invalid PROJECTMODULES_Id"); + } + + let (rest, modules_size) = Self::parse_u32(_input)?; _input = rest; + if modules_size != 0x0002 { + return Err("Invalid PROJECTMODULES_Size"); + } + + let (rest, modules_count) = Self::parse_u16(_input)?; _input = rest; + + let (rest, cookie_id) = Self::parse_u16(_input)?; _input = rest; + if cookie_id != 0x0013 { + return Err("Invalid ProjectCookie_Id"); + } + + let (rest, cookie_size) = Self::parse_u32(_input)?; _input = rest; + if cookie_size != 0x0002 { + return Err("Invalid ProjectCookie_Size"); + } + + let (rest, _cookie) = Self::parse_u16(_input)?; + _input = rest; + + // -- Parse each module + let mut modules = HashMap::new(); + for _ in 0..modules_count { + // MODULENAME record + let (rest2, module_id) = Self::parse_u16(_input)?; + _input = rest2; + if module_id != 0x0019 { + return Err("Invalid MODULENAME_Id"); + } + + let (rest2, module_name_size) = Self::parse_u32(_input)?; + _input = rest2; + let (rest2, name_bytes) = Self::parse_bytes(_input, module_name_size as usize)?; + _input = rest2; + let module_name = String::from_utf8_lossy(name_bytes).to_string(); + + let mut module_type = ModuleType::Unknown; + let mut stream_name = String::new(); + let mut module_offset = 0u32; + + // Read all sections until we get the terminator 0x002B + loop { + let (rest2, section_id) = match Self::parse_u16(_input) { + Ok(x) => x, + Err(_) => return Err("Failed to parse module section ID"), + }; + _input = rest2; + + match section_id { + 0x0047 => { + // MODULENAMEUNICODE + let (rest3, unicode_size) = Self::parse_u32(_input)?; + _input = rest3; + let (rest3, _unicode_name) = Self::parse_bytes(_input, unicode_size as usize)?; + _input = rest3; + }, + 0x001A => { + // MODULESTREAMNAME + let (rest3, stream_size) = Self::parse_u32(_input)?; + _input = rest3; + let (rest3, stream_bytes) = Self::parse_bytes(_input, stream_size as usize)?; + _input = rest3; + stream_name = String::from_utf8_lossy(stream_bytes).to_string(); + + let (rest3, reserved) = Self::parse_u16(_input)?; + _input = rest3; + if reserved != 0x0032 { + return Err("Invalid STREAMNAME_Reserved"); + } + + let (rest3, unicode_size) = Self::parse_u32(_input)?; + _input = rest3; + let (rest3, _unicode_name) = Self::parse_bytes(_input, unicode_size as usize)?; + _input = rest3; + }, + 0x001C => { + // MODULEDOCSTRING + let (rest3, doc_size) = Self::parse_u32(_input)?; + _input = rest3; + let (rest3, _doc_string) = Self::parse_bytes(_input, doc_size as usize)?; + _input = rest3; + + let (rest3, reserved) = Self::parse_u16(_input)?; + _input = rest3; + if reserved != 0x0048 { + return Err("Invalid DOCSTRING_Reserved"); + } + + let (rest3, unicode_size) = Self::parse_u32(_input)?; + _input = rest3; + let (rest3, _unicode_doc) = Self::parse_bytes(_input, unicode_size as usize)?; + _input = rest3; + }, + 0x0031 => { + // MODULEOFFSET + let (rest3, offset_size) = Self::parse_u32(_input)?; + _input = rest3; + if offset_size != 0x0004 { + return Err("Invalid OFFSET_Size"); + } + let (rest3, offset) = Self::parse_u32(_input)?; + module_offset = offset; + _input = rest3; + }, + 0x001E => { + // MODULEHELPCONTEXT + let (rest3, help_size) = Self::parse_u32(_input)?; + _input = rest3; + if help_size != 0x0004 { + return Err("Invalid HELPCONTEXT_Size"); + } + let (rest3, _help_context) = Self::parse_u32(_input)?; + _input = rest3; + }, + 0x002C => { + // MODULECOOKIE + let (rest3, cookie_size) = Self::parse_u32(_input)?; + _input = rest3; + if cookie_size != 0x0002 { + return Err("Invalid COOKIE_Size"); + } + let (rest3, _cookie) = Self::parse_u16(_input)?; + _input = rest3; + }, + 0x0021 => { + // Module is Standard + module_type = ModuleType::Standard; + let (rest3, _reserved) = Self::parse_u32(_input)?; + _input = rest3; + }, + 0x0022 => { + // Module is Class + module_type = ModuleType::Class; + let (rest3, _reserved) = Self::parse_u32(_input)?; + _input = rest3; + }, + 0x0025 => { + // MODULEREADONLY + let (rest3, reserved) = Self::parse_u32(_input)?; + _input = rest3; + if reserved != 0x0000 { + return Err("Invalid READONLY_Reserved"); + } + }, + 0x0028 => { + // MODULEPRIVATE + let (rest3, reserved) = Self::parse_u32(_input)?; + _input = rest3; + if reserved != 0x0000 { + return Err("Invalid PRIVATE_Reserved"); + } + }, + 0x002B => { + // TERMINATOR + let (rest3, reserved) = Self::parse_u32(_input)?; + if reserved != 0x0000 { + return Err("Invalid MODULE_Reserved"); + } + _input = rest3; + break; + }, + _ => return Err("Invalid module section ID"), + } + } + + // Retrieve module code + if let Some(module_data) = module_streams.get(&stream_name) { + if module_offset as usize >= module_data.len() { + return Err("Invalid module offset"); + } + let code_data = &module_data[module_offset as usize..]; + if !code_data.is_empty() { + let decompressed = Self::decompress_stream(code_data)?; + let code = String::from_utf8_lossy(&decompressed).to_string(); + modules.insert( + module_name.clone(), + VbaModule { + name: module_name, + code, + module_type, + } + ); + } + } + } + + Ok(VbaProject { + modules, + info: ProjectInfo { + name: project_name, + version: format!("{}.{}", version_major, version_minor), + references, + }, + }) + } +} \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out new file mode 100644 index 000000000..408dca385 --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out @@ -0,0 +1,17 @@ +vba: + has_macros: true + module_names: + - "ThisDocument" + module_types: + - "Class" + module_code: + - "Attribute VB_Name = \"ThisDocument\"\r\nAttribute VB_Base = \"1Normal.ThisDocument\"\r\nAttribute VB_GlobalNameSpace = False\r\nAttribute VB_Creatable = False\r\nAttribute VB_PredeclaredId = True\r\nAttribute VB_Exposed = True\r\nAttribute VB_TemplateDerived = True\r\nAttribute VB_Customizable = True\r\n\r\nPrivate Sub Document_New()\r\n MsgBox \"Hello, world!\"\r\nEnd Sub\r\n" + project_info: + name: "Project" + version: "1769106437.10" + references: + - "stdole" + - "Normal" + - "Office" + module_count: 1 + is_compressed: true \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip new file mode 100644 index 000000000..d12d5ee98 Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip differ diff --git a/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out new file mode 100644 index 000000000..a1340cbc3 --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out @@ -0,0 +1,2 @@ +vba: + has_macros: false \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip new file mode 100644 index 000000000..2236f51f3 Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip differ diff --git a/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out new file mode 100644 index 000000000..408dca385 --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out @@ -0,0 +1,17 @@ +vba: + has_macros: true + module_names: + - "ThisDocument" + module_types: + - "Class" + module_code: + - "Attribute VB_Name = \"ThisDocument\"\r\nAttribute VB_Base = \"1Normal.ThisDocument\"\r\nAttribute VB_GlobalNameSpace = False\r\nAttribute VB_Creatable = False\r\nAttribute VB_PredeclaredId = True\r\nAttribute VB_Exposed = True\r\nAttribute VB_TemplateDerived = True\r\nAttribute VB_Customizable = True\r\n\r\nPrivate Sub Document_New()\r\n MsgBox \"Hello, world!\"\r\nEnd Sub\r\n" + project_info: + name: "Project" + version: "1769106437.10" + references: + - "stdole" + - "Normal" + - "Office" + module_count: 1 + is_compressed: true \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip new file mode 100644 index 000000000..0d1468330 Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip differ