1use std::collections::BTreeMap;
7use std::io::Read;
8use std::path::PathBuf;
9
10use byteorder::{LittleEndian, ReadBytesExt};
11use log::{debug, log_enabled, warn, Level};
12
13use crate::cfb::{Cfb, XlsEncoding};
14use crate::utils::read_u16;
15
16#[derive(Debug)]
18pub enum VbaError {
19 Cfb(crate::cfb::CfbError),
21 Io(std::io::Error),
23
24 ModuleNotFound(String),
26 Unknown {
28 typ: &'static str,
30 val: u16,
32 },
33 LibId,
35 InvalidRecordId {
37 expected: u16,
39 found: u16,
41 },
42}
43
44from_err!(crate::cfb::CfbError, VbaError, Cfb);
45from_err!(std::io::Error, VbaError, Io);
46
47impl std::fmt::Display for VbaError {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 match self {
50 VbaError::Io(e) => write!(f, "I/O error: {}", e),
51 VbaError::Cfb(e) => write!(f, "Cfb error: {}", e),
52
53 VbaError::ModuleNotFound(e) => write!(f, "Cannot find module '{}'", e),
54 VbaError::Unknown { typ, val } => write!(f, "Unknown {} '{:X}'", typ, val),
55 VbaError::LibId => write!(f, "Unexpected libid format"),
56 VbaError::InvalidRecordId { expected, found } => write!(
57 f,
58 "Invalid record id: expecting {:X} found {:X}",
59 expected, found
60 ),
61 }
62 }
63}
64
65impl std::error::Error for VbaError {
66 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
67 match self {
68 VbaError::Io(e) => Some(e),
69 VbaError::Cfb(e) => Some(e),
70 _ => None,
71 }
72 }
73}
74
75#[allow(dead_code)]
77#[derive(Clone)]
78pub struct VbaProject {
79 references: Vec<Reference>,
80 modules: BTreeMap<String, Vec<u8>>,
81 encoding: XlsEncoding,
82}
83
84impl VbaProject {
85 pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
89 let mut cfb = Cfb::new(r, len)?;
90 VbaProject::from_cfb(r, &mut cfb)
91 }
92
93 pub fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
95 let stream = cfb.get_stream("dir", r)?;
97 let stream = crate::cfb::decompress_stream(&stream)?;
98 let stream = &mut &*stream;
99
100 let encoding = read_dir_information(stream)?;
102
103 let refs = Reference::from_stream(stream, &encoding)?;
105
106 let mods: Vec<Module> = read_modules(stream, &encoding)?;
108
109 let modules: BTreeMap<String, Vec<u8>> = mods
111 .into_iter()
112 .map(|m| {
113 cfb.get_stream(&m.stream_name, r).and_then(|s| {
114 crate::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
115 })
116 })
117 .collect::<Result<_, _>>()?;
118
119 Ok(VbaProject {
120 references: refs,
121 modules,
122 encoding,
123 })
124 }
125
126 pub fn get_references(&self) -> &[Reference] {
128 &self.references
129 }
130
131 pub fn get_module_names(&self) -> Vec<&str> {
133 self.modules.keys().map(|k| &**k).collect()
134 }
135
136 pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
160 debug!("read module {}", name);
161 let data = self.get_module_raw(name)?;
162 Ok(self.encoding.decode_all(data))
163 }
164
165 pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
167 match self.modules.get(name) {
168 Some(m) => Ok(&**m),
169 None => Err(VbaError::ModuleNotFound(name.into())),
170 }
171 }
172}
173
174#[derive(Debug, Clone, Hash, Eq, PartialEq)]
176pub struct Reference {
177 pub name: String,
179 pub description: String,
181 pub path: PathBuf,
183}
184
185impl Reference {
186 pub fn is_missing(&self) -> bool {
188 !self.path.exists()
189 }
190
191 fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
193 debug!("read all references metadata");
194
195 let mut references = Vec::new();
196 let mut reference = Reference {
197 name: "".to_string(),
198 description: "".to_string(),
199 path: "".into(),
200 };
201
202 loop {
203 let check = stream.read_u16::<LittleEndian>();
204 match check? {
205 0x000F => {
206 if !reference.name.is_empty() {
208 references.push(reference);
209 }
210 break;
211 }
212 0x0016 => {
213 if !reference.name.is_empty() {
215 references.push(reference);
216 }
217 let name = read_variable_record(stream, 1)?;
218 let name = encoding.decode_all(name);
219 reference = Reference {
220 name: name.clone(),
221 description: name,
222 path: "".into(),
223 };
224 check_variable_record(0x003E, stream)?; }
226 0x0033 => {
227 reference.set_libid(stream, encoding)?;
229 }
230 0x002F => {
231 *stream = &stream[4..]; reference.set_libid(stream, encoding)?;
234
235 *stream = &stream[6..];
236 match stream.read_u16::<LittleEndian>()? {
237 0x0016 => {
238 read_variable_record(stream, 1)?; check_variable_record(0x003E, stream)?; check_record(0x0030, stream)?;
242 }
243 0x0030 => (),
244 e => {
245 return Err(VbaError::Unknown {
246 typ: "token in reference control",
247 val: e,
248 });
249 }
250 }
251 *stream = &stream[4..];
252 reference.set_libid(stream, encoding)?;
253 *stream = &stream[26..];
254 }
255 0x000D => {
256 *stream = &stream[4..];
258 reference.set_libid(stream, encoding)?;
259 *stream = &stream[6..];
260 }
261 0x000E => {
262 *stream = &stream[4..];
264 let absolute = read_variable_record(stream, 1)?; {
266 let absolute = encoding.decode_all(absolute);
267 reference.path = if let Some(stripped) = absolute.strip_prefix("*\\C") {
268 stripped.into()
269 } else {
270 absolute.into()
271 };
272 }
273 read_variable_record(stream, 1)?; *stream = &stream[6..];
275 }
276 c => {
277 return Err(VbaError::Unknown {
278 typ: "check id",
279 val: c,
280 });
281 }
282 }
283 }
284
285 debug!("references: {:#?}", references);
286 Ok(references)
287 }
288
289 fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
290 let libid = read_variable_record(stream, 1)?; if libid.is_empty() || libid.ends_with(b"##") {
292 return Ok(());
293 }
294 let libid = encoding.decode_all(libid);
295 let mut parts = libid.rsplit('#');
296 match (parts.next(), parts.next()) {
297 (Some(desc), Some(path)) => {
298 self.description = desc.into();
299 if !path.is_empty() && self.path.as_os_str().is_empty() {
301 self.path = path.into();
302 }
303 Ok(())
304 }
305 _ => Err(VbaError::LibId),
306 }
307 }
308}
309
310#[derive(Debug, Clone, Default)]
312struct Module {
313 name: String,
315 stream_name: String,
316 text_offset: usize,
317}
318
319fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
320 debug!("read dir header");
321
322 *stream = &stream[10..];
324
325 if read_u16(&stream[0..2]) == 0x004A {
327 *stream = &stream[10..];
328 }
329
330 *stream = &stream[20..];
332
333 let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
335 *stream = &stream[8..];
336
337 check_variable_record(0x0004, stream)?;
339
340 check_variable_record(0x0005, stream)?;
342 check_variable_record(0x0040, stream)?; check_variable_record(0x0006, stream)?;
346 check_variable_record(0x003D, stream)?;
347
348 *stream = &stream[32..];
350
351 check_variable_record(0x000C, stream)?;
353 check_variable_record(0x003C, stream)?; Ok(encoding)
356}
357
358fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
359 debug!("read all modules metadata");
360 *stream = &stream[4..];
361
362 let module_len = stream.read_u16::<LittleEndian>()? as usize;
363
364 *stream = &stream[8..]; let mut modules = Vec::with_capacity(module_len);
366
367 for _ in 0..module_len {
368 let name = check_variable_record(0x0019, stream)?;
370 let name = encoding.decode_all(name);
371
372 check_variable_record(0x0047, stream)?; let stream_name = check_variable_record(0x001A, stream)?; let stream_name = encoding.decode_all(stream_name);
376
377 check_variable_record(0x0032, stream)?; check_variable_record(0x001C, stream)?; check_variable_record(0x0048, stream)?; check_record(0x0031, stream)?;
383 *stream = &stream[4..];
384 let offset = stream.read_u32::<LittleEndian>()? as usize;
385
386 check_record(0x001E, stream)?;
388 *stream = &stream[8..];
389
390 check_record(0x002C, stream)?;
392 *stream = &stream[6..];
393
394 match stream.read_u16::<LittleEndian>()? {
395 0x0021 |
396 0x0022 => (),
397 e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
398 }
399
400 loop {
401 *stream = &stream[4..]; match stream.read_u16::<LittleEndian>() {
403 Ok(0x0025) | Ok(0x0028) => (),
404 Ok(0x002B) => break,
405 Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
406 Err(e) => return Err(VbaError::Io(e)),
407 }
408 }
409 *stream = &stream[4..]; modules.push(Module {
412 name,
413 stream_name,
414 text_offset: offset,
415 });
416 }
417
418 Ok(modules)
419}
420
421fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
425 let len = r.read_u32::<LittleEndian>()? as usize * mult;
426 let (read, next) = r.split_at(len);
427 *r = next;
428 Ok(read)
429}
430
431fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
433 check_record(id, r)?;
434 let record = read_variable_record(r, 1)?;
435 if log_enabled!(Level::Warn) && record.len() > 100_000 {
436 warn!(
437 "record id {} as a suspicious huge length of {} (hex: {:x})",
438 id,
439 record.len(),
440 record.len() as u32
441 );
442 }
443 Ok(record)
444}
445
446fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
448 debug!("check record {:x}", id);
449 let record_id = r.read_u16::<LittleEndian>()?;
450 if record_id != id {
451 Err(VbaError::InvalidRecordId {
452 expected: id,
453 found: record_id,
454 })
455 } else {
456 Ok(())
457 }
458}