calamine/
vba.rs

1//! Parse vbaProject.bin file
2//!
3//! Retranscription from:
4//! https://github.com/unixfreak0037/officeparser/blob/master/officeparser.py
5
6use std::collections::BTreeMap;
7use std::io::Read;
8use std::path::PathBuf;
9
10use byteorder::{LittleEndian, ReadBytesExt};
11use log::{debug, log_enabled, warn, Level};
12
13use crate::cfb::{Cfb, XlsEncoding};
14use crate::utils::read_u16;
15
16/// A VBA specific error enum
17#[derive(Debug)]
18pub enum VbaError {
19    /// Error comes from a cfb parsing
20    Cfb(crate::cfb::CfbError),
21    /// Io error
22    Io(std::io::Error),
23
24    /// Cannot find module
25    ModuleNotFound(String),
26    /// Generic unknown u16 value
27    Unknown {
28        /// error type
29        typ: &'static str,
30        /// value found
31        val: u16,
32    },
33    /// Invalid libid format
34    LibId,
35    /// Invalid record id
36    InvalidRecordId {
37        /// expected record id
38        expected: u16,
39        /// record if found
40        found: u16,
41    },
42}
43
44from_err!(crate::cfb::CfbError, VbaError, Cfb);
45from_err!(std::io::Error, VbaError, Io);
46
47impl std::fmt::Display for VbaError {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            VbaError::Io(e) => write!(f, "I/O error: {}", e),
51            VbaError::Cfb(e) => write!(f, "Cfb error: {}", e),
52
53            VbaError::ModuleNotFound(e) => write!(f, "Cannot find module '{}'", e),
54            VbaError::Unknown { typ, val } => write!(f, "Unknown {} '{:X}'", typ, val),
55            VbaError::LibId => write!(f, "Unexpected libid format"),
56            VbaError::InvalidRecordId { expected, found } => write!(
57                f,
58                "Invalid record id: expecting {:X} found {:X}",
59                expected, found
60            ),
61        }
62    }
63}
64
65impl std::error::Error for VbaError {
66    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
67        match self {
68            VbaError::Io(e) => Some(e),
69            VbaError::Cfb(e) => Some(e),
70            _ => None,
71        }
72    }
73}
74
75/// A struct for managing VBA reading
76#[allow(dead_code)]
77#[derive(Clone)]
78pub struct VbaProject {
79    references: Vec<Reference>,
80    modules: BTreeMap<String, Vec<u8>>,
81    encoding: XlsEncoding,
82}
83
84impl VbaProject {
85    /// Create a new `VbaProject` out of the vbaProject.bin `ZipFile` or xls file
86    ///
87    /// Starts reading project metadata (header, directories, sectors and minisectors).
88    pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
89        let mut cfb = Cfb::new(r, len)?;
90        VbaProject::from_cfb(r, &mut cfb)
91    }
92
93    /// Creates a new `VbaProject` out of a Compound File Binary and the corresponding reader
94    pub fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
95        // dir stream
96        let stream = cfb.get_stream("dir", r)?;
97        let stream = crate::cfb::decompress_stream(&stream)?;
98        let stream = &mut &*stream;
99
100        // read dir information record (not used)
101        let encoding = read_dir_information(stream)?;
102
103        // array of REFERENCE records
104        let refs = Reference::from_stream(stream, &encoding)?;
105
106        // modules
107        let mods: Vec<Module> = read_modules(stream, &encoding)?;
108
109        // read all modules
110        let modules: BTreeMap<String, Vec<u8>> = mods
111            .into_iter()
112            .map(|m| {
113                cfb.get_stream(&m.stream_name, r).and_then(|s| {
114                    crate::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
115                })
116            })
117            .collect::<Result<_, _>>()?;
118
119        Ok(VbaProject {
120            references: refs,
121            modules,
122            encoding,
123        })
124    }
125
126    /// Gets the list of `Reference`s
127    pub fn get_references(&self) -> &[Reference] {
128        &self.references
129    }
130
131    /// Gets the list of `Module` names
132    pub fn get_module_names(&self) -> Vec<&str> {
133        self.modules.keys().map(|k| &**k).collect()
134    }
135
136    /// Reads module content and tries to convert to utf8
137    ///
138    /// While it works most of the time, the modules are MBCS encoding and the conversion
139    /// may fail. If this is the case you should revert to `read_module_raw` as there is
140    /// no built in decoding provided in this crate
141    ///
142    /// # Examples
143    /// ```
144    /// use calamine::{Reader, open_workbook, Xlsx};
145    ///
146    /// # let path = format!("{}/tests/vba.xlsm", env!("CARGO_MANIFEST_DIR"));
147    /// let mut xl: Xlsx<_> = open_workbook(path).expect("Cannot find excel file");
148    /// if let Some(Ok(mut vba)) = xl.vba_project() {
149    ///     let vba = vba.to_mut();
150    ///     let modules = vba.get_module_names().into_iter()
151    ///                      .map(|s| s.to_string()).collect::<Vec<_>>();
152    ///     for m in modules {
153    ///         println!("Module {}:", m);
154    ///         println!("{}", vba.get_module(&m)
155    ///                           .expect(&format!("cannot read {:?} module", m)));
156    ///     }
157    /// }
158    /// ```
159    pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
160        debug!("read module {}", name);
161        let data = self.get_module_raw(name)?;
162        Ok(self.encoding.decode_all(data))
163    }
164
165    /// Reads module content (MBCS encoded) and output it as-is (binary output)
166    pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
167        match self.modules.get(name) {
168            Some(m) => Ok(&**m),
169            None => Err(VbaError::ModuleNotFound(name.into())),
170        }
171    }
172}
173
174/// A vba reference
175#[derive(Debug, Clone, Hash, Eq, PartialEq)]
176pub struct Reference {
177    /// name
178    pub name: String,
179    /// description
180    pub description: String,
181    /// location of the reference
182    pub path: PathBuf,
183}
184
185impl Reference {
186    /// Check if the reference location is accessible
187    pub fn is_missing(&self) -> bool {
188        !self.path.exists()
189    }
190
191    /// Gets the list of references from the dir_stream relevant part
192    fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
193        debug!("read all references metadata");
194
195        let mut references = Vec::new();
196        let mut reference = Reference {
197            name: "".to_string(),
198            description: "".to_string(),
199            path: "".into(),
200        };
201
202        loop {
203            let check = stream.read_u16::<LittleEndian>();
204            match check? {
205                0x000F => {
206                    // termination of references array
207                    if !reference.name.is_empty() {
208                        references.push(reference);
209                    }
210                    break;
211                }
212                0x0016 => {
213                    // REFERENCENAME
214                    if !reference.name.is_empty() {
215                        references.push(reference);
216                    }
217                    let name = read_variable_record(stream, 1)?;
218                    let name = encoding.decode_all(name);
219                    reference = Reference {
220                        name: name.clone(),
221                        description: name,
222                        path: "".into(),
223                    };
224                    check_variable_record(0x003E, stream)?; // unicode
225                }
226                0x0033 => {
227                    // REFERENCEORIGINAL (followed by REFERENCECONTROL)
228                    reference.set_libid(stream, encoding)?;
229                }
230                0x002F => {
231                    // REFERENCECONTROL
232                    *stream = &stream[4..]; // SizeTwiddled: len of total ref control
233                    reference.set_libid(stream, encoding)?;
234
235                    *stream = &stream[6..];
236                    match stream.read_u16::<LittleEndian>()? {
237                        0x0016 => {
238                            // optional name record extended
239                            read_variable_record(stream, 1)?; // name extended
240                            check_variable_record(0x003E, stream)?; // name extended unicode
241                            check_record(0x0030, stream)?;
242                        }
243                        0x0030 => (),
244                        e => {
245                            return Err(VbaError::Unknown {
246                                typ: "token in reference control",
247                                val: e,
248                            });
249                        }
250                    }
251                    *stream = &stream[4..];
252                    reference.set_libid(stream, encoding)?;
253                    *stream = &stream[26..];
254                }
255                0x000D => {
256                    // REFERENCEREGISTERED
257                    *stream = &stream[4..];
258                    reference.set_libid(stream, encoding)?;
259                    *stream = &stream[6..];
260                }
261                0x000E => {
262                    // REFERENCEPROJECT
263                    *stream = &stream[4..];
264                    let absolute = read_variable_record(stream, 1)?; // project libid absolute
265                    {
266                        let absolute = encoding.decode_all(absolute);
267                        reference.path = if let Some(stripped) = absolute.strip_prefix("*\\C") {
268                            stripped.into()
269                        } else {
270                            absolute.into()
271                        };
272                    }
273                    read_variable_record(stream, 1)?; // project libid relative
274                    *stream = &stream[6..];
275                }
276                c => {
277                    return Err(VbaError::Unknown {
278                        typ: "check id",
279                        val: c,
280                    });
281                }
282            }
283        }
284
285        debug!("references: {:#?}", references);
286        Ok(references)
287    }
288
289    fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
290        let libid = read_variable_record(stream, 1)?; //libid twiddled
291        if libid.is_empty() || libid.ends_with(b"##") {
292            return Ok(());
293        }
294        let libid = encoding.decode_all(libid);
295        let mut parts = libid.rsplit('#');
296        match (parts.next(), parts.next()) {
297            (Some(desc), Some(path)) => {
298                self.description = desc.into();
299                // use original path if already set
300                if !path.is_empty() && self.path.as_os_str().is_empty() {
301                    self.path = path.into();
302                }
303                Ok(())
304            }
305            _ => Err(VbaError::LibId),
306        }
307    }
308}
309
310/// A vba module
311#[derive(Debug, Clone, Default)]
312struct Module {
313    /// module name as it appears in vba project
314    name: String,
315    stream_name: String,
316    text_offset: usize,
317}
318
319fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
320    debug!("read dir header");
321
322    // PROJECTSYSKIND
323    *stream = &stream[10..];
324
325    // PROJECTCOMPATVERSION (optional)
326    if read_u16(&stream[0..2]) == 0x004A {
327        *stream = &stream[10..];
328    }
329
330    // PROJECTLCID and PROJECTLCIDINVOKE Records
331    *stream = &stream[20..];
332
333    // PROJECT Codepage
334    let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
335    *stream = &stream[8..];
336
337    // PROJECTNAME Record
338    check_variable_record(0x0004, stream)?;
339
340    // PROJECTDOCSTRING Record
341    check_variable_record(0x0005, stream)?;
342    check_variable_record(0x0040, stream)?; // unicode
343
344    // PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
345    check_variable_record(0x0006, stream)?;
346    check_variable_record(0x003D, stream)?;
347
348    // PROJECTHELPCONTEXT PROJECTLIBFLAGS and PROJECTVERSION Records
349    *stream = &stream[32..];
350
351    // PROJECTCONSTANTS Record
352    check_variable_record(0x000C, stream)?;
353    check_variable_record(0x003C, stream)?; // unicode
354
355    Ok(encoding)
356}
357
358fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
359    debug!("read all modules metadata");
360    *stream = &stream[4..];
361
362    let module_len = stream.read_u16::<LittleEndian>()? as usize;
363
364    *stream = &stream[8..]; // PROJECTCOOKIE record
365    let mut modules = Vec::with_capacity(module_len);
366
367    for _ in 0..module_len {
368        // name
369        let name = check_variable_record(0x0019, stream)?;
370        let name = encoding.decode_all(name);
371
372        check_variable_record(0x0047, stream)?; // unicode
373
374        let stream_name = check_variable_record(0x001A, stream)?; // stream name
375        let stream_name = encoding.decode_all(stream_name);
376
377        check_variable_record(0x0032, stream)?; // stream name unicode
378        check_variable_record(0x001C, stream)?; // doc string
379        check_variable_record(0x0048, stream)?; // doc string unicode
380
381        // offset
382        check_record(0x0031, stream)?;
383        *stream = &stream[4..];
384        let offset = stream.read_u32::<LittleEndian>()? as usize;
385
386        // help context
387        check_record(0x001E, stream)?;
388        *stream = &stream[8..];
389
390        // cookie
391        check_record(0x002C, stream)?;
392        *stream = &stream[6..];
393
394        match stream.read_u16::<LittleEndian>()? {
395            0x0021 /* procedural module */ |
396            0x0022 /* document, class or designer module */ => (),
397            e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
398        }
399
400        loop {
401            *stream = &stream[4..]; // reserved
402            match stream.read_u16::<LittleEndian>() {
403                Ok(0x0025) /* readonly */ | Ok(0x0028) /* private */ => (),
404                Ok(0x002B) => break,
405                Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
406                Err(e) => return Err(VbaError::Io(e)),
407            }
408        }
409        *stream = &stream[4..]; // reserved
410
411        modules.push(Module {
412            name,
413            stream_name,
414            text_offset: offset,
415        });
416    }
417
418    Ok(modules)
419}
420
421/// Reads a variable length record
422///
423/// `mult` is a multiplier of the length (e.g 2 when parsing XLWideString)
424fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
425    let len = r.read_u32::<LittleEndian>()? as usize * mult;
426    let (read, next) = r.split_at(len);
427    *r = next;
428    Ok(read)
429}
430
431/// Check that next record matches `id` and returns a variable length record
432fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
433    check_record(id, r)?;
434    let record = read_variable_record(r, 1)?;
435    if log_enabled!(Level::Warn) && record.len() > 100_000 {
436        warn!(
437            "record id {} as a suspicious huge length of {} (hex: {:x})",
438            id,
439            record.len(),
440            record.len() as u32
441        );
442    }
443    Ok(record)
444}
445
446/// Check that next record matches `id`
447fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
448    debug!("check record {:x}", id);
449    let record_id = r.read_u16::<LittleEndian>()?;
450    if record_id != id {
451        Err(VbaError::InvalidRecordId {
452            expected: id,
453            found: record_id,
454        })
455    } else {
456        Ok(())
457    }
458}