calamine/
xlsb.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::io::{BufReader, Read, Seek};
4use std::string::String;
5
6use log::debug;
7
8use encoding_rs::UTF_16LE;
9use quick_xml::events::attributes::Attribute;
10use quick_xml::events::Event;
11use quick_xml::name::QName;
12use quick_xml::Reader as XmlReader;
13use zip::read::{ZipArchive, ZipFile};
14use zip::result::ZipError;
15
16use crate::formats::{
17    builtin_format_by_code, detect_custom_number_format, format_excel_f64, CellFormat,
18};
19use crate::utils::{push_column, read_f64, read_i32, read_u16, read_u32, read_usize};
20use crate::vba::VbaProject;
21use crate::{
22    Cell, CellErrorType, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible,
23};
24
25/// A Xlsb specific error
26#[derive(Debug)]
27pub enum XlsbError {
28    /// Io error
29    Io(std::io::Error),
30    /// Zip error
31    Zip(zip::result::ZipError),
32    /// Xml error
33    Xml(quick_xml::Error),
34    /// Xml attribute error
35    XmlAttr(quick_xml::events::attributes::AttrError),
36    /// Vba error
37    Vba(crate::vba::VbaError),
38
39    /// Mismatch value
40    Mismatch {
41        /// expected
42        expected: &'static str,
43        /// found
44        found: u16,
45    },
46    /// File not found
47    FileNotFound(String),
48    /// Invalid formula, stack length too short
49    StackLen,
50
51    /// Unsupported type
52    UnsupportedType(u16),
53    /// Unsupported etpg
54    Etpg(u8),
55    /// Unsupported iftab
56    IfTab(usize),
57    /// Unsupported BErr
58    BErr(u8),
59    /// Unsupported Ptg
60    Ptg(u8),
61    /// Unsupported cell error code
62    CellError(u8),
63    /// Wide str length too long
64    WideStr {
65        /// wide str length
66        ws_len: usize,
67        /// buffer length
68        buf_len: usize,
69    },
70    /// Unrecognized data
71    Unrecognized {
72        /// data type
73        typ: &'static str,
74        /// value found
75        val: String,
76    },
77}
78
79from_err!(std::io::Error, XlsbError, Io);
80from_err!(zip::result::ZipError, XlsbError, Zip);
81from_err!(quick_xml::Error, XlsbError, Xml);
82
83impl std::fmt::Display for XlsbError {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        match self {
86            XlsbError::Io(e) => write!(f, "I/O error: {}", e),
87            XlsbError::Zip(e) => write!(f, "Zip error: {}", e),
88            XlsbError::Xml(e) => write!(f, "Xml error: {}", e),
89            XlsbError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e),
90            XlsbError::Vba(e) => write!(f, "Vba error: {}", e),
91            XlsbError::Mismatch { expected, found } => {
92                write!(f, "Expecting {}, got {:X}", expected, found)
93            }
94            XlsbError::FileNotFound(file) => write!(f, "File not found: '{}'", file),
95            XlsbError::StackLen => write!(f, "Invalid stack length"),
96            XlsbError::UnsupportedType(t) => write!(f, "Unsupported type {:X}", t),
97            XlsbError::Etpg(t) => write!(f, "Unsupported etpg {:X}", t),
98            XlsbError::IfTab(t) => write!(f, "Unsupported iftab {:X}", t),
99            XlsbError::BErr(t) => write!(f, "Unsupported BErr {:X}", t),
100            XlsbError::Ptg(t) => write!(f, "Unsupported Ptf {:X}", t),
101            XlsbError::CellError(t) => write!(f, "Unsupported Cell Error code {:X}", t),
102            XlsbError::WideStr { ws_len, buf_len } => write!(
103                f,
104                "Wide str length exceeds buffer length ({} > {})",
105                ws_len, buf_len
106            ),
107            XlsbError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: {}", typ, val),
108        }
109    }
110}
111
112impl std::error::Error for XlsbError {
113    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
114        match self {
115            XlsbError::Io(e) => Some(e),
116            XlsbError::Zip(e) => Some(e),
117            XlsbError::Xml(e) => Some(e),
118            XlsbError::Vba(e) => Some(e),
119            _ => None,
120        }
121    }
122}
123
124/// A Xlsb reader
125pub struct Xlsb<RS> {
126    zip: ZipArchive<RS>,
127    extern_sheets: Vec<String>,
128    sheets: Vec<(String, String)>,
129    strings: Vec<String>,
130    /// Cell (number) formats
131    formats: Vec<CellFormat>,
132    is_1904: bool,
133    metadata: Metadata,
134    #[cfg(feature = "picture")]
135    pictures: Option<Vec<(String, Vec<u8>)>>,
136}
137
138impl<RS: Read + Seek> Xlsb<RS> {
139    /// MS-XLSB
140    fn read_relationships(&mut self) -> Result<BTreeMap<Vec<u8>, String>, XlsbError> {
141        let mut relationships = BTreeMap::new();
142        match self.zip.by_name("xl/_rels/workbook.bin.rels") {
143            Ok(f) => {
144                let mut xml = XmlReader::from_reader(BufReader::new(f));
145                xml.check_end_names(false)
146                    .trim_text(false)
147                    .check_comments(false)
148                    .expand_empty_elements(true);
149                let mut buf: Vec<u8> = Vec::with_capacity(64);
150
151                loop {
152                    match xml.read_event_into(&mut buf) {
153                        Ok(Event::Start(ref e)) if e.name() == QName(b"Relationship") => {
154                            let mut id = None;
155                            let mut target = None;
156                            for a in e.attributes() {
157                                match a.map_err(XlsbError::XmlAttr)? {
158                                    Attribute {
159                                        key: QName(b"Id"),
160                                        value: v,
161                                    } => {
162                                        id = Some(v.to_vec());
163                                    }
164                                    Attribute {
165                                        key: QName(b"Target"),
166                                        value: v,
167                                    } => {
168                                        target = Some(xml.decoder().decode(&v)?.into_owned());
169                                    }
170                                    _ => (),
171                                }
172                            }
173                            if let (Some(id), Some(target)) = (id, target) {
174                                relationships.insert(id, target);
175                            }
176                        }
177                        Ok(Event::Eof) => break,
178                        Err(e) => return Err(XlsbError::Xml(e)),
179                        _ => (),
180                    }
181                    buf.clear();
182                }
183            }
184            Err(ZipError::FileNotFound) => (),
185            Err(e) => return Err(XlsbError::Zip(e)),
186        }
187        Ok(relationships)
188    }
189
190    /// MS-XLSB 2.1.7.50 Styles
191    fn read_styles(&mut self) -> Result<(), XlsbError> {
192        let mut iter = match RecordIter::from_zip(&mut self.zip, "xl/styles.bin") {
193            Ok(iter) => iter,
194            Err(_) => return Ok(()), // it is fine if path does not exists
195        };
196        let mut buf = Vec::with_capacity(1024);
197        let mut number_formats = BTreeMap::new();
198
199        loop {
200            match iter.read_type()? {
201                0x0267 => {
202                    // BrtBeginFmts
203                    let _len = iter.fill_buffer(&mut buf)?;
204                    let len = read_usize(&buf);
205
206                    for _ in 0..len {
207                        let _ = iter.next_skip_blocks(0x002C, &[], &mut buf)?; // BrtFmt
208                        let fmt_code = read_u16(&buf);
209                        let fmt_str = wide_str(&buf[2..], &mut 0)?;
210                        number_formats
211                            .insert(fmt_code, detect_custom_number_format(fmt_str.as_ref()));
212                    }
213                }
214                0x0269 => {
215                    // BrtBeginCellXFs
216                    let _len = iter.fill_buffer(&mut buf)?;
217                    let len = read_usize(&buf);
218                    for _ in 0..len {
219                        let _ = iter.next_skip_blocks(0x002F, &[], &mut buf)?; // BrtXF
220                        let fmt_code = read_u16(&buf[2..4]);
221                        match builtin_format_by_code(fmt_code) {
222                            CellFormat::DateTime => self.formats.push(CellFormat::DateTime),
223                            CellFormat::TimeDelta => self.formats.push(CellFormat::TimeDelta),
224                            CellFormat::Other => {
225                                self.formats.push(
226                                    number_formats
227                                        .get(&fmt_code)
228                                        .copied()
229                                        .unwrap_or(CellFormat::Other),
230                                );
231                            }
232                        }
233                    }
234                    // BrtBeginCellXFs is always present and always after BrtBeginFmts
235                    break;
236                }
237                _ => (),
238            }
239            buf.clear();
240        }
241
242        Ok(())
243    }
244
245    /// MS-XLSB 2.1.7.45
246    fn read_shared_strings(&mut self) -> Result<(), XlsbError> {
247        let mut iter = match RecordIter::from_zip(&mut self.zip, "xl/sharedStrings.bin") {
248            Ok(iter) => iter,
249            Err(_) => return Ok(()), // it is fine if path does not exists
250        };
251        let mut buf = Vec::with_capacity(1024);
252
253        let _ = iter.next_skip_blocks(0x009F, &[], &mut buf)?; // BrtBeginSst
254        let len = read_usize(&buf[4..8]);
255
256        // BrtSSTItems
257        for _ in 0..len {
258            let _ = iter.next_skip_blocks(
259                0x0013,
260                &[
261                    (0x0023, Some(0x0024)), // future
262                ],
263                &mut buf,
264            )?; // BrtSSTItem
265            self.strings.push(wide_str(&buf[1..], &mut 0)?.into_owned());
266        }
267        Ok(())
268    }
269
270    /// MS-XLSB 2.1.7.61
271    fn read_workbook(
272        &mut self,
273        relationships: &BTreeMap<Vec<u8>, String>,
274    ) -> Result<(), XlsbError> {
275        let mut iter = RecordIter::from_zip(&mut self.zip, "xl/workbook.bin")?;
276        let mut buf = Vec::with_capacity(1024);
277
278        loop {
279            match iter.read_type()? {
280                0x0099 => {
281                    let _ = iter.fill_buffer(&mut buf)?;
282                    self.is_1904 = &buf[0] & 0x1 != 0;
283                } // BrtWbProp
284                0x009C => {
285                    // BrtBundleSh
286                    let len = iter.fill_buffer(&mut buf)?;
287                    let rel_len = read_u32(&buf[8..len]);
288                    if rel_len != 0xFFFF_FFFF {
289                        let rel_len = rel_len as usize * 2;
290                        let relid = &buf[12..12 + rel_len];
291                        // converts utf16le to utf8 for BTreeMap search
292                        let relid = UTF_16LE.decode(relid).0;
293                        let path = format!("xl/{}", relationships[relid.as_bytes()]);
294                        // ST_SheetState
295                        let visible = match read_u32(&buf) {
296                            0 => SheetVisible::Visible,
297                            1 => SheetVisible::Hidden,
298                            2 => SheetVisible::VeryHidden,
299                            v => {
300                                return Err(XlsbError::Unrecognized {
301                                    typ: "BoundSheet8:hsState",
302                                    val: v.to_string(),
303                                })
304                            }
305                        };
306                        let typ = match path.split('/').nth(1) {
307                            Some("worksheets") => SheetType::WorkSheet,
308                            Some("chartsheets") => SheetType::ChartSheet,
309                            Some("dialogsheets") => SheetType::DialogSheet,
310                            _ => {
311                                return Err(XlsbError::Unrecognized {
312                                    typ: "BoundSheet8:dt",
313                                    val: path.to_string(),
314                                })
315                            }
316                        };
317                        let name = wide_str(&buf[12 + rel_len..len], &mut 0)?;
318                        self.metadata.sheets.push(Sheet {
319                            name: name.to_string(),
320                            typ,
321                            visible,
322                        });
323                        self.sheets.push((name.into_owned(), path));
324                    };
325                }
326                0x0090 => break, // BrtEndBundleShs
327                _ => (),
328            }
329            buf.clear();
330        }
331
332        // BrtName
333        let mut defined_names = Vec::new();
334        loop {
335            let typ = iter.read_type()?;
336            match typ {
337                0x016A => {
338                    // BrtExternSheet
339                    let _len = iter.fill_buffer(&mut buf)?;
340                    let cxti = read_u32(&buf[..4]) as usize;
341                    if cxti < 1_000_000 {
342                        self.extern_sheets.reserve(cxti);
343                    }
344                    let sheets = &self.sheets;
345                    let extern_sheets = buf[4..]
346                        .chunks(12)
347                        .map(|xti| {
348                            match read_i32(&xti[4..8]) {
349                                -2 => "#ThisWorkbook",
350                                -1 => "#InvalidWorkSheet",
351                                p if p >= 0 && (p as usize) < sheets.len() => &sheets[p as usize].0,
352                                _ => "#Unknown",
353                            }
354                            .to_string()
355                        })
356                        .take(cxti)
357                        .collect();
358                    self.extern_sheets = extern_sheets;
359                }
360                0x0027 => {
361                    // BrtName
362                    let len = iter.fill_buffer(&mut buf)?;
363                    let mut str_len = 0;
364                    let name = wide_str(&buf[9..len], &mut str_len)?.into_owned();
365                    let rgce_len = read_u32(&buf[9 + str_len..]) as usize;
366                    let rgce = &buf[13 + str_len..13 + str_len + rgce_len];
367                    let formula = parse_formula(rgce, &self.extern_sheets, &defined_names)?;
368                    defined_names.push((name, formula));
369                }
370                0x009D | 0x0225 | 0x018D | 0x0180 | 0x009A | 0x0252 | 0x0229 | 0x009B | 0x0084 => {
371                    // record supposed to happen AFTER BrtNames
372                    self.metadata.names = defined_names;
373                    return Ok(());
374                }
375                _ => debug!("Unsupported type {:X}", typ),
376            }
377        }
378    }
379
380    fn worksheet_range_from_path(&mut self, path: &str) -> Result<Range<DataType>, XlsbError> {
381        let mut iter = RecordIter::from_zip(&mut self.zip, &path)?;
382        let mut buf = Vec::with_capacity(1024);
383        let formats = &self.formats;
384        // BrtWsDim
385        let _ = iter.next_skip_blocks(
386            0x0094,
387            &[
388                (0x0081, None), // BrtBeginSheet
389                (0x0093, None), // BrtWsProp
390            ],
391            &mut buf,
392        )?;
393        let (start, end) = parse_dimensions(&buf[..16]);
394        let len = (end.0 - start.0 + 1) * (end.1 - start.1 + 1);
395        let mut cells = if len < 1_000_000 {
396            Vec::with_capacity(len as usize)
397        } else {
398            Vec::new()
399        };
400
401        // BrtBeginSheetData
402        let _ = iter.next_skip_blocks(
403            0x0091,
404            &[
405                (0x0085, Some(0x0086)), // Views
406                (0x0025, Some(0x0026)), // AC blocks
407                (0x01E5, None),         // BrtWsFmtInfo
408                (0x0186, Some(0x0187)), // Col Infos
409            ],
410            &mut buf,
411        )?;
412
413        // Initialization: first BrtRowHdr
414        let mut typ: u16;
415        let mut row = 0u32;
416
417        // loop until end of sheet
418        loop {
419            typ = iter.read_type()?;
420            let _ = iter.fill_buffer(&mut buf)?;
421
422            let value = match typ {
423                // 0x0001 => continue, // DataType::Empty, // BrtCellBlank
424                0x0002 => {
425                    // BrtCellRk MS-XLSB 2.5.122
426                    let d100 = (buf[8] & 1) != 0;
427                    let is_int = (buf[8] & 2) != 0;
428                    buf[8] &= 0xFC;
429
430                    if is_int {
431                        let v = (read_i32(&buf[8..12]) >> 2) as i64;
432                        if d100 {
433                            let v = (v as f64) / 100.0;
434                            format_excel_f64(v, cell_format(formats, &buf), self.is_1904)
435                        } else {
436                            DataType::Int(v)
437                        }
438                    } else {
439                        let mut v = [0u8; 8];
440                        v[4..].copy_from_slice(&buf[8..12]);
441                        let v = read_f64(&v);
442                        let v = if d100 { v / 100.0 } else { v };
443                        format_excel_f64(v, cell_format(formats, &buf), self.is_1904)
444                    }
445                }
446                0x0003 => {
447                    let error = match buf[8] {
448                        0x00 => CellErrorType::Null,
449                        0x07 => CellErrorType::Div0,
450                        0x0F => CellErrorType::Value,
451                        0x17 => CellErrorType::Ref,
452                        0x1D => CellErrorType::Name,
453                        0x24 => CellErrorType::Num,
454                        0x2A => CellErrorType::NA,
455                        0x2B => CellErrorType::GettingData,
456                        c => return Err(XlsbError::CellError(c)),
457                    };
458                    // BrtCellError
459                    DataType::Error(error)
460                }
461                0x0004 | 0x000A => DataType::Bool(buf[8] != 0), // BrtCellBool or BrtFmlaBool
462                0x0005 | 0x0009 => {
463                    let v = read_f64(&buf[8..16]);
464                    format_excel_f64(v, cell_format(formats, &buf), self.is_1904)
465                } // BrtCellReal or BrtFmlaNum
466                0x0006 | 0x0008 => DataType::String(wide_str(&buf[8..], &mut 0)?.into_owned()), // BrtCellSt or BrtFmlaString
467                0x0007 => {
468                    // BrtCellIsst
469                    let isst = read_usize(&buf[8..12]);
470                    DataType::String(self.strings[isst].clone())
471                }
472                0x0000 => {
473                    // BrtRowHdr
474                    row = read_u32(&buf);
475                    if row > 0x0010_0000 {
476                        return Ok(Range::from_sparse(cells)); // invalid row
477                    }
478                    continue;
479                }
480                0x0092 => return Ok(Range::from_sparse(cells)), // BrtEndSheetData
481                _ => continue, // anything else, ignore and try next, without changing idx
482            };
483
484            let col = read_u32(&buf);
485            match value {
486                DataType::Empty => (),
487                DataType::String(s) if s.is_empty() => (),
488                value => cells.push(Cell::new((row, col), value)),
489            }
490        }
491    }
492
493    fn worksheet_formula_from_path(&mut self, path: String) -> Result<Range<String>, XlsbError> {
494        let mut iter = RecordIter::from_zip(&mut self.zip, &path)?;
495        let mut buf = Vec::with_capacity(1024);
496
497        // BrtWsDim
498        let _ = iter.next_skip_blocks(
499            0x0094,
500            &[
501                (0x0081, None), // BrtBeginSheet
502                (0x0093, None), // BrtWsProp
503            ],
504            &mut buf,
505        )?;
506        let (start, end) = parse_dimensions(&buf[..16]);
507        let mut cells = Vec::new();
508        if start.0 <= end.0 && start.1 <= end.1 {
509            let rows = (end.0 - start.0 + 1) as usize;
510            let cols = (end.1 - start.1 + 1) as usize;
511            let len = rows.saturating_mul(cols);
512            if len < 1_000_000 {
513                cells.reserve(len);
514            }
515        }
516
517        // BrtBeginSheetData
518        let _ = iter.next_skip_blocks(
519            0x0091,
520            &[
521                (0x0085, Some(0x0086)), // Views
522                (0x0025, Some(0x0026)), // AC blocks
523                (0x01E5, None),         // BrtWsFmtInfo
524                (0x0186, Some(0x0187)), // Col Infos
525            ],
526            &mut buf,
527        )?;
528
529        // Initialization: first BrtRowHdr
530        let mut typ: u16;
531        let mut row = 0u32;
532
533        // loop until end of sheet
534        loop {
535            typ = iter.read_type()?;
536            let _ = iter.fill_buffer(&mut buf)?;
537
538            let value = match typ {
539                // 0x0001 => continue, // DataType::Empty, // BrtCellBlank
540                0x0008 => {
541                    // BrtFmlaString
542                    let cch = read_u32(&buf[8..]) as usize;
543                    let formula = &buf[14 + cch * 2..];
544                    let cce = read_u32(formula) as usize;
545                    let rgce = &formula[4..4 + cce];
546                    parse_formula(rgce, &self.extern_sheets, &self.metadata.names)?
547                }
548                0x0009 => {
549                    // BrtFmlaNum
550                    let formula = &buf[18..];
551                    let cce = read_u32(formula) as usize;
552                    let rgce = &formula[4..4 + cce];
553                    parse_formula(rgce, &self.extern_sheets, &self.metadata.names)?
554                }
555                0x000A | 0x000B => {
556                    // BrtFmlaBool | BrtFmlaError
557                    let formula = &buf[11..];
558                    let cce = read_u32(formula) as usize;
559                    let rgce = &formula[4..4 + cce];
560                    parse_formula(rgce, &self.extern_sheets, &self.metadata.names)?
561                }
562                0x0000 => {
563                    // BrtRowHdr
564                    row = read_u32(&buf);
565                    if row > 0x0010_0000 {
566                        return Ok(Range::from_sparse(cells)); // invalid row
567                    }
568                    continue;
569                }
570                0x0092 => return Ok(Range::from_sparse(cells)), // BrtEndSheetData
571                _ => continue, // anything else, ignore and try next, without changing idx
572            };
573
574            let col = read_u32(&buf);
575            if !value.is_empty() {
576                cells.push(Cell::new((row, col), value));
577            }
578        }
579    }
580
581    #[cfg(feature = "picture")]
582    fn read_pictures(&mut self) -> Result<(), XlsbError> {
583        let mut pics = Vec::new();
584        for i in 0..self.zip.len() {
585            let mut zfile = self.zip.by_index(i)?;
586            let zname = zfile.name().to_owned();
587            if zname.starts_with("xl/media") {
588                let name_ext: Vec<&str> = zname.split(".").collect();
589                if let Some(ext) = name_ext.last() {
590                    if [
591                        "emf", "wmf", "pict", "jpeg", "jpg", "png", "dib", "gif", "tiff", "eps",
592                        "bmp", "wpg",
593                    ]
594                    .contains(ext)
595                    {
596                        let mut buf: Vec<u8> = Vec::new();
597                        zfile.read_to_end(&mut buf)?;
598                        pics.push((ext.to_string(), buf));
599                    }
600                }
601            }
602        }
603        if !pics.is_empty() {
604            self.pictures = Some(pics);
605        }
606        Ok(())
607    }
608}
609
610impl<RS: Read + Seek> Reader<RS> for Xlsb<RS> {
611    type Error = XlsbError;
612
613    fn new(reader: RS) -> Result<Self, XlsbError> {
614        let mut xlsb = Xlsb {
615            zip: ZipArchive::new(reader)?,
616            sheets: Vec::new(),
617            strings: Vec::new(),
618            extern_sheets: Vec::new(),
619            formats: Vec::new(),
620            is_1904: false,
621            metadata: Metadata::default(),
622            #[cfg(feature = "picture")]
623            pictures: None,
624        };
625        xlsb.read_shared_strings()?;
626        xlsb.read_styles()?;
627        let relationships = xlsb.read_relationships()?;
628        xlsb.read_workbook(&relationships)?;
629        #[cfg(feature = "picture")]
630        xlsb.read_pictures()?;
631
632        Ok(xlsb)
633    }
634
635    fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, XlsbError>> {
636        self.zip.by_name("xl/vbaProject.bin").ok().map(|mut f| {
637            let len = f.size() as usize;
638            VbaProject::new(&mut f, len)
639                .map(Cow::Owned)
640                .map_err(XlsbError::Vba)
641        })
642    }
643
644    fn metadata(&self) -> &Metadata {
645        &self.metadata
646    }
647
648    /// MS-XLSB 2.1.7.62
649    fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, XlsbError>> {
650        let path = match self.sheets.iter().find(|&(n, _)| n == name) {
651            Some((_, path)) => path.clone(),
652            None => return None,
653        };
654        Some(self.worksheet_range_from_path(&path))
655    }
656
657    /// MS-XLSB 2.1.7.62
658    fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, XlsbError>> {
659        let path = match self.sheets.iter().find(|&(n, _)| n == name) {
660            Some((_, path)) => path.clone(),
661            None => return None,
662        };
663        Some(self.worksheet_formula_from_path(path))
664    }
665
666    /// MS-XLSB 2.1.7.62
667    fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
668        let sheets = self.sheets.clone();
669        sheets
670            .into_iter()
671            .filter_map(|(name, path)| {
672                let ws = self.worksheet_range_from_path(&path).ok()?;
673                Some((name, ws))
674            })
675            .collect()
676    }
677
678    #[cfg(feature = "picture")]
679    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
680        self.pictures.to_owned()
681    }
682}
683
684struct RecordIter<'a> {
685    b: [u8; 1],
686    r: BufReader<ZipFile<'a>>,
687}
688
689impl<'a> RecordIter<'a> {
690    fn from_zip<RS: Read + Seek>(
691        zip: &'a mut ZipArchive<RS>,
692        path: &str,
693    ) -> Result<RecordIter<'a>, XlsbError> {
694        match zip.by_name(path) {
695            Ok(f) => Ok(RecordIter {
696                r: BufReader::new(f),
697                b: [0],
698            }),
699            Err(ZipError::FileNotFound) => Err(XlsbError::FileNotFound(path.into())),
700            Err(e) => Err(XlsbError::Zip(e)),
701        }
702    }
703
704    fn read_u8(&mut self) -> Result<u8, std::io::Error> {
705        self.r.read_exact(&mut self.b)?;
706        Ok(self.b[0])
707    }
708
709    /// Read next type, until we have no future record
710    fn read_type(&mut self) -> Result<u16, std::io::Error> {
711        let b = self.read_u8()?;
712        let typ = if (b & 0x80) == 0x80 {
713            (b & 0x7F) as u16 + (((self.read_u8()? & 0x7F) as u16) << 7)
714        } else {
715            b as u16
716        };
717        Ok(typ)
718    }
719
720    fn fill_buffer(&mut self, buf: &mut Vec<u8>) -> Result<usize, std::io::Error> {
721        let mut b = self.read_u8()?;
722        let mut len = (b & 0x7F) as usize;
723        for i in 1..4 {
724            if (b & 0x80) == 0 {
725                break;
726            }
727            b = self.read_u8()?;
728            len += ((b & 0x7F) as usize) << (7 * i);
729        }
730        if buf.len() < len {
731            *buf = vec![0; len];
732        }
733
734        self.r.read_exact(&mut buf[..len])?;
735        Ok(len)
736    }
737
738    /// Reads next type, and discard blocks between `start` and `end`
739    fn next_skip_blocks(
740        &mut self,
741        record_type: u16,
742        bounds: &[(u16, Option<u16>)],
743        buf: &mut Vec<u8>,
744    ) -> Result<usize, XlsbError> {
745        loop {
746            let typ = self.read_type()?;
747            let len = self.fill_buffer(buf)?;
748            if typ == record_type {
749                return Ok(len);
750            }
751            if let Some(end) = bounds.iter().find(|b| b.0 == typ).and_then(|b| b.1) {
752                while self.read_type()? != end {
753                    let _ = self.fill_buffer(buf)?;
754                }
755                let _ = self.fill_buffer(buf)?;
756            }
757        }
758    }
759}
760
761fn wide_str<'a>(buf: &'a [u8], str_len: &mut usize) -> Result<Cow<'a, str>, XlsbError> {
762    let len = read_u32(buf) as usize;
763    if buf.len() < 4 + len * 2 {
764        return Err(XlsbError::WideStr {
765            ws_len: 4 + len * 2,
766            buf_len: buf.len(),
767        });
768    }
769    *str_len = 4 + len * 2;
770    let s = &buf[4..*str_len];
771    Ok(UTF_16LE.decode(s).0)
772}
773
774fn parse_dimensions(buf: &[u8]) -> ((u32, u32), (u32, u32)) {
775    (
776        (read_u32(&buf[0..4]), read_u32(&buf[8..12])),
777        (read_u32(&buf[4..8]), read_u32(&buf[12..16])),
778    )
779}
780
781/// Formula parsing
782///
783/// [MS-XLSB 2.2.2]
784/// [MS-XLSB 2.5.97]
785///
786/// See Ptg [2.5.97.16]
787fn parse_formula(
788    mut rgce: &[u8],
789    sheets: &[String],
790    names: &[(String, String)],
791) -> Result<String, XlsbError> {
792    if rgce.is_empty() {
793        return Ok(String::new());
794    }
795
796    let mut stack = Vec::new();
797    let mut formula = String::with_capacity(rgce.len());
798    while !rgce.is_empty() {
799        let ptg = rgce[0];
800        rgce = &rgce[1..];
801        match ptg {
802            0x3a | 0x5a | 0x7a => {
803                // PtgRef3d
804                let ixti = read_u16(&rgce[0..2]);
805                stack.push(formula.len());
806                formula.push_str(&sheets[ixti as usize]);
807                formula.push('!');
808                // TODO: check with relative columns
809                formula.push('$');
810                push_column(read_u16(&rgce[6..8]) as u32, &mut formula);
811                formula.push('$');
812                formula.push_str(&format!("{}", read_u32(&rgce[2..6]) + 1));
813                rgce = &rgce[8..];
814            }
815            0x3b | 0x5b | 0x7b => {
816                // PtgArea3d
817                let ixti = read_u16(&rgce[0..2]);
818                stack.push(formula.len());
819                formula.push_str(&sheets[ixti as usize]);
820                formula.push('!');
821                // TODO: check with relative columns
822                formula.push('$');
823                push_column(read_u16(&rgce[10..12]) as u32, &mut formula);
824                formula.push('$');
825                formula.push_str(&format!("{}", read_u32(&rgce[2..6]) + 1));
826                formula.push(':');
827                formula.push('$');
828                push_column(read_u16(&rgce[12..14]) as u32, &mut formula);
829                formula.push('$');
830                formula.push_str(&format!("{}", read_u32(&rgce[6..10]) + 1));
831                rgce = &rgce[14..];
832            }
833            0x3c | 0x5c | 0x7c => {
834                // PtfRefErr3d
835                let ixti = read_u16(&rgce[0..2]);
836                stack.push(formula.len());
837                formula.push_str(&sheets[ixti as usize]);
838                formula.push('!');
839                formula.push_str("#REF!");
840                rgce = &rgce[8..];
841            }
842            0x3d | 0x5d | 0x7d => {
843                // PtgAreaErr3d
844                let ixti = read_u16(&rgce[0..2]);
845                stack.push(formula.len());
846                formula.push_str(&sheets[ixti as usize]);
847                formula.push('!');
848                formula.push_str("#REF!");
849                rgce = &rgce[14..];
850            }
851            0x01 => {
852                // PtgExp: array/shared formula, ignore
853                debug!("ignoring PtgExp array/shared formula");
854                stack.push(formula.len());
855                rgce = &rgce[4..];
856            }
857            0x03..=0x11 => {
858                // binary operation
859                let e2 = stack.pop().ok_or(XlsbError::StackLen)?;
860                let e2 = formula.split_off(e2);
861                // imaginary 'e1' will actually already be the start of the binary op
862                let op = match ptg {
863                    0x03 => "+",
864                    0x04 => "-",
865                    0x05 => "*",
866                    0x06 => "/",
867                    0x07 => "^",
868                    0x08 => "&",
869                    0x09 => "<",
870                    0x0A => "<=",
871                    0x0B => "=",
872                    0x0C => ">",
873                    0x0D => ">=",
874                    0x0E => "<>",
875                    0x0F => " ",
876                    0x10 => ",",
877                    0x11 => ":",
878                    _ => unreachable!(),
879                };
880                formula.push_str(op);
881                formula.push_str(&e2);
882            }
883            0x12 => {
884                let e = stack.last().ok_or(XlsbError::StackLen)?;
885                formula.insert(*e, '+');
886            }
887            0x13 => {
888                let e = stack.last().ok_or(XlsbError::StackLen)?;
889                formula.insert(*e, '-');
890            }
891            0x14 => {
892                formula.push('%');
893            }
894            0x15 => {
895                let e = stack.last().ok_or(XlsbError::StackLen)?;
896                formula.insert(*e, '(');
897                formula.push(')');
898            }
899            0x16 => {
900                stack.push(formula.len());
901            }
902            0x17 => {
903                stack.push(formula.len());
904                formula.push('\"');
905                let cch = read_u16(&rgce[0..2]) as usize;
906                formula.push_str(&UTF_16LE.decode(&rgce[2..2 + 2 * cch]).0);
907                formula.push('\"');
908                rgce = &rgce[2 + 2 * cch..];
909            }
910            0x18 => {
911                stack.push(formula.len());
912                let eptg = rgce[0];
913                rgce = &rgce[1..];
914                match eptg {
915                    0x19 => rgce = &rgce[12..],
916                    0x1D => rgce = &rgce[4..],
917                    e => return Err(XlsbError::Etpg(e)),
918                }
919            }
920            0x19 => {
921                let eptg = rgce[0];
922                rgce = &rgce[1..];
923                match eptg {
924                    0x01 | 0x02 | 0x08 | 0x20 | 0x21 | 0x40 | 0x41 | 0x80 => rgce = &rgce[2..],
925                    0x04 => rgce = &rgce[10..],
926                    0x10 => {
927                        rgce = &rgce[2..];
928                        let e = stack.last().ok_or(XlsbError::StackLen)?;
929                        let e = formula.split_off(*e);
930                        formula.push_str("SUM(");
931                        formula.push_str(&e);
932                        formula.push(')');
933                    }
934                    e => return Err(XlsbError::Etpg(e)),
935                }
936            }
937            0x1C => {
938                stack.push(formula.len());
939                let err = rgce[0];
940                rgce = &rgce[1..];
941                match err {
942                    0x00 => formula.push_str("#NULL!"),
943                    0x07 => formula.push_str("#DIV/0!"),
944                    0x0F => formula.push_str("#VALUE!"),
945                    0x17 => formula.push_str("#REF!"),
946                    0x1D => formula.push_str("#NAME?"),
947                    0x24 => formula.push_str("#NUM!"),
948                    0x2A => formula.push_str("#N/A"),
949                    0x2B => formula.push_str("#GETTING_DATA"),
950                    e => return Err(XlsbError::BErr(e)),
951                }
952            }
953            0x1D => {
954                stack.push(formula.len());
955                formula.push_str(if rgce[0] == 0 { "FALSE" } else { "TRUE" });
956                rgce = &rgce[1..];
957            }
958            0x1E => {
959                stack.push(formula.len());
960                formula.push_str(&format!("{}", read_u16(rgce)));
961                rgce = &rgce[2..];
962            }
963            0x1F => {
964                stack.push(formula.len());
965                formula.push_str(&format!("{}", read_f64(rgce)));
966                rgce = &rgce[8..];
967            }
968            0x20 | 0x40 | 0x60 => {
969                // PtgArray: ignore
970                stack.push(formula.len());
971                rgce = &rgce[14..];
972            }
973            0x21 | 0x22 | 0x41 | 0x42 | 0x61 | 0x62 => {
974                let (iftab, argc) = match ptg {
975                    0x22 | 0x42 | 0x62 => {
976                        let iftab = read_u16(&rgce[1..]) as usize;
977                        let argc = rgce[0] as usize;
978                        rgce = &rgce[3..];
979                        (iftab, argc)
980                    }
981                    _ => {
982                        let iftab = read_u16(rgce) as usize;
983                        if iftab > crate::utils::FTAB_LEN {
984                            return Err(XlsbError::IfTab(iftab));
985                        }
986                        rgce = &rgce[2..];
987                        let argc = crate::utils::FTAB_ARGC[iftab] as usize;
988                        (iftab, argc)
989                    }
990                };
991                if stack.len() < argc {
992                    return Err(XlsbError::StackLen);
993                }
994                if argc > 0 {
995                    let args_start = stack.len() - argc;
996                    let mut args = stack.split_off(args_start);
997                    let start = args[0];
998                    for s in &mut args {
999                        *s -= start;
1000                    }
1001                    let fargs = formula.split_off(start);
1002                    stack.push(formula.len());
1003                    args.push(fargs.len());
1004                    formula.push_str(crate::utils::FTAB[iftab]);
1005                    formula.push('(');
1006                    for w in args.windows(2) {
1007                        formula.push_str(&fargs[w[0]..w[1]]);
1008                        formula.push(',');
1009                    }
1010                    formula.pop();
1011                    formula.push(')');
1012                } else {
1013                    stack.push(formula.len());
1014                    formula.push_str(crate::utils::FTAB[iftab]);
1015                    formula.push_str("()");
1016                }
1017            }
1018            0x23 | 0x43 | 0x63 => {
1019                let iname = read_u32(rgce) as usize - 1; // one-based
1020                stack.push(formula.len());
1021                if let Some(name) = names.get(iname) {
1022                    formula.push_str(&name.0);
1023                }
1024                rgce = &rgce[4..];
1025            }
1026            0x24 | 0x44 | 0x64 => {
1027                let row = read_u32(rgce) + 1;
1028                let col = [rgce[4], rgce[5] & 0x3F];
1029                let col = read_u16(&col);
1030                stack.push(formula.len());
1031                if rgce[5] & 0x80 != 0x80 {
1032                    formula.push('$');
1033                }
1034                push_column(col as u32, &mut formula);
1035                if rgce[5] & 0x40 != 0x40 {
1036                    formula.push('$');
1037                }
1038                formula.push_str(&format!("{}", row));
1039                rgce = &rgce[6..];
1040            }
1041            0x25 | 0x45 | 0x65 => {
1042                stack.push(formula.len());
1043                formula.push('$');
1044                push_column(read_u16(&rgce[8..10]) as u32, &mut formula);
1045                formula.push('$');
1046                formula.push_str(&format!("{}", read_u32(&rgce[0..4]) + 1));
1047                formula.push(':');
1048                formula.push('$');
1049                push_column(read_u16(&rgce[10..12]) as u32, &mut formula);
1050                formula.push('$');
1051                formula.push_str(&format!("{}", read_u32(&rgce[4..8]) + 1));
1052                rgce = &rgce[12..];
1053            }
1054            0x2A | 0x4A | 0x6A => {
1055                stack.push(formula.len());
1056                formula.push_str("#REF!");
1057                rgce = &rgce[6..];
1058            }
1059            0x2B | 0x4B | 0x6B => {
1060                stack.push(formula.len());
1061                formula.push_str("#REF!");
1062                rgce = &rgce[12..];
1063            }
1064            0x29 | 0x49 | 0x69 => {
1065                let cce = read_u16(rgce) as usize;
1066                rgce = &rgce[2..];
1067                let f = parse_formula(&rgce[..cce], sheets, names)?;
1068                stack.push(formula.len());
1069                formula.push_str(&f);
1070                rgce = &rgce[cce..];
1071            }
1072            0x39 | 0x59 | 0x79 => {
1073                // TODO: external workbook ... ignore this formula ...
1074                stack.push(formula.len());
1075                formula.push_str("EXTERNAL_WB_NAME");
1076                rgce = &rgce[6..];
1077            }
1078            _ => return Err(XlsbError::Ptg(ptg)),
1079        }
1080    }
1081
1082    if stack.len() == 1 {
1083        Ok(formula)
1084    } else {
1085        Err(XlsbError::StackLen)
1086    }
1087}
1088
1089fn cell_format<'a>(formats: &'a [CellFormat], buf: &[u8]) -> Option<&'a CellFormat> {
1090    // Parses a Cell (MS-XLSB 2.5.9) and determines if it references a Date format
1091
1092    // iStyleRef is stored as a 24bit integer starting at the fifth byte
1093    let style_ref = u32::from_le_bytes([buf[4], buf[5], buf[6], 0]);
1094
1095    formats.get(style_ref as usize)
1096}