calamine/
xlsx.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::io::BufReader;
4use std::io::{Read, Seek};
5use std::str::FromStr;
6
7use log::warn;
8use quick_xml::events::attributes::{Attribute, Attributes};
9use quick_xml::events::{BytesStart, Event};
10use quick_xml::name::QName;
11use quick_xml::Reader as XmlReader;
12use zip::read::{ZipArchive, ZipFile};
13use zip::result::ZipError;
14
15use crate::formats::{
16    builtin_format_by_id, detect_custom_number_format, format_excel_f64, CellFormat,
17};
18use crate::vba::VbaProject;
19use crate::{
20    Cell, CellErrorType, CellType, DataType, Metadata, Range, Reader, Sheet, SheetType,
21    SheetVisible, Table,
22};
23
24type XlsReader<'a> = XmlReader<BufReader<ZipFile<'a>>>;
25
26/// Maximum number of rows allowed in an xlsx file
27pub const MAX_ROWS: u32 = 1_048_576;
28
29/// Maximum number of columns allowed in an xlsx file
30pub const MAX_COLUMNS: u32 = 16_384;
31
32/// An enum for Xlsx specific errors
33#[derive(Debug)]
34pub enum XlsxError {
35    /// Io error
36    Io(std::io::Error),
37    /// Zip error
38    Zip(zip::result::ZipError),
39    /// Vba error
40    Vba(crate::vba::VbaError),
41    /// Xml error
42    Xml(quick_xml::Error),
43    /// Xml attribute error
44    XmlAttr(quick_xml::events::attributes::AttrError),
45    /// Parse error
46    Parse(std::string::ParseError),
47    /// Float error
48    ParseFloat(std::num::ParseFloatError),
49    /// ParseInt error
50    ParseInt(std::num::ParseIntError),
51
52    /// Unexpected end of xml
53    XmlEof(&'static str),
54    /// Unexpected node
55    UnexpectedNode(&'static str),
56    /// File not found
57    FileNotFound(String),
58    /// Relationship not found
59    RelationshipNotFound,
60    /// Expecting alphanumeric character
61    Alphanumeric(u8),
62    /// Numeric column
63    NumericColumn(u8),
64    /// Wrong dimension count
65    DimensionCount(usize),
66    /// Cell 't' attribute error
67    CellTAttribute(String),
68    /// There is no column component in the range string
69    RangeWithoutColumnComponent,
70    /// There is no row component in the range string
71    RangeWithoutRowComponent,
72    /// Unexpected error
73    Unexpected(&'static str),
74    /// Unrecognized data
75    Unrecognized {
76        /// data type
77        typ: &'static str,
78        /// value found
79        val: String,
80    },
81    /// Cell error
82    CellError(String),
83}
84
85from_err!(std::io::Error, XlsxError, Io);
86from_err!(zip::result::ZipError, XlsxError, Zip);
87from_err!(crate::vba::VbaError, XlsxError, Vba);
88from_err!(quick_xml::Error, XlsxError, Xml);
89from_err!(std::string::ParseError, XlsxError, Parse);
90from_err!(std::num::ParseFloatError, XlsxError, ParseFloat);
91from_err!(std::num::ParseIntError, XlsxError, ParseInt);
92
93impl std::fmt::Display for XlsxError {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        match self {
96            XlsxError::Io(e) => write!(f, "I/O error: {}", e),
97            XlsxError::Zip(e) => write!(f, "Zip error: {}", e),
98            XlsxError::Xml(e) => write!(f, "Xml error: {}", e),
99            XlsxError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e),
100            XlsxError::Vba(e) => write!(f, "Vba error: {}", e),
101            XlsxError::Parse(e) => write!(f, "Parse string error: {}", e),
102            XlsxError::ParseInt(e) => write!(f, "Parse integer error: {}", e),
103            XlsxError::ParseFloat(e) => write!(f, "Parse float error: {}", e),
104
105            XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting '</{}>'", e),
106            XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{}' node", e),
107            XlsxError::FileNotFound(e) => write!(f, "File not found '{}'", e),
108            XlsxError::RelationshipNotFound => write!(f, "Relationship not found"),
109            XlsxError::Alphanumeric(e) => {
110                write!(f, "Expecting alphanumeric character, got {:X}", e)
111            }
112            XlsxError::NumericColumn(e) => write!(
113                f,
114                "Numeric character is not allowed for column name, got {}",
115                e
116            ),
117            XlsxError::DimensionCount(e) => {
118                write!(f, "Range dimension must be lower than 2. Got {}", e)
119            }
120            XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {:?}", e),
121            XlsxError::RangeWithoutColumnComponent => {
122                write!(f, "Range is missing the expected column component.")
123            }
124            XlsxError::RangeWithoutRowComponent => {
125                write!(f, "Range is missing the expected row component.")
126            }
127            XlsxError::Unexpected(e) => write!(f, "{}", e),
128            XlsxError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: {}", typ, val),
129            XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{}'", e),
130        }
131    }
132}
133
134impl std::error::Error for XlsxError {
135    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
136        match self {
137            XlsxError::Io(e) => Some(e),
138            XlsxError::Zip(e) => Some(e),
139            XlsxError::Xml(e) => Some(e),
140            XlsxError::Vba(e) => Some(e),
141            XlsxError::Parse(e) => Some(e),
142            XlsxError::ParseInt(e) => Some(e),
143            XlsxError::ParseFloat(e) => Some(e),
144            _ => None,
145        }
146    }
147}
148
149impl FromStr for CellErrorType {
150    type Err = XlsxError;
151    fn from_str(s: &str) -> Result<Self, Self::Err> {
152        match s {
153            "#DIV/0!" => Ok(CellErrorType::Div0),
154            "#N/A" => Ok(CellErrorType::NA),
155            "#NAME?" => Ok(CellErrorType::Name),
156            "#NULL!" => Ok(CellErrorType::Null),
157            "#NUM!" => Ok(CellErrorType::Num),
158            "#REF!" => Ok(CellErrorType::Ref),
159            "#VALUE!" => Ok(CellErrorType::Value),
160            _ => Err(XlsxError::CellError(s.into())),
161        }
162    }
163}
164
165type Tables = Option<Vec<(String, String, Vec<String>, Dimensions)>>;
166
167/// A struct representing xml zipped excel file
168/// Xlsx, Xlsm, Xlam
169pub struct Xlsx<RS> {
170    zip: ZipArchive<RS>,
171    /// Shared strings
172    strings: Vec<String>,
173    /// Sheets paths
174    sheets: Vec<(String, String)>,
175    /// Tables: Name, Sheet, Columns, Data dimensions
176    tables: Tables,
177    /// Cell (number) formats
178    formats: Vec<CellFormat>,
179    /// 1904 datetime system
180    is_1904: bool,
181    /// Metadata
182    metadata: Metadata,
183    /// Pictures
184    #[cfg(feature = "picture")]
185    pictures: Option<Vec<(String, Vec<u8>)>>,
186}
187
188impl<RS: Read + Seek> Xlsx<RS> {
189    fn read_shared_strings(&mut self) -> Result<(), XlsxError> {
190        let mut xml = match xml_reader(&mut self.zip, "xl/sharedStrings.xml") {
191            None => return Ok(()),
192            Some(x) => x?,
193        };
194        let mut buf = Vec::with_capacity(1024);
195        loop {
196            buf.clear();
197            match xml.read_event_into(&mut buf) {
198                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"si" => {
199                    if let Some(s) = read_string(&mut xml, e.name())? {
200                        self.strings.push(s);
201                    }
202                }
203                Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sst" => break,
204                Ok(Event::Eof) => return Err(XlsxError::XmlEof("sst")),
205                Err(e) => return Err(XlsxError::Xml(e)),
206                _ => (),
207            }
208        }
209        Ok(())
210    }
211
212    fn read_styles(&mut self) -> Result<(), XlsxError> {
213        let mut xml = match xml_reader(&mut self.zip, "xl/styles.xml") {
214            None => return Ok(()),
215            Some(x) => x?,
216        };
217
218        let mut number_formats = BTreeMap::new();
219
220        let mut buf = Vec::with_capacity(1024);
221        let mut inner_buf = Vec::with_capacity(1024);
222        loop {
223            buf.clear();
224            match xml.read_event_into(&mut buf) {
225                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"numFmts" => loop {
226                    inner_buf.clear();
227                    match xml.read_event_into(&mut inner_buf) {
228                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"numFmt" => {
229                            let mut id = Vec::new();
230                            let mut format = String::new();
231                            for a in e.attributes() {
232                                match a.map_err(XlsxError::XmlAttr)? {
233                                    Attribute {
234                                        key: QName(b"numFmtId"),
235                                        value: v,
236                                    } => id.extend_from_slice(&v),
237                                    Attribute {
238                                        key: QName(b"formatCode"),
239                                        value: v,
240                                    } => format = xml.decoder().decode(&v)?.into_owned(),
241                                    _ => (),
242                                }
243                            }
244                            if !format.is_empty() {
245                                number_formats.insert(id, format);
246                            }
247                        }
248                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"numFmts" => break,
249                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("numFmts")),
250                        Err(e) => return Err(XlsxError::Xml(e)),
251                        _ => (),
252                    }
253                },
254                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"cellXfs" => loop {
255                    inner_buf.clear();
256                    match xml.read_event_into(&mut inner_buf) {
257                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"xf" => {
258                            self.formats.push(
259                                e.attributes()
260                                    .filter_map(|a| a.ok())
261                                    .find(|a| a.key == QName(b"numFmtId"))
262                                    .map_or(CellFormat::Other, |a| {
263                                        match number_formats.get(&*a.value) {
264                                            Some(fmt) => detect_custom_number_format(fmt),
265                                            None => builtin_format_by_id(&a.value),
266                                        }
267                                    }),
268                            );
269                        }
270                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"cellXfs" => break,
271                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("cellXfs")),
272                        Err(e) => return Err(XlsxError::Xml(e)),
273                        _ => (),
274                    }
275                },
276                Ok(Event::End(ref e)) if e.local_name().as_ref() == b"styleSheet" => break,
277                Ok(Event::Eof) => return Err(XlsxError::XmlEof("styleSheet")),
278                Err(e) => return Err(XlsxError::Xml(e)),
279                _ => (),
280            }
281        }
282        Ok(())
283    }
284
285    fn read_workbook(
286        &mut self,
287        relationships: &BTreeMap<Vec<u8>, String>,
288    ) -> Result<(), XlsxError> {
289        let mut xml = match xml_reader(&mut self.zip, "xl/workbook.xml") {
290            None => return Ok(()),
291            Some(x) => x?,
292        };
293        let mut defined_names = Vec::new();
294        let mut buf = Vec::with_capacity(1024);
295        let mut val_buf = Vec::with_capacity(1024);
296        loop {
297            buf.clear();
298            match xml.read_event_into(&mut buf) {
299                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheet" => {
300                    let mut name = String::new();
301                    let mut path = String::new();
302                    let mut visible = SheetVisible::Visible;
303                    for a in e.attributes() {
304                        let a = a.map_err(XlsxError::XmlAttr)?;
305                        match a {
306                            Attribute {
307                                key: QName(b"name"),
308                                ..
309                            } => {
310                                name = a.decode_and_unescape_value(&xml)?.to_string();
311                            }
312                            Attribute {
313                                key: QName(b"state"),
314                                ..
315                            } => {
316                                visible = match a.decode_and_unescape_value(&xml)?.as_ref() {
317                                    "visible" => SheetVisible::Visible,
318                                    "hidden" => SheetVisible::Hidden,
319                                    "veryHidden" => SheetVisible::VeryHidden,
320                                    v => {
321                                        return Err(XlsxError::Unrecognized {
322                                            typ: "sheet:state",
323                                            val: v.to_string(),
324                                        })
325                                    }
326                                }
327                            }
328                            Attribute {
329                                key: QName(b"r:id"),
330                                value: v,
331                            }
332                            | Attribute {
333                                key: QName(b"relationships:id"),
334                                value: v,
335                            } => {
336                                let r = &relationships
337                                    .get(&*v)
338                                    .ok_or(XlsxError::RelationshipNotFound)?[..];
339                                // target may have pre-prended "/xl/" or "xl/" path;
340                                // strip if present
341                                path = if r.starts_with("/xl/") {
342                                    r[1..].to_string()
343                                } else if r.starts_with("xl/") {
344                                    r.to_string()
345                                } else {
346                                    format!("xl/{}", r)
347                                };
348                            }
349                            _ => (),
350                        }
351                    }
352                    let typ = match path.split('/').nth(1) {
353                        Some("worksheets") => SheetType::WorkSheet,
354                        Some("chartsheets") => SheetType::ChartSheet,
355                        Some("dialogsheets") => SheetType::DialogSheet,
356                        _ => {
357                            return Err(XlsxError::Unrecognized {
358                                typ: "sheet:type",
359                                val: path.to_string(),
360                            })
361                        }
362                    };
363                    self.metadata.sheets.push(Sheet {
364                        name: name.to_string(),
365                        typ,
366                        visible,
367                    });
368                    self.sheets.push((name, path));
369                }
370                Ok(Event::Start(ref e)) if e.name().as_ref() == b"workbookPr" => {
371                    self.is_1904 = match e.try_get_attribute("date1904")? {
372                        Some(c) => ["1", "true"].contains(
373                            &c.decode_and_unescape_value(&xml)
374                                .map_err(XlsxError::Xml)?
375                                .as_ref(),
376                        ),
377                        None => false,
378                    };
379                }
380                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"definedName" => {
381                    if let Some(a) = e
382                        .attributes()
383                        .filter_map(std::result::Result::ok)
384                        .find(|a| a.key == QName(b"name"))
385                    {
386                        let name = a.decode_and_unescape_value(&xml)?.to_string();
387                        val_buf.clear();
388                        let mut value = String::new();
389                        loop {
390                            match xml.read_event_into(&mut val_buf)? {
391                                Event::Text(t) => value.push_str(&t.unescape()?),
392                                Event::End(end) if end.name() == e.name() => break,
393                                Event::Eof => return Err(XlsxError::XmlEof("workbook")),
394                                _ => (),
395                            }
396                        }
397                        defined_names.push((name, value));
398                    }
399                }
400                Ok(Event::End(ref e)) if e.local_name().as_ref() == b"workbook" => break,
401                Ok(Event::Eof) => return Err(XlsxError::XmlEof("workbook")),
402                Err(e) => return Err(XlsxError::Xml(e)),
403                _ => (),
404            }
405        }
406        self.metadata.names = defined_names;
407        Ok(())
408    }
409
410    fn read_relationships(&mut self) -> Result<BTreeMap<Vec<u8>, String>, XlsxError> {
411        let mut xml = match xml_reader(&mut self.zip, "xl/_rels/workbook.xml.rels") {
412            None => {
413                return Err(XlsxError::FileNotFound(
414                    "xl/_rels/workbook.xml.rels".to_string(),
415                ));
416            }
417            Some(x) => x?,
418        };
419        let mut relationships = BTreeMap::new();
420        let mut buf = Vec::with_capacity(64);
421        loop {
422            buf.clear();
423            match xml.read_event_into(&mut buf) {
424                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"Relationship" => {
425                    let mut id = Vec::new();
426                    let mut target = String::new();
427                    for a in e.attributes() {
428                        match a.map_err(XlsxError::XmlAttr)? {
429                            Attribute {
430                                key: QName(b"Id"),
431                                value: v,
432                            } => id.extend_from_slice(&v),
433                            Attribute {
434                                key: QName(b"Target"),
435                                value: v,
436                            } => target = xml.decoder().decode(&v)?.into_owned(),
437                            _ => (),
438                        }
439                    }
440                    relationships.insert(id, target);
441                }
442                Ok(Event::End(ref e)) if e.local_name().as_ref() == b"Relationships" => break,
443                Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
444                Err(e) => return Err(XlsxError::Xml(e)),
445                _ => (),
446            }
447        }
448        Ok(relationships)
449    }
450
451    // sheets must be added before this is called!!
452    fn read_table_metadata(&mut self) -> Result<(), XlsxError> {
453        for (sheet_name, sheet_path) in &self.sheets {
454            let last_folder_index = sheet_path.rfind('/').expect("should be in a folder");
455            let (base_folder, file_name) = sheet_path.split_at(last_folder_index);
456            let rel_path = format!("{}/_rels{}.rels", base_folder, file_name);
457
458            let mut table_locations = Vec::new();
459            let mut buf = Vec::with_capacity(64);
460            // we need another mutable borrow of self.zip later so we enclose this borrow within braces
461            {
462                let mut xml = match xml_reader(&mut self.zip, &rel_path) {
463                    None => continue,
464                    Some(x) => x?,
465                };
466                loop {
467                    buf.clear();
468                    match xml.read_event_into(&mut buf) {
469                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"Relationship" => {
470                            let mut id = Vec::new();
471                            let mut target = String::new();
472                            let mut table_type = false;
473                            for a in e.attributes() {
474                                match a.map_err(XlsxError::XmlAttr)? {
475                                    Attribute {
476                                        key: QName(b"Id"),
477                                        value: v,
478                                    } => id.extend_from_slice(&v),
479                                    Attribute {
480                                        key: QName(b"Target"),
481                                        value: v,
482                                    } => target = xml.decoder().decode(&v)?.into_owned(),
483                                    Attribute {
484                                        key: QName(b"Type"),
485                                        value: v,
486                                    } => table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"[..],
487                                    _ => (),
488                                }
489                            }
490                            if table_type {
491                                if target.starts_with("../") {
492                                    // this is an incomplete implementation, but should be good enough for excel
493                                    let new_index =
494                                        base_folder.rfind('/').expect("Must be a parent folder");
495                                    let full_path = format!(
496                                        "{}{}",
497                                        base_folder[..new_index].to_owned(),
498                                        target[2..].to_owned()
499                                    );
500                                    table_locations.push(full_path);
501                                } else if target.is_empty() { // do nothing
502                                } else {
503                                    table_locations.push(target);
504                                }
505                            }
506                        }
507                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"Relationships" => {
508                            break
509                        }
510                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
511                        Err(e) => return Err(XlsxError::Xml(e)),
512                        _ => (),
513                    }
514                }
515            }
516            let mut new_tables = Vec::new();
517            for table_file in table_locations {
518                let mut xml = match xml_reader(&mut self.zip, &table_file) {
519                    None => continue,
520                    Some(x) => x?,
521                };
522                let mut column_names = Vec::new();
523                let mut table_meta = InnerTableMetadata::new();
524                loop {
525                    buf.clear();
526                    match xml.read_event_into(&mut buf) {
527                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"table" => {
528                            for a in e.attributes() {
529                                match a.map_err(XlsxError::XmlAttr)? {
530                                    Attribute {
531                                        key: QName(b"displayName"),
532                                        value: v,
533                                    } => {
534                                        table_meta.display_name =
535                                            xml.decoder().decode(&v)?.into_owned()
536                                    }
537                                    Attribute {
538                                        key: QName(b"ref"),
539                                        value: v,
540                                    } => {
541                                        table_meta.ref_cells =
542                                            xml.decoder().decode(&v)?.into_owned()
543                                    }
544                                    Attribute {
545                                        key: QName(b"headerRowCount"),
546                                        value: v,
547                                    } => {
548                                        table_meta.header_row_count =
549                                            xml.decoder().decode(&v)?.parse()?
550                                    }
551                                    Attribute {
552                                        key: QName(b"insertRow"),
553                                        value: v,
554                                    } => table_meta.insert_row = *v != b"0"[..],
555                                    Attribute {
556                                        key: QName(b"totalsRowCount"),
557                                        value: v,
558                                    } => {
559                                        table_meta.totals_row_count =
560                                            xml.decoder().decode(&v)?.parse()?
561                                    }
562                                    _ => (),
563                                }
564                            }
565                        }
566                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"tableColumn" => {
567                            for a in e.attributes().flatten() {
568                                if let Attribute {
569                                    key: QName(b"name"),
570                                    value: v,
571                                } = a
572                                {
573                                    column_names.push(xml.decoder().decode(&v)?.into_owned())
574                                }
575                            }
576                        }
577                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"table" => break,
578                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("Table")),
579                        Err(e) => return Err(XlsxError::Xml(e)),
580                        _ => (),
581                    }
582                }
583                let mut dims = get_dimension(table_meta.ref_cells.as_bytes())?;
584                if table_meta.header_row_count != 0 {
585                    dims.start.0 += table_meta.header_row_count;
586                }
587                if table_meta.totals_row_count != 0 {
588                    dims.end.0 -= table_meta.header_row_count;
589                }
590                if table_meta.insert_row {
591                    dims.end.0 -= 1;
592                }
593                new_tables.push((
594                    table_meta.display_name,
595                    sheet_name.clone(),
596                    column_names,
597                    dims,
598                ));
599            }
600            if let Some(tables) = &mut self.tables {
601                tables.append(&mut new_tables);
602            } else {
603                self.tables = Some(new_tables);
604            }
605        }
606        Ok(())
607    }
608
609    /// Read pictures
610    #[cfg(feature = "picture")]
611    fn read_pictures(&mut self) -> Result<(), XlsxError> {
612        let mut pics = Vec::new();
613        for i in 0..self.zip.len() {
614            let mut zfile = self.zip.by_index(i)?;
615            let zname = zfile.name().to_owned();
616            if zname.starts_with("xl/media") {
617                let name_ext: Vec<&str> = zname.split(".").collect();
618                if let Some(ext) = name_ext.last() {
619                    if [
620                        "emf", "wmf", "pict", "jpeg", "jpg", "png", "dib", "gif", "tiff", "eps",
621                        "bmp", "wpg",
622                    ]
623                    .contains(ext)
624                    {
625                        let mut buf: Vec<u8> = Vec::new();
626                        zfile.read_to_end(&mut buf)?;
627                        pics.push((ext.to_string(), buf));
628                    }
629                }
630            }
631        }
632        if !pics.is_empty() {
633            self.pictures = Some(pics);
634        }
635        Ok(())
636    }
637
638    /// Load the tables from
639    pub fn load_tables(&mut self) -> Result<(), XlsxError> {
640        if self.tables.is_none() {
641            self.read_table_metadata()
642        } else {
643            Ok(())
644        }
645    }
646
647    /// Get the names of all the tables
648    pub fn table_names(&self) -> Vec<&String> {
649        self.tables
650            .as_ref()
651            .expect("Tables must be loaded before they are referenced")
652            .iter()
653            .map(|(name, ..)| name)
654            .collect()
655    }
656    /// Get the names of all the tables in a sheet
657    pub fn table_names_in_sheet(&self, sheet_name: &str) -> Vec<&String> {
658        self.tables
659            .as_ref()
660            .expect("Tables must be loaded before they are referenced")
661            .iter()
662            .filter(|(_, sheet, ..)| sheet == sheet_name)
663            .map(|(name, ..)| name)
664            .collect()
665    }
666
667    /// Get the table by name
668    // TODO: If retrieving multiple tables from a single sheet, get tables by sheet will be more efficient
669    pub fn table_by_name(
670        &mut self,
671        table_name: &str,
672    ) -> Option<Result<Table<DataType>, XlsxError>> {
673        let match_table_meta = self
674            .tables
675            .as_ref()
676            .expect("Tables must be loaded before they are referenced")
677            .iter()
678            .find(|(table, ..)| table == table_name)?;
679        let name = match_table_meta.0.to_owned();
680        let sheet_name = match_table_meta.1.clone();
681        let columns = match_table_meta.2.clone();
682        let start_dim = match_table_meta.3.start;
683        let end_dim = match_table_meta.3.end;
684        let r_range = self.worksheet_range(&sheet_name)?;
685        match r_range {
686            Ok(range) => {
687                let tbl_rng = range.range(start_dim, end_dim);
688                Some(Ok(Table {
689                    name,
690                    sheet_name,
691                    columns,
692                    data: tbl_rng,
693                }))
694            }
695            Err(e) => Some(Err(e)),
696        }
697    }
698}
699
700struct InnerTableMetadata {
701    display_name: String,
702    ref_cells: String,
703    header_row_count: u32,
704    insert_row: bool,
705    totals_row_count: u32,
706}
707
708impl InnerTableMetadata {
709    fn new() -> Self {
710        Self {
711            display_name: String::new(),
712            ref_cells: String::new(),
713            header_row_count: 1,
714            insert_row: false,
715            totals_row_count: 0,
716        }
717    }
718}
719
720fn worksheet<T, F>(
721    strings: &[String],
722    formats: &[CellFormat],
723    mut xml: XlsReader<'_>,
724    read_data: &mut F,
725) -> Result<Range<T>, XlsxError>
726where
727    T: CellType,
728    F: FnMut(
729        &[String],
730        &[CellFormat],
731        &mut XlsReader<'_>,
732        &mut Vec<Cell<T>>,
733    ) -> Result<(), XlsxError>,
734{
735    let mut cells = Vec::with_capacity(1024);
736    let mut buf = Vec::with_capacity(1024);
737    'xml: loop {
738        buf.clear();
739        match xml.read_event_into(&mut buf) {
740            Ok(Event::Start(ref e)) => {
741                match e.local_name().as_ref() {
742                    b"dimension" => {
743                        for a in e.attributes() {
744                            if let Attribute {
745                                key: QName(b"ref"),
746                                value: rdim,
747                            } = a.map_err(XlsxError::XmlAttr)?
748                            {
749                                let len = get_dimension(&rdim)?.len();
750                                if len < 1_000_000 {
751                                    // it is unlikely to have more than that
752                                    // there may be of empty cells
753                                    cells.reserve(len as usize);
754                                }
755                                continue 'xml;
756                            }
757                        }
758                        return Err(XlsxError::UnexpectedNode("dimension"));
759                    }
760                    b"sheetData" => {
761                        read_data(strings, formats, &mut xml, &mut cells)?;
762                        break;
763                    }
764                    _ => (),
765                }
766            }
767            Ok(Event::Eof) => break,
768            Err(e) => return Err(XlsxError::Xml(e)),
769            _ => (),
770        }
771    }
772    Ok(Range::from_sparse(cells))
773}
774
775impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
776    type Error = XlsxError;
777
778    fn new(reader: RS) -> Result<Self, XlsxError> {
779        let mut xlsx = Xlsx {
780            zip: ZipArchive::new(reader)?,
781            strings: Vec::new(),
782            formats: Vec::new(),
783            is_1904: false,
784            sheets: Vec::new(),
785            tables: None,
786            metadata: Metadata::default(),
787            #[cfg(feature = "picture")]
788            pictures: None,
789        };
790        xlsx.read_shared_strings()?;
791        xlsx.read_styles()?;
792        let relationships = xlsx.read_relationships()?;
793        xlsx.read_workbook(&relationships)?;
794        #[cfg(feature = "picture")]
795        xlsx.read_pictures()?;
796
797        Ok(xlsx)
798    }
799
800    fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, XlsxError>> {
801        self.zip.by_name("xl/vbaProject.bin").ok().map(|mut f| {
802            let len = f.size() as usize;
803            VbaProject::new(&mut f, len)
804                .map(Cow::Owned)
805                .map_err(XlsxError::Vba)
806        })
807    }
808
809    fn metadata(&self) -> &Metadata {
810        &self.metadata
811    }
812
813    fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, XlsxError>> {
814        let xml = match self.sheets.iter().find(|&(n, _)| n == name) {
815            Some((_, path)) => xml_reader(&mut self.zip, path),
816            None => return None,
817        };
818        let is_1904 = self.is_1904;
819        let strings = &self.strings;
820        let formats = &self.formats;
821        xml.map(|xml| {
822            worksheet(strings, formats, xml?, &mut |s, f, xml, cells| {
823                read_sheet_data(xml, s, f, cells, is_1904)
824            })
825        })
826    }
827
828    fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, XlsxError>> {
829        let xml = match self.sheets.iter().find(|&(n, _)| n == name) {
830            Some((_, path)) => xml_reader(&mut self.zip, path),
831            None => return None,
832        };
833
834        let strings = &self.strings;
835        let formats = &self.formats;
836        xml.map(|xml| {
837            worksheet(strings, formats, xml?, &mut |_, _, xml, cells| {
838                read_sheet(xml, cells, &mut |cells, xml, e, pos, _| {
839                    match e.local_name().as_ref() {
840                        b"is" | b"v" => {
841                            xml.read_to_end_into(e.name(), &mut Vec::new())?;
842                        }
843                        b"f" => {
844                            let mut f_buf = Vec::with_capacity(512);
845                            let mut f = String::new();
846                            loop {
847                                match xml.read_event_into(&mut f_buf)? {
848                                    Event::Text(t) => f.push_str(&t.unescape()?),
849                                    Event::End(end) if end.name() == e.name() => break,
850                                    Event::Eof => return Err(XlsxError::XmlEof("f")),
851                                    _ => (),
852                                }
853                                f_buf.clear();
854                            }
855                            if !f.is_empty() {
856                                cells.push(Cell::new(pos, f));
857                            }
858                        }
859                        _ => return Err(XlsxError::UnexpectedNode("v, f, or is")),
860                    }
861                    Ok(())
862                })
863            })
864        })
865    }
866
867    fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
868        let is_1904 = self.is_1904;
869        self.sheets
870            .clone()
871            .into_iter()
872            .filter_map(|(name, path)| {
873                let xml = xml_reader(&mut self.zip, &path)?.ok()?;
874                let range = worksheet(
875                    &self.strings,
876                    &self.formats,
877                    xml,
878                    &mut |s, f, xml, cells| read_sheet_data(xml, s, f, cells, is_1904),
879                )
880                .ok()?;
881                Some((name, range))
882            })
883            .collect()
884    }
885
886    #[cfg(feature = "picture")]
887    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
888        self.pictures.to_owned()
889    }
890}
891
892fn xml_reader<'a, RS: Read + Seek>(
893    zip: &'a mut ZipArchive<RS>,
894    path: &str,
895) -> Option<Result<XlsReader<'a>, XlsxError>> {
896    match zip.by_name(path) {
897        Ok(f) => {
898            let mut r = XmlReader::from_reader(BufReader::new(f));
899            r.check_end_names(false)
900                .trim_text(false)
901                .check_comments(false)
902                .expand_empty_elements(true);
903            Some(Ok(r))
904        }
905        Err(ZipError::FileNotFound) => None,
906        Err(e) => Some(Err(e.into())),
907    }
908}
909
910/// search through an Element's attributes for the named one
911fn get_attribute<'a>(atts: Attributes<'a>, n: QName) -> Result<Option<&'a [u8]>, XlsxError> {
912    for a in atts {
913        match a {
914            Ok(Attribute {
915                key,
916                value: Cow::Borrowed(value),
917            }) if key == n => return Ok(Some(value)),
918            Err(e) => return Err(XlsxError::XmlAttr(e)),
919            _ => {} // ignore other attributes
920        }
921    }
922    Ok(None)
923}
924
925fn read_sheet<T, F>(
926    xml: &mut XlsReader<'_>,
927    cells: &mut Vec<Cell<T>>,
928    push_cell: &mut F,
929) -> Result<(), XlsxError>
930where
931    T: CellType,
932    F: FnMut(
933        &mut Vec<Cell<T>>,
934        &mut XlsReader<'_>,
935        &BytesStart<'_>,
936        (u32, u32),
937        &BytesStart<'_>,
938    ) -> Result<(), XlsxError>,
939{
940    let mut buf = Vec::with_capacity(1024);
941    let mut cell_buf = Vec::with_capacity(1024);
942
943    let mut row_index = 0;
944    let mut col_index = 0;
945
946    loop {
947        buf.clear();
948        match xml.read_event_into(&mut buf) {
949            Ok(Event::Start(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
950                let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
951                if let Some(range) = attribute {
952                    let row = get_row(range)?;
953                    row_index = row;
954                }
955            }
956            Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
957                row_index += 1;
958                col_index = 0;
959            }
960            Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => {
961                let attribute = get_attribute(c_element.attributes(), QName(b"r"))?;
962
963                let pos = if let Some(range) = attribute {
964                    let (row, col) = get_row_column(range)?;
965                    col_index = col;
966                    (row, col)
967                } else {
968                    (row_index, col_index)
969                };
970
971                loop {
972                    cell_buf.clear();
973                    match xml.read_event_into(&mut cell_buf) {
974                        Ok(Event::Start(ref e)) => push_cell(cells, xml, e, pos, c_element)?,
975                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break,
976                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")),
977                        Err(e) => return Err(XlsxError::Xml(e)),
978                        _ => (),
979                    }
980                }
981                col_index += 1;
982            }
983            Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => return Ok(()),
984            Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
985            Err(e) => return Err(XlsxError::Xml(e)),
986            _ => (),
987        }
988    }
989}
990
991/// read sheetData node
992fn read_sheet_data(
993    xml: &mut XlsReader<'_>,
994    strings: &[String],
995    formats: &[CellFormat],
996    cells: &mut Vec<Cell<DataType>>,
997    is_1904: bool,
998) -> Result<(), XlsxError> {
999    /// read the contents of a <v> cell
1000    fn read_value(
1001        v: String,
1002        strings: &[String],
1003        formats: &[CellFormat],
1004        c_element: &BytesStart<'_>,
1005        is_1904: bool,
1006    ) -> Result<DataType, XlsxError> {
1007        let cell_format = match get_attribute(c_element.attributes(), QName(b"s")) {
1008            Ok(Some(style)) => {
1009                let id: usize = std::str::from_utf8(style).unwrap_or("0").parse()?;
1010                formats.get(id)
1011            }
1012            _ => Some(&CellFormat::Other),
1013        };
1014
1015        match get_attribute(c_element.attributes(), QName(b"t"))? {
1016            Some(b"s") => {
1017                // shared string
1018                let idx: usize = v.parse()?;
1019                Ok(DataType::String(strings[idx].clone()))
1020            }
1021            Some(b"b") => {
1022                // boolean
1023                Ok(DataType::Bool(v != "0"))
1024            }
1025            Some(b"e") => {
1026                // error
1027                Ok(DataType::Error(v.parse()?))
1028            }
1029            Some(b"d") => {
1030                // date
1031                Ok(DataType::DateTimeIso(v))
1032            }
1033            Some(b"str") => {
1034                // see http://officeopenxml.com/SScontentOverview.php
1035                // str - refers to formula cells
1036                // * <c .. t='v' .. > indicates calculated value (this case)
1037                // * <c .. t='f' .. > to the formula string (ignored case
1038                // TODO: Fully support a DataType::Formula representing both Formula string &
1039                // last calculated value?
1040                //
1041                // NB: the result of a formula may not be a numeric value (=A3&" "&A4).
1042                // We do try an initial parse as Float for utility, but fall back to a string
1043                // representation if that fails
1044                v.parse().map(DataType::Float).or(Ok(DataType::String(v)))
1045            }
1046            Some(b"n") => {
1047                // n - number
1048                if v.is_empty() {
1049                    Ok(DataType::Empty)
1050                } else {
1051                    v.parse()
1052                        .map(|n| format_excel_f64(n, cell_format, is_1904))
1053                        .map_err(XlsxError::ParseFloat)
1054                }
1055            }
1056            None => {
1057                // If type is not known, we try to parse as Float for utility, but fall back to
1058                // String if this fails.
1059                v.parse()
1060                    .map(|n| format_excel_f64(n, cell_format, is_1904))
1061                    .or(Ok(DataType::String(v)))
1062            }
1063            Some(b"is") => {
1064                // this case should be handled in outer loop over cell elements, in which
1065                // case read_inline_str is called instead. Case included here for completeness.
1066                Err(XlsxError::Unexpected(
1067                    "called read_value on a cell of type inlineStr",
1068                ))
1069            }
1070            Some(t) => {
1071                let t = std::str::from_utf8(t).unwrap_or("<utf8 error>").to_string();
1072                Err(XlsxError::CellTAttribute(t))
1073            }
1074        }
1075    }
1076
1077    read_sheet(xml, cells, &mut |cells, xml, e, pos, c_element| {
1078        match e.local_name().as_ref() {
1079            b"is" => {
1080                // inlineStr
1081                if let Some(s) = read_string(xml, e.name())? {
1082                    cells.push(Cell::new(pos, DataType::String(s)));
1083                }
1084            }
1085            b"v" => {
1086                // value
1087                let mut v = String::new();
1088                let mut v_buf = Vec::new();
1089                loop {
1090                    v_buf.clear();
1091                    match xml.read_event_into(&mut v_buf)? {
1092                        Event::Text(t) => v.push_str(&t.unescape()?),
1093                        Event::End(end) if end.name() == e.name() => break,
1094                        Event::Eof => return Err(XlsxError::XmlEof("v")),
1095                        _ => (),
1096                    }
1097                }
1098                match read_value(v, strings, formats, c_element, is_1904)? {
1099                    DataType::Empty => (),
1100                    v => cells.push(Cell::new(pos, v)),
1101                }
1102            }
1103            b"f" => {
1104                xml.read_to_end_into(e.name(), &mut Vec::new())?;
1105            }
1106            _n => return Err(XlsxError::UnexpectedNode("v, f, or is")),
1107        }
1108        Ok(())
1109    })
1110}
1111
1112#[derive(Debug, PartialEq)]
1113struct Dimensions {
1114    start: (u32, u32),
1115    end: (u32, u32),
1116}
1117
1118impl Dimensions {
1119    fn len(&self) -> u64 {
1120        (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
1121    }
1122}
1123
1124/// converts a text representation (e.g. "A6:G67") of a dimension into integers
1125/// - top left (row, column),
1126/// - bottom right (row, column)
1127fn get_dimension(dimension: &[u8]) -> Result<Dimensions, XlsxError> {
1128    let parts: Vec<_> = dimension
1129        .split(|c| *c == b':')
1130        .map(get_row_column)
1131        .collect::<Result<Vec<_>, XlsxError>>()?;
1132
1133    match parts.len() {
1134        0 => Err(XlsxError::DimensionCount(0)),
1135        1 => Ok(Dimensions {
1136            start: parts[0],
1137            end: parts[0],
1138        }),
1139        2 => {
1140            let rows = parts[1].0 - parts[0].0;
1141            let columns = parts[1].1 - parts[0].1;
1142            if rows > MAX_ROWS {
1143                warn!(
1144                    "xlsx has more than maximum number of rows ({} > {})",
1145                    rows, MAX_ROWS
1146                );
1147            }
1148            if columns > MAX_COLUMNS {
1149                warn!(
1150                    "xlsx has more than maximum number of columns ({} > {})",
1151                    columns, MAX_COLUMNS
1152                );
1153            }
1154            Ok(Dimensions {
1155                start: parts[0],
1156                end: parts[1],
1157            })
1158        }
1159        len => Err(XlsxError::DimensionCount(len)),
1160    }
1161}
1162
1163/// Converts a text range name into its position (row, column) (0 based index).
1164/// If the row or column component in the range is missing, an Error is returned.
1165fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
1166    let (row, col) = get_row_and_optional_column(range)?;
1167    let col = col.ok_or(XlsxError::RangeWithoutColumnComponent)?;
1168    Ok((row, col))
1169}
1170
1171/// Converts a text row name into its position (0 based index).
1172/// If the row component in the range is missing, an Error is returned.
1173/// If the text row name also contains a column component, it is ignored.
1174fn get_row(range: &[u8]) -> Result<u32, XlsxError> {
1175    get_row_and_optional_column(range).map(|(row, _)| row)
1176}
1177
1178/// Converts a text range name into its position (row, column) (0 based index).
1179/// If the row component in the range is missing, an Error is returned.
1180/// If the column component in the range is missing, an None is returned for the column.
1181fn get_row_and_optional_column(range: &[u8]) -> Result<(u32, Option<u32>), XlsxError> {
1182    let (mut row, mut col) = (0, 0);
1183    let mut pow = 1;
1184    let mut readrow = true;
1185    for c in range.iter().rev() {
1186        match *c {
1187            c @ b'0'..=b'9' => {
1188                if readrow {
1189                    row += ((c - b'0') as u32) * pow;
1190                    pow *= 10;
1191                } else {
1192                    return Err(XlsxError::NumericColumn(c));
1193                }
1194            }
1195            c @ b'A'..=b'Z' => {
1196                if readrow {
1197                    pow = 1;
1198                    readrow = false;
1199                }
1200                col += ((c - b'A') as u32 + 1) * pow;
1201                pow *= 26;
1202            }
1203            c @ b'a'..=b'z' => {
1204                if readrow {
1205                    pow = 1;
1206                    readrow = false;
1207                }
1208                col += ((c - b'a') as u32 + 1) * pow;
1209                pow *= 26;
1210            }
1211            _ => return Err(XlsxError::Alphanumeric(*c)),
1212        }
1213    }
1214    let row = row
1215        .checked_sub(1)
1216        .ok_or(XlsxError::RangeWithoutRowComponent)?;
1217    Ok((row, col.checked_sub(1)))
1218}
1219
1220/// attempts to read either a simple or richtext string
1221fn read_string(
1222    xml: &mut XlsReader<'_>,
1223    QName(closing): QName,
1224) -> Result<Option<String>, XlsxError> {
1225    let mut buf = Vec::with_capacity(1024);
1226    let mut val_buf = Vec::with_capacity(1024);
1227    let mut rich_buffer: Option<String> = None;
1228    let mut is_phonetic_text = false;
1229    loop {
1230        buf.clear();
1231        match xml.read_event_into(&mut buf) {
1232            Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"r" => {
1233                if rich_buffer.is_none() {
1234                    // use a buffer since richtext has multiples <r> and <t> for the same cell
1235                    rich_buffer = Some(String::new());
1236                }
1237            }
1238            Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"rPh" => {
1239                is_phonetic_text = true;
1240            }
1241            Ok(Event::End(ref e)) if e.local_name().as_ref() == closing => {
1242                return Ok(rich_buffer);
1243            }
1244            Ok(Event::End(ref e)) if e.local_name().as_ref() == b"rPh" => {
1245                is_phonetic_text = false;
1246            }
1247            Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"t" && !is_phonetic_text => {
1248                val_buf.clear();
1249                let mut value = String::new();
1250                loop {
1251                    match xml.read_event_into(&mut val_buf)? {
1252                        Event::Text(t) => value.push_str(&t.unescape()?),
1253                        Event::End(end) if end.name() == e.name() => break,
1254                        Event::Eof => return Err(XlsxError::XmlEof("t")),
1255                        _ => (),
1256                    }
1257                }
1258                if let Some(ref mut s) = rich_buffer {
1259                    s.push_str(&value);
1260                } else {
1261                    // consume any remaining events up to expected closing tag
1262                    xml.read_to_end_into(QName(closing), &mut val_buf)?;
1263                    return Ok(Some(value));
1264                }
1265            }
1266            Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
1267            Err(e) => return Err(XlsxError::Xml(e)),
1268            _ => (),
1269        }
1270    }
1271}
1272
1273#[test]
1274fn test_dimensions() {
1275    assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
1276    assert_eq!(get_row_column(b"C107").unwrap(), (106, 2));
1277    assert_eq!(
1278        get_dimension(b"C2:D35").unwrap(),
1279        Dimensions {
1280            start: (1, 2),
1281            end: (34, 3)
1282        }
1283    );
1284    assert_eq!(
1285        get_dimension(b"A1:XFD1048576").unwrap(),
1286        Dimensions {
1287            start: (0, 0),
1288            end: (1_048_575, 16_383),
1289        }
1290    );
1291}
1292
1293#[test]
1294fn test_dimension_length() {
1295    assert_eq!(get_dimension(b"A1:Z99").unwrap().len(), 2_574);
1296    assert_eq!(
1297        get_dimension(b"A1:XFD1048576").unwrap().len(),
1298        17_179_869_184
1299    );
1300}
1301
1302#[test]
1303fn test_parse_error() {
1304    assert_eq!(
1305        CellErrorType::from_str("#DIV/0!").unwrap(),
1306        CellErrorType::Div0
1307    );
1308    assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
1309    assert_eq!(
1310        CellErrorType::from_str("#NAME?").unwrap(),
1311        CellErrorType::Name
1312    );
1313    assert_eq!(
1314        CellErrorType::from_str("#NULL!").unwrap(),
1315        CellErrorType::Null
1316    );
1317    assert_eq!(
1318        CellErrorType::from_str("#NUM!").unwrap(),
1319        CellErrorType::Num
1320    );
1321    assert_eq!(
1322        CellErrorType::from_str("#REF!").unwrap(),
1323        CellErrorType::Ref
1324    );
1325    assert_eq!(
1326        CellErrorType::from_str("#VALUE!").unwrap(),
1327        CellErrorType::Value
1328    );
1329}