1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::io::BufReader;
4use std::io::{Read, Seek};
5use std::str::FromStr;
6
7use log::warn;
8use quick_xml::events::attributes::{Attribute, Attributes};
9use quick_xml::events::{BytesStart, Event};
10use quick_xml::name::QName;
11use quick_xml::Reader as XmlReader;
12use zip::read::{ZipArchive, ZipFile};
13use zip::result::ZipError;
14
15use crate::formats::{
16 builtin_format_by_id, detect_custom_number_format, format_excel_f64, CellFormat,
17};
18use crate::vba::VbaProject;
19use crate::{
20 Cell, CellErrorType, CellType, DataType, Metadata, Range, Reader, Sheet, SheetType,
21 SheetVisible, Table,
22};
23
24type XlsReader<'a> = XmlReader<BufReader<ZipFile<'a>>>;
25
26pub const MAX_ROWS: u32 = 1_048_576;
28
29pub const MAX_COLUMNS: u32 = 16_384;
31
32#[derive(Debug)]
34pub enum XlsxError {
35 Io(std::io::Error),
37 Zip(zip::result::ZipError),
39 Vba(crate::vba::VbaError),
41 Xml(quick_xml::Error),
43 XmlAttr(quick_xml::events::attributes::AttrError),
45 Parse(std::string::ParseError),
47 ParseFloat(std::num::ParseFloatError),
49 ParseInt(std::num::ParseIntError),
51
52 XmlEof(&'static str),
54 UnexpectedNode(&'static str),
56 FileNotFound(String),
58 RelationshipNotFound,
60 Alphanumeric(u8),
62 NumericColumn(u8),
64 DimensionCount(usize),
66 CellTAttribute(String),
68 RangeWithoutColumnComponent,
70 RangeWithoutRowComponent,
72 Unexpected(&'static str),
74 Unrecognized {
76 typ: &'static str,
78 val: String,
80 },
81 CellError(String),
83}
84
85from_err!(std::io::Error, XlsxError, Io);
86from_err!(zip::result::ZipError, XlsxError, Zip);
87from_err!(crate::vba::VbaError, XlsxError, Vba);
88from_err!(quick_xml::Error, XlsxError, Xml);
89from_err!(std::string::ParseError, XlsxError, Parse);
90from_err!(std::num::ParseFloatError, XlsxError, ParseFloat);
91from_err!(std::num::ParseIntError, XlsxError, ParseInt);
92
93impl std::fmt::Display for XlsxError {
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 XlsxError::Io(e) => write!(f, "I/O error: {}", e),
97 XlsxError::Zip(e) => write!(f, "Zip error: {}", e),
98 XlsxError::Xml(e) => write!(f, "Xml error: {}", e),
99 XlsxError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e),
100 XlsxError::Vba(e) => write!(f, "Vba error: {}", e),
101 XlsxError::Parse(e) => write!(f, "Parse string error: {}", e),
102 XlsxError::ParseInt(e) => write!(f, "Parse integer error: {}", e),
103 XlsxError::ParseFloat(e) => write!(f, "Parse float error: {}", e),
104
105 XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting '</{}>'", e),
106 XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{}' node", e),
107 XlsxError::FileNotFound(e) => write!(f, "File not found '{}'", e),
108 XlsxError::RelationshipNotFound => write!(f, "Relationship not found"),
109 XlsxError::Alphanumeric(e) => {
110 write!(f, "Expecting alphanumeric character, got {:X}", e)
111 }
112 XlsxError::NumericColumn(e) => write!(
113 f,
114 "Numeric character is not allowed for column name, got {}",
115 e
116 ),
117 XlsxError::DimensionCount(e) => {
118 write!(f, "Range dimension must be lower than 2. Got {}", e)
119 }
120 XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {:?}", e),
121 XlsxError::RangeWithoutColumnComponent => {
122 write!(f, "Range is missing the expected column component.")
123 }
124 XlsxError::RangeWithoutRowComponent => {
125 write!(f, "Range is missing the expected row component.")
126 }
127 XlsxError::Unexpected(e) => write!(f, "{}", e),
128 XlsxError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: {}", typ, val),
129 XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{}'", e),
130 }
131 }
132}
133
134impl std::error::Error for XlsxError {
135 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
136 match self {
137 XlsxError::Io(e) => Some(e),
138 XlsxError::Zip(e) => Some(e),
139 XlsxError::Xml(e) => Some(e),
140 XlsxError::Vba(e) => Some(e),
141 XlsxError::Parse(e) => Some(e),
142 XlsxError::ParseInt(e) => Some(e),
143 XlsxError::ParseFloat(e) => Some(e),
144 _ => None,
145 }
146 }
147}
148
149impl FromStr for CellErrorType {
150 type Err = XlsxError;
151 fn from_str(s: &str) -> Result<Self, Self::Err> {
152 match s {
153 "#DIV/0!" => Ok(CellErrorType::Div0),
154 "#N/A" => Ok(CellErrorType::NA),
155 "#NAME?" => Ok(CellErrorType::Name),
156 "#NULL!" => Ok(CellErrorType::Null),
157 "#NUM!" => Ok(CellErrorType::Num),
158 "#REF!" => Ok(CellErrorType::Ref),
159 "#VALUE!" => Ok(CellErrorType::Value),
160 _ => Err(XlsxError::CellError(s.into())),
161 }
162 }
163}
164
165type Tables = Option<Vec<(String, String, Vec<String>, Dimensions)>>;
166
167pub struct Xlsx<RS> {
170 zip: ZipArchive<RS>,
171 strings: Vec<String>,
173 sheets: Vec<(String, String)>,
175 tables: Tables,
177 formats: Vec<CellFormat>,
179 is_1904: bool,
181 metadata: Metadata,
183 #[cfg(feature = "picture")]
185 pictures: Option<Vec<(String, Vec<u8>)>>,
186}
187
188impl<RS: Read + Seek> Xlsx<RS> {
189 fn read_shared_strings(&mut self) -> Result<(), XlsxError> {
190 let mut xml = match xml_reader(&mut self.zip, "xl/sharedStrings.xml") {
191 None => return Ok(()),
192 Some(x) => x?,
193 };
194 let mut buf = Vec::with_capacity(1024);
195 loop {
196 buf.clear();
197 match xml.read_event_into(&mut buf) {
198 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"si" => {
199 if let Some(s) = read_string(&mut xml, e.name())? {
200 self.strings.push(s);
201 }
202 }
203 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sst" => break,
204 Ok(Event::Eof) => return Err(XlsxError::XmlEof("sst")),
205 Err(e) => return Err(XlsxError::Xml(e)),
206 _ => (),
207 }
208 }
209 Ok(())
210 }
211
212 fn read_styles(&mut self) -> Result<(), XlsxError> {
213 let mut xml = match xml_reader(&mut self.zip, "xl/styles.xml") {
214 None => return Ok(()),
215 Some(x) => x?,
216 };
217
218 let mut number_formats = BTreeMap::new();
219
220 let mut buf = Vec::with_capacity(1024);
221 let mut inner_buf = Vec::with_capacity(1024);
222 loop {
223 buf.clear();
224 match xml.read_event_into(&mut buf) {
225 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"numFmts" => loop {
226 inner_buf.clear();
227 match xml.read_event_into(&mut inner_buf) {
228 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"numFmt" => {
229 let mut id = Vec::new();
230 let mut format = String::new();
231 for a in e.attributes() {
232 match a.map_err(XlsxError::XmlAttr)? {
233 Attribute {
234 key: QName(b"numFmtId"),
235 value: v,
236 } => id.extend_from_slice(&v),
237 Attribute {
238 key: QName(b"formatCode"),
239 value: v,
240 } => format = xml.decoder().decode(&v)?.into_owned(),
241 _ => (),
242 }
243 }
244 if !format.is_empty() {
245 number_formats.insert(id, format);
246 }
247 }
248 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"numFmts" => break,
249 Ok(Event::Eof) => return Err(XlsxError::XmlEof("numFmts")),
250 Err(e) => return Err(XlsxError::Xml(e)),
251 _ => (),
252 }
253 },
254 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"cellXfs" => loop {
255 inner_buf.clear();
256 match xml.read_event_into(&mut inner_buf) {
257 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"xf" => {
258 self.formats.push(
259 e.attributes()
260 .filter_map(|a| a.ok())
261 .find(|a| a.key == QName(b"numFmtId"))
262 .map_or(CellFormat::Other, |a| {
263 match number_formats.get(&*a.value) {
264 Some(fmt) => detect_custom_number_format(fmt),
265 None => builtin_format_by_id(&a.value),
266 }
267 }),
268 );
269 }
270 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"cellXfs" => break,
271 Ok(Event::Eof) => return Err(XlsxError::XmlEof("cellXfs")),
272 Err(e) => return Err(XlsxError::Xml(e)),
273 _ => (),
274 }
275 },
276 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"styleSheet" => break,
277 Ok(Event::Eof) => return Err(XlsxError::XmlEof("styleSheet")),
278 Err(e) => return Err(XlsxError::Xml(e)),
279 _ => (),
280 }
281 }
282 Ok(())
283 }
284
285 fn read_workbook(
286 &mut self,
287 relationships: &BTreeMap<Vec<u8>, String>,
288 ) -> Result<(), XlsxError> {
289 let mut xml = match xml_reader(&mut self.zip, "xl/workbook.xml") {
290 None => return Ok(()),
291 Some(x) => x?,
292 };
293 let mut defined_names = Vec::new();
294 let mut buf = Vec::with_capacity(1024);
295 let mut val_buf = Vec::with_capacity(1024);
296 loop {
297 buf.clear();
298 match xml.read_event_into(&mut buf) {
299 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheet" => {
300 let mut name = String::new();
301 let mut path = String::new();
302 let mut visible = SheetVisible::Visible;
303 for a in e.attributes() {
304 let a = a.map_err(XlsxError::XmlAttr)?;
305 match a {
306 Attribute {
307 key: QName(b"name"),
308 ..
309 } => {
310 name = a.decode_and_unescape_value(&xml)?.to_string();
311 }
312 Attribute {
313 key: QName(b"state"),
314 ..
315 } => {
316 visible = match a.decode_and_unescape_value(&xml)?.as_ref() {
317 "visible" => SheetVisible::Visible,
318 "hidden" => SheetVisible::Hidden,
319 "veryHidden" => SheetVisible::VeryHidden,
320 v => {
321 return Err(XlsxError::Unrecognized {
322 typ: "sheet:state",
323 val: v.to_string(),
324 })
325 }
326 }
327 }
328 Attribute {
329 key: QName(b"r:id"),
330 value: v,
331 }
332 | Attribute {
333 key: QName(b"relationships:id"),
334 value: v,
335 } => {
336 let r = &relationships
337 .get(&*v)
338 .ok_or(XlsxError::RelationshipNotFound)?[..];
339 path = if r.starts_with("/xl/") {
342 r[1..].to_string()
343 } else if r.starts_with("xl/") {
344 r.to_string()
345 } else {
346 format!("xl/{}", r)
347 };
348 }
349 _ => (),
350 }
351 }
352 let typ = match path.split('/').nth(1) {
353 Some("worksheets") => SheetType::WorkSheet,
354 Some("chartsheets") => SheetType::ChartSheet,
355 Some("dialogsheets") => SheetType::DialogSheet,
356 _ => {
357 return Err(XlsxError::Unrecognized {
358 typ: "sheet:type",
359 val: path.to_string(),
360 })
361 }
362 };
363 self.metadata.sheets.push(Sheet {
364 name: name.to_string(),
365 typ,
366 visible,
367 });
368 self.sheets.push((name, path));
369 }
370 Ok(Event::Start(ref e)) if e.name().as_ref() == b"workbookPr" => {
371 self.is_1904 = match e.try_get_attribute("date1904")? {
372 Some(c) => ["1", "true"].contains(
373 &c.decode_and_unescape_value(&xml)
374 .map_err(XlsxError::Xml)?
375 .as_ref(),
376 ),
377 None => false,
378 };
379 }
380 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"definedName" => {
381 if let Some(a) = e
382 .attributes()
383 .filter_map(std::result::Result::ok)
384 .find(|a| a.key == QName(b"name"))
385 {
386 let name = a.decode_and_unescape_value(&xml)?.to_string();
387 val_buf.clear();
388 let mut value = String::new();
389 loop {
390 match xml.read_event_into(&mut val_buf)? {
391 Event::Text(t) => value.push_str(&t.unescape()?),
392 Event::End(end) if end.name() == e.name() => break,
393 Event::Eof => return Err(XlsxError::XmlEof("workbook")),
394 _ => (),
395 }
396 }
397 defined_names.push((name, value));
398 }
399 }
400 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"workbook" => break,
401 Ok(Event::Eof) => return Err(XlsxError::XmlEof("workbook")),
402 Err(e) => return Err(XlsxError::Xml(e)),
403 _ => (),
404 }
405 }
406 self.metadata.names = defined_names;
407 Ok(())
408 }
409
410 fn read_relationships(&mut self) -> Result<BTreeMap<Vec<u8>, String>, XlsxError> {
411 let mut xml = match xml_reader(&mut self.zip, "xl/_rels/workbook.xml.rels") {
412 None => {
413 return Err(XlsxError::FileNotFound(
414 "xl/_rels/workbook.xml.rels".to_string(),
415 ));
416 }
417 Some(x) => x?,
418 };
419 let mut relationships = BTreeMap::new();
420 let mut buf = Vec::with_capacity(64);
421 loop {
422 buf.clear();
423 match xml.read_event_into(&mut buf) {
424 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"Relationship" => {
425 let mut id = Vec::new();
426 let mut target = String::new();
427 for a in e.attributes() {
428 match a.map_err(XlsxError::XmlAttr)? {
429 Attribute {
430 key: QName(b"Id"),
431 value: v,
432 } => id.extend_from_slice(&v),
433 Attribute {
434 key: QName(b"Target"),
435 value: v,
436 } => target = xml.decoder().decode(&v)?.into_owned(),
437 _ => (),
438 }
439 }
440 relationships.insert(id, target);
441 }
442 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"Relationships" => break,
443 Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
444 Err(e) => return Err(XlsxError::Xml(e)),
445 _ => (),
446 }
447 }
448 Ok(relationships)
449 }
450
451 fn read_table_metadata(&mut self) -> Result<(), XlsxError> {
453 for (sheet_name, sheet_path) in &self.sheets {
454 let last_folder_index = sheet_path.rfind('/').expect("should be in a folder");
455 let (base_folder, file_name) = sheet_path.split_at(last_folder_index);
456 let rel_path = format!("{}/_rels{}.rels", base_folder, file_name);
457
458 let mut table_locations = Vec::new();
459 let mut buf = Vec::with_capacity(64);
460 {
462 let mut xml = match xml_reader(&mut self.zip, &rel_path) {
463 None => continue,
464 Some(x) => x?,
465 };
466 loop {
467 buf.clear();
468 match xml.read_event_into(&mut buf) {
469 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"Relationship" => {
470 let mut id = Vec::new();
471 let mut target = String::new();
472 let mut table_type = false;
473 for a in e.attributes() {
474 match a.map_err(XlsxError::XmlAttr)? {
475 Attribute {
476 key: QName(b"Id"),
477 value: v,
478 } => id.extend_from_slice(&v),
479 Attribute {
480 key: QName(b"Target"),
481 value: v,
482 } => target = xml.decoder().decode(&v)?.into_owned(),
483 Attribute {
484 key: QName(b"Type"),
485 value: v,
486 } => table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"[..],
487 _ => (),
488 }
489 }
490 if table_type {
491 if target.starts_with("../") {
492 let new_index =
494 base_folder.rfind('/').expect("Must be a parent folder");
495 let full_path = format!(
496 "{}{}",
497 base_folder[..new_index].to_owned(),
498 target[2..].to_owned()
499 );
500 table_locations.push(full_path);
501 } else if target.is_empty() { } else {
503 table_locations.push(target);
504 }
505 }
506 }
507 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"Relationships" => {
508 break
509 }
510 Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
511 Err(e) => return Err(XlsxError::Xml(e)),
512 _ => (),
513 }
514 }
515 }
516 let mut new_tables = Vec::new();
517 for table_file in table_locations {
518 let mut xml = match xml_reader(&mut self.zip, &table_file) {
519 None => continue,
520 Some(x) => x?,
521 };
522 let mut column_names = Vec::new();
523 let mut table_meta = InnerTableMetadata::new();
524 loop {
525 buf.clear();
526 match xml.read_event_into(&mut buf) {
527 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"table" => {
528 for a in e.attributes() {
529 match a.map_err(XlsxError::XmlAttr)? {
530 Attribute {
531 key: QName(b"displayName"),
532 value: v,
533 } => {
534 table_meta.display_name =
535 xml.decoder().decode(&v)?.into_owned()
536 }
537 Attribute {
538 key: QName(b"ref"),
539 value: v,
540 } => {
541 table_meta.ref_cells =
542 xml.decoder().decode(&v)?.into_owned()
543 }
544 Attribute {
545 key: QName(b"headerRowCount"),
546 value: v,
547 } => {
548 table_meta.header_row_count =
549 xml.decoder().decode(&v)?.parse()?
550 }
551 Attribute {
552 key: QName(b"insertRow"),
553 value: v,
554 } => table_meta.insert_row = *v != b"0"[..],
555 Attribute {
556 key: QName(b"totalsRowCount"),
557 value: v,
558 } => {
559 table_meta.totals_row_count =
560 xml.decoder().decode(&v)?.parse()?
561 }
562 _ => (),
563 }
564 }
565 }
566 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"tableColumn" => {
567 for a in e.attributes().flatten() {
568 if let Attribute {
569 key: QName(b"name"),
570 value: v,
571 } = a
572 {
573 column_names.push(xml.decoder().decode(&v)?.into_owned())
574 }
575 }
576 }
577 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"table" => break,
578 Ok(Event::Eof) => return Err(XlsxError::XmlEof("Table")),
579 Err(e) => return Err(XlsxError::Xml(e)),
580 _ => (),
581 }
582 }
583 let mut dims = get_dimension(table_meta.ref_cells.as_bytes())?;
584 if table_meta.header_row_count != 0 {
585 dims.start.0 += table_meta.header_row_count;
586 }
587 if table_meta.totals_row_count != 0 {
588 dims.end.0 -= table_meta.header_row_count;
589 }
590 if table_meta.insert_row {
591 dims.end.0 -= 1;
592 }
593 new_tables.push((
594 table_meta.display_name,
595 sheet_name.clone(),
596 column_names,
597 dims,
598 ));
599 }
600 if let Some(tables) = &mut self.tables {
601 tables.append(&mut new_tables);
602 } else {
603 self.tables = Some(new_tables);
604 }
605 }
606 Ok(())
607 }
608
609 #[cfg(feature = "picture")]
611 fn read_pictures(&mut self) -> Result<(), XlsxError> {
612 let mut pics = Vec::new();
613 for i in 0..self.zip.len() {
614 let mut zfile = self.zip.by_index(i)?;
615 let zname = zfile.name().to_owned();
616 if zname.starts_with("xl/media") {
617 let name_ext: Vec<&str> = zname.split(".").collect();
618 if let Some(ext) = name_ext.last() {
619 if [
620 "emf", "wmf", "pict", "jpeg", "jpg", "png", "dib", "gif", "tiff", "eps",
621 "bmp", "wpg",
622 ]
623 .contains(ext)
624 {
625 let mut buf: Vec<u8> = Vec::new();
626 zfile.read_to_end(&mut buf)?;
627 pics.push((ext.to_string(), buf));
628 }
629 }
630 }
631 }
632 if !pics.is_empty() {
633 self.pictures = Some(pics);
634 }
635 Ok(())
636 }
637
638 pub fn load_tables(&mut self) -> Result<(), XlsxError> {
640 if self.tables.is_none() {
641 self.read_table_metadata()
642 } else {
643 Ok(())
644 }
645 }
646
647 pub fn table_names(&self) -> Vec<&String> {
649 self.tables
650 .as_ref()
651 .expect("Tables must be loaded before they are referenced")
652 .iter()
653 .map(|(name, ..)| name)
654 .collect()
655 }
656 pub fn table_names_in_sheet(&self, sheet_name: &str) -> Vec<&String> {
658 self.tables
659 .as_ref()
660 .expect("Tables must be loaded before they are referenced")
661 .iter()
662 .filter(|(_, sheet, ..)| sheet == sheet_name)
663 .map(|(name, ..)| name)
664 .collect()
665 }
666
667 pub fn table_by_name(
670 &mut self,
671 table_name: &str,
672 ) -> Option<Result<Table<DataType>, XlsxError>> {
673 let match_table_meta = self
674 .tables
675 .as_ref()
676 .expect("Tables must be loaded before they are referenced")
677 .iter()
678 .find(|(table, ..)| table == table_name)?;
679 let name = match_table_meta.0.to_owned();
680 let sheet_name = match_table_meta.1.clone();
681 let columns = match_table_meta.2.clone();
682 let start_dim = match_table_meta.3.start;
683 let end_dim = match_table_meta.3.end;
684 let r_range = self.worksheet_range(&sheet_name)?;
685 match r_range {
686 Ok(range) => {
687 let tbl_rng = range.range(start_dim, end_dim);
688 Some(Ok(Table {
689 name,
690 sheet_name,
691 columns,
692 data: tbl_rng,
693 }))
694 }
695 Err(e) => Some(Err(e)),
696 }
697 }
698}
699
700struct InnerTableMetadata {
701 display_name: String,
702 ref_cells: String,
703 header_row_count: u32,
704 insert_row: bool,
705 totals_row_count: u32,
706}
707
708impl InnerTableMetadata {
709 fn new() -> Self {
710 Self {
711 display_name: String::new(),
712 ref_cells: String::new(),
713 header_row_count: 1,
714 insert_row: false,
715 totals_row_count: 0,
716 }
717 }
718}
719
720fn worksheet<T, F>(
721 strings: &[String],
722 formats: &[CellFormat],
723 mut xml: XlsReader<'_>,
724 read_data: &mut F,
725) -> Result<Range<T>, XlsxError>
726where
727 T: CellType,
728 F: FnMut(
729 &[String],
730 &[CellFormat],
731 &mut XlsReader<'_>,
732 &mut Vec<Cell<T>>,
733 ) -> Result<(), XlsxError>,
734{
735 let mut cells = Vec::with_capacity(1024);
736 let mut buf = Vec::with_capacity(1024);
737 'xml: loop {
738 buf.clear();
739 match xml.read_event_into(&mut buf) {
740 Ok(Event::Start(ref e)) => {
741 match e.local_name().as_ref() {
742 b"dimension" => {
743 for a in e.attributes() {
744 if let Attribute {
745 key: QName(b"ref"),
746 value: rdim,
747 } = a.map_err(XlsxError::XmlAttr)?
748 {
749 let len = get_dimension(&rdim)?.len();
750 if len < 1_000_000 {
751 cells.reserve(len as usize);
754 }
755 continue 'xml;
756 }
757 }
758 return Err(XlsxError::UnexpectedNode("dimension"));
759 }
760 b"sheetData" => {
761 read_data(strings, formats, &mut xml, &mut cells)?;
762 break;
763 }
764 _ => (),
765 }
766 }
767 Ok(Event::Eof) => break,
768 Err(e) => return Err(XlsxError::Xml(e)),
769 _ => (),
770 }
771 }
772 Ok(Range::from_sparse(cells))
773}
774
775impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
776 type Error = XlsxError;
777
778 fn new(reader: RS) -> Result<Self, XlsxError> {
779 let mut xlsx = Xlsx {
780 zip: ZipArchive::new(reader)?,
781 strings: Vec::new(),
782 formats: Vec::new(),
783 is_1904: false,
784 sheets: Vec::new(),
785 tables: None,
786 metadata: Metadata::default(),
787 #[cfg(feature = "picture")]
788 pictures: None,
789 };
790 xlsx.read_shared_strings()?;
791 xlsx.read_styles()?;
792 let relationships = xlsx.read_relationships()?;
793 xlsx.read_workbook(&relationships)?;
794 #[cfg(feature = "picture")]
795 xlsx.read_pictures()?;
796
797 Ok(xlsx)
798 }
799
800 fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, XlsxError>> {
801 self.zip.by_name("xl/vbaProject.bin").ok().map(|mut f| {
802 let len = f.size() as usize;
803 VbaProject::new(&mut f, len)
804 .map(Cow::Owned)
805 .map_err(XlsxError::Vba)
806 })
807 }
808
809 fn metadata(&self) -> &Metadata {
810 &self.metadata
811 }
812
813 fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, XlsxError>> {
814 let xml = match self.sheets.iter().find(|&(n, _)| n == name) {
815 Some((_, path)) => xml_reader(&mut self.zip, path),
816 None => return None,
817 };
818 let is_1904 = self.is_1904;
819 let strings = &self.strings;
820 let formats = &self.formats;
821 xml.map(|xml| {
822 worksheet(strings, formats, xml?, &mut |s, f, xml, cells| {
823 read_sheet_data(xml, s, f, cells, is_1904)
824 })
825 })
826 }
827
828 fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, XlsxError>> {
829 let xml = match self.sheets.iter().find(|&(n, _)| n == name) {
830 Some((_, path)) => xml_reader(&mut self.zip, path),
831 None => return None,
832 };
833
834 let strings = &self.strings;
835 let formats = &self.formats;
836 xml.map(|xml| {
837 worksheet(strings, formats, xml?, &mut |_, _, xml, cells| {
838 read_sheet(xml, cells, &mut |cells, xml, e, pos, _| {
839 match e.local_name().as_ref() {
840 b"is" | b"v" => {
841 xml.read_to_end_into(e.name(), &mut Vec::new())?;
842 }
843 b"f" => {
844 let mut f_buf = Vec::with_capacity(512);
845 let mut f = String::new();
846 loop {
847 match xml.read_event_into(&mut f_buf)? {
848 Event::Text(t) => f.push_str(&t.unescape()?),
849 Event::End(end) if end.name() == e.name() => break,
850 Event::Eof => return Err(XlsxError::XmlEof("f")),
851 _ => (),
852 }
853 f_buf.clear();
854 }
855 if !f.is_empty() {
856 cells.push(Cell::new(pos, f));
857 }
858 }
859 _ => return Err(XlsxError::UnexpectedNode("v, f, or is")),
860 }
861 Ok(())
862 })
863 })
864 })
865 }
866
867 fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
868 let is_1904 = self.is_1904;
869 self.sheets
870 .clone()
871 .into_iter()
872 .filter_map(|(name, path)| {
873 let xml = xml_reader(&mut self.zip, &path)?.ok()?;
874 let range = worksheet(
875 &self.strings,
876 &self.formats,
877 xml,
878 &mut |s, f, xml, cells| read_sheet_data(xml, s, f, cells, is_1904),
879 )
880 .ok()?;
881 Some((name, range))
882 })
883 .collect()
884 }
885
886 #[cfg(feature = "picture")]
887 fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
888 self.pictures.to_owned()
889 }
890}
891
892fn xml_reader<'a, RS: Read + Seek>(
893 zip: &'a mut ZipArchive<RS>,
894 path: &str,
895) -> Option<Result<XlsReader<'a>, XlsxError>> {
896 match zip.by_name(path) {
897 Ok(f) => {
898 let mut r = XmlReader::from_reader(BufReader::new(f));
899 r.check_end_names(false)
900 .trim_text(false)
901 .check_comments(false)
902 .expand_empty_elements(true);
903 Some(Ok(r))
904 }
905 Err(ZipError::FileNotFound) => None,
906 Err(e) => Some(Err(e.into())),
907 }
908}
909
910fn get_attribute<'a>(atts: Attributes<'a>, n: QName) -> Result<Option<&'a [u8]>, XlsxError> {
912 for a in atts {
913 match a {
914 Ok(Attribute {
915 key,
916 value: Cow::Borrowed(value),
917 }) if key == n => return Ok(Some(value)),
918 Err(e) => return Err(XlsxError::XmlAttr(e)),
919 _ => {} }
921 }
922 Ok(None)
923}
924
925fn read_sheet<T, F>(
926 xml: &mut XlsReader<'_>,
927 cells: &mut Vec<Cell<T>>,
928 push_cell: &mut F,
929) -> Result<(), XlsxError>
930where
931 T: CellType,
932 F: FnMut(
933 &mut Vec<Cell<T>>,
934 &mut XlsReader<'_>,
935 &BytesStart<'_>,
936 (u32, u32),
937 &BytesStart<'_>,
938 ) -> Result<(), XlsxError>,
939{
940 let mut buf = Vec::with_capacity(1024);
941 let mut cell_buf = Vec::with_capacity(1024);
942
943 let mut row_index = 0;
944 let mut col_index = 0;
945
946 loop {
947 buf.clear();
948 match xml.read_event_into(&mut buf) {
949 Ok(Event::Start(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
950 let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
951 if let Some(range) = attribute {
952 let row = get_row(range)?;
953 row_index = row;
954 }
955 }
956 Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
957 row_index += 1;
958 col_index = 0;
959 }
960 Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => {
961 let attribute = get_attribute(c_element.attributes(), QName(b"r"))?;
962
963 let pos = if let Some(range) = attribute {
964 let (row, col) = get_row_column(range)?;
965 col_index = col;
966 (row, col)
967 } else {
968 (row_index, col_index)
969 };
970
971 loop {
972 cell_buf.clear();
973 match xml.read_event_into(&mut cell_buf) {
974 Ok(Event::Start(ref e)) => push_cell(cells, xml, e, pos, c_element)?,
975 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break,
976 Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")),
977 Err(e) => return Err(XlsxError::Xml(e)),
978 _ => (),
979 }
980 }
981 col_index += 1;
982 }
983 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => return Ok(()),
984 Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
985 Err(e) => return Err(XlsxError::Xml(e)),
986 _ => (),
987 }
988 }
989}
990
991fn read_sheet_data(
993 xml: &mut XlsReader<'_>,
994 strings: &[String],
995 formats: &[CellFormat],
996 cells: &mut Vec<Cell<DataType>>,
997 is_1904: bool,
998) -> Result<(), XlsxError> {
999 fn read_value(
1001 v: String,
1002 strings: &[String],
1003 formats: &[CellFormat],
1004 c_element: &BytesStart<'_>,
1005 is_1904: bool,
1006 ) -> Result<DataType, XlsxError> {
1007 let cell_format = match get_attribute(c_element.attributes(), QName(b"s")) {
1008 Ok(Some(style)) => {
1009 let id: usize = std::str::from_utf8(style).unwrap_or("0").parse()?;
1010 formats.get(id)
1011 }
1012 _ => Some(&CellFormat::Other),
1013 };
1014
1015 match get_attribute(c_element.attributes(), QName(b"t"))? {
1016 Some(b"s") => {
1017 let idx: usize = v.parse()?;
1019 Ok(DataType::String(strings[idx].clone()))
1020 }
1021 Some(b"b") => {
1022 Ok(DataType::Bool(v != "0"))
1024 }
1025 Some(b"e") => {
1026 Ok(DataType::Error(v.parse()?))
1028 }
1029 Some(b"d") => {
1030 Ok(DataType::DateTimeIso(v))
1032 }
1033 Some(b"str") => {
1034 v.parse().map(DataType::Float).or(Ok(DataType::String(v)))
1045 }
1046 Some(b"n") => {
1047 if v.is_empty() {
1049 Ok(DataType::Empty)
1050 } else {
1051 v.parse()
1052 .map(|n| format_excel_f64(n, cell_format, is_1904))
1053 .map_err(XlsxError::ParseFloat)
1054 }
1055 }
1056 None => {
1057 v.parse()
1060 .map(|n| format_excel_f64(n, cell_format, is_1904))
1061 .or(Ok(DataType::String(v)))
1062 }
1063 Some(b"is") => {
1064 Err(XlsxError::Unexpected(
1067 "called read_value on a cell of type inlineStr",
1068 ))
1069 }
1070 Some(t) => {
1071 let t = std::str::from_utf8(t).unwrap_or("<utf8 error>").to_string();
1072 Err(XlsxError::CellTAttribute(t))
1073 }
1074 }
1075 }
1076
1077 read_sheet(xml, cells, &mut |cells, xml, e, pos, c_element| {
1078 match e.local_name().as_ref() {
1079 b"is" => {
1080 if let Some(s) = read_string(xml, e.name())? {
1082 cells.push(Cell::new(pos, DataType::String(s)));
1083 }
1084 }
1085 b"v" => {
1086 let mut v = String::new();
1088 let mut v_buf = Vec::new();
1089 loop {
1090 v_buf.clear();
1091 match xml.read_event_into(&mut v_buf)? {
1092 Event::Text(t) => v.push_str(&t.unescape()?),
1093 Event::End(end) if end.name() == e.name() => break,
1094 Event::Eof => return Err(XlsxError::XmlEof("v")),
1095 _ => (),
1096 }
1097 }
1098 match read_value(v, strings, formats, c_element, is_1904)? {
1099 DataType::Empty => (),
1100 v => cells.push(Cell::new(pos, v)),
1101 }
1102 }
1103 b"f" => {
1104 xml.read_to_end_into(e.name(), &mut Vec::new())?;
1105 }
1106 _n => return Err(XlsxError::UnexpectedNode("v, f, or is")),
1107 }
1108 Ok(())
1109 })
1110}
1111
1112#[derive(Debug, PartialEq)]
1113struct Dimensions {
1114 start: (u32, u32),
1115 end: (u32, u32),
1116}
1117
1118impl Dimensions {
1119 fn len(&self) -> u64 {
1120 (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
1121 }
1122}
1123
1124fn get_dimension(dimension: &[u8]) -> Result<Dimensions, XlsxError> {
1128 let parts: Vec<_> = dimension
1129 .split(|c| *c == b':')
1130 .map(get_row_column)
1131 .collect::<Result<Vec<_>, XlsxError>>()?;
1132
1133 match parts.len() {
1134 0 => Err(XlsxError::DimensionCount(0)),
1135 1 => Ok(Dimensions {
1136 start: parts[0],
1137 end: parts[0],
1138 }),
1139 2 => {
1140 let rows = parts[1].0 - parts[0].0;
1141 let columns = parts[1].1 - parts[0].1;
1142 if rows > MAX_ROWS {
1143 warn!(
1144 "xlsx has more than maximum number of rows ({} > {})",
1145 rows, MAX_ROWS
1146 );
1147 }
1148 if columns > MAX_COLUMNS {
1149 warn!(
1150 "xlsx has more than maximum number of columns ({} > {})",
1151 columns, MAX_COLUMNS
1152 );
1153 }
1154 Ok(Dimensions {
1155 start: parts[0],
1156 end: parts[1],
1157 })
1158 }
1159 len => Err(XlsxError::DimensionCount(len)),
1160 }
1161}
1162
1163fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
1166 let (row, col) = get_row_and_optional_column(range)?;
1167 let col = col.ok_or(XlsxError::RangeWithoutColumnComponent)?;
1168 Ok((row, col))
1169}
1170
1171fn get_row(range: &[u8]) -> Result<u32, XlsxError> {
1175 get_row_and_optional_column(range).map(|(row, _)| row)
1176}
1177
1178fn get_row_and_optional_column(range: &[u8]) -> Result<(u32, Option<u32>), XlsxError> {
1182 let (mut row, mut col) = (0, 0);
1183 let mut pow = 1;
1184 let mut readrow = true;
1185 for c in range.iter().rev() {
1186 match *c {
1187 c @ b'0'..=b'9' => {
1188 if readrow {
1189 row += ((c - b'0') as u32) * pow;
1190 pow *= 10;
1191 } else {
1192 return Err(XlsxError::NumericColumn(c));
1193 }
1194 }
1195 c @ b'A'..=b'Z' => {
1196 if readrow {
1197 pow = 1;
1198 readrow = false;
1199 }
1200 col += ((c - b'A') as u32 + 1) * pow;
1201 pow *= 26;
1202 }
1203 c @ b'a'..=b'z' => {
1204 if readrow {
1205 pow = 1;
1206 readrow = false;
1207 }
1208 col += ((c - b'a') as u32 + 1) * pow;
1209 pow *= 26;
1210 }
1211 _ => return Err(XlsxError::Alphanumeric(*c)),
1212 }
1213 }
1214 let row = row
1215 .checked_sub(1)
1216 .ok_or(XlsxError::RangeWithoutRowComponent)?;
1217 Ok((row, col.checked_sub(1)))
1218}
1219
1220fn read_string(
1222 xml: &mut XlsReader<'_>,
1223 QName(closing): QName,
1224) -> Result<Option<String>, XlsxError> {
1225 let mut buf = Vec::with_capacity(1024);
1226 let mut val_buf = Vec::with_capacity(1024);
1227 let mut rich_buffer: Option<String> = None;
1228 let mut is_phonetic_text = false;
1229 loop {
1230 buf.clear();
1231 match xml.read_event_into(&mut buf) {
1232 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"r" => {
1233 if rich_buffer.is_none() {
1234 rich_buffer = Some(String::new());
1236 }
1237 }
1238 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"rPh" => {
1239 is_phonetic_text = true;
1240 }
1241 Ok(Event::End(ref e)) if e.local_name().as_ref() == closing => {
1242 return Ok(rich_buffer);
1243 }
1244 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"rPh" => {
1245 is_phonetic_text = false;
1246 }
1247 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"t" && !is_phonetic_text => {
1248 val_buf.clear();
1249 let mut value = String::new();
1250 loop {
1251 match xml.read_event_into(&mut val_buf)? {
1252 Event::Text(t) => value.push_str(&t.unescape()?),
1253 Event::End(end) if end.name() == e.name() => break,
1254 Event::Eof => return Err(XlsxError::XmlEof("t")),
1255 _ => (),
1256 }
1257 }
1258 if let Some(ref mut s) = rich_buffer {
1259 s.push_str(&value);
1260 } else {
1261 xml.read_to_end_into(QName(closing), &mut val_buf)?;
1263 return Ok(Some(value));
1264 }
1265 }
1266 Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
1267 Err(e) => return Err(XlsxError::Xml(e)),
1268 _ => (),
1269 }
1270 }
1271}
1272
1273#[test]
1274fn test_dimensions() {
1275 assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
1276 assert_eq!(get_row_column(b"C107").unwrap(), (106, 2));
1277 assert_eq!(
1278 get_dimension(b"C2:D35").unwrap(),
1279 Dimensions {
1280 start: (1, 2),
1281 end: (34, 3)
1282 }
1283 );
1284 assert_eq!(
1285 get_dimension(b"A1:XFD1048576").unwrap(),
1286 Dimensions {
1287 start: (0, 0),
1288 end: (1_048_575, 16_383),
1289 }
1290 );
1291}
1292
1293#[test]
1294fn test_dimension_length() {
1295 assert_eq!(get_dimension(b"A1:Z99").unwrap().len(), 2_574);
1296 assert_eq!(
1297 get_dimension(b"A1:XFD1048576").unwrap().len(),
1298 17_179_869_184
1299 );
1300}
1301
1302#[test]
1303fn test_parse_error() {
1304 assert_eq!(
1305 CellErrorType::from_str("#DIV/0!").unwrap(),
1306 CellErrorType::Div0
1307 );
1308 assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
1309 assert_eq!(
1310 CellErrorType::from_str("#NAME?").unwrap(),
1311 CellErrorType::Name
1312 );
1313 assert_eq!(
1314 CellErrorType::from_str("#NULL!").unwrap(),
1315 CellErrorType::Null
1316 );
1317 assert_eq!(
1318 CellErrorType::from_str("#NUM!").unwrap(),
1319 CellErrorType::Num
1320 );
1321 assert_eq!(
1322 CellErrorType::from_str("#REF!").unwrap(),
1323 CellErrorType::Ref
1324 );
1325 assert_eq!(
1326 CellErrorType::from_str("#VALUE!").unwrap(),
1327 CellErrorType::Value
1328 );
1329}