1use std::borrow::Cow;
8use std::collections::{BTreeMap, HashMap};
9use std::io::{BufReader, Read, Seek};
10
11use quick_xml::events::attributes::Attributes;
12use quick_xml::events::Event;
13use quick_xml::name::QName;
14use quick_xml::Reader as XmlReader;
15use zip::read::{ZipArchive, ZipFile};
16use zip::result::ZipError;
17
18use crate::vba::VbaProject;
19use crate::{DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible};
20use std::marker::PhantomData;
21
22const MIMETYPE: &[u8] = b"application/vnd.oasis.opendocument.spreadsheet";
23
24type OdsReader<'a> = XmlReader<BufReader<ZipFile<'a>>>;
25
26#[derive(Debug)]
28pub enum OdsError {
29 Io(std::io::Error),
31 Zip(zip::result::ZipError),
33 Xml(quick_xml::Error),
35 XmlAttr(quick_xml::events::attributes::AttrError),
37 Parse(std::string::ParseError),
39 ParseInt(std::num::ParseIntError),
41 ParseFloat(std::num::ParseFloatError),
43 ParseBool(std::str::ParseBoolError),
45
46 InvalidMime(Vec<u8>),
48 FileNotFound(&'static str),
50 Eof(&'static str),
52 Mismatch {
54 expected: &'static str,
56 found: String,
58 },
59}
60
61from_err!(std::io::Error, OdsError, Io);
62from_err!(zip::result::ZipError, OdsError, Zip);
63from_err!(quick_xml::Error, OdsError, Xml);
64from_err!(std::string::ParseError, OdsError, Parse);
65from_err!(std::num::ParseFloatError, OdsError, ParseFloat);
66
67impl std::fmt::Display for OdsError {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 match self {
70 OdsError::Io(e) => write!(f, "I/O error: {}", e),
71 OdsError::Zip(e) => write!(f, "Zip error: {:?}", e),
72 OdsError::Xml(e) => write!(f, "Xml error: {}", e),
73 OdsError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e),
74 OdsError::Parse(e) => write!(f, "Parse string error: {}", e),
75 OdsError::ParseInt(e) => write!(f, "Parse integer error: {}", e),
76 OdsError::ParseFloat(e) => write!(f, "Parse float error: {}", e),
77 OdsError::ParseBool(e) => write!(f, "Parse bool error: {}", e),
78 OdsError::InvalidMime(mime) => write!(f, "Invalid MIME type: {:?}", mime),
79 OdsError::FileNotFound(file) => write!(f, "'{}' file not found in archive", file),
80 OdsError::Eof(node) => write!(f, "Expecting '{}' node, found end of xml file", node),
81 OdsError::Mismatch { expected, found } => {
82 write!(f, "Expecting '{}', found '{}'", expected, found)
83 }
84 }
85 }
86}
87
88impl std::error::Error for OdsError {
89 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
90 match self {
91 OdsError::Io(e) => Some(e),
92 OdsError::Zip(e) => Some(e),
93 OdsError::Xml(e) => Some(e),
94 OdsError::Parse(e) => Some(e),
95 OdsError::ParseInt(e) => Some(e),
96 OdsError::ParseFloat(e) => Some(e),
97 _ => None,
98 }
99 }
100}
101
102pub struct Ods<RS> {
108 sheets: BTreeMap<String, (Range<DataType>, Range<String>)>,
109 metadata: Metadata,
110 marker: PhantomData<RS>,
111 #[cfg(feature = "picture")]
112 pictures: Option<Vec<(String, Vec<u8>)>>,
113}
114
115impl<RS> Reader<RS> for Ods<RS>
116where
117 RS: Read + Seek,
118{
119 type Error = OdsError;
120
121 fn new(reader: RS) -> Result<Self, OdsError> {
122 let mut zip = ZipArchive::new(reader)?;
123
124 match zip.by_name("mimetype") {
126 Ok(mut f) => {
127 let mut buf = [0u8; 46];
128 f.read_exact(&mut buf)?;
129 if &buf[..] != MIMETYPE {
130 return Err(OdsError::InvalidMime(buf.to_vec()));
131 }
132 }
133 Err(ZipError::FileNotFound) => return Err(OdsError::FileNotFound("mimetype")),
134 Err(e) => return Err(OdsError::Zip(e)),
135 }
136
137 #[cfg(feature = "picture")]
138 let pictures = read_pictures(&mut zip)?;
139
140 let Content {
141 sheets,
142 sheets_metadata,
143 defined_names,
144 } = parse_content(zip)?;
145 let metadata = Metadata {
146 sheets: sheets_metadata,
147 names: defined_names,
148 };
149
150 Ok(Ods {
151 marker: PhantomData,
152 metadata,
153 sheets,
154 #[cfg(feature = "picture")]
155 pictures,
156 })
157 }
158
159 fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, OdsError>> {
161 None
162 }
163
164 fn metadata(&self) -> &Metadata {
166 &self.metadata
167 }
168
169 fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, OdsError>> {
171 self.sheets.get(name).map(|r| Ok(r.0.to_owned()))
172 }
173
174 fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
175 self.sheets
176 .iter()
177 .map(|(name, (range, _formula))| (name.to_owned(), range.clone()))
178 .collect()
179 }
180
181 fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, OdsError>> {
183 self.sheets.get(name).map(|r| Ok(r.1.to_owned()))
184 }
185
186 #[cfg(feature = "picture")]
187 fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
188 self.pictures.to_owned()
189 }
190}
191
192struct Content {
193 sheets: BTreeMap<String, (Range<DataType>, Range<String>)>,
194 sheets_metadata: Vec<Sheet>,
195 defined_names: Vec<(String, String)>,
196}
197
198fn parse_content<RS: Read + Seek>(mut zip: ZipArchive<RS>) -> Result<Content, OdsError> {
200 let mut reader = match zip.by_name("content.xml") {
201 Ok(f) => {
202 let mut r = XmlReader::from_reader(BufReader::new(f));
203 r.check_end_names(false)
204 .trim_text(false)
205 .check_comments(false)
206 .expand_empty_elements(true);
207 r
208 }
209 Err(ZipError::FileNotFound) => return Err(OdsError::FileNotFound("content.xml")),
210 Err(e) => return Err(OdsError::Zip(e)),
211 };
212 let mut buf = Vec::with_capacity(1024);
213 let mut sheets = BTreeMap::new();
214 let mut defined_names = Vec::new();
215 let mut sheets_metadata = Vec::new();
216 let mut styles = HashMap::new();
217 let mut style_name: Option<String> = None;
218 loop {
219 match reader.read_event_into(&mut buf) {
220 Ok(Event::Start(ref e)) if e.name() == QName(b"style:style") => {
221 style_name = e
222 .try_get_attribute(b"style:name")?
223 .map(|a| a.decode_and_unescape_value(&reader))
224 .transpose()
225 .map_err(OdsError::Xml)?
226 .map(|x| x.to_string())
227 }
228 Ok(Event::Start(ref e))
229 if style_name.clone().is_some() && e.name() == QName(b"style:table-properties") =>
230 {
231 let visible = match e.try_get_attribute(b"table:display")? {
232 Some(a) => match a
233 .decode_and_unescape_value(&reader)
234 .map_err(OdsError::Xml)?
235 .parse()
236 .map_err(OdsError::ParseBool)?
237 {
238 true => SheetVisible::Visible,
239 false => SheetVisible::Hidden,
240 },
241 None => SheetVisible::Visible,
242 };
243 styles.insert(style_name.clone(), visible);
244 }
245 Ok(Event::Start(ref e)) if e.name() == QName(b"table:table") => {
246 let visible = styles
247 .get(
248 &e.try_get_attribute(b"table:style-name")?
249 .map(|a| a.decode_and_unescape_value(&reader))
250 .transpose()
251 .map_err(OdsError::Xml)?
252 .map(|x| x.to_string()),
253 )
254 .map(|v| v.to_owned())
255 .unwrap_or(SheetVisible::Visible);
256 if let Some(ref a) = e
257 .attributes()
258 .filter_map(|a| a.ok())
259 .find(|a| a.key == QName(b"table:name"))
260 {
261 let name = a
262 .decode_and_unescape_value(&reader)
263 .map_err(OdsError::Xml)?
264 .to_string();
265 let (range, formulas) = read_table(&mut reader)?;
266 sheets_metadata.push(Sheet {
267 name: name.clone(),
268 typ: SheetType::WorkSheet,
269 visible,
270 });
271 sheets.insert(name, (range, formulas));
272 }
273 }
274 Ok(Event::Start(ref e)) if e.name() == QName(b"table:named-expressions") => {
275 defined_names = read_named_expressions(&mut reader)?;
276 }
277 Ok(Event::Eof) => break,
278 Err(e) => return Err(OdsError::Xml(e)),
279 _ => (),
280 }
281 buf.clear();
282 }
283 Ok(Content {
284 sheets,
285 sheets_metadata,
286 defined_names,
287 })
288}
289
290fn read_table(reader: &mut OdsReader<'_>) -> Result<(Range<DataType>, Range<String>), OdsError> {
291 let mut cells = Vec::new();
292 let mut rows_repeats = Vec::new();
293 let mut formulas = Vec::new();
294 let mut cols = Vec::new();
295 let mut buf = Vec::with_capacity(1024);
296 let mut row_buf = Vec::with_capacity(1024);
297 let mut cell_buf = Vec::with_capacity(1024);
298 cols.push(0);
299 loop {
300 match reader.read_event_into(&mut buf) {
301 Ok(Event::Start(ref e)) if e.name() == QName(b"table:table-row") => {
302 let row_repeats = match e.try_get_attribute(b"table:number-rows-repeated")? {
303 Some(c) => c
304 .decode_and_unescape_value(reader)
305 .map_err(OdsError::Xml)?
306 .parse()
307 .map_err(OdsError::ParseInt)?,
308 None => 1,
309 };
310 read_row(
311 reader,
312 &mut row_buf,
313 &mut cell_buf,
314 &mut cells,
315 &mut formulas,
316 )?;
317 cols.push(cells.len());
318 rows_repeats.push(row_repeats);
319 }
320 Ok(Event::End(ref e)) if e.name() == QName(b"table:table") => break,
321 Err(e) => return Err(OdsError::Xml(e)),
322 Ok(_) => (),
323 }
324 buf.clear();
325 }
326 Ok((
327 get_range(cells, &cols, &rows_repeats),
328 get_range(formulas, &cols, &rows_repeats),
329 ))
330}
331
332fn is_empty_row<T: Default + Clone + PartialEq>(row: &[T]) -> bool {
333 row.iter().all(|x| x == &T::default())
334}
335
336fn get_range<T: Default + Clone + PartialEq>(
337 mut cells: Vec<T>,
338 cols: &[usize],
339 rows_repeats: &[usize],
340) -> Range<T> {
341 let mut row_min = None;
343 let mut row_max = 0;
344 let mut col_min = usize::MAX;
345 let mut col_max = 0;
346 let mut first_empty_rows_repeated = 0;
347 {
348 for (i, w) in cols.windows(2).enumerate() {
349 let row = &cells[w[0]..w[1]];
350 if let Some(p) = row.iter().position(|c| c != &T::default()) {
351 if row_min.is_none() {
352 row_min = Some(i);
353 first_empty_rows_repeated =
354 rows_repeats.iter().take(i).sum::<usize>().saturating_sub(i);
355 }
356 row_max = i;
357 if p < col_min {
358 col_min = p;
359 }
360 if let Some(p) = row.iter().rposition(|c| c != &T::default()) {
361 if p > col_max {
362 col_max = p;
363 }
364 }
365 }
366 }
367 }
368 let row_min = match row_min {
369 Some(min) => min,
370 _ => return Range::default(),
371 };
372
373 let cells_len = (row_max + 1 - row_min) * (col_max + 1 - col_min);
375 {
376 let mut new_cells = Vec::with_capacity(cells_len);
377 let empty_cells = vec![T::default(); col_max + 1];
378 let mut empty_row_repeats = 0;
379 for (w, row_repeats) in cols
380 .windows(2)
381 .skip(row_min)
382 .take(row_max + 1)
383 .zip(rows_repeats.iter().skip(row_min).take(row_max + 1))
384 {
385 let row = &cells[w[0]..w[1]];
386 let row_repeats = *row_repeats;
387
388 if is_empty_row(row) {
389 empty_row_repeats = row_repeats;
390 continue;
391 }
392
393 if empty_row_repeats > 0 {
394 row_max = row_max + empty_row_repeats - 1;
395 for _ in 0..empty_row_repeats {
396 new_cells.extend_from_slice(&empty_cells);
397 }
398 empty_row_repeats = 0;
399 };
400
401 if row_repeats > 1 {
402 row_max = row_max + row_repeats - 1;
403 };
404
405 for _ in 0..row_repeats {
406 match row.len().cmp(&(col_max + 1)) {
407 std::cmp::Ordering::Less => {
408 new_cells.extend_from_slice(&row[col_min..]);
409 new_cells.extend_from_slice(&empty_cells[row.len()..]);
410 }
411 std::cmp::Ordering::Equal => {
412 new_cells.extend_from_slice(&row[col_min..]);
413 }
414 std::cmp::Ordering::Greater => {
415 new_cells.extend_from_slice(&row[col_min..=col_max]);
416 }
417 }
418 }
419 }
420 cells = new_cells;
421 }
422 let row_min = row_min + first_empty_rows_repeated;
423 let row_max = row_max + first_empty_rows_repeated;
424 Range {
425 start: (row_min as u32, col_min as u32),
426 end: (row_max as u32, col_max as u32),
427 inner: cells,
428 }
429}
430
431fn read_row(
432 reader: &mut OdsReader<'_>,
433 row_buf: &mut Vec<u8>,
434 cell_buf: &mut Vec<u8>,
435 cells: &mut Vec<DataType>,
436 formulas: &mut Vec<String>,
437) -> Result<(), OdsError> {
438 let mut empty_col_repeats = 0;
439 loop {
440 row_buf.clear();
441 match reader.read_event_into(row_buf) {
442 Ok(Event::Start(ref e))
443 if e.name() == QName(b"table:table-cell")
444 || e.name() == QName(b"table:covered-table-cell") =>
445 {
446 let mut repeats = 1;
447 for a in e.attributes() {
448 let a = a.map_err(OdsError::XmlAttr)?;
449 if a.key == QName(b"table:number-columns-repeated") {
450 repeats = reader
451 .decoder()
452 .decode(&a.value)?
453 .parse()
454 .map_err(OdsError::ParseInt)?;
455 break;
456 }
457 }
458
459 let (value, formula, is_closed) = get_datatype(reader, e.attributes(), cell_buf)?;
460
461 for _ in 0..empty_col_repeats {
462 cells.push(DataType::Empty);
463 formulas.push("".to_string());
464 }
465 empty_col_repeats = 0;
466
467 if value.is_empty() && formula.is_empty() {
468 empty_col_repeats = repeats;
469 } else {
470 for _ in 0..repeats {
471 cells.push(value.clone());
472 formulas.push(formula.clone());
473 }
474 }
475 if !is_closed {
476 reader.read_to_end_into(e.name(), cell_buf)?;
477 }
478 }
479 Ok(Event::End(ref e)) if e.name() == QName(b"table:table-row") => break,
480 Err(e) => return Err(OdsError::Xml(e)),
481 Ok(e) => {
482 return Err(OdsError::Mismatch {
483 expected: "table-cell",
484 found: format!("{:?}", e),
485 });
486 }
487 }
488 }
489 Ok(())
490}
491
492fn get_datatype(
496 reader: &mut OdsReader<'_>,
497 atts: Attributes<'_>,
498 buf: &mut Vec<u8>,
499) -> Result<(DataType, String, bool), OdsError> {
500 let mut is_string = false;
501 let mut is_value_set = false;
502 let mut val = DataType::Empty;
503 let mut formula = String::new();
504 for a in atts {
505 let a = a.map_err(OdsError::XmlAttr)?;
506 match a.key {
507 QName(b"office:value") if !is_value_set => {
508 let v = reader.decoder().decode(&a.value)?;
509 val = DataType::Float(v.parse().map_err(OdsError::ParseFloat)?);
510 is_value_set = true;
511 }
512 QName(b"office:string-value" | b"office:date-value" | b"office:time-value")
513 if !is_value_set =>
514 {
515 let attr = a
516 .decode_and_unescape_value(reader)
517 .map_err(OdsError::Xml)?
518 .to_string();
519 val = match a.key {
520 QName(b"office:date-value") => DataType::DateTimeIso(attr),
521 QName(b"office:time-value") => DataType::DurationIso(attr),
522 _ => DataType::String(attr),
523 };
524 is_value_set = true;
525 }
526 QName(b"office:boolean-value") if !is_value_set => {
527 let b = &*a.value == b"TRUE" || &*a.value == b"true";
528 val = DataType::Bool(b);
529 is_value_set = true;
530 }
531 QName(b"office:value-type") if !is_value_set => is_string = &*a.value == b"string",
532 QName(b"table:formula") => {
533 formula = a
534 .decode_and_unescape_value(reader)
535 .map_err(OdsError::Xml)?
536 .to_string();
537 }
538 _ => (),
539 }
540 }
541 if !is_value_set && is_string {
542 let mut s = String::new();
545 let mut first_paragraph = true;
546 loop {
547 buf.clear();
548 match reader.read_event_into(buf) {
549 Ok(Event::Text(ref e)) => {
550 s.push_str(&e.unescape()?);
551 }
552 Ok(Event::End(ref e))
553 if e.name() == QName(b"table:table-cell")
554 || e.name() == QName(b"table:covered-table-cell") =>
555 {
556 return Ok((DataType::String(s), formula, true));
557 }
558 Ok(Event::Start(ref e)) if e.name() == QName(b"text:p") => {
559 if first_paragraph {
560 first_paragraph = false;
561 } else {
562 s.push('\n');
563 }
564 }
565 Ok(Event::Start(ref e)) if e.name() == QName(b"text:s") => {
566 let count = match e.try_get_attribute("text:c")? {
567 Some(c) => c
568 .decode_and_unescape_value(reader)
569 .map_err(OdsError::Xml)?
570 .parse()
571 .map_err(OdsError::ParseInt)?,
572 None => 1,
573 };
574 for _ in 0..count {
575 s.push(' ');
576 }
577 }
578 Err(e) => return Err(OdsError::Xml(e)),
579 Ok(Event::Eof) => return Err(OdsError::Eof("table:table-cell")),
580 _ => (),
581 }
582 }
583 } else {
584 Ok((val, formula, false))
585 }
586}
587
588fn read_named_expressions(reader: &mut OdsReader<'_>) -> Result<Vec<(String, String)>, OdsError> {
589 let mut defined_names = Vec::new();
590 let mut buf = Vec::with_capacity(512);
591 loop {
592 buf.clear();
593 match reader.read_event_into(&mut buf) {
594 Ok(Event::Start(ref e))
595 if e.name() == QName(b"table:named-range")
596 || e.name() == QName(b"table:named-expression") =>
597 {
598 let mut name = String::new();
599 let mut formula = String::new();
600 for a in e.attributes() {
601 let a = a.map_err(OdsError::XmlAttr)?;
602 match a.key {
603 QName(b"table:name") => {
604 name = a
605 .decode_and_unescape_value(reader)
606 .map_err(OdsError::Xml)?
607 .to_string();
608 }
609 QName(b"table:cell-range-address" | b"table:expression") => {
610 formula = a
611 .decode_and_unescape_value(reader)
612 .map_err(OdsError::Xml)?
613 .to_string();
614 }
615 _ => (),
616 }
617 }
618 defined_names.push((name, formula));
619 }
620 Ok(Event::End(ref e))
621 if e.name() == QName(b"table:named-range")
622 || e.name() == QName(b"table:named-expression") => {}
623 Ok(Event::End(ref e)) if e.name() == QName(b"table:named-expressions") => break,
624 Err(e) => return Err(OdsError::Xml(e)),
625 Ok(e) => {
626 return Err(OdsError::Mismatch {
627 expected: "table:named-expressions",
628 found: format!("{:?}", e),
629 });
630 }
631 }
632 }
633 Ok(defined_names)
634}
635
636#[cfg(feature = "picture")]
638fn read_pictures<RS: Read + Seek>(
639 zip: &mut ZipArchive<RS>,
640) -> Result<Option<Vec<(String, Vec<u8>)>>, OdsError> {
641 let mut pics = Vec::new();
642 for i in 0..zip.len() {
643 let mut zfile = zip.by_index(i)?;
644 let zname = zfile.name().to_owned();
645 if zname.starts_with("Pictures") {
647 let name_ext: Vec<&str> = zname.split(".").collect();
648 if let Some(ext) = name_ext.last() {
649 if [
650 "emf", "wmf", "pict", "jpeg", "jpg", "png", "dib", "gif", "tiff", "eps", "bmp",
651 "wpg",
652 ]
653 .contains(ext)
654 {
655 let mut buf: Vec<u8> = Vec::new();
656 zfile.read_to_end(&mut buf)?;
657 pics.push((ext.to_string(), buf));
658 }
659 }
660 }
661 }
662 if pics.is_empty() {
663 Ok(None)
664 } else {
665 Ok(Some(pics))
666 }
667}