1use std::borrow::Cow;
2use std::cmp::min;
3use std::collections::BTreeMap;
4use std::convert::TryInto;
5use std::fmt::Write;
6use std::io::{Read, Seek, SeekFrom};
7use std::marker::PhantomData;
8
9use log::debug;
10
11use crate::cfb::{Cfb, XlsEncoding};
12use crate::formats::{
13 builtin_format_by_code, detect_custom_number_format, format_excel_f64, format_excel_i64,
14 CellFormat,
15};
16#[cfg(feature = "picture")]
17use crate::utils::read_usize;
18use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32};
19use crate::vba::VbaProject;
20use crate::{
21 Cell, CellErrorType, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible,
22};
23
24#[derive(Debug)]
25pub enum XlsError {
27 Io(std::io::Error),
29 Cfb(crate::cfb::CfbError),
31 Vba(crate::vba::VbaError),
33
34 StackLen,
36 Unrecognized {
38 typ: &'static str,
40 val: u8,
42 },
43 Password,
45 Len {
47 expected: usize,
49 found: usize,
51 typ: &'static str,
53 },
54 ContinueRecordTooShort,
56 EoStream(&'static str),
58
59 InvalidFormula {
61 stack_size: usize,
63 },
64 IfTab(usize),
66 Etpg(u8),
68 NoVba,
70 #[cfg(feature = "picture")]
72 Art(&'static str),
73}
74
75from_err!(std::io::Error, XlsError, Io);
76from_err!(crate::cfb::CfbError, XlsError, Cfb);
77from_err!(crate::vba::VbaError, XlsError, Vba);
78
79impl std::fmt::Display for XlsError {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 match self {
82 XlsError::Io(e) => write!(f, "I/O error: {}", e),
83 XlsError::Cfb(e) => write!(f, "Cfb error: {}", e),
84 XlsError::Vba(e) => write!(f, "Vba error: {}", e),
85 XlsError::StackLen => write!(f, "Invalid stack length"),
86 XlsError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: 0x{:0X}", typ, val),
87 XlsError::Password => write!(f, "Workbook is password protected"),
88 XlsError::Len {
89 expected,
90 found,
91 typ,
92 } => write!(
93 f,
94 "Invalid {} length, expected {} maximum, found {}",
95 typ, expected, found
96 ),
97 XlsError::ContinueRecordTooShort => write!(
98 f,
99 "Continued record too short while reading extended string"
100 ),
101 XlsError::EoStream(s) => write!(f, "End of stream '{}'", s),
102 XlsError::InvalidFormula { stack_size } => {
103 write!(f, "Invalid formula (stack size: {})", stack_size)
104 }
105 XlsError::IfTab(iftab) => write!(f, "Invalid iftab {:X}", iftab),
106 XlsError::Etpg(etpg) => write!(f, "Invalid etpg {:X}", etpg),
107 XlsError::NoVba => write!(f, "No VBA project"),
108 #[cfg(feature = "picture")]
109 XlsError::Art(s) => write!(f, "Invalid art record '{}'", s),
110 }
111 }
112}
113
114impl std::error::Error for XlsError {
115 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
116 match self {
117 XlsError::Io(e) => Some(e),
118 XlsError::Cfb(e) => Some(e),
119 XlsError::Vba(e) => Some(e),
120 _ => None,
121 }
122 }
123}
124
125#[derive(Debug, Clone, Default)]
127#[non_exhaustive]
128pub struct XlsOptions {
129 pub force_codepage: Option<u16>,
138}
139
140pub struct Xls<RS> {
142 sheets: BTreeMap<String, (Range<DataType>, Range<String>)>,
143 vba: Option<VbaProject>,
144 metadata: Metadata,
145 marker: PhantomData<RS>,
146 options: XlsOptions,
147 formats: Vec<CellFormat>,
148 is_1904: bool,
149 #[cfg(feature = "picture")]
150 pictures: Option<Vec<(String, Vec<u8>)>>,
151}
152
153impl<RS: Read + Seek> Xls<RS> {
154 pub fn new_with_options(mut reader: RS, options: XlsOptions) -> Result<Self, XlsError> {
170 let mut cfb = {
171 let offset_end = reader.seek(SeekFrom::End(0))? as usize;
172 reader.seek(SeekFrom::Start(0))?;
173 Cfb::new(&mut reader, offset_end)?
174 };
175
176 debug!("cfb loaded");
177
178 let vba = if cfb.has_directory("_VBA_PROJECT_CUR") {
180 Some(VbaProject::from_cfb(&mut reader, &mut cfb)?)
181 } else {
182 None
183 };
184
185 debug!("vba ok");
186
187 let mut xls = Xls {
188 sheets: BTreeMap::new(),
189 vba,
190 marker: PhantomData,
191 metadata: Metadata::default(),
192 options,
193 is_1904: false,
194 formats: Vec::new(),
195 #[cfg(feature = "picture")]
196 pictures: None,
197 };
198
199 xls.parse_workbook(reader, cfb)?;
200
201 debug!("xls parsed");
202
203 Ok(xls)
204 }
205}
206
207impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
208 type Error = XlsError;
209
210 fn new(reader: RS) -> Result<Self, XlsError> {
211 Self::new_with_options(reader, XlsOptions::default())
212 }
213
214 fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, XlsError>> {
215 self.vba.as_ref().map(|vba| Ok(Cow::Borrowed(vba)))
216 }
217
218 fn metadata(&self) -> &Metadata {
220 &self.metadata
221 }
222
223 fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, XlsError>> {
224 self.sheets.get(name).map(|r| Ok(r.0.clone()))
225 }
226
227 fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
228 self.sheets
229 .iter()
230 .map(|(name, (data, _))| (name.to_owned(), data.clone()))
231 .collect()
232 }
233
234 fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, XlsError>> {
235 self.sheets.get(name).map(|r| Ok(r.1.clone()))
236 }
237
238 #[cfg(feature = "picture")]
239 fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
240 self.pictures.to_owned()
241 }
242}
243
244#[derive(Debug, Clone, Copy)]
245struct Xti {
246 _isup_book: u16,
247 itab_first: i16,
248 _itab_last: i16,
249}
250
251impl<RS: Read + Seek> Xls<RS> {
252 fn parse_workbook(&mut self, mut reader: RS, mut cfb: Cfb) -> Result<(), XlsError> {
253 let stream = cfb
255 .get_stream("Workbook", &mut reader)
256 .or_else(|_| cfb.get_stream("Book", &mut reader))?;
257
258 let mut sheet_names = Vec::new();
259 let mut strings = Vec::new();
260 let mut defined_names = Vec::new();
261 let mut xtis = Vec::new();
262 let mut formats = BTreeMap::new();
263 let mut xfs = Vec::new();
264 let codepage = self.options.force_codepage.unwrap_or(1200);
265 let mut encoding = XlsEncoding::from_codepage(codepage)?;
266 #[cfg(feature = "picture")]
267 let mut draw_group: Vec<u8> = Vec::new();
268 {
269 let wb = &stream;
270 let records = RecordIter { stream: wb };
271 for record in records {
272 let mut r = record?;
273 match r.typ {
274 0x0012 if read_u16(r.data) != 0 => return Err(XlsError::Password),
275 0x0042 => {
277 if self.options.force_codepage.is_none() {
278 encoding = XlsEncoding::from_codepage(read_u16(r.data))?
279 }
280 }
281 0x013D => {
282 let sheet_len = r.data.len() / 2;
283 sheet_names.reserve(sheet_len);
284 self.metadata.sheets.reserve(sheet_len);
285 }
286 0x0022 => {
288 if read_u16(r.data) == 1 {
289 self.is_1904 = true
290 }
291 }
292 0x041E => {
294 let (idx, format) = parse_format(&mut r, &encoding)?;
295 formats.insert(idx, format);
296 }
297 0x00E0 => {
299 xfs.push(parse_xf(&r)?);
300 }
301 0x0085 => {
303 let (pos, sheet) = parse_sheet_metadata(&mut r, &encoding)?;
304 self.metadata.sheets.push(sheet.clone());
305 sheet_names.push((pos, sheet.name)); }
307 0x0018 => {
308 let cch = r.data[3] as usize;
310 let cce = read_u16(&r.data[4..]) as usize;
311 let mut name = String::new();
312 read_unicode_string_no_cch(&encoding, &r.data[14..], &cch, &mut name);
313 let rgce = &r.data[r.data.len() - cce..];
314 let formula = parse_defined_names(rgce)?;
315 defined_names.push((name, formula));
316 }
317 0x0017 => {
318 let cxti = read_u16(r.data) as usize;
320 xtis.extend(r.data[2..].chunks(6).take(cxti).map(|xti| Xti {
321 _isup_book: read_u16(&xti[..2]),
322 itab_first: read_i16(&xti[2..4]),
323 _itab_last: read_i16(&xti[4..]),
324 }));
325 }
326 0x00FC => strings = parse_sst(&mut r, &encoding)?, #[cfg(feature = "picture")]
328 0x00EB => {
329 draw_group.extend(r.data);
331 if let Some(cont) = r.cont {
332 draw_group.extend(cont.iter().flat_map(|v| *v));
333 }
334 }
335 0x000A => break, _ => (),
337 }
338 }
339 }
340
341 self.formats = xfs
342 .into_iter()
343 .map(|fmt| match formats.get(&fmt) {
344 Some(s) => *s,
345 _ => builtin_format_by_code(fmt),
346 })
347 .collect();
348
349 debug!("formats: {:?}", self.formats);
350
351 let defined_names = defined_names
352 .into_iter()
353 .map(|(name, (i, mut f))| {
354 if let Some(i) = i {
355 let sh = xtis
356 .get(i)
357 .and_then(|xti| sheet_names.get(xti.itab_first as usize))
358 .map_or("#REF", |sh| &sh.1);
359 f = format!("{sh}!{f}");
360 }
361 (name, f)
362 })
363 .collect::<Vec<_>>();
364
365 debug!("defined_names: {:?}", defined_names);
366
367 let mut sheets = BTreeMap::new();
368 let fmla_sheet_names = sheet_names
369 .iter()
370 .map(|(_, n)| n.clone())
371 .collect::<Vec<_>>();
372 for (pos, name) in sheet_names {
373 let sh = &stream[pos..];
374 let records = RecordIter { stream: sh };
375 let mut cells = Vec::new();
376 let mut formulas = Vec::new();
377 let mut fmla_pos = (0, 0);
378 for record in records {
379 let r = record?;
380 match r.typ {
381 0x0200 => {
383 let Dimensions { start, end } = parse_dimensions(r.data)?;
384 let rows = (end.0 - start.0 + 1) as usize;
385 let cols = (end.1 - start.1 + 1) as usize;
386 cells.reserve(rows.saturating_mul(cols));
387 }
388 0x0203 => cells.push(parse_number(r.data, &self.formats, self.is_1904)?), 0x0204 => cells.extend(parse_label(r.data, &encoding)?), 0x0205 => cells.push(parse_bool_err(r.data)?), 0x0207 => {
393 let val = DataType::String(parse_string(r.data, &encoding)?);
395 cells.push(Cell::new(fmla_pos, val))
396 }
397 0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), 0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), 0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, 0x000A => break, 0x0006 => {
402 if r.data.len() < 20 {
404 return Err(XlsError::Len {
405 expected: 20,
406 found: r.data.len(),
407 typ: "Formula",
408 });
409 }
410 let row = read_u16(r.data);
411 let col = read_u16(&r.data[2..]);
412 fmla_pos = (row as u32, col as u32);
413 if let Some(val) = parse_formula_value(&r.data[6..14])? {
414 cells.push(Cell::new(fmla_pos, val));
417 }
418 let fmla = parse_formula(
419 &r.data[20..],
420 &fmla_sheet_names,
421 &defined_names,
422 &xtis,
423 &encoding,
424 )
425 .unwrap_or_else(|e| {
426 debug!("{}", e);
427 format!(
428 "Unrecognised formula \
429 for cell ({}, {}): {:?}",
430 row, col, e
431 )
432 });
433 formulas.push(Cell::new(fmla_pos, fmla));
434 }
435 _ => (),
436 }
437 }
438 let range = Range::from_sparse(cells);
439 let formula = Range::from_sparse(formulas);
440 sheets.insert(name, (range, formula));
441 }
442
443 self.sheets = sheets;
444 self.metadata.names = defined_names;
445
446 #[cfg(feature = "picture")]
447 if !draw_group.is_empty() {
448 let pics = parse_pictures(&draw_group)?;
449 if !pics.is_empty() {
450 self.pictures = Some(pics);
451 }
452 }
453
454 Ok(())
455 }
456}
457
458fn parse_sheet_metadata(
460 r: &mut Record<'_>,
461 encoding: &XlsEncoding,
462) -> Result<(usize, Sheet), XlsError> {
463 let pos = read_u32(r.data) as usize;
464 let visible = match r.data[4] & 0b0011_1111 {
465 0x00 => SheetVisible::Visible,
466 0x01 => SheetVisible::Hidden,
467 0x02 => SheetVisible::VeryHidden,
468 e => {
469 return Err(XlsError::Unrecognized {
470 typ: "BoundSheet8:hsState",
471 val: e,
472 });
473 }
474 };
475 let typ = match r.data[5] {
476 0x00 => SheetType::WorkSheet,
477 0x01 => SheetType::MacroSheet,
478 0x02 => SheetType::ChartSheet,
479 0x06 => SheetType::Vba,
480 e => {
481 return Err(XlsError::Unrecognized {
482 typ: "BoundSheet8:dt",
483 val: e,
484 });
485 }
486 };
487 r.data = &r.data[6..];
488 let name = parse_short_string(r, encoding)?;
489 let sheet_name = name
490 .as_bytes()
491 .iter()
492 .cloned()
493 .filter(|b| *b != 0)
494 .collect::<Vec<_>>();
495 let name = String::from_utf8(sheet_name).unwrap();
496 Ok((pos, Sheet { name, visible, typ }))
497}
498
499fn parse_number(
500 r: &[u8],
501 formats: &[CellFormat],
502 is_1904: bool,
503) -> Result<Cell<DataType>, XlsError> {
504 if r.len() < 14 {
505 return Err(XlsError::Len {
506 typ: "number",
507 expected: 14,
508 found: r.len(),
509 });
510 }
511 let row = read_u16(r) as u32;
512 let col = read_u16(&r[2..]) as u32;
513 let v = read_f64(&r[6..]);
514 let format = formats.get(read_u16(&r[4..]) as usize);
515
516 Ok(Cell::new((row, col), format_excel_f64(v, format, is_1904)))
517}
518
519fn parse_bool_err(r: &[u8]) -> Result<Cell<DataType>, XlsError> {
520 if r.len() < 8 {
521 return Err(XlsError::Len {
522 typ: "BoolErr",
523 expected: 8,
524 found: r.len(),
525 });
526 }
527 let row = read_u16(r);
528 let col = read_u16(&r[2..]);
529 let pos = (row as u32, col as u32);
530 match r[7] {
531 0x00 => Ok(Cell::new(pos, DataType::Bool(r[6] != 0))),
532 0x01 => Ok(Cell::new(pos, parse_err(r[6])?)),
533 e => Err(XlsError::Unrecognized {
534 typ: "fError",
535 val: e,
536 }),
537 }
538}
539
540fn parse_err(e: u8) -> Result<DataType, XlsError> {
541 match e {
542 0x00 => Ok(DataType::Error(CellErrorType::Null)),
543 0x07 => Ok(DataType::Error(CellErrorType::Div0)),
544 0x0F => Ok(DataType::Error(CellErrorType::Value)),
545 0x17 => Ok(DataType::Error(CellErrorType::Ref)),
546 0x1D => Ok(DataType::Error(CellErrorType::Name)),
547 0x24 => Ok(DataType::Error(CellErrorType::Num)),
548 0x2A => Ok(DataType::Error(CellErrorType::NA)),
549 0x2B => Ok(DataType::Error(CellErrorType::GettingData)),
550 e => Err(XlsError::Unrecognized {
551 typ: "error",
552 val: e,
553 }),
554 }
555}
556
557fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<DataType>, XlsError> {
558 if r.len() < 10 {
559 return Err(XlsError::Len {
560 typ: "rk",
561 expected: 10,
562 found: r.len(),
563 });
564 }
565 let row = read_u16(r);
566 let col = read_u16(&r[2..]);
567
568 Ok(Cell::new(
569 (row as u32, col as u32),
570 rk_num(&r[4..10], formats, is_1904),
571 ))
572}
573
574fn parse_mul_rk(
575 r: &[u8],
576 cells: &mut Vec<Cell<DataType>>,
577 formats: &[CellFormat],
578 is_1904: bool,
579) -> Result<(), XlsError> {
580 if r.len() < 6 {
581 return Err(XlsError::Len {
582 typ: "rk",
583 expected: 6,
584 found: r.len(),
585 });
586 }
587
588 let row = read_u16(r);
589 let col_first = read_u16(&r[2..]);
590 let col_last = read_u16(&r[r.len() - 2..]);
591
592 if r.len() != 6 + 6 * (col_last - col_first + 1) as usize {
593 return Err(XlsError::Len {
594 typ: "rk",
595 expected: 6 + 6 * (col_last - col_first + 1) as usize,
596 found: r.len(),
597 });
598 }
599
600 let mut col = col_first as u32;
601
602 for rk in r[4..r.len() - 2].chunks(6) {
603 cells.push(Cell::new((row as u32, col), rk_num(rk, formats, is_1904)));
604 col += 1;
605 }
606 Ok(())
607}
608
609fn rk_num(rk: &[u8], formats: &[CellFormat], is_1904: bool) -> DataType {
610 let d100 = (rk[2] & 1) != 0;
611 let is_int = (rk[2] & 2) != 0;
612 let format = formats.get(read_u16(rk) as usize);
613
614 let mut v = [0u8; 8];
615 v[4..].copy_from_slice(&rk[2..]);
616 v[4] &= 0xFC;
617 if is_int {
618 let v = (read_i32(&v[4..8]) >> 2) as i64;
619 if d100 && v % 100 != 0 {
620 format_excel_f64(v as f64 / 100.0, format, is_1904)
621 } else {
622 format_excel_i64(if d100 { v / 100 } else { v }, format, is_1904)
623 }
624 } else {
625 let v = read_f64(&v);
626 format_excel_f64(if d100 { v / 100.0 } else { v }, format, is_1904)
627 }
628}
629
630fn parse_short_string(r: &mut Record<'_>, encoding: &XlsEncoding) -> Result<String, XlsError> {
632 if r.data.len() < 2 {
633 return Err(XlsError::Len {
634 typ: "short string",
635 expected: 2,
636 found: r.data.len(),
637 });
638 }
639 let cch = r.data[0] as usize;
640 let high_byte = r.data[1] & 0x1 != 0;
641 r.data = &r.data[2..];
642 let mut s = String::with_capacity(cch);
643 let _ = encoding.decode_to(r.data, cch, &mut s, Some(high_byte));
644 Ok(s)
645}
646
647fn parse_string(r: &[u8], encoding: &XlsEncoding) -> Result<String, XlsError> {
649 if r.len() < 4 {
650 return Err(XlsError::Len {
651 typ: "string",
652 expected: 4,
653 found: r.len(),
654 });
655 }
656 let cch = read_u16(r) as usize;
657 let high_byte = r[2] & 0x1 != 0;
658 let mut s = String::with_capacity(cch);
659 let _ = encoding.decode_to(&r[3..], cch, &mut s, Some(high_byte));
660 Ok(s)
661}
662
663fn parse_label(r: &[u8], encoding: &XlsEncoding) -> Result<Option<Cell<DataType>>, XlsError> {
664 if r.len() < 6 {
665 return Err(XlsError::Len {
666 typ: "label",
667 expected: 6,
668 found: r.len(),
669 });
670 }
671 let row = read_u16(r);
672 let col = read_u16(&r[2..]);
673 let _ixfe = read_u16(&r[4..]);
674 return Ok(Some(Cell::new(
675 (row as u32, col as u32),
676 DataType::String(parse_string(&r[6..], encoding)?),
677 )));
678}
679
680fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<DataType>>, XlsError> {
681 if r.len() < 10 {
682 return Err(XlsError::Len {
683 typ: "label sst",
684 expected: 10,
685 found: r.len(),
686 });
687 }
688 let row = read_u16(r);
689 let col = read_u16(&r[2..]);
690 let i = read_u32(&r[6..]) as usize;
691 if let Some(s) = strings.get(i) {
692 if !s.is_empty() {
693 return Ok(Some(Cell::new(
694 (row as u32, col as u32),
695 DataType::String(s.clone()),
696 )));
697 }
698 }
699 Ok(None)
700}
701
702struct Dimensions {
703 start: (u32, u32),
704 end: (u32, u32),
705}
706
707fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
708 let (rf, rl, cf, cl) = match r.len() {
709 10 => (
710 read_u16(&r[0..2]) as u32,
711 read_u16(&r[2..4]) as u32,
712 read_u16(&r[4..6]) as u32,
713 read_u16(&r[6..8]) as u32,
714 ),
715 14 => (
716 read_u32(&r[0..4]),
717 read_u32(&r[4..8]),
718 read_u16(&r[8..10]) as u32,
719 read_u16(&r[10..12]) as u32,
720 ),
721 _ => {
722 return Err(XlsError::Len {
723 typ: "dimensions",
724 expected: 14,
725 found: r.len(),
726 });
727 }
728 };
729 if 1 <= rl && 1 <= cl {
730 Ok(Dimensions {
731 start: (rf, cf),
732 end: (rl - 1, cl - 1),
733 })
734 } else {
735 Ok(Dimensions {
736 start: (rf, cf),
737 end: (rf, cf),
738 })
739 }
740}
741
742fn parse_sst(r: &mut Record<'_>, encoding: &XlsEncoding) -> Result<Vec<String>, XlsError> {
743 if r.data.len() < 8 {
744 return Err(XlsError::Len {
745 typ: "sst",
746 expected: 8,
747 found: r.data.len(),
748 });
749 }
750 let len: usize = read_i32(&r.data[4..8]).try_into().unwrap();
751 let mut sst = Vec::with_capacity(len);
752 r.data = &r.data[8..];
753
754 for _ in 0..len {
755 sst.push(read_rich_extended_string(r, encoding)?);
756 }
757 Ok(sst)
758}
759
760fn parse_xf(r: &Record<'_>) -> Result<u16, XlsError> {
764 if r.data.len() < 4 {
765 return Err(XlsError::Len {
766 typ: "xf",
767 expected: 4,
768 found: r.data.len(),
769 });
770 }
771
772 Ok(read_u16(&r.data[2..]))
773}
774
775fn parse_format(r: &mut Record<'_>, encoding: &XlsEncoding) -> Result<(u16, CellFormat), XlsError> {
779 if r.data.len() < 4 {
780 return Err(XlsError::Len {
781 typ: "format",
782 expected: 4,
783 found: r.data.len(),
784 });
785 }
786
787 let idx = read_u16(r.data);
788
789 let cch = read_u16(&r.data[2..]) as usize;
790 let high_byte = r.data[4] & 0x1 != 0;
791 r.data = &r.data[5..];
792 let mut s = String::with_capacity(cch);
793 encoding.decode_to(r.data, cch, &mut s, Some(high_byte));
794
795 Ok((idx, detect_custom_number_format(&s)))
796}
797
798fn read_rich_extended_string(
802 r: &mut Record<'_>,
803 encoding: &XlsEncoding,
804) -> Result<String, XlsError> {
805 if r.data.is_empty() && !r.continue_record() || r.data.len() < 3 {
806 return Err(XlsError::Len {
807 typ: "rich extended string",
808 expected: 3,
809 found: r.data.len(),
810 });
811 }
812
813 let cch = read_u16(r.data) as usize;
814 let flags = r.data[2];
815
816 r.data = &r.data[3..];
817
818 let high_byte = flags & 0x1 != 0;
819
820 let mut c_run = 0;
822
823 let mut cb_ext_rst = 0;
825
826 if flags & 0x8 != 0 {
828 c_run = read_u16(r.data) as usize;
829 r.data = &r.data[2..];
830 }
831
832 if flags & 0x4 != 0 {
834 cb_ext_rst = read_i32(r.data) as usize;
835 r.data = &r.data[4..];
836 }
837
838 let s = read_dbcs(encoding, cch, r, high_byte)?;
840
841 r.skip(c_run * 4)?;
843
844 r.skip(cb_ext_rst)?;
846
847 Ok(s)
848}
849
850fn read_dbcs(
851 encoding: &XlsEncoding,
852 mut len: usize,
853 r: &mut Record<'_>,
854 mut high_byte: bool,
855) -> Result<String, XlsError> {
856 let mut s = String::with_capacity(len);
857 while len > 0 {
858 let (l, at) = encoding.decode_to(r.data, len, &mut s, Some(high_byte));
859 r.data = &r.data[at..];
860 len -= l;
861 if len > 0 {
862 if r.continue_record() {
863 high_byte = r.data[0] & 0x1 != 0;
864 r.data = &r.data[1..];
865 } else {
866 return Err(XlsError::EoStream("dbcs"));
867 }
868 }
869 }
870 Ok(s)
871}
872
873fn read_unicode_string_no_cch(encoding: &XlsEncoding, buf: &[u8], len: &usize, s: &mut String) {
874 encoding.decode_to(&buf[1..=*len], *len, s, Some(buf[0] & 0x1 != 0));
875}
876
877struct Record<'a> {
878 typ: u16,
879 data: &'a [u8],
880 cont: Option<Vec<&'a [u8]>>,
881}
882
883impl<'a> Record<'a> {
884 fn continue_record(&mut self) -> bool {
885 match self.cont {
886 None => false,
887 Some(ref mut v) => {
888 if v.is_empty() {
889 false
890 } else {
891 self.data = v.remove(0);
892 true
893 }
894 }
895 }
896 }
897
898 fn skip(&mut self, mut len: usize) -> Result<(), XlsError> {
899 while len > 0 {
900 if self.data.is_empty() && !self.continue_record() {
901 return Err(XlsError::ContinueRecordTooShort);
902 }
903 let l = min(len, self.data.len());
904 let (_, next) = self.data.split_at(l);
905 self.data = next;
906 len -= l;
907 }
908 Ok(())
909 }
910}
911
912struct RecordIter<'a> {
913 stream: &'a [u8],
914}
915
916impl<'a> Iterator for RecordIter<'a> {
917 type Item = Result<Record<'a>, XlsError>;
918 fn next(&mut self) -> Option<Self::Item> {
919 if self.stream.len() < 4 {
920 return if self.stream.is_empty() {
921 None
922 } else {
923 Some(Err(XlsError::EoStream("record type and length")))
924 };
925 }
926 let t = read_u16(self.stream);
927 let mut len = read_u16(&self.stream[2..]) as usize;
928 if self.stream.len() < len + 4 {
929 return Some(Err(XlsError::EoStream("record length")));
930 }
931 let (data, next) = self.stream.split_at(len + 4);
932 self.stream = next;
933 let d = &data[4..];
934
935 let cont = if next.len() > 4 && read_u16(next) == 0x003C {
937 let mut cont = Vec::new();
938 while self.stream.len() > 4 && read_u16(self.stream) == 0x003C {
939 len = read_u16(&self.stream[2..]) as usize;
940 if self.stream.len() < len + 4 {
941 return Some(Err(XlsError::EoStream("continue record length")));
942 }
943 let sp = self.stream.split_at(len + 4);
944 cont.push(&sp.0[4..]);
945 self.stream = sp.1;
946 }
947 Some(cont)
948 } else {
949 None
950 };
951
952 Some(Ok(Record {
953 typ: t,
954 data: d,
955 cont,
956 }))
957 }
958}
959
960fn parse_defined_names(rgce: &[u8]) -> Result<(Option<usize>, String), XlsError> {
964 if rgce.is_empty() {
965 return Ok((None, "empty rgce".to_string()));
967 }
968 let ptg = rgce[0];
969 let res = match ptg {
970 0x3a | 0x5a | 0x7a => {
971 let ixti = read_u16(&rgce[1..3]) as usize;
973 let mut f = String::new();
974 f.push('$');
976 push_column(read_u16(&rgce[5..7]) as u32, &mut f);
977 f.push('$');
978 f.push_str(&format!("{}", read_u16(&rgce[3..5]) as u32 + 1));
979 (Some(ixti), f)
980 }
981 0x3b | 0x5b | 0x7b => {
982 let ixti = read_u16(&rgce[1..3]) as usize;
984 let mut f = String::new();
985 f.push('$');
987 push_column(read_u16(&rgce[7..9]) as u32, &mut f);
988 f.push('$');
989 f.push_str(&format!("{}", read_u16(&rgce[3..5]) as u32 + 1));
990 f.push(':');
991 f.push('$');
992 push_column(read_u16(&rgce[9..11]) as u32, &mut f);
993 f.push('$');
994 f.push_str(&format!("{}", read_u16(&rgce[5..7]) as u32 + 1));
995 (Some(ixti), f)
996 }
997 0x3c | 0x5c | 0x7c | 0x3d | 0x5d | 0x7d => {
998 let ixti = read_u16(&rgce[1..3]) as usize;
1000 (Some(ixti), "#REF!".to_string())
1001 }
1002 _ => (None, format!("Unsupported ptg: {:x}", ptg)),
1003 };
1004 Ok(res)
1005}
1006
1007fn parse_formula(
1011 mut rgce: &[u8],
1012 sheets: &[String],
1013 names: &[(String, String)],
1014 xtis: &[Xti],
1015 encoding: &XlsEncoding,
1016) -> Result<String, XlsError> {
1017 let mut stack = Vec::new();
1018 let mut formula = String::with_capacity(rgce.len());
1019 let cce = read_u16(rgce) as usize;
1020 rgce = &rgce[2..2 + cce];
1021 while !rgce.is_empty() {
1022 let ptg = rgce[0];
1023 rgce = &rgce[1..];
1024 match ptg {
1025 0x3a | 0x5a | 0x7a => {
1026 let ixti = read_u16(&rgce[0..2]);
1028 let rowu = read_u16(&rgce[2..]);
1029 let colu = read_u16(&rgce[4..]);
1030 let sh = xtis
1031 .get(ixti as usize)
1032 .and_then(|xti| sheets.get(xti.itab_first as usize))
1033 .map_or("#REF", |sh| sh);
1034 stack.push(formula.len());
1035 formula.push_str(sh);
1036 formula.push('!');
1037 let col = colu << 2; if colu & 2 != 0 {
1039 formula.push('$');
1040 }
1041 push_column(col as u32, &mut formula);
1042 if colu & 1 != 0 {
1043 formula.push('$');
1044 }
1045 write!(&mut formula, "{}", rowu + 1).unwrap();
1046 rgce = &rgce[6..];
1047 }
1048 0x3b | 0x5b | 0x7b => {
1049 let ixti = read_u16(&rgce[0..2]);
1051 stack.push(formula.len());
1052 formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1053 formula.push('!');
1054 formula.push('$');
1056 push_column(read_u16(&rgce[6..8]) as u32, &mut formula);
1057 write!(&mut formula, "${}:$", read_u16(&rgce[2..4]) as u32 + 1).unwrap();
1058 push_column(read_u16(&rgce[8..10]) as u32, &mut formula);
1059 write!(&mut formula, "${}", read_u16(&rgce[4..6]) as u32 + 1).unwrap();
1060 rgce = &rgce[10..];
1061 }
1062 0x3c | 0x5c | 0x7c => {
1063 let ixti = read_u16(&rgce[0..2]);
1065 stack.push(formula.len());
1066 formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1067 formula.push('!');
1068 formula.push_str("#REF!");
1069 rgce = &rgce[6..];
1070 }
1071 0x3d | 0x5d | 0x7d => {
1072 let ixti = read_u16(&rgce[0..2]);
1074 stack.push(formula.len());
1075 formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1076 formula.push('!');
1077 formula.push_str("#REF!");
1078 rgce = &rgce[10..];
1079 }
1080 0x01 => {
1081 debug!("ignoring PtgExp array/shared formula");
1083 stack.push(formula.len());
1084 rgce = &rgce[4..];
1085 }
1086 0x03..=0x11 => {
1087 let e2 = stack.pop().ok_or(XlsError::StackLen)?;
1089 let op = match ptg {
1091 0x03 => "+",
1092 0x04 => "-",
1093 0x05 => "*",
1094 0x06 => "/",
1095 0x07 => "^",
1096 0x08 => "&",
1097 0x09 => "<",
1098 0x0A => "<=",
1099 0x0B => "=",
1100 0x0C => ">",
1101 0x0D => ">=",
1102 0x0E => "<>",
1103 0x0F => " ",
1104 0x10 => ",",
1105 0x11 => ":",
1106 _ => unreachable!(),
1107 };
1108 let e2 = formula.split_off(e2);
1109 write!(&mut formula, "{}{}", op, e2).unwrap();
1110 }
1111 0x12 => {
1112 let e = stack.last().ok_or(XlsError::StackLen)?;
1113 formula.insert(*e, '+');
1114 }
1115 0x13 => {
1116 let e = stack.last().ok_or(XlsError::StackLen)?;
1117 formula.insert(*e, '-');
1118 }
1119 0x14 => {
1120 formula.push('%');
1121 }
1122 0x15 => {
1123 let e = stack.last().ok_or(XlsError::StackLen)?;
1124 formula.insert(*e, '(');
1125 formula.push(')');
1126 }
1127 0x16 => {
1128 stack.push(formula.len());
1129 }
1130 0x17 => {
1131 stack.push(formula.len());
1132 formula.push('\"');
1133 let cch = rgce[0] as usize;
1134 read_unicode_string_no_cch(encoding, &rgce[1..], &cch, &mut formula);
1135 formula.push('\"');
1136 rgce = &rgce[2 + cch..];
1137 }
1138 0x18 => {
1139 rgce = &rgce[5..];
1140 }
1141 0x19 => {
1142 let etpg = rgce[0];
1143 rgce = &rgce[1..];
1144 match etpg {
1145 0x01 | 0x02 | 0x08 | 0x20 | 0x21 => rgce = &rgce[2..],
1146 0x04 => {
1147 let n = read_u16(&rgce[..2]) as usize + 1;
1149 rgce = &rgce[2 + 2 * n..]; }
1151 0x10 => {
1152 rgce = &rgce[2..];
1153 let e = *stack.last().ok_or(XlsError::StackLen)?;
1154 let e = formula.split_off(e);
1155 write!(&mut formula, "SUM({})", e).unwrap();
1156 }
1157 0x40 | 0x41 => {
1158 let e = *stack.last().ok_or(XlsError::StackLen)?;
1160 let space = match rgce[0] {
1161 0x00 | 0x02 | 0x04 | 0x06 => ' ',
1162 0x01 | 0x03 | 0x05 => '\r',
1163 val => {
1164 return Err(XlsError::Unrecognized {
1165 typ: "PtgAttrSpaceType",
1166 val,
1167 });
1168 }
1169 };
1170 let cch = rgce[1];
1171 for _ in 0..cch {
1172 formula.insert(e, space);
1173 }
1174 rgce = &rgce[2..];
1175 }
1176 e => return Err(XlsError::Etpg(e)),
1177 }
1178 }
1179 0x1C => {
1180 stack.push(formula.len());
1181 let err = rgce[0];
1182 rgce = &rgce[1..];
1183 match err {
1184 0x00 => formula.push_str("#NULL!"),
1185 0x07 => formula.push_str("#DIV/0!"),
1186 0x0F => formula.push_str("#VALUE!"),
1187 0x17 => formula.push_str("#REF!"),
1188 0x1D => formula.push_str("#NAME?"),
1189 0x24 => formula.push_str("#NUM!"),
1190 0x2A => formula.push_str("#N/A"),
1191 0x2B => formula.push_str("#GETTING_DATA"),
1192 e => {
1193 return Err(XlsError::Unrecognized {
1194 typ: "BErr",
1195 val: e,
1196 });
1197 }
1198 }
1199 }
1200 0x1D => {
1201 stack.push(formula.len());
1202 formula.push_str(if rgce[0] == 0 { "FALSE" } else { "TRUE" });
1203 rgce = &rgce[1..];
1204 }
1205 0x1E => {
1206 stack.push(formula.len());
1207 write!(&mut formula, "{}", read_u16(rgce)).unwrap();
1208 rgce = &rgce[2..];
1209 }
1210 0x1F => {
1211 stack.push(formula.len());
1212 write!(&mut formula, "{}", read_f64(rgce)).unwrap();
1213 rgce = &rgce[8..];
1214 }
1215 0x20 | 0x40 | 0x60 => {
1216 stack.push(formula.len());
1218 formula.push_str("{PtgArray}");
1219 rgce = &rgce[7..];
1220 }
1221 0x21 | 0x22 | 0x41 | 0x42 | 0x61 | 0x62 => {
1222 let (iftab, argc) = match ptg {
1223 0x22 | 0x42 | 0x62 => {
1224 let iftab = read_u16(&rgce[1..]) as usize;
1225 let argc = rgce[0] as usize;
1226 rgce = &rgce[3..];
1227 (iftab, argc)
1228 }
1229 _ => {
1230 let iftab = read_u16(rgce) as usize;
1231 if iftab > crate::utils::FTAB_LEN {
1232 return Err(XlsError::IfTab(iftab));
1233 }
1234 rgce = &rgce[2..];
1235 let argc = crate::utils::FTAB_ARGC[iftab] as usize;
1236 (iftab, argc)
1237 }
1238 };
1239 if stack.len() < argc {
1240 return Err(XlsError::StackLen);
1241 }
1242 if argc > 0 {
1243 let args_start = stack.len() - argc;
1244 let mut args = stack.split_off(args_start);
1245 let start = args[0];
1246 for s in &mut args {
1247 *s -= start;
1248 }
1249 let fargs = formula.split_off(start);
1250 stack.push(formula.len());
1251 args.push(fargs.len());
1252 formula.push_str(
1253 crate::utils::FTAB
1254 .get(iftab)
1255 .ok_or(XlsError::IfTab(iftab))?,
1256 );
1257 formula.push('(');
1258 for w in args.windows(2) {
1259 formula.push_str(&fargs[w[0]..w[1]]);
1260 formula.push(',');
1261 }
1262 formula.pop();
1263 formula.push(')');
1264 } else {
1265 stack.push(formula.len());
1266 formula.push_str(crate::utils::FTAB[iftab]);
1267 formula.push_str("()");
1268 }
1269 }
1270 0x23 | 0x43 | 0x63 => {
1271 let iname = read_u32(rgce) as usize - 1; stack.push(formula.len());
1273 formula.push_str(names.get(iname).map_or("#REF!", |n| &*n.0));
1274 rgce = &rgce[4..];
1275 }
1276 0x24 | 0x44 | 0x64 => {
1277 stack.push(formula.len());
1278 let row = read_u16(rgce) + 1;
1279 let col = read_u16(&[rgce[2], rgce[3] & 0x3F]);
1280 if rgce[3] & 0x80 != 0x80 {
1281 formula.push('$');
1282 }
1283 push_column(col as u32, &mut formula);
1284 if rgce[3] & 0x40 != 0x40 {
1285 formula.push('$');
1286 }
1287 formula.push_str(&format!("{}", row));
1288 rgce = &rgce[4..];
1289 }
1290 0x25 | 0x45 | 0x65 => {
1291 stack.push(formula.len());
1292 formula.push('$');
1293 push_column(read_u16(&rgce[4..6]) as u32, &mut formula);
1294 write!(&mut formula, "${}:$", read_u16(&rgce[0..2]) as u32 + 1).unwrap();
1295 push_column(read_u16(&rgce[6..8]) as u32, &mut formula);
1296 write!(&mut formula, "${}", read_u16(&rgce[2..4]) as u32 + 1).unwrap();
1297 rgce = &rgce[8..];
1298 }
1299 0x2A | 0x4A | 0x6A => {
1300 stack.push(formula.len());
1301 formula.push_str("#REF!");
1302 rgce = &rgce[4..];
1303 }
1304 0x2B | 0x4B | 0x6B => {
1305 stack.push(formula.len());
1306 formula.push_str("#REF!");
1307 rgce = &rgce[8..];
1308 }
1309 0x39 | 0x59 => {
1310 stack.push(formula.len());
1312 formula.push_str("[PtgNameX]");
1313 rgce = &rgce[6..];
1314 }
1315 _ => {
1316 return Err(XlsError::Unrecognized {
1317 typ: "ptg",
1318 val: ptg,
1319 });
1320 }
1321 }
1322 }
1323 if stack.len() == 1 {
1324 Ok(formula)
1325 } else {
1326 Err(XlsError::InvalidFormula {
1327 stack_size: stack.len(),
1328 })
1329 }
1330}
1331
1332fn parse_formula_value(r: &[u8]) -> Result<Option<DataType>, XlsError> {
1334 match *r {
1335 [0x00, .., 0xFF, 0xFF] => Ok(None),
1337 [0x01, _, b, .., 0xFF, 0xFF] => Ok(Some(DataType::Bool(b != 0))),
1338 [0x02, _, e, .., 0xFF, 0xFF] => parse_err(e).map(Some),
1339 [0x03, _, .., 0xFF, 0xFF] => Ok(Some(DataType::String("".to_string()))),
1341 [e, .., 0xFF, 0xFF] => Err(XlsError::Unrecognized {
1342 typ: "error",
1343 val: e,
1344 }),
1345 _ => Ok(Some(DataType::Float(read_f64(r)))),
1346 }
1347}
1348
1349#[cfg(feature = "picture")]
1351struct ArtRecord<'a> {
1352 instance: u16,
1353 typ: u16,
1354 data: &'a [u8],
1355}
1356
1357#[cfg(feature = "picture")]
1358struct ArtRecordIter<'a> {
1359 stream: &'a [u8],
1360}
1361
1362#[cfg(feature = "picture")]
1363impl<'a> Iterator for ArtRecordIter<'a> {
1364 type Item = Result<ArtRecord<'a>, XlsError>;
1365 fn next(&mut self) -> Option<Self::Item> {
1366 if self.stream.len() < 8 {
1367 return if self.stream.is_empty() {
1368 None
1369 } else {
1370 Some(Err(XlsError::EoStream("art record header")))
1371 };
1372 }
1373 let ver_ins = read_u16(self.stream);
1374 let instance = ver_ins >> 4;
1375 let typ = read_u16(&self.stream[2..]);
1376 if typ < 0xF000 {
1377 return Some(Err(XlsError::Art("type range 0xF000 - 0xFFFF")));
1378 }
1379 let len = read_usize(&self.stream[4..]);
1380 if self.stream.len() < len + 8 {
1381 return Some(Err(XlsError::EoStream("art record length")));
1382 }
1383 let (d, next) = self.stream.split_at(len + 8);
1384 self.stream = next;
1385 let data = &d[8..];
1386
1387 Some(Ok(ArtRecord {
1388 instance,
1389 typ,
1390 data,
1391 }))
1392 }
1393}
1394
1395#[cfg(feature = "picture")]
1397fn parse_pictures(stream: &[u8]) -> Result<Vec<(String, Vec<u8>)>, XlsError> {
1398 let mut pics = Vec::new();
1399 let records = ArtRecordIter { stream };
1400 for record in records {
1401 let r = record?;
1402 match r.typ {
1403 0xF000 | 0xF001 => pics.extend(parse_pictures(r.data)?),
1406 0xF007 => {
1408 let skip = 36 + r.data[33] as usize;
1409 pics.extend(parse_pictures(&r.data[skip..])?);
1410 }
1411 0xF01A | 0xF01B | 0xF01C | 0xF01D | 0xF01E | 0xF01F | 0xF029 | 0xF02A => {
1413 let ext_skip = match r.typ {
1414 0xF01A => {
1416 let skip = match r.instance {
1417 0x3D4 => 50usize,
1418 0x3D5 => 66,
1419 _ => unreachable!(),
1420 };
1421 Ok(("emf", skip))
1422 }
1423 0xF01B => {
1425 let skip = match r.instance {
1426 0x216 => 50usize,
1427 0x217 => 66,
1428 _ => unreachable!(),
1429 };
1430 Ok(("wmf", skip))
1431 }
1432 0xF01C => {
1434 let skip = match r.instance {
1435 0x542 => 50usize,
1436 0x543 => 66,
1437 _ => unreachable!(),
1438 };
1439 Ok(("pict", skip))
1440 }
1441 0xF01D | 0xF02A => {
1443 let skip = match r.instance {
1444 0x46A | 0x6E2 => 17usize,
1445 0x46B | 0x6E3 => 33,
1446 _ => unreachable!(),
1447 };
1448 Ok(("jpg", skip))
1449 }
1450 0xF01E => {
1452 let skip = match r.instance {
1453 0x6E0 => 17usize,
1454 0x6E1 => 33,
1455 _ => unreachable!(),
1456 };
1457 Ok(("png", skip))
1458 }
1459 0xF01F => {
1461 let skip = match r.instance {
1462 0x7A8 => 17usize,
1463 0x7A9 => 33,
1464 _ => unreachable!(),
1465 };
1466 Ok(("dib", skip))
1467 }
1468 0xF029 => {
1470 let skip = match r.instance {
1471 0x6E4 => 17usize,
1472 0x6E5 => 33,
1473 _ => unreachable!(),
1474 };
1475 Ok(("tiff", skip))
1476 }
1477 _ => Err(XlsError::Art("picture type not support")),
1478 };
1479 let ext_skip = ext_skip?;
1480 pics.push((ext_skip.0.to_string(), Vec::from(&r.data[ext_skip.1..])));
1481 }
1482 _ => {}
1483 }
1484 }
1485 Ok(pics)
1486}