zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::{CompressionMethod, Decompressor};
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs};
9use crate::read::zip_archive::{Shared, SharedBuilder};
10use crate::result::{ZipError, ZipResult};
11use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
12use crate::types::{
13    AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
14    ZipLocalEntryBlock,
15};
16use crate::write::SimpleFileOptions;
17use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
18use crate::ZIP64_BYTES_THR;
19use indexmap::IndexMap;
20use std::borrow::Cow;
21use std::ffi::OsStr;
22use std::fs::create_dir_all;
23use std::io::{self, copy, prelude::*, sink, SeekFrom};
24use std::mem;
25use std::mem::size_of;
26use std::ops::Deref;
27use std::path::{Component, Path, PathBuf};
28use std::sync::{Arc, OnceLock};
29
30mod config;
31
32pub use config::*;
33
34/// Provides high level API for reading from a stream.
35pub(crate) mod stream;
36
37#[cfg(feature = "lzma")]
38pub(crate) mod lzma;
39
40pub(crate) mod magic_finder;
41
42// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
43pub(crate) mod zip_archive {
44    use indexmap::IndexMap;
45    use std::sync::Arc;
46
47    /// Extract immutable data from `ZipArchive` to make it cheap to clone
48    #[derive(Debug)]
49    pub(crate) struct Shared {
50        pub(crate) files: IndexMap<Box<str>, super::ZipFileData>,
51        pub(super) offset: u64,
52        pub(super) dir_start: u64,
53        // This isn't yet used anywhere, but it is here for use cases in the future.
54        #[allow(dead_code)]
55        pub(super) config: super::Config,
56        pub(crate) comment: Box<[u8]>,
57        pub(crate) zip64_comment: Option<Box<[u8]>>,
58    }
59
60    #[derive(Debug)]
61    pub(crate) struct SharedBuilder {
62        pub(crate) files: Vec<super::ZipFileData>,
63        pub(super) offset: u64,
64        pub(super) dir_start: u64,
65        // This isn't yet used anywhere, but it is here for use cases in the future.
66        #[allow(dead_code)]
67        pub(super) config: super::Config,
68    }
69
70    impl SharedBuilder {
71        pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
72            let mut index_map = IndexMap::with_capacity(self.files.len());
73            self.files.into_iter().for_each(|file| {
74                index_map.insert(file.file_name.clone(), file);
75            });
76            Shared {
77                files: index_map,
78                offset: self.offset,
79                dir_start: self.dir_start,
80                config: self.config,
81                comment,
82                zip64_comment,
83            }
84        }
85    }
86
87    /// ZIP archive reader
88    ///
89    /// At the moment, this type is cheap to clone if this is the case for the
90    /// reader it uses. However, this is not guaranteed by this crate and it may
91    /// change in the future.
92    ///
93    /// ```no_run
94    /// use std::io::prelude::*;
95    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
96    ///     use zip::HasZipMetadata;
97    ///     let mut zip = zip::ZipArchive::new(reader)?;
98    ///
99    ///     for i in 0..zip.len() {
100    ///         let mut file = zip.by_index(i)?;
101    ///         println!("Filename: {}", file.name());
102    ///         std::io::copy(&mut file, &mut std::io::stdout())?;
103    ///     }
104    ///
105    ///     Ok(())
106    /// }
107    /// ```
108    #[derive(Clone, Debug)]
109    pub struct ZipArchive<R> {
110        pub(super) reader: R,
111        pub(super) shared: Arc<Shared>,
112    }
113}
114
115#[cfg(feature = "aes-crypto")]
116use crate::aes::PWD_VERIFY_LENGTH;
117use crate::extra_fields::UnicodeExtraField;
118use crate::result::ZipError::{InvalidArchive, InvalidPassword};
119use crate::spec::is_dir;
120use crate::types::ffi::{S_IFLNK, S_IFREG};
121use crate::unstable::{path_to_string, LittleEndianReadExt};
122pub use zip_archive::ZipArchive;
123
124#[allow(clippy::large_enum_variant)]
125pub(crate) enum CryptoReader<'a> {
126    Plaintext(io::Take<&'a mut dyn Read>),
127    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
128    #[cfg(feature = "aes-crypto")]
129    Aes {
130        reader: AesReaderValid<io::Take<&'a mut dyn Read>>,
131        vendor_version: AesVendorVersion,
132    },
133}
134
135impl Read for CryptoReader<'_> {
136    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
137        match self {
138            CryptoReader::Plaintext(r) => r.read(buf),
139            CryptoReader::ZipCrypto(r) => r.read(buf),
140            #[cfg(feature = "aes-crypto")]
141            CryptoReader::Aes { reader: r, .. } => r.read(buf),
142        }
143    }
144
145    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
146        match self {
147            CryptoReader::Plaintext(r) => r.read_to_end(buf),
148            CryptoReader::ZipCrypto(r) => r.read_to_end(buf),
149            #[cfg(feature = "aes-crypto")]
150            CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf),
151        }
152    }
153
154    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
155        match self {
156            CryptoReader::Plaintext(r) => r.read_to_string(buf),
157            CryptoReader::ZipCrypto(r) => r.read_to_string(buf),
158            #[cfg(feature = "aes-crypto")]
159            CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf),
160        }
161    }
162}
163
164impl<'a> CryptoReader<'a> {
165    /// Consumes this decoder, returning the underlying reader.
166    pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
167        match self {
168            CryptoReader::Plaintext(r) => r,
169            CryptoReader::ZipCrypto(r) => r.into_inner(),
170            #[cfg(feature = "aes-crypto")]
171            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
172        }
173    }
174
175    /// Returns `true` if the data is encrypted using AE2.
176    pub const fn is_ae2_encrypted(&self) -> bool {
177        #[cfg(feature = "aes-crypto")]
178        return matches!(
179            self,
180            CryptoReader::Aes {
181                vendor_version: AesVendorVersion::Ae2,
182                ..
183            }
184        );
185        #[cfg(not(feature = "aes-crypto"))]
186        false
187    }
188}
189
190#[cold]
191fn invalid_state<T>() -> io::Result<T> {
192    Err(io::Error::new(
193        io::ErrorKind::Other,
194        "ZipFileReader was in an invalid state",
195    ))
196}
197
198pub(crate) enum ZipFileReader<'a> {
199    NoReader,
200    Raw(io::Take<&'a mut dyn Read>),
201    Compressed(Box<Crc32Reader<Decompressor<io::BufReader<CryptoReader<'a>>>>>),
202}
203
204impl Read for ZipFileReader<'_> {
205    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
206        match self {
207            ZipFileReader::NoReader => invalid_state(),
208            ZipFileReader::Raw(r) => r.read(buf),
209            ZipFileReader::Compressed(r) => r.read(buf),
210        }
211    }
212
213    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
214        match self {
215            ZipFileReader::NoReader => invalid_state(),
216            ZipFileReader::Raw(r) => r.read_exact(buf),
217            ZipFileReader::Compressed(r) => r.read_exact(buf),
218        }
219    }
220
221    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
222        match self {
223            ZipFileReader::NoReader => invalid_state(),
224            ZipFileReader::Raw(r) => r.read_to_end(buf),
225            ZipFileReader::Compressed(r) => r.read_to_end(buf),
226        }
227    }
228
229    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
230        match self {
231            ZipFileReader::NoReader => invalid_state(),
232            ZipFileReader::Raw(r) => r.read_to_string(buf),
233            ZipFileReader::Compressed(r) => r.read_to_string(buf),
234        }
235    }
236}
237
238impl<'a> ZipFileReader<'a> {
239    fn into_inner(self) -> io::Result<io::Take<&'a mut dyn Read>> {
240        match self {
241            ZipFileReader::NoReader => invalid_state(),
242            ZipFileReader::Raw(r) => Ok(r),
243            ZipFileReader::Compressed(r) => {
244                Ok(r.into_inner().into_inner().into_inner().into_inner())
245            }
246        }
247    }
248}
249
250/// A struct for reading a zip file
251pub struct ZipFile<'a> {
252    pub(crate) data: Cow<'a, ZipFileData>,
253    pub(crate) reader: ZipFileReader<'a>,
254}
255
256/// A struct for reading and seeking a zip file
257pub struct ZipFileSeek<'a, R> {
258    data: Cow<'a, ZipFileData>,
259    reader: ZipFileSeekReader<'a, R>,
260}
261
262enum ZipFileSeekReader<'a, R> {
263    Raw(SeekableTake<'a, R>),
264}
265
266struct SeekableTake<'a, R> {
267    inner: &'a mut R,
268    inner_starting_offset: u64,
269    length: u64,
270    current_offset: u64,
271}
272
273impl<'a, R: Seek> SeekableTake<'a, R> {
274    pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
275        let inner_starting_offset = inner.stream_position()?;
276        Ok(Self {
277            inner,
278            inner_starting_offset,
279            length,
280            current_offset: 0,
281        })
282    }
283}
284
285impl<R: Seek> Seek for SeekableTake<'_, R> {
286    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
287        let offset = match pos {
288            SeekFrom::Start(offset) => Some(offset),
289            SeekFrom::End(offset) => self.length.checked_add_signed(offset),
290            SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
291        };
292        match offset {
293            None => Err(io::Error::new(
294                io::ErrorKind::InvalidInput,
295                "invalid seek to a negative or overflowing position",
296            )),
297            Some(offset) => {
298                let clamped_offset = std::cmp::min(self.length, offset);
299                let new_inner_offset = self
300                    .inner
301                    .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
302                self.current_offset = new_inner_offset - self.inner_starting_offset;
303                Ok(new_inner_offset)
304            }
305        }
306    }
307}
308
309impl<R: Read> Read for SeekableTake<'_, R> {
310    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
311        let written = self
312            .inner
313            .take(self.length - self.current_offset)
314            .read(buf)?;
315        self.current_offset += written as u64;
316        Ok(written)
317    }
318}
319
320pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
321    create_dir_all(outpath.as_ref())?;
322    #[cfg(unix)]
323    {
324        // Dirs must be writable until all normal files are extracted
325        use std::os::unix::fs::PermissionsExt;
326        std::fs::set_permissions(
327            outpath.as_ref(),
328            std::fs::Permissions::from_mode(
329                0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
330            ),
331        )?;
332    }
333    Ok(())
334}
335
336pub(crate) fn find_content<'a>(
337    data: &ZipFileData,
338    reader: &'a mut (impl Read + Seek),
339) -> ZipResult<io::Take<&'a mut dyn Read>> {
340    // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
341    let data_start = match data.data_start.get() {
342        Some(data_start) => *data_start,
343        None => find_data_start(data, reader)?,
344    };
345
346    reader.seek(SeekFrom::Start(data_start))?;
347    Ok((reader as &mut dyn Read).take(data.compressed_size))
348}
349
350fn find_content_seek<'a, R: Read + Seek>(
351    data: &ZipFileData,
352    reader: &'a mut R,
353) -> ZipResult<SeekableTake<'a, R>> {
354    // Parse local header
355    let data_start = find_data_start(data, reader)?;
356    reader.seek(SeekFrom::Start(data_start))?;
357
358    // Explicit Ok and ? are needed to convert io::Error to ZipError
359    Ok(SeekableTake::new(reader, data.compressed_size)?)
360}
361
362fn find_data_start(
363    data: &ZipFileData,
364    reader: &mut (impl Read + Seek + Sized),
365) -> Result<u64, ZipError> {
366    // Go to start of data.
367    reader.seek(SeekFrom::Start(data.header_start))?;
368
369    // Parse static-sized fields and check the magic value.
370    let block = ZipLocalEntryBlock::parse(reader)?;
371
372    // Calculate the end of the local header from the fields we just parsed.
373    let variable_fields_len =
374        // Each of these fields must be converted to u64 before adding, as the result may
375        // easily overflow a u16.
376        block.file_name_length as u64 + block.extra_field_length as u64;
377    let data_start =
378        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
379
380    // Set the value so we don't have to read it again.
381    match data.data_start.set(data_start) {
382        Ok(()) => (),
383        // If the value was already set in the meantime, ensure it matches (this is probably
384        // unnecessary).
385        Err(_) => {
386            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
387        }
388    }
389
390    Ok(data_start)
391}
392
393#[allow(clippy::too_many_arguments)]
394pub(crate) fn make_crypto_reader<'a>(
395    data: &ZipFileData,
396    reader: io::Take<&'a mut dyn Read>,
397    password: Option<&[u8]>,
398    aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
399) -> ZipResult<CryptoReader<'a>> {
400    #[allow(deprecated)]
401    {
402        if let CompressionMethod::Unsupported(_) = data.compression_method {
403            return unsupported_zip_error("Compression method not supported");
404        }
405    }
406
407    let reader = match (password, aes_info) {
408        #[cfg(not(feature = "aes-crypto"))]
409        (Some(_), Some(_)) => {
410            return Err(ZipError::UnsupportedArchive(
411                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
412            ))
413        }
414        #[cfg(feature = "aes-crypto")]
415        (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes {
416            reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?,
417            vendor_version,
418        },
419        (Some(password), None) => {
420            let mut last_modified_time = data.last_modified_time;
421            if !data.using_data_descriptor {
422                last_modified_time = None;
423            }
424            let validator = if let Some(last_modified_time) = last_modified_time {
425                ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
426            } else {
427                ZipCryptoValidator::PkzipCrc32(data.crc32)
428            };
429            CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?)
430        }
431        (None, Some(_)) => return Err(InvalidPassword),
432        (None, None) => CryptoReader::Plaintext(reader),
433    };
434    Ok(reader)
435}
436
437pub(crate) fn make_reader(
438    compression_method: CompressionMethod,
439    crc32: u32,
440    reader: CryptoReader,
441) -> ZipResult<ZipFileReader> {
442    let ae2_encrypted = reader.is_ae2_encrypted();
443
444    Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new(
445        Decompressor::new(io::BufReader::new(reader), compression_method)?,
446        crc32,
447        ae2_encrypted,
448    ))))
449}
450
451pub(crate) fn make_symlink<T>(
452    outpath: &Path,
453    target: &[u8],
454    #[allow(unused)] existing_files: &IndexMap<Box<str>, T>,
455) -> ZipResult<()> {
456    let Ok(target_str) = std::str::from_utf8(target) else {
457        return Err(ZipError::InvalidArchive("Invalid UTF-8 as symlink target"));
458    };
459
460    #[cfg(not(any(unix, windows)))]
461    {
462        use std::fs::File;
463        let output = File::create(outpath);
464        output?.write_all(target)?;
465    }
466    #[cfg(unix)]
467    {
468        std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
469    }
470    #[cfg(windows)]
471    {
472        let target = Path::new(OsStr::new(&target_str));
473        let target_is_dir_from_archive =
474            existing_files.contains_key(target_str) && is_dir(target_str);
475        let target_is_dir = if target_is_dir_from_archive {
476            true
477        } else if let Ok(meta) = std::fs::metadata(target) {
478            meta.is_dir()
479        } else {
480            false
481        };
482        if target_is_dir {
483            std::os::windows::fs::symlink_dir(target, outpath)?;
484        } else {
485            std::os::windows::fs::symlink_file(target, outpath)?;
486        }
487    }
488    Ok(())
489}
490
491#[derive(Debug)]
492pub(crate) struct CentralDirectoryInfo {
493    pub(crate) archive_offset: u64,
494    pub(crate) directory_start: u64,
495    pub(crate) number_of_files: usize,
496    pub(crate) disk_number: u32,
497    pub(crate) disk_with_central_directory: u32,
498}
499
500impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
501    type Error = ZipError;
502
503    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
504        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
505            match &value.eocd64 {
506                Some(DataAndPosition { data: eocd64, .. }) => {
507                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
508                        return Err(InvalidArchive(
509                        "ZIP64 footer indicates more files on this disk than in the whole archive",
510                    ));
511                    } else if eocd64.version_needed_to_extract > eocd64.version_made_by {
512                        return Err(InvalidArchive(
513                        "ZIP64 footer indicates a new version is needed to extract this archive than the \
514                                 version that wrote it",
515                    ));
516                    }
517                    (
518                        eocd64.central_directory_offset,
519                        eocd64.number_of_files as usize,
520                        eocd64.disk_number,
521                        eocd64.disk_with_central_directory,
522                    )
523                }
524                _ => (
525                    value.eocd.data.central_directory_offset as u64,
526                    value.eocd.data.number_of_files_on_this_disk as usize,
527                    value.eocd.data.disk_number as u32,
528                    value.eocd.data.disk_with_central_directory as u32,
529                ),
530            };
531
532        let directory_start = relative_cd_offset
533            .checked_add(value.archive_offset)
534            .ok_or(InvalidArchive("Invalid central directory size or offset"))?;
535
536        Ok(Self {
537            archive_offset: value.archive_offset,
538            directory_start,
539            number_of_files,
540            disk_number,
541            disk_with_central_directory,
542        })
543    }
544}
545
546impl<R> ZipArchive<R> {
547    pub(crate) fn from_finalized_writer(
548        files: IndexMap<Box<str>, ZipFileData>,
549        comment: Box<[u8]>,
550        zip64_comment: Option<Box<[u8]>>,
551        reader: R,
552        central_start: u64,
553    ) -> ZipResult<Self> {
554        let initial_offset = match files.first() {
555            Some((_, file)) => file.header_start,
556            None => central_start,
557        };
558        let shared = Arc::new(Shared {
559            files,
560            offset: initial_offset,
561            dir_start: central_start,
562            config: Config {
563                archive_offset: ArchiveOffset::Known(initial_offset),
564            },
565            comment,
566            zip64_comment,
567        });
568        Ok(Self { reader, shared })
569    }
570
571    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
572    /// metadata.
573    pub fn decompressed_size(&self) -> Option<u128> {
574        let mut total = 0u128;
575        for file in self.shared.files.values() {
576            if file.using_data_descriptor {
577                return None;
578            }
579            total = total.checked_add(file.uncompressed_size as u128)?;
580        }
581        Some(total)
582    }
583}
584
585impl<R: Read + Seek> ZipArchive<R> {
586    pub(crate) fn merge_contents<W: Write + Seek>(
587        &mut self,
588        mut w: W,
589    ) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
590        if self.shared.files.is_empty() {
591            return Ok(IndexMap::new());
592        }
593        let mut new_files = self.shared.files.clone();
594        /* The first file header will probably start at the beginning of the file, but zip doesn't
595         * enforce that, and executable zips like PEX files will have a shebang line so will
596         * definitely be greater than 0.
597         *
598         * assert_eq!(0, new_files[0].header_start); // Avoid this.
599         */
600
601        let first_new_file_header_start = w.stream_position()?;
602
603        /* Push back file header starts for all entries in the covered files. */
604        new_files.values_mut().try_for_each(|f| {
605            /* This is probably the only really important thing to change. */
606            f.header_start = f
607                .header_start
608                .checked_add(first_new_file_header_start)
609                .ok_or(InvalidArchive(
610                    "new header start from merge would have been too large",
611                ))?;
612            /* This is only ever used internally to cache metadata lookups (it's not part of the
613             * zip spec), and 0 is the sentinel value. */
614            f.central_header_start = 0;
615            /* This is an atomic variable so it can be updated from another thread in the
616             * implementation (which is good!). */
617            if let Some(old_data_start) = f.data_start.take() {
618                let new_data_start = old_data_start
619                    .checked_add(first_new_file_header_start)
620                    .ok_or(InvalidArchive(
621                        "new data start from merge would have been too large",
622                    ))?;
623                f.data_start.get_or_init(|| new_data_start);
624            }
625            Ok::<_, ZipError>(())
626        })?;
627
628        /* Rewind to the beginning of the file.
629         *
630         * NB: we *could* decide to start copying from new_files[0].header_start instead, which
631         * would avoid copying over e.g. any pex shebangs or other file contents that start before
632         * the first zip file entry. However, zip files actually shouldn't care about garbage data
633         * in *between* real entries, since the central directory header records the correct start
634         * location of each, and keeping track of that math is more complicated logic that will only
635         * rarely be used, since most zips that get merged together are likely to be produced
636         * specifically for that purpose (and therefore are unlikely to have a shebang or other
637         * preface). Finally, this preserves any data that might actually be useful.
638         */
639        self.reader.rewind()?;
640        /* Find the end of the file data. */
641        let length_to_read = self.shared.dir_start;
642        /* Produce a Read that reads bytes up until the start of the central directory header.
643         * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
644         * handle, which it really shouldn't need to anyway. */
645        let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
646        /* Copy over file data from source archive directly. */
647        io::copy(&mut limited_raw, &mut w)?;
648
649        /* Return the files we've just written to the data stream. */
650        Ok(new_files)
651    }
652
653    /// Get the directory start offset and number of files. This is done in a
654    /// separate function to ease the control flow design.
655    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
656        // End of the probed region, initially set to the end of the file
657        let file_len = reader.seek(io::SeekFrom::End(0))?;
658        let mut end_exclusive = file_len;
659
660        loop {
661            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
662            let cde = spec::find_central_directory(
663                reader,
664                config.archive_offset,
665                end_exclusive,
666                file_len,
667            )?;
668
669            // Turn EOCD into internal representation.
670            let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
671                .and_then(|info| Self::read_central_header(info, config, reader))
672            else {
673                // The next EOCD candidate should start before the current one.
674                end_exclusive = cde.eocd.position;
675                continue;
676            };
677
678            return Ok(shared.build(
679                cde.eocd.data.zip_file_comment,
680                cde.eocd64.map(|v| v.data.extensible_data_sector),
681            ));
682        }
683    }
684
685    fn read_central_header(
686        dir_info: CentralDirectoryInfo,
687        config: Config,
688        reader: &mut R,
689    ) -> Result<SharedBuilder, ZipError> {
690        // If the parsed number of files is greater than the offset then
691        // something fishy is going on and we shouldn't trust number_of_files.
692        let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize {
693            0
694        } else {
695            dir_info.number_of_files
696        };
697
698        if dir_info.disk_number != dir_info.disk_with_central_directory {
699            return unsupported_zip_error("Support for multi-disk files is not implemented");
700        }
701
702        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
703            return unsupported_zip_error("Oversized central directory");
704        }
705
706        let mut files = Vec::with_capacity(file_capacity);
707        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
708        for _ in 0..dir_info.number_of_files {
709            let file = central_header_to_zip_file(reader, &dir_info)?;
710            files.push(file);
711        }
712
713        Ok(SharedBuilder {
714            files,
715            offset: dir_info.archive_offset,
716            dir_start: dir_info.directory_start,
717            config,
718        })
719    }
720
721    /// Returns the verification value and salt for the AES encryption of the file
722    ///
723    /// It fails if the file number is invalid.
724    ///
725    /// # Returns
726    ///
727    /// - None if the file is not encrypted with AES
728    #[cfg(feature = "aes-crypto")]
729    pub fn get_aes_verification_key_and_salt(
730        &mut self,
731        file_number: usize,
732    ) -> ZipResult<Option<AesInfo>> {
733        let (_, data) = self
734            .shared
735            .files
736            .get_index(file_number)
737            .ok_or(ZipError::FileNotFound)?;
738
739        let limit_reader = find_content(data, &mut self.reader)?;
740        match data.aes_mode {
741            None => Ok(None),
742            Some((aes_mode, _, _)) => {
743                let (verification_value, salt) =
744                    AesReader::new(limit_reader, aes_mode, data.compressed_size)
745                        .get_verification_value_and_salt()?;
746                let aes_info = AesInfo {
747                    aes_mode,
748                    verification_value,
749                    salt,
750                };
751                Ok(Some(aes_info))
752            }
753        }
754    }
755
756    /// Read a ZIP archive, collecting the files it contains.
757    ///
758    /// This uses the central directory record of the ZIP file, and ignores local file headers.
759    ///
760    /// A default [`Config`] is used.
761    pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
762        Self::with_config(Default::default(), reader)
763    }
764
765    /// Read a ZIP archive providing a read configuration, collecting the files it contains.
766    ///
767    /// This uses the central directory record of the ZIP file, and ignores local file headers.
768    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
769        let shared = Self::get_metadata(config, &mut reader)?;
770
771        Ok(ZipArchive {
772            reader,
773            shared: shared.into(),
774        })
775    }
776
777    /// Extract a Zip archive into a directory, overwriting files if they
778    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. Symbolic links are only
779    /// created and followed if the target is within the destination directory (this is checked
780    /// conservatively using [`std::fs::canonicalize`]).
781    ///
782    /// Extraction is not atomic. If an error is encountered, some of the files
783    /// may be left on disk. However, on Unix targets, no newly-created directories with part but
784    /// not all of their contents extracted will be readable, writable or usable as process working
785    /// directories by any non-root user except you.
786    ///
787    /// On Unix and Windows, symbolic links are extracted correctly. On other platforms such as
788    /// WebAssembly, symbolic links aren't supported, so they're extracted as normal files
789    /// containing the target path in UTF-8.
790    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
791        self.extract_internal(directory, None::<fn(&Path) -> bool>)
792    }
793
794    /// Extracts a Zip archive into a directory in the same fashion as
795    /// [`ZipArchive::extract`], but detects a "root" directory in the archive
796    /// (a single top-level directory that contains the rest of the archive's
797    /// entries) and extracts its contents directly.
798    ///
799    /// For a sensible default `filter`, you can use [`root_dir_common_filter`].
800    /// For a custom `filter`, see [`RootDirFilter`].
801    ///
802    /// See [`ZipArchive::root_dir`] for more information on how the root
803    /// directory is detected and the meaning of the `filter` parameter.
804    ///
805    /// ## Example
806    ///
807    /// Imagine a Zip archive with the following structure:
808    ///
809    /// ```text
810    /// root/file1.txt
811    /// root/file2.txt
812    /// root/sub/file3.txt
813    /// root/sub/subsub/file4.txt
814    /// ```
815    ///
816    /// If the archive is extracted to `foo` using [`ZipArchive::extract`],
817    /// the resulting directory structure will be:
818    ///
819    /// ```text
820    /// foo/root/file1.txt
821    /// foo/root/file2.txt
822    /// foo/root/sub/file3.txt
823    /// foo/root/sub/subsub/file4.txt
824    /// ```
825    ///
826    /// If the archive is extracted to `foo` using
827    /// [`ZipArchive::extract_unwrapped_root_dir`], the resulting directory
828    /// structure will be:
829    ///
830    /// ```text
831    /// foo/file1.txt
832    /// foo/file2.txt
833    /// foo/sub/file3.txt
834    /// foo/sub/subsub/file4.txt
835    /// ```
836    ///
837    /// ## Example - No Root Directory
838    ///
839    /// Imagine a Zip archive with the following structure:
840    ///
841    /// ```text
842    /// root/file1.txt
843    /// root/file2.txt
844    /// root/sub/file3.txt
845    /// root/sub/subsub/file4.txt
846    /// other/file5.txt
847    /// ```
848    ///
849    /// Due to the presence of the `other` directory,
850    /// [`ZipArchive::extract_unwrapped_root_dir`] will extract this in the same
851    /// fashion as [`ZipArchive::extract`] as there is now no "root directory."
852    pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
853        &mut self,
854        directory: P,
855        root_dir_filter: impl RootDirFilter,
856    ) -> ZipResult<()> {
857        self.extract_internal(directory, Some(root_dir_filter))
858    }
859
860    fn extract_internal<P: AsRef<Path>>(
861        &mut self,
862        directory: P,
863        root_dir_filter: Option<impl RootDirFilter>,
864    ) -> ZipResult<()> {
865        use std::fs;
866
867        create_dir_all(&directory)?;
868        let directory = directory.as_ref().canonicalize()?;
869
870        let root_dir = root_dir_filter
871            .and_then(|filter| {
872                self.root_dir(&filter)
873                    .transpose()
874                    .map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
875            })
876            .transpose()?;
877
878        // If we have a root dir, simplify the path components to be more
879        // appropriate for passing to `safe_prepare_path`
880        let root_dir = root_dir
881            .as_ref()
882            .map(|(root_dir, filter)| {
883                crate::path::simplified_components(root_dir)
884                    .ok_or_else(|| {
885                        // Should be unreachable
886                        debug_assert!(false, "Invalid root dir path");
887
888                        InvalidArchive("Invalid root dir path")
889                    })
890                    .map(|root_dir| (root_dir, filter))
891            })
892            .transpose()?;
893
894        #[cfg(unix)]
895        let mut files_by_unix_mode = Vec::new();
896
897        for i in 0..self.len() {
898            let mut file = self.by_index(i)?;
899
900            let mut outpath = directory.clone();
901            file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
902
903            let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) {
904                let mut target = Vec::with_capacity(file.size() as usize);
905                file.read_to_end(&mut target)?;
906                Some(target)
907            } else {
908                if file.is_dir() {
909                    crate::read::make_writable_dir_all(&outpath)?;
910                    continue;
911                }
912                None
913            };
914
915            drop(file);
916
917            if let Some(target) = symlink_target {
918                make_symlink(&outpath, &target, &self.shared.files)?;
919                continue;
920            }
921            let mut file = self.by_index(i)?;
922            let mut outfile = fs::File::create(&outpath)?;
923            io::copy(&mut file, &mut outfile)?;
924            #[cfg(unix)]
925            {
926                // Check for real permissions, which we'll set in a second pass
927                if let Some(mode) = file.unix_mode() {
928                    files_by_unix_mode.push((outpath.clone(), mode));
929                }
930            }
931        }
932        #[cfg(unix)]
933        {
934            use std::cmp::Reverse;
935            use std::os::unix::fs::PermissionsExt;
936
937            if files_by_unix_mode.len() > 1 {
938                // Ensure we update children's permissions before making a parent unwritable
939                files_by_unix_mode.sort_by_key(|(path, _)| Reverse(path.clone()));
940            }
941            for (path, mode) in files_by_unix_mode.into_iter() {
942                fs::set_permissions(&path, fs::Permissions::from_mode(mode))?;
943            }
944        }
945        Ok(())
946    }
947
948    /// Number of files contained in this zip.
949    pub fn len(&self) -> usize {
950        self.shared.files.len()
951    }
952
953    /// Get the starting offset of the zip central directory.
954    pub fn central_directory_start(&self) -> u64 {
955        self.shared.dir_start
956    }
957
958    /// Whether this zip archive contains no files
959    pub fn is_empty(&self) -> bool {
960        self.len() == 0
961    }
962
963    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
964    ///
965    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
966    /// of that prepended data.
967    pub fn offset(&self) -> u64 {
968        self.shared.offset
969    }
970
971    /// Get the comment of the zip archive.
972    pub fn comment(&self) -> &[u8] {
973        &self.shared.comment
974    }
975
976    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
977    pub fn zip64_comment(&self) -> Option<&[u8]> {
978        self.shared.zip64_comment.as_deref()
979    }
980
981    /// Returns an iterator over all the file and directory names in this archive.
982    pub fn file_names(&self) -> impl Iterator<Item = &str> {
983        self.shared.files.keys().map(|s| s.as_ref())
984    }
985
986    /// Search for a file entry by name, decrypt with given password
987    ///
988    /// # Warning
989    ///
990    /// The implementation of the cryptographic algorithms has not
991    /// gone through a correctness review, and you should assume it is insecure:
992    /// passwords used with this API may be compromised.
993    ///
994    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
995    /// to check for a 1/256 chance that the password is correct.
996    /// There are many passwords out there that will also pass the validity checks
997    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
998    /// due to its fairly primitive approach to cryptography.
999    pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult<ZipFile> {
1000        self.by_name_with_optional_password(name, Some(password))
1001    }
1002
1003    /// Search for a file entry by name
1004    pub fn by_name(&mut self, name: &str) -> ZipResult<ZipFile> {
1005        self.by_name_with_optional_password(name, None)
1006    }
1007
1008    /// Get the index of a file entry by name, if it's present.
1009    #[inline(always)]
1010    pub fn index_for_name(&self, name: &str) -> Option<usize> {
1011        self.shared.files.get_index_of(name)
1012    }
1013
1014    /// Get the index of a file entry by path, if it's present.
1015    #[inline(always)]
1016    pub fn index_for_path<T: AsRef<Path>>(&self, path: T) -> Option<usize> {
1017        self.index_for_name(&path_to_string(path))
1018    }
1019
1020    /// Get the name of a file entry, if it's present.
1021    #[inline(always)]
1022    pub fn name_for_index(&self, index: usize) -> Option<&str> {
1023        self.shared
1024            .files
1025            .get_index(index)
1026            .map(|(name, _)| name.as_ref())
1027    }
1028
1029    /// Search for a file entry by name and return a seekable object.
1030    pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<R>> {
1031        self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
1032    }
1033
1034    /// Search for a file entry by index and return a seekable object.
1035    pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<R>> {
1036        let reader = &mut self.reader;
1037        self.shared
1038            .files
1039            .get_index(index)
1040            .ok_or(ZipError::FileNotFound)
1041            .and_then(move |(_, data)| {
1042                let seek_reader = match data.compression_method {
1043                    CompressionMethod::Stored => {
1044                        ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
1045                    }
1046                    _ => {
1047                        return Err(ZipError::UnsupportedArchive(
1048                            "Seekable compressed files are not yet supported",
1049                        ))
1050                    }
1051                };
1052                Ok(ZipFileSeek {
1053                    reader: seek_reader,
1054                    data: Cow::Borrowed(data),
1055                })
1056            })
1057    }
1058
1059    fn by_name_with_optional_password<'a>(
1060        &'a mut self,
1061        name: &str,
1062        password: Option<&[u8]>,
1063    ) -> ZipResult<ZipFile<'a>> {
1064        let Some(index) = self.shared.files.get_index_of(name) else {
1065            return Err(ZipError::FileNotFound);
1066        };
1067        self.by_index_with_optional_password(index, password)
1068    }
1069
1070    /// Get a contained file by index, decrypt with given password
1071    ///
1072    /// # Warning
1073    ///
1074    /// The implementation of the cryptographic algorithms has not
1075    /// gone through a correctness review, and you should assume it is insecure:
1076    /// passwords used with this API may be compromised.
1077    ///
1078    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1079    /// to check for a 1/256 chance that the password is correct.
1080    /// There are many passwords out there that will also pass the validity checks
1081    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1082    /// due to its fairly primitive approach to cryptography.
1083    pub fn by_index_decrypt(
1084        &mut self,
1085        file_number: usize,
1086        password: &[u8],
1087    ) -> ZipResult<ZipFile<'_>> {
1088        self.by_index_with_optional_password(file_number, Some(password))
1089    }
1090
1091    /// Get a contained file by index
1092    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
1093        self.by_index_with_optional_password(file_number, None)
1094    }
1095
1096    /// Get a contained file by index without decompressing it
1097    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
1098        let reader = &mut self.reader;
1099        let (_, data) = self
1100            .shared
1101            .files
1102            .get_index(file_number)
1103            .ok_or(ZipError::FileNotFound)?;
1104        Ok(ZipFile {
1105            reader: ZipFileReader::Raw(find_content(data, reader)?),
1106            data: Cow::Borrowed(data),
1107        })
1108    }
1109
1110    fn by_index_with_optional_password(
1111        &mut self,
1112        file_number: usize,
1113        mut password: Option<&[u8]>,
1114    ) -> ZipResult<ZipFile<'_>> {
1115        let (_, data) = self
1116            .shared
1117            .files
1118            .get_index(file_number)
1119            .ok_or(ZipError::FileNotFound)?;
1120
1121        match (password, data.encrypted) {
1122            (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
1123            (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
1124            _ => {}
1125        }
1126        let limit_reader = find_content(data, &mut self.reader)?;
1127
1128        let crypto_reader = make_crypto_reader(data, limit_reader, password, data.aes_mode)?;
1129
1130        Ok(ZipFile {
1131            data: Cow::Borrowed(data),
1132            reader: make_reader(data.compression_method, data.crc32, crypto_reader)?,
1133        })
1134    }
1135
1136    /// Find the "root directory" of an archive if it exists, filtering out
1137    /// irrelevant entries when searching.
1138    ///
1139    /// Our definition of a "root directory" is a single top-level directory
1140    /// that contains the rest of the archive's entries. This is useful for
1141    /// extracting archives that contain a single top-level directory that
1142    /// you want to "unwrap" and extract directly.
1143    ///
1144    /// For a sensible default filter, you can use [`root_dir_common_filter`].
1145    /// For a custom filter, see [`RootDirFilter`].
1146    pub fn root_dir(&self, filter: impl RootDirFilter) -> ZipResult<Option<PathBuf>> {
1147        let mut root_dir: Option<PathBuf> = None;
1148
1149        for i in 0..self.len() {
1150            let (_, file) = self
1151                .shared
1152                .files
1153                .get_index(i)
1154                .ok_or(ZipError::FileNotFound)?;
1155
1156            let path = match file.enclosed_name() {
1157                Some(path) => path,
1158                None => return Ok(None),
1159            };
1160
1161            if !filter(&path) {
1162                continue;
1163            }
1164
1165            macro_rules! replace_root_dir {
1166                ($path:ident) => {
1167                    match &mut root_dir {
1168                        Some(root_dir) => {
1169                            if *root_dir != $path {
1170                                // We've found multiple root directories,
1171                                // abort.
1172                                return Ok(None);
1173                            } else {
1174                                continue;
1175                            }
1176                        }
1177
1178                        None => {
1179                            root_dir = Some($path.into());
1180                            continue;
1181                        }
1182                    }
1183                };
1184            }
1185
1186            // If this entry is located at the root of the archive...
1187            if path.components().count() == 1 {
1188                if file.is_dir() {
1189                    // If it's a directory, it could be the root directory.
1190                    replace_root_dir!(path);
1191                } else {
1192                    // If it's anything else, this archive does not have a
1193                    // root directory.
1194                    return Ok(None);
1195                }
1196            }
1197
1198            // Find the root directory for this entry.
1199            let mut path = path.as_path();
1200            while let Some(parent) = path.parent().filter(|path| *path != Path::new("")) {
1201                path = parent;
1202            }
1203
1204            replace_root_dir!(path);
1205        }
1206
1207        Ok(root_dir)
1208    }
1209
1210    /// Unwrap and return the inner reader object
1211    ///
1212    /// The position of the reader is undefined.
1213    pub fn into_inner(self) -> R {
1214        self.reader
1215    }
1216}
1217
1218/// Holds the AES information of a file in the zip archive
1219#[derive(Debug)]
1220#[cfg(feature = "aes-crypto")]
1221pub struct AesInfo {
1222    /// The AES encryption mode
1223    pub aes_mode: AesMode,
1224    /// The verification key
1225    pub verification_value: [u8; PWD_VERIFY_LENGTH],
1226    /// The salt
1227    pub salt: Vec<u8>,
1228}
1229
1230const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
1231    Err(ZipError::UnsupportedArchive(detail))
1232}
1233
1234/// Parse a central directory entry to collect the information for the file.
1235pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
1236    reader: &mut R,
1237    central_directory: &CentralDirectoryInfo,
1238) -> ZipResult<ZipFileData> {
1239    let central_header_start = reader.stream_position()?;
1240
1241    // Parse central header
1242    let block = ZipCentralEntryBlock::parse(reader)?;
1243
1244    let file = central_header_to_zip_file_inner(
1245        reader,
1246        central_directory.archive_offset,
1247        central_header_start,
1248        block,
1249    )?;
1250
1251    let central_header_end = reader.stream_position()?;
1252
1253    if file.header_start >= central_directory.directory_start {
1254        return Err(InvalidArchive(
1255            "A local file entry can't start after the central directory",
1256        ));
1257    }
1258
1259    let data_start = find_data_start(&file, reader)?;
1260
1261    if data_start > central_directory.directory_start {
1262        return Err(InvalidArchive(
1263            "File data can't start after the central directory",
1264        ));
1265    }
1266
1267    reader.seek(SeekFrom::Start(central_header_end))?;
1268    Ok(file)
1269}
1270
1271#[inline]
1272fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
1273    let mut data = vec![0; len].into_boxed_slice();
1274    reader.read_exact(&mut data)?;
1275    Ok(data)
1276}
1277
1278/// Parse a central directory entry to collect the information for the file.
1279fn central_header_to_zip_file_inner<R: Read>(
1280    reader: &mut R,
1281    archive_offset: u64,
1282    central_header_start: u64,
1283    block: ZipCentralEntryBlock,
1284) -> ZipResult<ZipFileData> {
1285    let ZipCentralEntryBlock {
1286        // magic,
1287        version_made_by,
1288        // version_to_extract,
1289        flags,
1290        compression_method,
1291        last_mod_time,
1292        last_mod_date,
1293        crc32,
1294        compressed_size,
1295        uncompressed_size,
1296        file_name_length,
1297        extra_field_length,
1298        file_comment_length,
1299        // disk_number,
1300        // internal_file_attributes,
1301        external_file_attributes,
1302        offset,
1303        ..
1304    } = block;
1305
1306    let encrypted = flags & 1 == 1;
1307    let is_utf8 = flags & (1 << 11) != 0;
1308    let using_data_descriptor = flags & (1 << 3) != 0;
1309
1310    let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
1311    let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
1312    let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
1313    let file_name: Box<str> = match is_utf8 {
1314        true => String::from_utf8_lossy(&file_name_raw).into(),
1315        false => file_name_raw.clone().from_cp437(),
1316    };
1317    let file_comment: Box<str> = match is_utf8 {
1318        true => String::from_utf8_lossy(&file_comment_raw).into(),
1319        false => file_comment_raw.from_cp437(),
1320    };
1321
1322    // Construct the result
1323    let mut result = ZipFileData {
1324        system: System::from((version_made_by >> 8) as u8),
1325        /* NB: this strips the top 8 bits! */
1326        version_made_by: version_made_by as u8,
1327        encrypted,
1328        using_data_descriptor,
1329        is_utf8,
1330        compression_method: CompressionMethod::parse_from_u16(compression_method),
1331        compression_level: None,
1332        last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
1333        crc32,
1334        compressed_size: compressed_size.into(),
1335        uncompressed_size: uncompressed_size.into(),
1336        file_name,
1337        file_name_raw,
1338        extra_field: Some(Arc::new(extra_field.to_vec())),
1339        central_extra_field: None,
1340        file_comment,
1341        header_start: offset.into(),
1342        extra_data_start: None,
1343        central_header_start,
1344        data_start: OnceLock::new(),
1345        external_attributes: external_file_attributes,
1346        large_file: false,
1347        aes_mode: None,
1348        aes_extra_data_start: 0,
1349        extra_fields: Vec::new(),
1350    };
1351    match parse_extra_field(&mut result) {
1352        Ok(stripped_extra_field) => {
1353            result.extra_field = stripped_extra_field;
1354        }
1355        Err(ZipError::Io(..)) => {}
1356        Err(e) => return Err(e),
1357    }
1358
1359    let aes_enabled = result.compression_method == CompressionMethod::AES;
1360    if aes_enabled && result.aes_mode.is_none() {
1361        return Err(InvalidArchive(
1362            "AES encryption without AES extra data field",
1363        ));
1364    }
1365
1366    // Account for shifted zip offsets.
1367    result.header_start = result
1368        .header_start
1369        .checked_add(archive_offset)
1370        .ok_or(InvalidArchive("Archive header is too large"))?;
1371
1372    Ok(result)
1373}
1374
1375pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<Option<Arc<Vec<u8>>>> {
1376    let Some(ref extra_field) = file.extra_field else {
1377        return Ok(None);
1378    };
1379    let extra_field = extra_field.clone();
1380    let mut processed_extra_field = extra_field.clone();
1381    let len = extra_field.len();
1382    let mut reader = io::Cursor::new(&**extra_field);
1383
1384    /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
1385    let mut position = reader.position() as usize;
1386    while (position) < len {
1387        let old_position = position;
1388        let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?;
1389        position = reader.position() as usize;
1390        if remove {
1391            let remaining = len - (position - old_position);
1392            if remaining == 0 {
1393                return Ok(None);
1394            }
1395            let mut new_extra_field = Vec::with_capacity(remaining);
1396            new_extra_field.extend_from_slice(&extra_field[0..old_position]);
1397            new_extra_field.extend_from_slice(&extra_field[position..]);
1398            processed_extra_field = Arc::new(new_extra_field);
1399        }
1400    }
1401    Ok(Some(processed_extra_field))
1402}
1403
1404pub(crate) fn parse_single_extra_field<R: Read>(
1405    file: &mut ZipFileData,
1406    reader: &mut R,
1407    bytes_already_read: u64,
1408    disallow_zip64: bool,
1409) -> ZipResult<bool> {
1410    let kind = reader.read_u16_le()?;
1411    let len = reader.read_u16_le()?;
1412    match kind {
1413        // Zip64 extended information extra field
1414        0x0001 => {
1415            if disallow_zip64 {
1416                return Err(InvalidArchive(
1417                    "Can't write a custom field using the ZIP64 ID",
1418                ));
1419            }
1420            file.large_file = true;
1421            let mut consumed_len = 0;
1422            if len >= 24 || file.uncompressed_size == spec::ZIP64_BYTES_THR {
1423                file.uncompressed_size = reader.read_u64_le()?;
1424                consumed_len += size_of::<u64>();
1425            }
1426            if len >= 24 || file.compressed_size == spec::ZIP64_BYTES_THR {
1427                file.compressed_size = reader.read_u64_le()?;
1428                consumed_len += size_of::<u64>();
1429            }
1430            if len >= 24 || file.header_start == spec::ZIP64_BYTES_THR {
1431                file.header_start = reader.read_u64_le()?;
1432                consumed_len += size_of::<u64>();
1433            }
1434            let Some(leftover_len) = (len as usize).checked_sub(consumed_len) else {
1435                return Err(InvalidArchive("ZIP64 extra-data field is the wrong length"));
1436            };
1437            reader.read_exact(&mut vec![0u8; leftover_len])?;
1438            return Ok(true);
1439        }
1440        0x000a => {
1441            // NTFS extra field
1442            file.extra_fields
1443                .push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
1444        }
1445        0x9901 => {
1446            // AES
1447            if len != 7 {
1448                return Err(ZipError::UnsupportedArchive(
1449                    "AES extra data field has an unsupported length",
1450                ));
1451            }
1452            let vendor_version = reader.read_u16_le()?;
1453            let vendor_id = reader.read_u16_le()?;
1454            let mut out = [0u8];
1455            reader.read_exact(&mut out)?;
1456            let aes_mode = out[0];
1457            let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
1458
1459            if vendor_id != 0x4541 {
1460                return Err(InvalidArchive("Invalid AES vendor"));
1461            }
1462            let vendor_version = match vendor_version {
1463                0x0001 => AesVendorVersion::Ae1,
1464                0x0002 => AesVendorVersion::Ae2,
1465                _ => return Err(InvalidArchive("Invalid AES vendor version")),
1466            };
1467            match aes_mode {
1468                0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)),
1469                0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)),
1470                0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)),
1471                _ => return Err(InvalidArchive("Invalid AES encryption strength")),
1472            };
1473            file.compression_method = compression_method;
1474            file.aes_extra_data_start = bytes_already_read;
1475        }
1476        0x5455 => {
1477            // extended timestamp
1478            // https://libzip.org/specifications/extrafld.txt
1479
1480            file.extra_fields.push(ExtraField::ExtendedTimestamp(
1481                ExtendedTimestamp::try_from_reader(reader, len)?,
1482            ));
1483        }
1484        0x6375 => {
1485            // Info-ZIP Unicode Comment Extra Field
1486            // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
1487            file.file_comment = String::from_utf8(
1488                UnicodeExtraField::try_from_reader(reader, len)?
1489                    .unwrap_valid(file.file_comment.as_bytes())?
1490                    .into_vec(),
1491            )?
1492            .into();
1493        }
1494        0x7075 => {
1495            // Info-ZIP Unicode Path Extra Field
1496            // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
1497            file.file_name_raw = UnicodeExtraField::try_from_reader(reader, len)?
1498                .unwrap_valid(&file.file_name_raw)?;
1499            file.file_name =
1500                String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
1501            file.is_utf8 = true;
1502        }
1503        _ => {
1504            reader.read_exact(&mut vec![0u8; len as usize])?;
1505            // Other fields are ignored
1506        }
1507    }
1508    Ok(false)
1509}
1510
1511/// A trait for exposing file metadata inside the zip.
1512pub trait HasZipMetadata {
1513    /// Get the file metadata
1514    fn get_metadata(&self) -> &ZipFileData;
1515}
1516
1517/// Methods for retrieving information on zip files
1518impl<'a> ZipFile<'a> {
1519    pub(crate) fn take_raw_reader(&mut self) -> io::Result<io::Take<&'a mut dyn Read>> {
1520        mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner()
1521    }
1522
1523    /// Get the version of the file
1524    pub fn version_made_by(&self) -> (u8, u8) {
1525        (
1526            self.get_metadata().version_made_by / 10,
1527            self.get_metadata().version_made_by % 10,
1528        )
1529    }
1530
1531    /// Get the name of the file
1532    ///
1533    /// # Warnings
1534    ///
1535    /// It is dangerous to use this name directly when extracting an archive.
1536    /// It may contain an absolute path (`/etc/shadow`), or break out of the
1537    /// current directory (`../runtime`). Carelessly writing to these paths
1538    /// allows an attacker to craft a ZIP archive that will overwrite critical
1539    /// files.
1540    ///
1541    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
1542    /// as a safe path.
1543    pub fn name(&self) -> &str {
1544        &self.get_metadata().file_name
1545    }
1546
1547    /// Get the name of the file, in the raw (internal) byte representation.
1548    ///
1549    /// The encoding of this data is currently undefined.
1550    pub fn name_raw(&self) -> &[u8] {
1551        &self.get_metadata().file_name_raw
1552    }
1553
1554    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
1555    /// removes a leading '/' and removes '..' parts.
1556    #[deprecated(
1557        since = "0.5.7",
1558        note = "by stripping `..`s from the path, the meaning of paths can change.
1559                `mangled_name` can be used if this behaviour is desirable"
1560    )]
1561    pub fn sanitized_name(&self) -> PathBuf {
1562        self.mangled_name()
1563    }
1564
1565    /// Rewrite the path, ignoring any path components with special meaning.
1566    ///
1567    /// - Absolute paths are made relative
1568    /// - [`ParentDir`]s are ignored
1569    /// - Truncates the filename at a NULL byte
1570    ///
1571    /// This is appropriate if you need to be able to extract *something* from
1572    /// any archive, but will easily misrepresent trivial paths like
1573    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
1574    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
1575    ///
1576    /// [`ParentDir`]: `PathBuf::Component::ParentDir`
1577    pub fn mangled_name(&self) -> PathBuf {
1578        self.get_metadata().file_name_sanitized()
1579    }
1580
1581    /// Ensure the file path is safe to use as a [`Path`].
1582    ///
1583    /// - It can't contain NULL bytes
1584    /// - It can't resolve to a path outside the current directory
1585    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
1586    /// - It can't be an absolute path
1587    ///
1588    /// This will read well-formed ZIP files correctly, and is resistant
1589    /// to path-based exploits. It is recommended over
1590    /// [`ZipFile::mangled_name`].
1591    pub fn enclosed_name(&self) -> Option<PathBuf> {
1592        self.get_metadata().enclosed_name()
1593    }
1594
1595    pub(crate) fn simplified_components(&self) -> Option<Vec<&OsStr>> {
1596        self.get_metadata().simplified_components()
1597    }
1598
1599    /// Prepare the path for extraction by creating necessary missing directories and checking for symlinks to be contained within the base path.
1600    ///
1601    /// `base_path` parameter is assumed to be canonicalized.
1602    pub(crate) fn safe_prepare_path(
1603        &self,
1604        base_path: &Path,
1605        outpath: &mut PathBuf,
1606        root_dir: Option<&(Vec<&OsStr>, impl RootDirFilter)>,
1607    ) -> ZipResult<()> {
1608        let components = self
1609            .simplified_components()
1610            .ok_or(InvalidArchive("Invalid file path"))?;
1611
1612        let components = match root_dir {
1613            Some((root_dir, filter)) => match components.strip_prefix(&**root_dir) {
1614                Some(components) => components,
1615
1616                // In this case, we expect that the file was not in the root
1617                // directory, but was filtered out when searching for the
1618                // root directory.
1619                None => {
1620                    // We could technically find ourselves at this code
1621                    // path if the user provides an unstable or
1622                    // non-deterministic `filter` function.
1623                    //
1624                    // If debug assertions are on, we should panic here.
1625                    // Otherwise, the safest thing to do here is to just
1626                    // extract as-is.
1627                    debug_assert!(
1628                        !filter(&PathBuf::from_iter(components.iter())),
1629                        "Root directory filter should not match at this point"
1630                    );
1631
1632                    // Extract as-is.
1633                    &components[..]
1634                }
1635            },
1636
1637            None => &components[..],
1638        };
1639
1640        let components_len = components.len();
1641
1642        for (is_last, component) in components
1643            .iter()
1644            .copied()
1645            .enumerate()
1646            .map(|(i, c)| (i == components_len - 1, c))
1647        {
1648            // we can skip the target directory itself because the base path is assumed to be "trusted" (if the user say extract to a symlink we can follow it)
1649            outpath.push(component);
1650
1651            // check if the path is a symlink, the target must be _inherently_ within the directory
1652            for limit in (0..5u8).rev() {
1653                let meta = match std::fs::symlink_metadata(&outpath) {
1654                    Ok(meta) => meta,
1655                    Err(e) if e.kind() == io::ErrorKind::NotFound => {
1656                        if !is_last {
1657                            crate::read::make_writable_dir_all(&outpath)?;
1658                        }
1659                        break;
1660                    }
1661                    Err(e) => return Err(e.into()),
1662                };
1663
1664                if !meta.is_symlink() {
1665                    break;
1666                }
1667
1668                if limit == 0 {
1669                    return Err(InvalidArchive("Extraction followed a symlink too deep"));
1670                }
1671
1672                // note that we cannot accept links that do not inherently resolve to a path inside the directory to prevent:
1673                // - disclosure of unrelated path exists (no check for a path exist and then ../ out)
1674                // - issues with file-system specific path resolution (case sensitivity, etc)
1675                let target = std::fs::read_link(&outpath)?;
1676
1677                if !crate::path::simplified_components(&target)
1678                    .ok_or(InvalidArchive("Invalid symlink target path"))?
1679                    .starts_with(
1680                        &crate::path::simplified_components(base_path)
1681                            .ok_or(InvalidArchive("Invalid base path"))?,
1682                    )
1683                {
1684                    let is_absolute_enclosed = base_path
1685                        .components()
1686                        .map(Some)
1687                        .chain(std::iter::once(None))
1688                        .zip(target.components().map(Some).chain(std::iter::repeat(None)))
1689                        .all(|(a, b)| match (a, b) {
1690                            // both components are normal
1691                            (Some(Component::Normal(a)), Some(Component::Normal(b))) => a == b,
1692                            // both components consumed fully
1693                            (None, None) => true,
1694                            // target consumed fully but base path is not
1695                            (Some(_), None) => false,
1696                            // base path consumed fully but target is not (and normal)
1697                            (None, Some(Component::CurDir | Component::Normal(_))) => true,
1698                            _ => false,
1699                        });
1700
1701                    if !is_absolute_enclosed {
1702                        return Err(InvalidArchive("Symlink is not inherently safe"));
1703                    }
1704                }
1705
1706                outpath.push(target);
1707            }
1708        }
1709        Ok(())
1710    }
1711
1712    /// Get the comment of the file
1713    pub fn comment(&self) -> &str {
1714        &self.get_metadata().file_comment
1715    }
1716
1717    /// Get the compression method used to store the file
1718    pub fn compression(&self) -> CompressionMethod {
1719        self.get_metadata().compression_method
1720    }
1721
1722    /// Get if the files is encrypted or not
1723    pub fn encrypted(&self) -> bool {
1724        self.data.encrypted
1725    }
1726
1727    /// Get the size of the file, in bytes, in the archive
1728    pub fn compressed_size(&self) -> u64 {
1729        self.get_metadata().compressed_size
1730    }
1731
1732    /// Get the size of the file, in bytes, when uncompressed
1733    pub fn size(&self) -> u64 {
1734        self.get_metadata().uncompressed_size
1735    }
1736
1737    /// Get the time the file was last modified
1738    pub fn last_modified(&self) -> Option<DateTime> {
1739        self.data.last_modified_time
1740    }
1741    /// Returns whether the file is actually a directory
1742    pub fn is_dir(&self) -> bool {
1743        is_dir(self.name())
1744    }
1745
1746    /// Returns whether the file is actually a symbolic link
1747    pub fn is_symlink(&self) -> bool {
1748        self.unix_mode()
1749            .is_some_and(|mode| mode & S_IFLNK == S_IFLNK)
1750    }
1751
1752    /// Returns whether the file is a normal file (i.e. not a directory or symlink)
1753    pub fn is_file(&self) -> bool {
1754        !self.is_dir() && !self.is_symlink()
1755    }
1756
1757    /// Get unix mode for the file
1758    pub fn unix_mode(&self) -> Option<u32> {
1759        self.get_metadata().unix_mode()
1760    }
1761
1762    /// Get the CRC32 hash of the original file
1763    pub fn crc32(&self) -> u32 {
1764        self.get_metadata().crc32
1765    }
1766
1767    /// Get the extra data of the zip header for this file
1768    pub fn extra_data(&self) -> Option<&[u8]> {
1769        self.get_metadata()
1770            .extra_field
1771            .as_ref()
1772            .map(|v| v.deref().deref())
1773    }
1774
1775    /// Get the starting offset of the data of the compressed file
1776    pub fn data_start(&self) -> u64 {
1777        *self.data.data_start.get().unwrap()
1778    }
1779
1780    /// Get the starting offset of the zip header for this file
1781    pub fn header_start(&self) -> u64 {
1782        self.get_metadata().header_start
1783    }
1784    /// Get the starting offset of the zip header in the central directory for this file
1785    pub fn central_header_start(&self) -> u64 {
1786        self.get_metadata().central_header_start
1787    }
1788
1789    /// Get the [`SimpleFileOptions`] that would be used to write this file to
1790    /// a new zip archive.
1791    pub fn options(&self) -> SimpleFileOptions {
1792        let mut options = SimpleFileOptions::default()
1793            .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR)
1794            .compression_method(self.compression())
1795            .unix_permissions(self.unix_mode().unwrap_or(0o644) | S_IFREG)
1796            .last_modified_time(
1797                self.last_modified()
1798                    .filter(|m| m.is_valid())
1799                    .unwrap_or_else(DateTime::default_for_write),
1800            );
1801
1802        options.normalize();
1803        options
1804    }
1805}
1806
1807/// Methods for retrieving information on zip files
1808impl ZipFile<'_> {
1809    /// iterate through all extra fields
1810    pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
1811        self.data.extra_fields.iter()
1812    }
1813}
1814
1815impl HasZipMetadata for ZipFile<'_> {
1816    fn get_metadata(&self) -> &ZipFileData {
1817        self.data.as_ref()
1818    }
1819}
1820
1821impl Read for ZipFile<'_> {
1822    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1823        self.reader.read(buf)
1824    }
1825
1826    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1827        self.reader.read_exact(buf)
1828    }
1829
1830    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
1831        self.reader.read_to_end(buf)
1832    }
1833
1834    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
1835        self.reader.read_to_string(buf)
1836    }
1837}
1838
1839impl<R: Read> Read for ZipFileSeek<'_, R> {
1840    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1841        match &mut self.reader {
1842            ZipFileSeekReader::Raw(r) => r.read(buf),
1843        }
1844    }
1845}
1846
1847impl<R: Seek> Seek for ZipFileSeek<'_, R> {
1848    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
1849        match &mut self.reader {
1850            ZipFileSeekReader::Raw(r) => r.seek(pos),
1851        }
1852    }
1853}
1854
1855impl<R> HasZipMetadata for ZipFileSeek<'_, R> {
1856    fn get_metadata(&self) -> &ZipFileData {
1857        self.data.as_ref()
1858    }
1859}
1860
1861impl Drop for ZipFile<'_> {
1862    fn drop(&mut self) {
1863        // self.data is Owned, this reader is constructed by a streaming reader.
1864        // In this case, we want to exhaust the reader so that the next file is accessible.
1865        if let Cow::Owned(_) = self.data {
1866            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
1867            if let Ok(mut inner) = self.take_raw_reader() {
1868                let _ = copy(&mut inner, &mut sink());
1869            }
1870        }
1871    }
1872}
1873
1874/// Read ZipFile structures from a non-seekable reader.
1875///
1876/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1877/// as some information will be missing when reading this manner.
1878///
1879/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1880/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1881/// is encountered. No more files should be read after this.
1882///
1883/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1884/// the structure is done.
1885///
1886/// Missing fields are:
1887/// * `comment`: set to an empty string
1888/// * `data_start`: set to 0
1889/// * `external_attributes`: `unix_mode()`: will return None
1890pub fn read_zipfile_from_stream<R: Read>(reader: &mut R) -> ZipResult<Option<ZipFile<'_>>> {
1891    // We can't use the typical ::parse() method, as we follow separate code paths depending on the
1892    // "magic" value (since the magic value will be from the central directory header if we've
1893    // finished iterating over all the actual files).
1894    /* TODO: smallvec? */
1895
1896    let mut block = ZipLocalEntryBlock::zeroed();
1897    reader.read_exact(block.as_bytes_mut())?;
1898
1899    match block.magic().from_le() {
1900        spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
1901        spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1902        _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR),
1903    }
1904
1905    let block = block.from_le();
1906
1907    let mut result = ZipFileData::from_local_block(block, reader)?;
1908
1909    match parse_extra_field(&mut result) {
1910        Ok(..) | Err(ZipError::Io(..)) => {}
1911        Err(e) => return Err(e),
1912    }
1913
1914    let limit_reader = (reader as &mut dyn Read).take(result.compressed_size);
1915
1916    let result_crc32 = result.crc32;
1917    let result_compression_method = result.compression_method;
1918    let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?;
1919
1920    Ok(Some(ZipFile {
1921        data: Cow::Owned(result),
1922        reader: make_reader(result_compression_method, result_crc32, crypto_reader)?,
1923    }))
1924}
1925
1926/// A filter that determines whether an entry should be ignored when searching
1927/// for the root directory of a Zip archive.
1928///
1929/// Returns `true` if the entry should be considered, and `false` if it should
1930/// be ignored.
1931///
1932/// See [`root_dir_common_filter`] for a sensible default filter.
1933pub trait RootDirFilter: Fn(&Path) -> bool {}
1934impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
1935
1936/// Common filters when finding the root directory of a Zip archive.
1937///
1938/// This filter is a sensible default for most use cases and filters out common
1939/// system files that are usually irrelevant to the contents of the archive.
1940///
1941/// Currently, the filter ignores:
1942/// - `/__MACOSX/`
1943/// - `/.DS_Store`
1944/// - `/Thumbs.db`
1945///
1946/// **This function is not guaranteed to be stable and may change in future versions.**
1947///
1948/// # Example
1949///
1950/// ```rust
1951/// # use std::path::Path;
1952/// assert!(zip::read::root_dir_common_filter(Path::new("foo.txt")));
1953/// assert!(!zip::read::root_dir_common_filter(Path::new(".DS_Store")));
1954/// assert!(!zip::read::root_dir_common_filter(Path::new("Thumbs.db")));
1955/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX")));
1956/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX/foo.txt")));
1957/// ```
1958pub fn root_dir_common_filter(path: &Path) -> bool {
1959    const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
1960
1961    if path.starts_with("__MACOSX") {
1962        return false;
1963    }
1964
1965    if path.components().count() == 1
1966        && path.file_name().is_some_and(|file_name| {
1967            COMMON_FILTER_ROOT_FILES
1968                .iter()
1969                .map(OsStr::new)
1970                .any(|cmp| cmp == file_name)
1971        })
1972    {
1973        return false;
1974    }
1975
1976    true
1977}
1978
1979#[cfg(test)]
1980mod test {
1981    use crate::result::ZipResult;
1982    use crate::write::SimpleFileOptions;
1983    use crate::CompressionMethod::Stored;
1984    use crate::{ZipArchive, ZipWriter};
1985    use std::io::{Cursor, Read, Write};
1986    use tempfile::TempDir;
1987
1988    #[test]
1989    fn invalid_offset() {
1990        use super::ZipArchive;
1991
1992        let mut v = Vec::new();
1993        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
1994        let reader = ZipArchive::new(Cursor::new(v));
1995        assert!(reader.is_err());
1996    }
1997
1998    #[test]
1999    fn invalid_offset2() {
2000        use super::ZipArchive;
2001
2002        let mut v = Vec::new();
2003        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
2004        let reader = ZipArchive::new(Cursor::new(v));
2005        assert!(reader.is_err());
2006    }
2007
2008    #[test]
2009    fn zip64_with_leading_junk() {
2010        use super::ZipArchive;
2011
2012        let mut v = Vec::new();
2013        v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
2014        let reader = ZipArchive::new(Cursor::new(v)).unwrap();
2015        assert_eq!(reader.len(), 1);
2016    }
2017
2018    #[test]
2019    fn zip_contents() {
2020        use super::ZipArchive;
2021
2022        let mut v = Vec::new();
2023        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2024        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2025        assert_eq!(reader.comment(), b"");
2026        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
2027    }
2028
2029    #[test]
2030    fn zip_read_streaming() {
2031        use super::read_zipfile_from_stream;
2032
2033        let mut v = Vec::new();
2034        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2035        let mut reader = Cursor::new(v);
2036        loop {
2037            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
2038                break;
2039            }
2040        }
2041    }
2042
2043    #[test]
2044    fn zip_clone() {
2045        use super::ZipArchive;
2046        use std::io::Read;
2047
2048        let mut v = Vec::new();
2049        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2050        let mut reader1 = ZipArchive::new(Cursor::new(v)).unwrap();
2051        let mut reader2 = reader1.clone();
2052
2053        let mut file1 = reader1.by_index(0).unwrap();
2054        let mut file2 = reader2.by_index(0).unwrap();
2055
2056        let t = file1.last_modified().unwrap();
2057        assert_eq!(
2058            (
2059                t.year(),
2060                t.month(),
2061                t.day(),
2062                t.hour(),
2063                t.minute(),
2064                t.second()
2065            ),
2066            (1980, 1, 1, 0, 0, 0)
2067        );
2068
2069        let mut buf1 = [0; 5];
2070        let mut buf2 = [0; 5];
2071        let mut buf3 = [0; 5];
2072        let mut buf4 = [0; 5];
2073
2074        file1.read_exact(&mut buf1).unwrap();
2075        file2.read_exact(&mut buf2).unwrap();
2076        file1.read_exact(&mut buf3).unwrap();
2077        file2.read_exact(&mut buf4).unwrap();
2078
2079        assert_eq!(buf1, buf2);
2080        assert_eq!(buf3, buf4);
2081        assert_ne!(buf1, buf3);
2082    }
2083
2084    #[test]
2085    fn file_and_dir_predicates() {
2086        use super::ZipArchive;
2087
2088        let mut v = Vec::new();
2089        v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
2090        let mut zip = ZipArchive::new(Cursor::new(v)).unwrap();
2091
2092        for i in 0..zip.len() {
2093            let zip_file = zip.by_index(i).unwrap();
2094            let full_name = zip_file.enclosed_name().unwrap();
2095            let file_name = full_name.file_name().unwrap().to_str().unwrap();
2096            assert!(
2097                (file_name.starts_with("dir") && zip_file.is_dir())
2098                    || (file_name.starts_with("file") && zip_file.is_file())
2099            );
2100        }
2101    }
2102
2103    #[test]
2104    fn zip64_magic_in_filenames() {
2105        let files = vec![
2106            include_bytes!("../tests/data/zip64_magic_in_filename_1.zip").to_vec(),
2107            include_bytes!("../tests/data/zip64_magic_in_filename_2.zip").to_vec(),
2108            include_bytes!("../tests/data/zip64_magic_in_filename_3.zip").to_vec(),
2109            include_bytes!("../tests/data/zip64_magic_in_filename_4.zip").to_vec(),
2110            include_bytes!("../tests/data/zip64_magic_in_filename_5.zip").to_vec(),
2111        ];
2112        // Although we don't allow adding files whose names contain the ZIP64 CDB-end or
2113        // CDB-end-locator signatures, we still read them when they aren't genuinely ambiguous.
2114        for file in files {
2115            ZipArchive::new(Cursor::new(file)).unwrap();
2116        }
2117    }
2118
2119    /// test case to ensure we don't preemptively over allocate based on the
2120    /// declared number of files in the CDE of an invalid zip when the number of
2121    /// files declared is more than the alleged offset in the CDE
2122    #[test]
2123    fn invalid_cde_number_of_files_allocation_smaller_offset() {
2124        use super::ZipArchive;
2125
2126        let mut v = Vec::new();
2127        v.extend_from_slice(include_bytes!(
2128            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
2129        ));
2130        let reader = ZipArchive::new(Cursor::new(v));
2131        assert!(reader.is_err() || reader.unwrap().is_empty());
2132    }
2133
2134    /// test case to ensure we don't preemptively over allocate based on the
2135    /// declared number of files in the CDE of an invalid zip when the number of
2136    /// files declared is less than the alleged offset in the CDE
2137    #[test]
2138    fn invalid_cde_number_of_files_allocation_greater_offset() {
2139        use super::ZipArchive;
2140
2141        let mut v = Vec::new();
2142        v.extend_from_slice(include_bytes!(
2143            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
2144        ));
2145        let reader = ZipArchive::new(Cursor::new(v));
2146        assert!(reader.is_err());
2147    }
2148
2149    #[cfg(feature = "deflate64")]
2150    #[test]
2151    fn deflate64_index_out_of_bounds() -> std::io::Result<()> {
2152        let mut v = Vec::new();
2153        v.extend_from_slice(include_bytes!(
2154            "../tests/data/raw_deflate64_index_out_of_bounds.zip"
2155        ));
2156        let mut reader = ZipArchive::new(Cursor::new(v))?;
2157        std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
2158        Ok(())
2159    }
2160
2161    #[cfg(feature = "deflate64")]
2162    #[test]
2163    fn deflate64_not_enough_space() {
2164        let mut v = Vec::new();
2165        v.extend_from_slice(include_bytes!("../tests/data/deflate64_issue_25.zip"));
2166        ZipArchive::new(Cursor::new(v)).expect_err("Invalid file");
2167    }
2168
2169    #[cfg(feature = "_deflate-any")]
2170    #[test]
2171    fn test_read_with_data_descriptor() {
2172        use std::io::Read;
2173
2174        let mut v = Vec::new();
2175        v.extend_from_slice(include_bytes!("../tests/data/data_descriptor.zip"));
2176        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2177        let mut decompressed = [0u8; 16];
2178        let mut file = reader.by_index(0).unwrap();
2179        assert_eq!(file.read(&mut decompressed).unwrap(), 12);
2180    }
2181
2182    #[test]
2183    fn test_is_symlink() -> std::io::Result<()> {
2184        let mut v = Vec::new();
2185        v.extend_from_slice(include_bytes!("../tests/data/symlink.zip"));
2186        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2187        assert!(reader.by_index(0).unwrap().is_symlink());
2188        let tempdir = TempDir::with_prefix("test_is_symlink")?;
2189        reader.extract(&tempdir).unwrap();
2190        assert!(tempdir.path().join("bar").is_symlink());
2191        Ok(())
2192    }
2193
2194    #[test]
2195    #[cfg(feature = "_deflate-any")]
2196    fn test_utf8_extra_field() {
2197        let mut v = Vec::new();
2198        v.extend_from_slice(include_bytes!("../tests/data/chinese.zip"));
2199        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2200        reader.by_name("七个房间.txt").unwrap();
2201    }
2202
2203    #[test]
2204    fn test_utf8() {
2205        let mut v = Vec::new();
2206        v.extend_from_slice(include_bytes!("../tests/data/linux-7z.zip"));
2207        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2208        reader.by_name("你好.txt").unwrap();
2209    }
2210
2211    #[test]
2212    fn test_utf8_2() {
2213        let mut v = Vec::new();
2214        v.extend_from_slice(include_bytes!("../tests/data/windows-7zip.zip"));
2215        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2216        reader.by_name("你好.txt").unwrap();
2217    }
2218
2219    #[test]
2220    fn test_64k_files() -> ZipResult<()> {
2221        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2222        let options = SimpleFileOptions {
2223            compression_method: Stored,
2224            ..Default::default()
2225        };
2226        for i in 0..=u16::MAX {
2227            let file_name = format!("{i}.txt");
2228            writer.start_file(&*file_name, options)?;
2229            writer.write_all(i.to_string().as_bytes())?;
2230        }
2231
2232        let mut reader = ZipArchive::new(writer.finish()?)?;
2233        for i in 0..=u16::MAX {
2234            let expected_name = format!("{i}.txt");
2235            let expected_contents = i.to_string();
2236            let expected_contents = expected_contents.as_bytes();
2237            let mut file = reader.by_name(&expected_name)?;
2238            let mut contents = Vec::with_capacity(expected_contents.len());
2239            file.read_to_end(&mut contents)?;
2240            assert_eq!(contents, expected_contents);
2241            drop(file);
2242            contents.clear();
2243            let mut file = reader.by_index(i as usize)?;
2244            file.read_to_end(&mut contents)?;
2245            assert_eq!(contents, expected_contents);
2246        }
2247        Ok(())
2248    }
2249
2250    /// Symlinks being extracted shouldn't be followed out of the destination directory.
2251    #[test]
2252    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
2253        use std::fs::create_dir;
2254
2255        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2256        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
2257        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
2258        let mut reader = writer.finish_into_readable()?;
2259        let dest_parent =
2260            TempDir::with_prefix("read__test_cannot_symlink_outside_destination").unwrap();
2261        let dest_sibling = dest_parent.path().join("dest-sibling");
2262        create_dir(&dest_sibling)?;
2263        let dest = dest_parent.path().join("dest");
2264        create_dir(&dest)?;
2265        assert!(reader.extract(dest).is_err());
2266        assert!(!dest_sibling.join("dest-file").exists());
2267        Ok(())
2268    }
2269
2270    #[test]
2271    fn test_can_create_destination() -> ZipResult<()> {
2272        let mut v = Vec::new();
2273        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2274        let mut reader = ZipArchive::new(Cursor::new(v))?;
2275        let dest = TempDir::with_prefix("read__test_can_create_destination").unwrap();
2276        reader.extract(&dest)?;
2277        assert!(dest.path().join("mimetype").exists());
2278        Ok(())
2279    }
2280}