Skip to content

Commit c934c82

Browse files
committedJun 22, 2024··
fix: Some archives with over u16::MAX files were handled incorrectly or slowly (#189)
1 parent 8bb3be0 commit c934c82

File tree

1 file changed

+119
-56
lines changed

1 file changed

+119
-56
lines changed
 

‎src/read.rs

+119-56
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ pub(crate) mod zip_archive {
6969
pub(crate) files: Vec<super::ZipFileData>,
7070
pub(super) offset: u64,
7171
pub(super) dir_start: u64,
72-
pub(super) dir_end: u64,
7372
// This isn't yet used anywhere, but it is here for use cases in the future.
7473
#[allow(dead_code)]
7574
pub(super) config: super::Config,
@@ -405,9 +404,11 @@ pub(crate) fn make_reader(
405404
pub(crate) struct CentralDirectoryInfo {
406405
pub(crate) archive_offset: u64,
407406
pub(crate) directory_start: u64,
407+
pub(crate) cde_position: u64,
408408
pub(crate) number_of_files: usize,
409409
pub(crate) disk_number: u32,
410410
pub(crate) disk_with_central_directory: u32,
411+
pub(crate) is_zip64: bool,
411412
}
412413

413414
impl<R> ZipArchive<R> {
@@ -560,6 +561,8 @@ impl<R: Read + Seek> ZipArchive<R> {
560561
number_of_files,
561562
disk_number: footer.disk_number as u32,
562563
disk_with_central_directory: footer.disk_with_central_directory as u32,
564+
cde_position: cde_start_pos,
565+
is_zip64: false
563566
})
564567
}
565568

@@ -662,6 +665,8 @@ impl<R: Read + Seek> ZipArchive<R> {
662665
number_of_files: footer64.number_of_files as usize,
663666
disk_number: footer64.disk_number,
664667
disk_with_central_directory: footer64.disk_with_central_directory,
668+
cde_position: cde_start_pos,
669+
is_zip64: true,
665670
})
666671
}
667672
}).collect();
@@ -674,56 +679,93 @@ impl<R: Read + Seek> ZipArchive<R> {
674679
config: Config,
675680
reader: &mut R,
676681
) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> {
677-
let mut invalid_errors = Vec::new();
678-
let mut unsupported_errors = Vec::new();
682+
let mut invalid_errors_32 = Vec::new();
683+
let mut unsupported_errors_32 = Vec::new();
684+
let mut invalid_errors_64 = Vec::new();
685+
let mut unsupported_errors_64 = Vec::new();
679686
let mut ok_results = Vec::new();
680687
let cde_locations = spec::Zip32CentralDirectoryEnd::find_and_parse(reader)?;
681-
cde_locations
682-
.into_vec()
683-
.into_iter()
684-
.for_each(|(footer, cde_start_pos)| {
688+
cde_locations.into_vec().into_iter().for_each(|(footer, cde_start_pos)| {
685689
let zip32_result =
686690
Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos);
687691
Self::sort_result(
688-
zip32_result
689-
.and_then(|result| Self::read_central_header(result, config, reader)),
690-
&mut invalid_errors,
691-
&mut unsupported_errors,
692+
zip32_result,
693+
&mut invalid_errors_32,
694+
&mut unsupported_errors_32,
692695
&mut ok_results,
693696
&footer,
694697
);
698+
let mut inner_results = Vec::with_capacity(1);
695699
// Check if file has a zip64 footer
696-
if let Ok(zip64_footers) =
697-
Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos)
700+
let zip64_vec_result =
701+
Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos);
702+
Self::sort_result(
703+
zip64_vec_result,
704+
&mut invalid_errors_64,
705+
&mut unsupported_errors_64,
706+
&mut inner_results,
707+
&(),
708+
);
709+
inner_results.into_iter().for_each(|(_, results)| {
710+
results.into_iter().for_each(|result| {
711+
Self::sort_result(
712+
result,
713+
&mut invalid_errors_64,
714+
&mut unsupported_errors_64,
715+
&mut ok_results,
716+
&footer,
717+
);
718+
});
719+
});
720+
}
721+
);
722+
ok_results.sort_by_key(|(_, result)| (
723+
!result.is_zip64, // try ZIP64 first
724+
u64::MAX - result.cde_position, // try the last one first
725+
));
726+
let mut best_result = None;
727+
for (footer, result) in ok_results {
728+
let mut inner_result = Vec::with_capacity(1);
729+
let is_zip64 = result.is_zip64;
730+
Self::sort_result(
731+
Self::read_central_header(result, config, reader),
732+
if is_zip64 {
733+
&mut invalid_errors_64
734+
} else {
735+
&mut invalid_errors_32
736+
},
737+
if is_zip64 {
738+
&mut unsupported_errors_64
739+
} else {
740+
&mut unsupported_errors_32
741+
},
742+
&mut inner_result,
743+
&()
744+
);
745+
if let Some((_, shared)) = inner_result.into_iter().next() {
746+
if shared.files.len() == footer.number_of_files as usize
747+
|| (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16)
698748
{
699-
zip64_footers
700-
.into_iter()
701-
.map(|result| {
702-
result.and_then(|dir_info| {
703-
Self::read_central_header(dir_info, config, reader)
704-
})
705-
})
706-
.for_each(|result| {
707-
Self::sort_result(
708-
result,
709-
&mut invalid_errors,
710-
&mut unsupported_errors,
711-
&mut ok_results,
712-
&footer,
713-
)
714-
});
749+
best_result = Some((footer, shared));
750+
break;
751+
} else {
752+
if is_zip64 {
753+
&mut invalid_errors_64
754+
} else {
755+
&mut invalid_errors_32
756+
}.push(InvalidArchive("wrong number of files"))
715757
}
716-
});
717-
if ok_results.is_empty() {
718-
return Err(unsupported_errors
719-
.into_iter()
720-
.next()
721-
.unwrap_or_else(|| invalid_errors.into_iter().next().unwrap()));
758+
}
722759
}
723-
let (footer, shared) = ok_results
724-
.into_iter()
725-
.max_by_key(|(_, shared)| (shared.dir_end, u64::MAX - shared.dir_start))
726-
.unwrap();
760+
let Some((footer, shared)) = best_result else {
761+
return Err(unsupported_errors_32.into_iter()
762+
.chain(unsupported_errors_64.into_iter())
763+
.chain(invalid_errors_32.into_iter())
764+
.chain(invalid_errors_64.into_iter())
765+
766+
.next()
767+
.unwrap());
768+
};
727769
reader.seek(io::SeekFrom::Start(shared.dir_start))?;
728770
Ok((Rc::try_unwrap(footer).unwrap(), shared.build()))
729771
}
@@ -749,37 +791,27 @@ impl<R: Read + Seek> ZipArchive<R> {
749791
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
750792
files.push(file);
751793
}
752-
let dir_end = reader.stream_position()?;
753794
Ok(SharedBuilder {
754795
files,
755796
offset: dir_info.archive_offset,
756797
dir_start: dir_info.directory_start,
757-
dir_end,
758798
config,
759799
})
760800
}
761801

762-
fn sort_result(
763-
result: Result<SharedBuilder, ZipError>,
802+
fn sort_result<T, U: Clone>(
803+
result: ZipResult<T>,
764804
invalid_errors: &mut Vec<ZipError>,
765805
unsupported_errors: &mut Vec<ZipError>,
766-
ok_results: &mut Vec<(Rc<Zip32CentralDirectoryEnd>, SharedBuilder)>,
767-
footer: &Rc<Zip32CentralDirectoryEnd>,
806+
ok_results: &mut Vec<(U, T)>,
807+
footer: &U,
768808
) {
769809
match result {
770810
Err(ZipError::UnsupportedArchive(e)) => {
771811
unsupported_errors.push(ZipError::UnsupportedArchive(e))
772812
}
773813
Err(e) => invalid_errors.push(e),
774-
Ok(o) => {
775-
if o.files.len() == footer.number_of_files as usize
776-
|| footer.number_of_files == ZIP64_ENTRY_THR as u16
777-
{
778-
ok_results.push((footer.clone(), o))
779-
} else {
780-
invalid_errors.push(InvalidArchive("wrong number of files"))
781-
}
782-
}
814+
Ok(o) => ok_results.push((footer.clone(), o)),
783815
}
784816
}
785817

@@ -1659,9 +1691,12 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
16591691

16601692
#[cfg(test)]
16611693
mod test {
1662-
use crate::ZipArchive;
1663-
use std::io::Cursor;
1694+
use crate::{ZipArchive, ZipWriter};
1695+
use std::io::{Cursor, Read, Write};
16641696
use tempdir::TempDir;
1697+
use crate::CompressionMethod::Stored;
1698+
use crate::result::ZipResult;
1699+
use crate::write::SimpleFileOptions;
16651700

16661701
#[test]
16671702
fn invalid_offset() {
@@ -1893,4 +1928,32 @@ mod test {
18931928
let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
18941929
reader.by_name("你好.txt").unwrap();
18951930
}
1931+
1932+
#[test]
1933+
fn test_64k_files() -> ZipResult<()> {
1934+
let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
1935+
let options = SimpleFileOptions {compression_method: Stored, ..Default::default()};
1936+
for i in 0..=u16::MAX {
1937+
let file_name = format!("{i}.txt");
1938+
writer.start_file(&*file_name, options)?;
1939+
writer.write_all(i.to_string().as_bytes())?;
1940+
}
1941+
1942+
let mut reader = ZipArchive::new(writer.finish()?)?;
1943+
for i in 0..=u16::MAX {
1944+
let expected_name = format!("{i}.txt");
1945+
let expected_contents = i.to_string();
1946+
let expected_contents = expected_contents.as_bytes();
1947+
let mut file = reader.by_name(&expected_name)?;
1948+
let mut contents = Vec::with_capacity(expected_contents.len());
1949+
file.read_to_end(&mut contents)?;
1950+
assert_eq!(contents, expected_contents);
1951+
drop(file);
1952+
contents.clear();
1953+
let mut file = reader.by_index(i as usize)?;
1954+
file.read_to_end(&mut contents)?;
1955+
assert_eq!(contents, expected_contents);
1956+
}
1957+
Ok(())
1958+
}
18961959
}

0 commit comments

Comments
 (0)
Please sign in to comment.