// Copyright 2011 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ #include #include #include #include #include #include "base/files/file.h" #include "base/files/file_path.h" #include "base/functional/callback.h" #include "base/memory/weak_ptr.h" #include "base/numerics/safe_conversions.h" #include "base/time/time.h" #if defined(USE_SYSTEM_MINIZIP) #include #else #include "third_party/zlib/contrib/minizip/unzip.h" #endif namespace zip { // A delegate interface used to stream out an entry; see // ZipReader::ExtractCurrentEntry. class WriterDelegate { public: virtual ~WriterDelegate() {} // Invoked once before any data is streamed out to pave the way (e.g., to open // the output file). Return false on failure to cancel extraction. virtual bool PrepareOutput() { return true; } // Invoked to write the next chunk of data. Return false on failure to cancel // extraction. virtual bool WriteBytes(const char* data, int num_bytes) { return true; } // Sets the last-modified time of the data. virtual void SetTimeModified(const base::Time& time) {} // Called with the POSIX file permissions of the data; POSIX implementations // may apply some of the permissions (for example, the executable bit) to the // output file. virtual void SetPosixFilePermissions(int mode) {} // Called if an error occurred while extracting the file. The WriterDelegate // can then remove and clean up the partially extracted data. virtual void OnError() {} }; // This class is used for reading ZIP archives. A typical use case of this class // is to scan entries in a ZIP archive and extract them. The code will look // like: // // ZipReader reader; // if (!reader.Open(zip_path)) { // // Cannot open // return; // } // // while (const ZipReader::entry* entry = reader.Next()) { // auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path); // if (!reader.ExtractCurrentEntry(writer)) { // // Cannot extract // return; // } // } // // if (!reader.ok()) { // // Error while enumerating entries // return; // } // class ZipReader { public: // A callback that is called when the operation is successful. using SuccessCallback = base::OnceClosure; // A callback that is called when the operation fails. using FailureCallback = base::OnceClosure; // A callback that is called periodically during the operation with the number // of bytes that have been processed so far. using ProgressCallback = base::RepeatingCallback; // A callback that is called periodically during the operation with the number // of bytes that have been processed since the previous call (i.e. delta). using ListenerCallback = base::RepeatingCallback; // Information of an entry (file or directory) in a ZIP archive. struct Entry { // Path of this entry, in its original encoding as it is stored in the ZIP // archive. The encoding is not specified here. It might or might not be // UTF-8, and the caller needs to use other means to determine the encoding // if it wants to interpret this path correctly. std::string path_in_original_encoding; // Path of the entry, converted to Unicode. This path is relative (eg // "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing // ".." or "." components (eg "../foo/bar.txt") are converted to safe // relative paths. Eg: // (In ZIP) -> (Entry.path) // /foo/bar -> ROOT/foo/bar // ../a -> UP/a // ./a -> DOT/a base::FilePath path; // Size of the original uncompressed file, or 0 if the entry is a directory. // This value should not be trusted, because it is stored as metadata in the // ZIP archive and can be different from the real uncompressed size. int64_t original_size; // Last modified time. If the timestamp stored in the ZIP archive is not // valid, the Unix epoch will be returned. // // The timestamp stored in the ZIP archive uses the MS-DOS date and time // format. // // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx // // As such the following limitations apply: // * Only years from 1980 to 2107 can be represented. // * The timestamp has a 2-second resolution. // * There is no timezone information, so the time is interpreted as UTC. base::Time last_modified; // True if the entry is a directory. // False if the entry is a file. bool is_directory = false; // True if the entry path cannot be converted to a safe relative path. This // happens if a file entry (not a directory) has a filename "." or "..". bool is_unsafe = false; // True if the file content is encrypted. bool is_encrypted = false; // True if the encryption scheme is AES. bool uses_aes_encryption = false; // Entry POSIX permissions (POSIX systems only). int posix_mode; }; ZipReader(); ZipReader(const ZipReader&) = delete; ZipReader& operator=(const ZipReader&) = delete; ~ZipReader(); // Opens the ZIP archive specified by |zip_path|. Returns true on // success. bool Open(const base::FilePath& zip_path); // Opens the ZIP archive referred to by the platform file |zip_fd|, without // taking ownership of |zip_fd|. Returns true on success. bool OpenFromPlatformFile(base::PlatformFile zip_fd); // Opens the zip data stored in |data|. This class uses a weak reference to // the given sring while extracting files, i.e. the caller should keep the // string until it finishes extracting files. bool OpenFromString(const std::string& data); // Closes the currently opened ZIP archive. This function is called in the // destructor of the class, so you usually don't need to call this. void Close(); // Sets the encoding of entry paths in the ZIP archive. // By default, paths are assumed to be in UTF-8. void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } // Sets the decryption password that will be used to decrypt encrypted file in // the ZIP archive. void SetPassword(std::string password) { password_ = std::move(password); } // Gets the next entry. Returns null if there is no more entry, or if an error // occurred while scanning entries. The returned Entry is owned by this // ZipReader, and is valid until Next() is called again or until this // ZipReader is closed. // // This function should be called before operations over the current entry // like ExtractCurrentEntryToFile(). // // while (const ZipReader::Entry* entry = reader.Next()) { // // Do something with the current entry here. // ... // } // // // Finished scanning entries. // // Check if the scanning stopped because of an error. // if (!reader.ok()) { // // There was an error. // ... // } const Entry* Next(); // Returns true if the enumeration of entries was successful, or false if it // stopped because of an error. bool ok() const { return ok_; } // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, // starting from the beginning of the entry. // // Returns true if the entire file was extracted without error. // // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntry(WriterDelegate* delegate, uint64_t num_bytes_to_extract = std::numeric_limits::max()) const; // Extracts the current entry to |delegate|, starting from the beginning // of the entry, calling |listener_callback| regularly with the number of // bytes extracted. // // Returns true if the entire file was extracted without error. // // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntryWithListener( WriterDelegate* delegate, ListenerCallback listener_callback) const; // Asynchronously extracts the current entry to the given output file path. If // the current entry is a directory it just creates the directory // synchronously instead. // // |success_callback| will be called on success and |failure_callback| will be // called on failure. |progress_callback| will be called at least once. // Callbacks will be posted to the current MessageLoop in-order. // // Precondition: Next() returned a non-null Entry. void ExtractCurrentEntryToFilePathAsync( const base::FilePath& output_file_path, SuccessCallback success_callback, FailureCallback failure_callback, ProgressCallback progress_callback); // Extracts the current entry into memory. If the current entry is a // directory, |*output| is set to the empty string. If the current entry is a // file, |*output| is filled with its contents. // // The value in |Entry::original_size| cannot be trusted, so the real size of // the uncompressed contents can be different. |max_read_bytes| limits the // amount of memory used to carry the entry. // // Returns true if the entire content is read without error. If the content is // bigger than |max_read_bytes|, this function returns false and |*output| is // filled with |max_read_bytes| of data. If an error occurs, this function // returns false and |*output| contains the content extracted so far, which // might be garbage data. // // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntryToString(uint64_t max_read_bytes, std::string* output) const; bool ExtractCurrentEntryToString(std::string* output) const { return ExtractCurrentEntryToString( base::checked_cast(output->max_size()), output); } // Returns the number of entries in the ZIP archive. // // Precondition: one of the Open() methods returned true. int num_entries() const { return num_entries_; } private: // Common code used both in Open and OpenFromFd. bool OpenInternal(); // Resets the internal state. void Reset(); // Opens the current entry in the ZIP archive. On success, returns true and // updates the current entry state |entry_|. // // Note that there is no matching CloseEntry(). The current entry state is // reset automatically as needed. bool OpenEntry(); // Normalizes the given path passed as UTF-16 string piece. Sets entry_.path, // entry_.is_directory and entry_.is_unsafe. void Normalize(base::StringPiece16 in); // Runs the ListenerCallback at a throttled rate. void ReportProgress(ListenerCallback listener_callback, uint64_t bytes) const; // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, // starting from the beginning of the entry calling |listener_callback| if // its supplied. // // Returns true if the entire file was extracted without error. // // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntry(WriterDelegate* delegate, ListenerCallback listener_callback, uint64_t num_bytes_to_extract = std::numeric_limits::max()) const; // Extracts a chunk of the file to the target. Will post a task for the next // chunk and success/failure/progress callbacks as necessary. void ExtractChunk(base::File target_file, SuccessCallback success_callback, FailureCallback failure_callback, ProgressCallback progress_callback, const int64_t offset); std::string encoding_; std::string password_; unzFile zip_file_; int num_entries_; int next_index_; bool reached_end_; bool ok_; Entry entry_; // Next time to report progress. mutable base::TimeTicks next_progress_report_time_ = base::TimeTicks::Now(); // Progress time delta. // TODO(crbug.com/953256) Add this as parameter to the unzip options. base::TimeDelta progress_period_ = base::Milliseconds(1000); // Number of bytes read since last progress report callback executed. mutable uint64_t delta_bytes_read_ = 0; base::WeakPtrFactory weak_ptr_factory_{this}; }; // A writer delegate that writes to a given File. It is recommended that this // file be initially empty. class FileWriterDelegate : public WriterDelegate { public: // Constructs a FileWriterDelegate that manipulates |file|. The delegate will // not own |file|, therefore the caller must guarantee |file| will outlive the // delegate. explicit FileWriterDelegate(base::File* file); // Constructs a FileWriterDelegate that takes ownership of |file|. explicit FileWriterDelegate(base::File owned_file); FileWriterDelegate(const FileWriterDelegate&) = delete; FileWriterDelegate& operator=(const FileWriterDelegate&) = delete; ~FileWriterDelegate() override; // Returns true if the file handle passed to the constructor is valid. bool PrepareOutput() override; // Writes |num_bytes| bytes of |data| to the file, returning false on error or // if not all bytes could be written. bool WriteBytes(const char* data, int num_bytes) override; // Sets the last-modified time of the data. void SetTimeModified(const base::Time& time) override; // On POSIX systems, sets the file to be executable if the source file was // executable. void SetPosixFilePermissions(int mode) override; // Empties the file to avoid leaving garbage data in it. void OnError() override; // Gets the number of bytes written into the file. int64_t file_length() { return file_length_; } protected: // The delegate can optionally own the file it modifies, in which case // owned_file_ is set and file_ is an alias for owned_file_. base::File owned_file_; // The file the delegate modifies. base::File* const file_ = &owned_file_; int64_t file_length_ = 0; }; // A writer delegate that creates and writes a file at a given path. This does // not overwrite any existing file. class FilePathWriterDelegate : public FileWriterDelegate { public: explicit FilePathWriterDelegate(base::FilePath output_file_path); FilePathWriterDelegate(const FilePathWriterDelegate&) = delete; FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete; ~FilePathWriterDelegate() override; // Creates the output file and any necessary intermediate directories. Does // not overwrite any existing file, and returns false if the output file // cannot be created because another file conflicts with it. bool PrepareOutput() override; // Deletes the output file. void OnError() override; private: const base::FilePath output_file_path_; }; } // namespace zip #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_