d4066a4ac5
Change the way zip archives are handled. This is necessary to deal with very large (~1GB) APK files, for which our current approach of mapping the entire file falls over. We now do the classic scavenger hunt for the End Of Central Directory magic on a buffer of data read from the file, instead of a memory-mapped section. We use what we find to create a map that covers the Central Directory only. If the caller is interested in unpacking the file contents, we have to do an additional file read to discover the size of the Local File Header section so we can skip past it. This is based on Change I745fb15abb in the dalvik tree. Both implementations share a common ancestry, but the cost of unifying them outweighs the benefits of wrapping C calls. Change-Id: Iddacb50fe913917c2845708a530872d65fdbe620
240 lines
7.3 KiB
C++
240 lines
7.3 KiB
C++
/*
|
|
* Copyright (C) 2007 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
//
|
|
// Read-only access to Zip archives, with minimal heap allocation.
|
|
//
|
|
// This is similar to the more-complete ZipFile class, but no attempt
|
|
// has been made to make them interchangeable. This class operates under
|
|
// a very different set of assumptions and constraints.
|
|
//
|
|
#ifndef __LIBS_ZIPFILERO_H
|
|
#define __LIBS_ZIPFILERO_H
|
|
|
|
#include "Errors.h"
|
|
#include "FileMap.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
|
|
namespace android {
|
|
|
|
/*
|
|
* Trivial typedef to ensure that ZipEntryRO is not treated as a simple
|
|
* integer. We use NULL to indicate an invalid value.
|
|
*/
|
|
typedef void* ZipEntryRO;
|
|
|
|
/*
|
|
* Open a Zip archive for reading.
|
|
*
|
|
* We want "open" and "find entry by name" to be fast operations, and we
|
|
* want to use as little memory as possible. We memory-map the file,
|
|
* and load a hash table with pointers to the filenames (which aren't
|
|
* null-terminated). The other fields are at a fixed offset from the
|
|
* filename, so we don't need to extract those (but we do need to byte-read
|
|
* and endian-swap them every time we want them).
|
|
*
|
|
* To speed comparisons when doing a lookup by name, we could make the mapping
|
|
* "private" (copy-on-write) and null-terminate the filenames after verifying
|
|
* the record structure. However, this requires a private mapping of
|
|
* every page that the Central Directory touches. Easier to tuck a copy
|
|
* of the string length into the hash table entry.
|
|
*/
|
|
class ZipFileRO {
|
|
public:
|
|
ZipFileRO()
|
|
: mFd(-1), mFileName(NULL), mFileLength(-1),
|
|
mDirectoryMap(NULL),
|
|
mNumEntries(-1), mDirectoryOffset(-1),
|
|
mHashTableSize(-1), mHashTable(NULL)
|
|
{}
|
|
~ZipFileRO() {
|
|
free(mHashTable);
|
|
if (mDirectoryMap)
|
|
mDirectoryMap->release();
|
|
if (mFd >= 0)
|
|
close(mFd);
|
|
if (mFileName)
|
|
free(mFileName);
|
|
}
|
|
|
|
/*
|
|
* Open an archive.
|
|
*/
|
|
status_t open(const char* zipFileName);
|
|
|
|
/*
|
|
* Find an entry, by name. Returns the entry identifier, or NULL if
|
|
* not found.
|
|
*
|
|
* If two entries have the same name, one will be chosen at semi-random.
|
|
*/
|
|
ZipEntryRO findEntryByName(const char* fileName) const;
|
|
|
|
/*
|
|
* Return the #of entries in the Zip archive.
|
|
*/
|
|
int getNumEntries(void) const {
|
|
return mNumEntries;
|
|
}
|
|
|
|
/*
|
|
* Return the Nth entry. Zip file entries are not stored in sorted
|
|
* order, and updated entries may appear at the end, so anyone walking
|
|
* the archive needs to avoid making ordering assumptions. We take
|
|
* that further by returning the Nth non-empty entry in the hash table
|
|
* rather than the Nth entry in the archive.
|
|
*
|
|
* Valid values are [0..numEntries).
|
|
*
|
|
* [This is currently O(n). If it needs to be fast we can allocate an
|
|
* additional data structure or provide an iterator interface.]
|
|
*/
|
|
ZipEntryRO findEntryByIndex(int idx) const;
|
|
|
|
/*
|
|
* Copy the filename into the supplied buffer. Returns 0 on success,
|
|
* -1 if "entry" is invalid, or the filename length if it didn't fit. The
|
|
* length, and the returned string, include the null-termination.
|
|
*/
|
|
int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
|
|
|
|
/*
|
|
* Get the vital stats for an entry. Pass in NULL pointers for anything
|
|
* you don't need.
|
|
*
|
|
* "*pOffset" holds the Zip file offset of the entry's data.
|
|
*
|
|
* Returns "false" if "entry" is bogus or if the data in the Zip file
|
|
* appears to be bad.
|
|
*/
|
|
bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
|
|
size_t* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const;
|
|
|
|
/*
|
|
* Create a new FileMap object that maps a subset of the archive. For
|
|
* an uncompressed entry this effectively provides a pointer to the
|
|
* actual data, for a compressed entry this provides the input buffer
|
|
* for inflate().
|
|
*/
|
|
FileMap* createEntryFileMap(ZipEntryRO entry) const;
|
|
|
|
/*
|
|
* Uncompress the data into a buffer. Depending on the compression
|
|
* format, this is either an "inflate" operation or a memcpy.
|
|
*
|
|
* Use "uncompLen" from getEntryInfo() to determine the required
|
|
* buffer size.
|
|
*
|
|
* Returns "true" on success.
|
|
*/
|
|
bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
|
|
|
|
/*
|
|
* Uncompress the data to an open file descriptor.
|
|
*/
|
|
bool uncompressEntry(ZipEntryRO entry, int fd) const;
|
|
|
|
/* Zip compression methods we support */
|
|
enum {
|
|
kCompressStored = 0, // no compression
|
|
kCompressDeflated = 8, // standard deflate
|
|
};
|
|
|
|
/*
|
|
* Utility function: uncompress deflated data, buffer to buffer.
|
|
*/
|
|
static bool inflateBuffer(void* outBuf, const void* inBuf,
|
|
size_t uncompLen, size_t compLen);
|
|
|
|
/*
|
|
* Utility function: uncompress deflated data, buffer to fd.
|
|
*/
|
|
static bool inflateBuffer(int fd, const void* inBuf,
|
|
size_t uncompLen, size_t compLen);
|
|
|
|
/*
|
|
* Some basic functions for raw data manipulation. "LE" means
|
|
* Little Endian.
|
|
*/
|
|
static inline unsigned short get2LE(const unsigned char* buf) {
|
|
return buf[0] | (buf[1] << 8);
|
|
}
|
|
static inline unsigned long get4LE(const unsigned char* buf) {
|
|
return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
|
|
}
|
|
|
|
private:
|
|
/* these are private and not defined */
|
|
ZipFileRO(const ZipFileRO& src);
|
|
ZipFileRO& operator=(const ZipFileRO& src);
|
|
|
|
/* locate and parse the central directory */
|
|
bool mapCentralDirectory(void);
|
|
|
|
/* parse the archive, prepping internal structures */
|
|
bool parseZipArchive(void);
|
|
|
|
/* add a new entry to the hash table */
|
|
void addToHash(const char* str, int strLen, unsigned int hash);
|
|
|
|
/* compute string hash code */
|
|
static unsigned int computeHash(const char* str, int len);
|
|
|
|
/* convert a ZipEntryRO back to a hash table index */
|
|
int entryToIndex(const ZipEntryRO entry) const;
|
|
|
|
/*
|
|
* One entry in the hash table.
|
|
*/
|
|
typedef struct HashEntry {
|
|
const char* name;
|
|
unsigned short nameLen;
|
|
//unsigned int hash;
|
|
} HashEntry;
|
|
|
|
/* open Zip archive */
|
|
int mFd;
|
|
|
|
/* zip file name */
|
|
char* mFileName;
|
|
|
|
/* length of file */
|
|
size_t mFileLength;
|
|
|
|
/* mapped file */
|
|
FileMap* mDirectoryMap;
|
|
|
|
/* number of entries in the Zip archive */
|
|
int mNumEntries;
|
|
|
|
/* CD directory offset in the Zip archive */
|
|
off_t mDirectoryOffset;
|
|
|
|
/*
|
|
* We know how many entries are in the Zip archive, so we have a
|
|
* fixed-size hash table. We probe for an empty slot.
|
|
*/
|
|
int mHashTableSize;
|
|
HashEntry* mHashTable;
|
|
};
|
|
|
|
}; // namespace android
|
|
|
|
#endif /*__LIBS_ZIPFILERO_H*/
|