Reimplement ZipFileRO in terms of libziparchive.

This lets us share zip archive processing code with both
the runtime (Art, dalvik) and critical java code
(StrictJarFile).

This change also moves several utility methods to ZipUtils
and dedups code across several zip inflation methods.

One of the side effects of this change is that several
processing loops are now O(n) instead of O(n^2).

bug: 10193060

(cherry picked from commit afd31e08299008fdc5c2813f21b2573f29dc53df)

Change-Id: Iae67e62f1dc6dfc3f43e29bc38e3ffd1cb14d191
This commit is contained in:
Narayan Kamath 2013-12-03 13:16:03 +00:00
parent d837548947
commit aae73e748c
9 changed files with 322 additions and 1153 deletions

View File

@ -40,6 +40,8 @@
#include <unistd.h>
#include <time.h>
typedef void* ZipArchiveHandle;
namespace android {
/*
@ -51,18 +53,13 @@ typedef void* ZipEntryRO;
/*
* Open a Zip archive for reading.
*
* We want "open" and "find entry by name" to be fast operations, and we
* want to use as little memory as possible. We memory-map the file,
* and load a hash table with pointers to the filenames (which aren't
* null-terminated). The other fields are at a fixed offset from the
* filename, so we don't need to extract those (but we do need to byte-read
* and endian-swap them every time we want them).
* Implemented as a thin wrapper over system/core/libziparchive.
*
* To speed comparisons when doing a lookup by name, we could make the mapping
* "private" (copy-on-write) and null-terminate the filenames after verifying
* the record structure. However, this requires a private mapping of
* every page that the Central Directory touches. Easier to tuck a copy
* of the string length into the hash table entry.
* "open" and "find entry by name" are fast operations and use as little
* memory as possible.
*
* We also support fast iteration over all entries in the file (with a
* stable, but unspecified iteration order).
*
* NOTE: If this is used on file descriptors inherited from a fork() operation,
* you must be on a platform that implements pread() to guarantee correctness
@ -70,48 +67,44 @@ typedef void* ZipEntryRO;
*/
class ZipFileRO {
public:
ZipFileRO()
: mFd(-1), mFileName(NULL), mFileLength(-1),
mDirectoryMap(NULL),
mNumEntries(-1), mDirectoryOffset(-1),
mHashTableSize(-1), mHashTable(NULL)
{}
~ZipFileRO();
/* Zip compression methods we support */
enum {
kCompressStored = 0, // no compression
kCompressDeflated = 8, // standard deflate
};
/*
* Open an archive.
*/
status_t open(const char* zipFileName);
static ZipFileRO* open(const char* zipFileName);
/*
* Find an entry, by name. Returns the entry identifier, or NULL if
* not found.
*
* If two entries have the same name, one will be chosen at semi-random.
*/
ZipEntryRO findEntryByName(const char* fileName) const;
ZipEntryRO findEntryByName(const char* entryName) const;
/*
* Start iterating over the list of entries in the zip file. Requires
* a matching call to endIteration with the same cookie.
*/
bool startIteration(void** cookie);
/**
* Return the next entry in iteration order, or NULL if there are no more
* entries in this archive.
*/
ZipEntryRO nextEntry(void* cookie);
void endIteration(void* cookie);
void releaseEntry(ZipEntryRO entry) const;
/*
* Return the #of entries in the Zip archive.
*/
int getNumEntries(void) const {
return mNumEntries;
}
/*
* Return the Nth entry. Zip file entries are not stored in sorted
* order, and updated entries may appear at the end, so anyone walking
* the archive needs to avoid making ordering assumptions. We take
* that further by returning the Nth non-empty entry in the hash table
* rather than the Nth entry in the archive.
*
* Valid values are [0..numEntries).
*
* [This is currently O(n). If it needs to be fast we can allocate an
* additional data structure or provide an iterator interface.]
*/
ZipEntryRO findEntryByIndex(int idx) const;
int getNumEntries();
/*
* Copy the filename into the supplied buffer. Returns 0 on success,
@ -149,112 +142,27 @@ public:
*
* Returns "true" on success.
*/
bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
bool uncompressEntry(ZipEntryRO entry, void* buffer, size_t size) const;
/*
* Uncompress the data to an open file descriptor.
*/
bool uncompressEntry(ZipEntryRO entry, int fd) const;
/* Zip compression methods we support */
enum {
kCompressStored = 0, // no compression
kCompressDeflated = 8, // standard deflate
};
/*
* Utility function: uncompress deflated data, buffer to buffer.
*/
static bool inflateBuffer(void* outBuf, const void* inBuf,
size_t uncompLen, size_t compLen);
/*
* Utility function: uncompress deflated data, buffer to fd.
*/
static bool inflateBuffer(int fd, const void* inBuf,
size_t uncompLen, size_t compLen);
/*
* Utility function to convert ZIP's time format to a timespec struct.
*/
static inline void zipTimeToTimespec(long when, struct tm* timespec) {
const long date = when >> 16;
timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
timespec->tm_mon = (date >> 5) & 0x0F;
timespec->tm_mday = date & 0x1F;
timespec->tm_hour = (when >> 11) & 0x1F;
timespec->tm_min = (when >> 5) & 0x3F;
timespec->tm_sec = (when & 0x1F) << 1;
}
/*
* Some basic functions for raw data manipulation. "LE" means
* Little Endian.
*/
static inline unsigned short get2LE(const unsigned char* buf) {
return buf[0] | (buf[1] << 8);
}
static inline unsigned long get4LE(const unsigned char* buf) {
return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}
~ZipFileRO();
private:
/* these are private and not defined */
/* these are private and not defined */
ZipFileRO(const ZipFileRO& src);
ZipFileRO& operator=(const ZipFileRO& src);
/* locate and parse the central directory */
bool mapCentralDirectory(void);
ZipFileRO(ZipArchiveHandle handle, char* fileName) : mHandle(handle),
mFileName(fileName)
{
}
/* parse the archive, prepping internal structures */
bool parseZipArchive(void);
/* add a new entry to the hash table */
void addToHash(const char* str, int strLen, unsigned int hash);
/* compute string hash code */
static unsigned int computeHash(const char* str, int len);
/* convert a ZipEntryRO back to a hash table index */
int entryToIndex(const ZipEntryRO entry) const;
/*
* One entry in the hash table.
*/
typedef struct HashEntry {
const char* name;
unsigned short nameLen;
//unsigned int hash;
} HashEntry;
/* open Zip archive */
int mFd;
/* Lock for handling the file descriptor (seeks, etc) */
mutable Mutex mFdLock;
/* zip file name */
char* mFileName;
/* length of file */
size_t mFileLength;
/* mapped file */
FileMap* mDirectoryMap;
/* number of entries in the Zip archive */
int mNumEntries;
/* CD directory offset in the Zip archive */
off64_t mDirectoryOffset;
/*
* We know how many entries are in the Zip archive, so we have a
* fixed-size hash table. We probe for an empty slot.
*/
int mHashTableSize;
HashEntry* mHashTable;
const ZipArchiveHandle mHandle;
char* mFileName;
};
}; // namespace android

View File

@ -21,6 +21,7 @@
#define __LIBS_ZIPUTILS_H
#include <stdio.h>
#include <time.h>
namespace android {
@ -33,9 +34,11 @@ public:
* General utility function for uncompressing "deflate" data from a file
* to a buffer.
*/
static bool inflateToBuffer(FILE* fp, void* buf, long uncompressedLen,
long compressedLen);
static bool inflateToBuffer(int fd, void* buf, long uncompressedLen,
long compressedLen);
static bool inflateToBuffer(FILE* fp, void* buf, long uncompressedLen,
static bool inflateToBuffer(void *in, void* buf, long uncompressedLen,
long compressedLen);
/*
@ -57,6 +60,19 @@ public:
static bool examineGzip(FILE* fp, int* pCompressionMethod,
long* pUncompressedLen, long* pCompressedLen, unsigned long* pCRC32);
/*
* Utility function to convert ZIP's time format to a timespec struct.
*/
static inline void zipTimeToTimespec(long when, struct tm* timespec) {
const long date = when >> 16;
timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
timespec->tm_mon = (date >> 5) & 0x0F;
timespec->tm_mday = date & 0x1F;
timespec->tm_hour = (when >> 11) & 0x1F;
timespec->tm_min = (when >> 5) & 0x3F;
timespec->tm_sec = (when & 0x1F) << 1;
}
private:
ZipUtils() {}
~ZipUtils() {}

View File

@ -54,6 +54,7 @@ LOCAL_C_INCLUDES := \
external/zlib
LOCAL_STATIC_LIBRARIES := liblog
LOCAL_WHOLE_STATIC_LIBRARIES := libziparchive-host
include $(BUILD_HOST_STATIC_LIBRARY)
@ -72,9 +73,12 @@ LOCAL_SHARED_LIBRARIES := \
libutils \
libz
LOCAL_STATIC_LIBRARIES := libziparchive
LOCAL_C_INCLUDES := \
external/icu4c/common \
external/zlib
external/zlib \
system/core/include
LOCAL_MODULE:= libandroidfw

View File

@ -843,7 +843,7 @@ void _CompressedAsset::close(void)
* The first time this is called, we expand the compressed data into a
* buffer.
*/
const void* _CompressedAsset::getBuffer(bool wordAligned)
const void* _CompressedAsset::getBuffer(bool)
{
unsigned char* buf = NULL;
@ -860,7 +860,7 @@ const void* _CompressedAsset::getBuffer(bool wordAligned)
}
if (mMap != NULL) {
if (!ZipFileRO::inflateBuffer(buf, mMap->getDataPtr(),
if (!ZipUtils::inflateToBuffer(mMap->getDataPtr(), buf,
mUncompressedLen, mCompressedLen))
goto bail;
} else {

View File

@ -305,10 +305,11 @@ bool AssetManager::getZipEntryCrcLocked(const String8& zipPath, const char* entr
if (entry == NULL) {
return false;
}
if (!zip->getEntryInfo(entry, NULL, NULL, NULL, NULL, NULL, (long*)pCrc)) {
return false;
}
return true;
const bool gotInfo = zip->getEntryInfo(entry, NULL, NULL, NULL, NULL, NULL, (long*)pCrc);
zip->releaseEntry(entry);
return gotInfo;
}
bool AssetManager::createIdmapFileLocked(const String8& originalPath, const String8& overlayPath,
@ -821,16 +822,14 @@ Asset* AssetManager::openNonAssetInPathLocked(const char* fileName, AccessMode m
String8 path(fileName);
/* check the appropriate Zip file */
ZipFileRO* pZip;
ZipEntryRO entry;
pZip = getZipFileLocked(ap);
ZipFileRO* pZip = getZipFileLocked(ap);
if (pZip != NULL) {
//printf("GOT zip, checking NA '%s'\n", (const char*) path);
entry = pZip->findEntryByName(path.string());
ZipEntryRO entry = pZip->findEntryByName(path.string());
if (entry != NULL) {
//printf("FOUND NA in Zip file for %s\n", appName ? appName : kAppCommon);
pAsset = openAssetFromZipLocked(pZip, entry, mode, path);
pZip->releaseEntry(entry);
}
}
@ -975,17 +974,15 @@ Asset* AssetManager::openInLocaleVendorLocked(const char* fileName, AccessMode m
path.appendPath(fileName);
/* check the appropriate Zip file */
ZipFileRO* pZip;
ZipEntryRO entry;
pZip = getZipFileLocked(ap);
ZipFileRO* pZip = getZipFileLocked(ap);
if (pZip != NULL) {
//printf("GOT zip, checking '%s'\n", (const char*) path);
entry = pZip->findEntryByName(path.string());
ZipEntryRO entry = pZip->findEntryByName(path.string());
if (entry != NULL) {
//printf("FOUND in Zip file for %s/%s-%s\n",
// appName, locale, vendor);
pAsset = openAssetFromZipLocked(pZip, entry, mode, path);
pZip->releaseEntry(entry);
}
}
@ -1487,11 +1484,16 @@ bool AssetManager::scanAndMergeZipLocked(SortedVector<AssetDir::FileInfo>* pMerg
* semantics.
*/
int dirNameLen = dirName.length();
for (int i = 0; i < pZip->getNumEntries(); i++) {
ZipEntryRO entry;
void *iterationCookie;
if (!pZip->startIteration(&iterationCookie)) {
ALOGW("ZipFileRO::startIteration returned false");
return false;
}
ZipEntryRO entry;
while ((entry = pZip->nextEntry(iterationCookie)) != NULL) {
char nameBuf[256];
entry = pZip->findEntryByIndex(i);
if (pZip->getEntryFileName(entry, nameBuf, sizeof(nameBuf)) != 0) {
// TODO: fix this if we expect to have long names
ALOGE("ARGH: name too long?\n");
@ -1541,6 +1543,8 @@ bool AssetManager::scanAndMergeZipLocked(SortedVector<AssetDir::FileInfo>* pMerg
}
}
pZip->endIteration(iterationCookie);
/*
* Add the set of unique directories.
*/
@ -1814,12 +1818,10 @@ AssetManager::SharedZip::SharedZip(const String8& path, time_t modWhen)
mResourceTableAsset(NULL), mResourceTable(NULL)
{
//ALOGI("Creating SharedZip %p %s\n", this, (const char*)mPath);
mZipFile = new ZipFileRO;
ALOGV("+++ opening zip '%s'\n", mPath.string());
if (mZipFile->open(mPath.string()) != NO_ERROR) {
mZipFile = ZipFileRO::open(mPath.string());
if (mZipFile == NULL) {
ALOGD("failed to open Zip archive '%s'\n", mPath.string());
delete mZipFile;
mZipFile = NULL;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -33,115 +33,13 @@
using namespace android;
/*
* Utility function that expands zip/gzip "deflate" compressed data
* into a buffer.
*
* "fd" is an open file positioned at the start of the "deflate" data
* "buf" must hold at least "uncompressedLen" bytes.
*/
/*static*/ bool ZipUtils::inflateToBuffer(int fd, void* buf,
long uncompressedLen, long compressedLen)
{
bool result = false;
const unsigned long kReadBufSize = 32768;
unsigned char* readBuf = NULL;
z_stream zstream;
int zerr;
unsigned long compRemaining;
assert(uncompressedLen >= 0);
assert(compressedLen >= 0);
readBuf = new unsigned char[kReadBufSize];
if (readBuf == NULL)
goto bail;
compRemaining = compressedLen;
/*
* Initialize the zlib stream.
*/
memset(&zstream, 0, sizeof(zstream));
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
zstream.next_in = NULL;
zstream.avail_in = 0;
zstream.next_out = (Bytef*) buf;
zstream.avail_out = uncompressedLen;
zstream.data_type = Z_UNKNOWN;
/*
* Use the undocumented "negative window bits" feature to tell zlib
* that there's no zlib header waiting for it.
*/
zerr = inflateInit2(&zstream, -MAX_WBITS);
if (zerr != Z_OK) {
if (zerr == Z_VERSION_ERROR) {
ALOGE("Installed zlib is not compatible with linked version (%s)\n",
ZLIB_VERSION);
} else {
ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
}
goto bail;
}
/*
* Loop while we have data.
*/
do {
unsigned long getSize;
/* read as much as we can */
if (zstream.avail_in == 0) {
getSize = (compRemaining > kReadBufSize) ?
kReadBufSize : compRemaining;
ALOGV("+++ reading %ld bytes (%ld left)\n",
getSize, compRemaining);
int cc = TEMP_FAILURE_RETRY(read(fd, readBuf, getSize));
if (cc < 0) {
ALOGW("inflate read failed: %s", strerror(errno));
} else if (cc != (int) getSize) {
ALOGW("inflate read failed (%d vs %ld)", cc, getSize);
goto z_bail;
}
compRemaining -= getSize;
zstream.next_in = readBuf;
zstream.avail_in = getSize;
}
/* uncompress the data */
zerr = inflate(&zstream, Z_NO_FLUSH);
if (zerr != Z_OK && zerr != Z_STREAM_END) {
ALOGD("zlib inflate call failed (zerr=%d)\n", zerr);
goto z_bail;
}
/* output buffer holds all, so no need to write the output */
} while (zerr == Z_OK);
assert(zerr == Z_STREAM_END); /* other errors should've been caught */
if ((long) zstream.total_out != uncompressedLen) {
ALOGW("Size mismatch on inflated file (%ld vs %ld)\n",
zstream.total_out, uncompressedLen);
goto z_bail;
}
// success!
result = true;
z_bail:
inflateEnd(&zstream); /* free up any allocated structures */
bail:
delete[] readBuf;
return result;
static inline unsigned long get4LE(const unsigned char* buf) {
return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}
static const unsigned long kReadBufSize = 32768;
/*
* Utility function that expands zip/gzip "deflate" compressed data
* into a buffer.
@ -153,12 +51,11 @@ bail:
* "fp" is an open file positioned at the start of the "deflate" data
* "buf" must hold at least "uncompressedLen" bytes.
*/
/*static*/ bool ZipUtils::inflateToBuffer(FILE* fp, void* buf,
/*static*/ template<typename T> bool inflateToBuffer(T& reader, void* buf,
long uncompressedLen, long compressedLen)
{
bool result = false;
const unsigned long kReadBufSize = 32768;
unsigned char* readBuf = NULL;
z_stream zstream;
int zerr;
unsigned long compRemaining;
@ -166,15 +63,12 @@ bail:
assert(uncompressedLen >= 0);
assert(compressedLen >= 0);
readBuf = new unsigned char[kReadBufSize];
if (readBuf == NULL)
goto bail;
compRemaining = compressedLen;
/*
* Initialize the zlib stream.
*/
memset(&zstream, 0, sizeof(zstream));
memset(&zstream, 0, sizeof(zstream));
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
@ -184,10 +78,10 @@ bail:
zstream.avail_out = uncompressedLen;
zstream.data_type = Z_UNKNOWN;
/*
* Use the undocumented "negative window bits" feature to tell zlib
* that there's no zlib header waiting for it.
*/
/*
* Use the undocumented "negative window bits" feature to tell zlib
* that there's no zlib header waiting for it.
*/
zerr = inflateInit2(&zstream, -MAX_WBITS);
if (zerr != Z_OK) {
if (zerr == Z_VERSION_ERROR) {
@ -212,17 +106,18 @@ bail:
ALOGV("+++ reading %ld bytes (%ld left)\n",
getSize, compRemaining);
int cc = fread(readBuf, 1, getSize, fp);
if (cc != (int) getSize) {
ALOGD("inflate read failed (%d vs %ld)\n",
cc, getSize);
unsigned char* nextBuffer = NULL;
const unsigned long nextSize = reader.read(&nextBuffer, getSize);
if (nextSize < getSize || nextBuffer == NULL) {
ALOGD("inflate read failed (%ld vs %ld)\n", nextSize, getSize);
goto z_bail;
}
compRemaining -= getSize;
compRemaining -= nextSize;
zstream.next_in = readBuf;
zstream.avail_in = getSize;
zstream.next_in = nextBuffer;
zstream.avail_in = nextSize;
}
/* uncompress the data */
@ -250,10 +145,100 @@ z_bail:
inflateEnd(&zstream); /* free up any allocated structures */
bail:
delete[] readBuf;
return result;
}
class FileReader {
public:
FileReader(FILE* fp) :
mFp(fp), mReadBuf(new unsigned char[kReadBufSize])
{
}
~FileReader() {
delete[] mReadBuf;
}
long read(unsigned char** nextBuffer, long readSize) const {
*nextBuffer = mReadBuf;
return fread(mReadBuf, 1, readSize, mFp);
}
FILE* mFp;
unsigned char* mReadBuf;
};
class FdReader {
public:
FdReader(int fd) :
mFd(fd), mReadBuf(new unsigned char[kReadBufSize])
{
}
~FdReader() {
delete[] mReadBuf;
}
long read(unsigned char** nextBuffer, long readSize) const {
*nextBuffer = mReadBuf;
return TEMP_FAILURE_RETRY(::read(mFd, mReadBuf, readSize));
}
int mFd;
unsigned char* mReadBuf;
};
class BufferReader {
public:
BufferReader(void* input, size_t inputSize) :
mInput(reinterpret_cast<unsigned char*>(input)),
mInputSize(inputSize),
mBufferReturned(false)
{
}
long read(unsigned char** nextBuffer, long readSize) {
if (!mBufferReturned) {
mBufferReturned = true;
*nextBuffer = mInput;
return mInputSize;
}
*nextBuffer = NULL;
return 0;
}
unsigned char* mInput;
const size_t mInputSize;
bool mBufferReturned;
};
/*static*/ bool ZipUtils::inflateToBuffer(FILE* fp, void* buf,
long uncompressedLen, long compressedLen)
{
FileReader reader(fp);
return ::inflateToBuffer<FileReader>(reader, buf,
uncompressedLen, compressedLen);
}
/*static*/ bool ZipUtils::inflateToBuffer(int fd, void* buf,
long uncompressedLen, long compressedLen)
{
FdReader reader(fd);
return ::inflateToBuffer<FdReader>(reader, buf,
uncompressedLen, compressedLen);
}
/*static*/ bool ZipUtils::inflateToBuffer(void* in, void* buf,
long uncompressedLen, long compressedLen)
{
BufferReader reader(in, compressedLen);
return ::inflateToBuffer<BufferReader>(reader, buf,
uncompressedLen, compressedLen);
}
/*
* Look at the contents of a gzip archive. We want to know where the
* data starts, and how long it will be after it is uncompressed.
@ -338,8 +323,8 @@ bail:
fseek(fp, curPosn, SEEK_SET);
*pCompressionMethod = method;
*pCRC32 = ZipFileRO::get4LE(&buf[0]);
*pUncompressedLen = ZipFileRO::get4LE(&buf[4]);
*pCRC32 = get4LE(&buf[0]);
*pUncompressedLen = get4LE(&buf[4]);
return true;
}

View File

@ -6,7 +6,7 @@ include $(CLEAR_VARS)
test_src_files := \
BackupData_test.cpp \
ObbFile_test.cpp \
ZipFileRO_test.cpp
ZipUtils_test.cpp
shared_libraries := \
libandroidfw \

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
#define LOG_TAG "ZipFileRO_test"
#define LOG_TAG "ZipUtils_test"
#include <utils/Log.h>
#include <androidfw/ZipFileRO.h>
#include <androidfw/ZipUtils.h>
#include <gtest/gtest.h>
@ -25,7 +25,7 @@
namespace android {
class ZipFileROTest : public testing::Test {
class ZipUtilsTest : public testing::Test {
protected:
virtual void SetUp() {
}
@ -34,13 +34,13 @@ protected:
}
};
TEST_F(ZipFileROTest, ZipTimeConvertSuccess) {
TEST_F(ZipUtilsTest, ZipTimeConvertSuccess) {
struct tm t;
// 2011-06-29 14:40:40
long when = 0x3EDD7514;
ZipFileRO::zipTimeToTimespec(when, &t);
ZipUtils::zipTimeToTimespec(when, &t);
EXPECT_EQ(2011, t.tm_year + 1900)
<< "Year was improperly converted.";