mirror of
https://review.haiku-os.org/haiku
synced 2025-01-30 18:24:53 +01:00
1f633814fa
Instead of handling compression for individual file/attribute data we do now compress the whole heap where they are stored. This significantly improves compression ratios. We still divide the uncompressed data into 64 KiB chunks and use a chunk offset array for the compressed chunks to allow for quick random access without too much overhead. The tradeoff is a limited possible compression ratio -- i.e. we won't be as good as tar.gz (though surprisingly with my test archives we did better than zip). The other package file sections (package attributes and TOC) are no longer compressed individually. Their uncompressed data are simply pushed onto the heap where the usual compression strategy applies. To simplify things the repository format has been changed in the same manner although it doesn't otherwise use the heap, since it only stores meta data. Due to the data compression having been exposed in public and private API, this change touches a lot of package kit using code, including packagefs and the boot loader packagefs support. The latter two haven't been tested yet. Moreover packagefs needs a new kind of cache so we avoid re-reading the same heap chunk for two different data items it contains.
144 lines
4.1 KiB
C++
144 lines
4.1 KiB
C++
/*
|
|
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
|
|
* Distributed under the terms of the MIT License.
|
|
*/
|
|
#ifndef _PACKAGE__HPKG__PRIVATE__PACKAGE_FILE_HEAP_ACCESSOR_BASE_H_
|
|
#define _PACKAGE__HPKG__PRIVATE__PACKAGE_FILE_HEAP_ACCESSOR_BASE_H_
|
|
|
|
|
|
#include <SupportDefs.h>
|
|
|
|
#include <package/hpkg/DataReader.h>
|
|
|
|
|
|
namespace BPackageKit {
|
|
|
|
namespace BHPKG {
|
|
|
|
|
|
class BErrorOutput;
|
|
|
|
|
|
namespace BPrivate {
|
|
|
|
|
|
class PackageFileHeapAccessorBase : public BAbstractBufferedDataReader {
|
|
public:
|
|
class OffsetArray;
|
|
|
|
public:
|
|
PackageFileHeapAccessorBase(
|
|
BErrorOutput* errorOutput, int fd,
|
|
off_t heapOffset);
|
|
virtual ~PackageFileHeapAccessorBase();
|
|
|
|
off_t HeapOffset() const
|
|
{ return fHeapOffset; }
|
|
off_t CompressedHeapSize() const
|
|
{ return fCompressedHeapSize; }
|
|
uint64 UncompressedHeapSize() const
|
|
{ return fUncompressedHeapSize; }
|
|
size_t ChunkSize() const
|
|
{ return kChunkSize; }
|
|
|
|
// normally used after cloning a PackageFileHeapReader only
|
|
void SetErrorOutput(BErrorOutput* errorOutput)
|
|
{ fErrorOutput = errorOutput; }
|
|
void SetFD(int fd)
|
|
{ fFD = fd; }
|
|
|
|
// BAbstractBufferedDataReader
|
|
virtual status_t ReadDataToOutput(off_t offset, size_t size,
|
|
BDataOutput* output);
|
|
|
|
public:
|
|
static const size_t kChunkSize = 64 * 1024;
|
|
|
|
protected:
|
|
virtual status_t ReadAndDecompressChunk(size_t chunkIndex,
|
|
void* compressedDataBuffer,
|
|
void* uncompressedDataBuffer) = 0;
|
|
status_t ReadAndDecompressChunkData(uint64 offset,
|
|
size_t compressedSize,
|
|
size_t uncompressedSize,
|
|
void* compressedDataBuffer,
|
|
void* uncompressedDataBuffer);
|
|
status_t ReadFileData(uint64 offset, void* buffer,
|
|
size_t size);
|
|
|
|
protected:
|
|
BErrorOutput* fErrorOutput;
|
|
int fFD;
|
|
off_t fHeapOffset;
|
|
uint64 fCompressedHeapSize;
|
|
uint64 fUncompressedHeapSize;
|
|
};
|
|
|
|
|
|
/*! Stores the chunk offsets in a compact way, while still providing quick
|
|
access.
|
|
- The object doesn't store the number of chunks/offsets it contains. During
|
|
initialization the chunk count is provided. Later, when getting an offset,
|
|
the caller is responsible for ensuring a valid index.
|
|
- The first (index 0) chunk offset is omitted, since it is always 0.
|
|
- The chunk offsets that fit in a 32 bit number use only one 32 bit element
|
|
in the offsets array.
|
|
- The chunk offsets that don't fit in a 32 bit number use two elements in
|
|
the offsets array.
|
|
Memory use is one pointer, if the chunk count is <= 1 (uncompressed heap size
|
|
<= 64 KiB). Afterwards it's one pointer plus 32 bit per chunk as long as the
|
|
last offset still fits 32 bit (compressed heap size < 4GiB). For any further
|
|
chunks it is 64 bit per chunk. So, for the common case we use sizeof(void*)
|
|
plus 1 KiB per 16 MiB of uncompressed heap, or about 64 KiB per 1 GiB. Which
|
|
seems reasonable for packagefs to keep in memory.
|
|
*/
|
|
class PackageFileHeapAccessorBase::OffsetArray {
|
|
public:
|
|
OffsetArray();
|
|
~OffsetArray();
|
|
|
|
bool InitChunksOffsets(size_t totalChunkCount,
|
|
size_t baseIndex, const uint16* chunkSizes,
|
|
size_t chunkCount);
|
|
|
|
bool Init(size_t totalChunkCount,
|
|
const OffsetArray& other);
|
|
// "copy" init
|
|
|
|
uint64 operator[](size_t index) const;
|
|
|
|
private:
|
|
uint32* fOffsets;
|
|
// - NULL, if chunkCount <= 1
|
|
// - element 0 contains the number of 32 bit
|
|
// offsets that follow, or is 0, when all
|
|
// offsets are 32 bit only
|
|
// - the following offsets use two elements
|
|
// each (lower followed by upper 32 bit)
|
|
// to represent the 64 bit value
|
|
};
|
|
|
|
|
|
inline uint64
|
|
PackageFileHeapAccessorBase::OffsetArray::operator[](size_t index) const
|
|
{
|
|
if (index == 0)
|
|
return 0;
|
|
|
|
if (fOffsets[0] == 0 || index < fOffsets[0])
|
|
return fOffsets[index];
|
|
|
|
index += index - fOffsets[0];
|
|
return fOffsets[index] | ((uint64)fOffsets[index + 1] << 32);
|
|
}
|
|
|
|
|
|
} // namespace BPrivate
|
|
|
|
} // namespace BHPKG
|
|
|
|
} // namespace BPackageKit
|
|
|
|
|
|
#endif // _PACKAGE__HPKG__PRIVATE__PACKAGE_FILE_HEAP_ACCESSOR_BASE_H_
|