block_cache: enable prefetching

* Allocate blocks and add them to the hash table so they are
  available for a future block_cache_get call.
* Make use of prefetching in FAT driver.
* A client filesystem may request to prefetch a block run that
  contains some blocks that are already cached.  The request will
  be truncated at the first such block in the run.
* Fixes #19186.

Change-Id: I8d2e3cff15e5b46569438e0dc085e2b391aa57a5
Reviewed-on: https://review.haiku-os.org/c/haiku/+/8525
Reviewed-by: waddlesplash <waddlesplash@gmail.com>
This commit is contained in:
Jim906 2024-10-25 10:00:36 -04:00 committed by waddlesplash
parent e6bf67457c
commit 9bb1816c14
7 changed files with 314 additions and 0 deletions

View File

@ -78,6 +78,7 @@ extern const void *block_cache_get(void *cache, off_t blockNumber);
extern status_t block_cache_set_dirty(void *cache, off_t blockNumber, extern status_t block_cache_set_dirty(void *cache, off_t blockNumber,
bool isDirty, int32 transaction); bool isDirty, int32 transaction);
extern void block_cache_put(void *cache, off_t blockNumber); extern void block_cache_put(void *cache, off_t blockNumber);
extern status_t block_cache_prefetch(void* cache, off_t blockNumber, size_t* _numBlocks);
/* file cache */ /* file cache */
extern void *file_cache_create(dev_t mountID, ino_t vnodeID, off_t size); extern void *file_cache_create(dev_t mountID, ino_t vnodeID, off_t size);

View File

@ -854,6 +854,7 @@
#define block_cache_get fssh_block_cache_get #define block_cache_get fssh_block_cache_get
#define block_cache_set_dirty fssh_block_cache_set_dirty #define block_cache_set_dirty fssh_block_cache_set_dirty
#define block_cache_put fssh_block_cache_put #define block_cache_put fssh_block_cache_put
#define block_cache_prefetch fssh_block_cache_prefetch
/* file cache */ /* file cache */
#define file_cache_create fssh_file_cache_create #define file_cache_create fssh_file_cache_create

View File

@ -92,6 +92,8 @@ extern fssh_status_t fssh_block_cache_set_dirty(void *_cache,
int32_t transaction); int32_t transaction);
extern void fssh_block_cache_put(void *_cache, extern void fssh_block_cache_put(void *_cache,
fssh_off_t blockNumber); fssh_off_t blockNumber);
extern fssh_status_t fssh_block_cache_prefetch(void* _cache, fssh_off_t blockNumber,
fssh_size_t* _numBlocks);
/* file cache */ /* file cache */
extern void * fssh_file_cache_create(fssh_mount_id mountID, extern void * fssh_file_cache_create(fssh_mount_id mountID,

View File

@ -407,6 +407,14 @@ getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag,
if (status != 0) if (status != 0)
return B_TO_POSIX_ERROR(status); return B_TO_POSIX_ERROR(status);
#ifdef _KERNEL_MODE
// for high block counts, try to get all blocks in one disk read
if (cBlockCount > 4) {
size_t prefetchBlocks = cBlockCount;
block_cache_prefetch(blockCache, dblkno, &prefetchBlocks);
}
#endif // _KERNEL_MODE
for (i = 0; i < cBlockCount && status == B_OK; i++) { for (i = 0; i < cBlockCount && status == B_OK; i++) {
if (readOnly == true) if (readOnly == true)
newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i); newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i);

View File

@ -2894,6 +2894,7 @@ dosfs_readdir(fs_volume* volume, fs_vnode* vnode, void* cookie, struct dirent* b
break; break;
dirBuf->d_ino = ino; dirBuf->d_ino = ino;
dirBuf->d_dev = volume->id; dirBuf->d_dev = volume->id;
// Is this direntry associated with a chain of previous winentries? // Is this direntry associated with a chain of previous winentries?
@ -3743,6 +3744,14 @@ fat_volume_init(vnode* devvp, mount* bsdVolume, const uint64_t fatFlags, const c
if (readOnly == true) if (readOnly == true)
bsdVolume->mnt_flag |= MNT_RDONLY; bsdVolume->mnt_flag |= MNT_RDONLY;
// attempt to read the FAT into memory in advance of fillinusemap, to prevent fillinusemap
// from doing a separate disk read for each block
if (fatVolume->pm_FATsecs > 4) {
size_t fatBlocks = fatVolume->pm_FATsecs;
block_cache_prefetch(bsdVolume->mnt_cache, static_cast<off_t>(fatVolume->pm_fatblk),
&fatBlocks);
}
// have the inuse map filled in // have the inuse map filled in
rw_lock_write_lock(&fatVolume->pm_fatlock.haikuRW); rw_lock_write_lock(&fatVolume->pm_fatlock.haikuRW);
status = B_FROM_POSIX_ERROR(fillinusemap(fatVolume)); status = B_FROM_POSIX_ERROR(fillinusemap(fatVolume));

View File

@ -26,6 +26,9 @@
#include <StackOrHeapArray.h> #include <StackOrHeapArray.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#ifndef BUILDING_USERLAND_FS_SERVER
#include "IORequest.h"
#endif // !BUILDING_USERLAND_FS_SERVER
#include "kernel_debug_config.h" #include "kernel_debug_config.h"
@ -327,6 +330,39 @@ private:
}; };
#ifndef BUILDING_USERLAND_FS_SERVER
class BlockPrefetcher {
public:
BlockPrefetcher(block_cache* cache, off_t fBlockNumber,
size_t numBlocks);
~BlockPrefetcher();
status_t Allocate();
status_t ReadAsync();
static status_t IterativeIOGetVecsHook(void* cookie, io_request* request,
off_t offset, size_t size, struct file_io_vec* vecs,
size_t* _count);
static status_t IterativeIOFinishedHook(void* cookie, io_request* request,
status_t status, bool partialTransfer,
size_t bytesTransferred);
size_t NumAllocated() { return fNumAllocated; }
private:
void _RemoveAllocated(size_t unbusyCount, size_t removeCount);
private:
block_cache* fCache;
off_t fBlockNumber;
size_t fNumRequested;
size_t fNumAllocated;
cached_block** fBlocks;
generic_io_vec* fDestVecs;
};
#endif // !BUILDING_USERLAND_FS_SERVER
class TransactionLocking { class TransactionLocking {
public: public:
inline bool Lock(block_cache* cache) inline bool Lock(block_cache* cache)
@ -777,6 +813,10 @@ static DoublyLinkedListLink<block_cache> sMarkCache;
static object_cache* sBlockCache; static object_cache* sBlockCache;
static void mark_block_busy_reading(block_cache* cache, cached_block* block);
static void mark_block_unbusy_reading(block_cache* cache, cached_block* block);
// #pragma mark - notifications/listener // #pragma mark - notifications/listener
@ -1411,6 +1451,207 @@ BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
} }
#ifndef BUILDING_USERLAND_FS_SERVER
// #pragma mark - BlockPrefetcher
BlockPrefetcher::BlockPrefetcher(block_cache* cache, off_t blockNumber, size_t numBlocks)
:
fCache(cache),
fBlockNumber(blockNumber),
fNumRequested(numBlocks),
fNumAllocated(0)
{
fBlocks = new cached_block*[numBlocks];
fDestVecs = new generic_io_vec[numBlocks];
}
BlockPrefetcher::~BlockPrefetcher()
{
delete[] fBlocks;
delete[] fDestVecs;
}
/*! Allocates cached_block objects in preparation for prefetching.
@return If an error is returned, then no blocks have been allocated.
@post Blocks have been constructed (including allocating the current_data member)
but current_data is uninitialized.
*/
status_t
BlockPrefetcher::Allocate()
{
TRACE(("BlockPrefetcher::Allocate: looking up %" B_PRIuSIZE " blocks, starting with %"
B_PRIdOFF "\n", fNumBlocks, fBlockNumber));
ASSERT_LOCKED_MUTEX(&fCache->lock);
size_t finalNumBlocks = fNumRequested;
// determine whether any requested blocks are already cached
for (size_t i = 0; i < fNumRequested; ++i) {
off_t blockNumIter = fBlockNumber + i;
if (blockNumIter < 0 || blockNumIter >= fCache->max_blocks) {
panic("BlockPrefetcher::Allocate: invalid block number %" B_PRIdOFF " (max %"
B_PRIdOFF ")", blockNumIter, fCache->max_blocks - 1);
return B_BAD_VALUE;
}
cached_block* block = fCache->hash->Lookup(blockNumIter);
if (block != NULL) {
// truncate the request
TRACE(("BlockPrefetcher::Allocate: found an existing block (%" B_PRIdOFF ")\n",
blockNumIter));
fBlocks[i] = NULL;
finalNumBlocks = i;
break;
}
}
// allocate the blocks
for (size_t i = 0; i < finalNumBlocks; ++i) {
cached_block* block = fCache->NewBlock(fBlockNumber + i);
if (block == NULL) {
_RemoveAllocated(0, i);
return B_NO_MEMORY;
}
fCache->hash->Insert(block);
block->unused = true;
fCache->unused_blocks.Add(block);
fCache->unused_block_count++;
fBlocks[i] = block;
}
fNumAllocated = finalNumBlocks;
return B_OK;
}
/*! Schedules reads from disk to cache.
@return If an error is returned, then the previously allocated blocks have been cleaned up.
@post The calling object will eventually be deleted by IterativeIOFinishedHook.
*/
status_t
BlockPrefetcher::ReadAsync()
{
TRACE(("BlockPrefetcher::Read: reading %" B_PRIuSIZE " blocks\n", fNumAllocated));
size_t blockSize = fCache->block_size;
generic_io_vec* vecs = fDestVecs;
for (size_t i = 0; i < fNumAllocated; ++i) {
vecs[i].base = reinterpret_cast<generic_addr_t>(fBlocks[i]->current_data);
vecs[i].length = blockSize;
mark_block_busy_reading(fCache, fBlocks[i]);
}
IORequest* request = new IORequest;
status_t status = request->Init(fBlockNumber * blockSize, vecs, fNumAllocated,
fNumAllocated * blockSize, false, B_DELETE_IO_REQUEST);
if (status != B_OK) {
TB(Error(fCache, fBlockNumber, "IORequest::Init starting here failed", status));
TRACE_ALWAYS("BlockPrefetcher::Read: failed to initialize IO request for %" B_PRIuSIZE
" blocks starting with %" B_PRIdOFF ": %s\n",
fNumAllocated, fBlockNumber, strerror(status));
_RemoveAllocated(fNumAllocated, fNumAllocated);
delete request;
return status;
}
return do_iterative_fd_io(fCache->fd, request, IterativeIOGetVecsHook, IterativeIOFinishedHook,
this);
}
/*static*/ status_t
BlockPrefetcher::IterativeIOGetVecsHook(void* cookie, io_request* request, off_t offset,
size_t size, struct file_io_vec* vecs, size_t* _count)
{
TRACE(("BlockPrefetcher::IterativeIOGetVecsHook: setting offset %" B_PRIdOFF " and length %"
B_PRIuSIZE "\n", offset, size));
if (*_count == 0)
return B_OK;
vecs[0].offset = offset;
// the requested offset was volume-relative to begin with
vecs[0].length = size;
// the request is always for a contiguous run of blocks
*_count = 1;
return B_OK;
}
/*static*/ status_t
BlockPrefetcher::IterativeIOFinishedHook(void* cookie, io_request* request, status_t status,
bool partialTransfer, size_t bytesTransferred)
{
TRACE(("BlockPrefetcher::IterativeIOFinishedHook: status %s, partial %d\n", strerror(status),
partialTransfer));
BlockPrefetcher* blockPrefetcher = reinterpret_cast<BlockPrefetcher*>(cookie);
block_cache* cache = blockPrefetcher->fCache;
cached_block** blocks = blockPrefetcher->fBlocks;
size_t blockSize = cache->block_size;
off_t blockNumber = blockPrefetcher->fBlockNumber;
size_t numBlocks = blockPrefetcher->fNumAllocated;
MutexLocker locker(&cache->lock);
if (bytesTransferred < numBlocks * blockSize) {
blockPrefetcher->_RemoveAllocated(numBlocks, numBlocks);
TB(Error(cache, blockNumber, "prefetch starting here failed", status));
TRACE_ALWAYS("prefetch_iterative_io_finished_hook: transferred only %" B_PRIuSIZE
" bytes in attempt to read %" B_PRIuSIZE " blocks (start block %" B_PRIdOFF "): %s\n",
bytesTransferred, numBlocks, blockNumber, strerror(status));
} else {
for (size_t i = 0; i < numBlocks; ++i) {
TB(Read(cache, blockNumber + i));
mark_block_unbusy_reading(cache, blocks[i]);
blocks[i]->last_accessed = system_time() / 1000000L;
}
}
delete blockPrefetcher;
return status;
}
/*! Cleans up blocks that were allocated for prefetching when an in-progress prefetch
is cancelled.
*/
void
BlockPrefetcher::_RemoveAllocated(size_t unbusyCount, size_t removeCount)
{
TRACE(("BlockPrefetcher::_RemoveAllocated: unbusy %" B_PRIuSIZE " and remove %" B_PRIuSIZE
" starting with %" B_PRIdOFF "\n", unbusyCount, removeCount, (*fBlocks)->block_number));
ASSERT_LOCKED_MUTEX(&fCache->lock);
for (size_t i = 0; i < unbusyCount; ++i)
mark_block_unbusy_reading(fCache, fBlocks[i]);
for (size_t i = 0; i < removeCount; ++i) {
ASSERT(fBlocks[i]->is_dirty == false && fBlocks[i]->unused == true);
fCache->unused_blocks.Remove(fBlocks[i]);
fCache->unused_block_count--;
fCache->RemoveBlock(fBlocks[i]);
fBlocks[i] = NULL;
}
fNumAllocated = 0;
return;
}
#endif // !BUILDING_USERLAND_FS_SERVER
// #pragma mark - block_cache // #pragma mark - block_cache
@ -3752,3 +3993,48 @@ block_cache_put(void* _cache, off_t blockNumber)
put_cached_block(cache, blockNumber); put_cached_block(cache, blockNumber);
} }
/*! Allocates blocks and schedules them to be read from disk, but does not get references to the
blocks.
@param blockNumber The index of the first requested block.
@param _numBlocks As input, the number of blocks requested. As output, the number of
blocks actually scheduled. Prefetching will stop short if the requested range includes a
block that is already cached.
*/
status_t
block_cache_prefetch(void* _cache, off_t blockNumber, size_t* _numBlocks)
{
#ifndef BUILDING_USERLAND_FS_SERVER
TRACE(("block_cache_prefetch: fetching %" B_PRIuSIZE " blocks starting with %" B_PRIdOFF "\n",
*_numBlocks, blockNumber));
block_cache* cache = reinterpret_cast<block_cache*>(_cache);
MutexLocker locker(&cache->lock);
size_t numBlocks = *_numBlocks;
*_numBlocks = 0;
BlockPrefetcher* blockPrefetcher = new BlockPrefetcher(cache, blockNumber, numBlocks);
status_t status = blockPrefetcher->Allocate();
if (status != B_OK || blockPrefetcher->NumAllocated() == 0) {
TRACE(("block_cache_prefetch returning early (%s): allocated %" B_PRIuSIZE "\n",
strerror(status), blockPrefetcher->NumAllocated()));
delete blockPrefetcher;
return status;
}
numBlocks = blockPrefetcher->NumAllocated();
status = blockPrefetcher->ReadAsync();
if (status == B_OK)
*_numBlocks = numBlocks;
return status;
#else // BUILDING_USERLAND_FS_SERVER
*_numBlocks = 0;
return B_UNSUPPORTED;
#endif // !BUILDING_USERLAND_FS_SERVER
}

View File

@ -1779,3 +1779,10 @@ fssh_block_cache_put(void* _cache, fssh_off_t blockNumber)
put_cached_block(cache, blockNumber); put_cached_block(cache, blockNumber);
} }
fssh_status_t
fssh_block_cache_prefetch(void* _cache, fssh_off_t blockNumber, fssh_size_t* _numBlocks)
{
*_numBlocks = 0;
return FSSH_B_UNSUPPORTED;
}