From 9bb1816c14e5b8f360ee3213c831c535680fc755 Mon Sep 17 00:00:00 2001
From: Jim906 <jim_l@fastmail.com>
Date: Fri, 25 Oct 2024 10:00:36 -0400
Subject: [PATCH] block_cache: enable prefetching

* Allocate blocks and add them to the hash table so they are
  available for a future block_cache_get call.
* Make use of prefetching in FAT driver.
* A client filesystem may request to prefetch a block run that
  contains some blocks that are already cached.  The request will
  be truncated at the first such block in the run.
* Fixes #19186.

Change-Id: I8d2e3cff15e5b46569438e0dc085e2b391aa57a5
Reviewed-on: https://review.haiku-os.org/c/haiku/+/8525
Reviewed-by: waddlesplash <waddlesplash@gmail.com>
---
 headers/os/drivers/fs_cache.h                 |   1 +
 headers/private/fs_shell/fssh_api_wrapper.h   |   1 +
 headers/private/fs_shell/fssh_fs_cache.h      |   2 +
 .../file_systems/fat/bsd/kern/vfs_bio.c       |   8 +
 .../file_systems/fat/kernel_interface.cpp     |   9 +
 src/system/kernel/cache/block_cache.cpp       | 286 ++++++++++++++++++
 src/tools/fs_shell/block_cache.cpp            |   7 +
 7 files changed, 314 insertions(+)

diff --git a/headers/os/drivers/fs_cache.h b/headers/os/drivers/fs_cache.h
index 90d760c9fb..3675a206ec 100644
--- a/headers/os/drivers/fs_cache.h
+++ b/headers/os/drivers/fs_cache.h
@@ -78,6 +78,7 @@ extern const void *block_cache_get(void *cache, off_t blockNumber);
 extern status_t block_cache_set_dirty(void *cache, off_t blockNumber,
 					bool isDirty, int32 transaction);
 extern void block_cache_put(void *cache, off_t blockNumber);
+extern status_t block_cache_prefetch(void* cache, off_t blockNumber, size_t* _numBlocks);
 
 /* file cache */
 extern void *file_cache_create(dev_t mountID, ino_t vnodeID, off_t size);
diff --git a/headers/private/fs_shell/fssh_api_wrapper.h b/headers/private/fs_shell/fssh_api_wrapper.h
index 068d726106..88fa0743b8 100644
--- a/headers/private/fs_shell/fssh_api_wrapper.h
+++ b/headers/private/fs_shell/fssh_api_wrapper.h
@@ -854,6 +854,7 @@
 #define block_cache_get					fssh_block_cache_get
 #define block_cache_set_dirty			fssh_block_cache_set_dirty
 #define block_cache_put					fssh_block_cache_put
+#define block_cache_prefetch			fssh_block_cache_prefetch
 
 /* file cache */
 #define file_cache_create				fssh_file_cache_create
diff --git a/headers/private/fs_shell/fssh_fs_cache.h b/headers/private/fs_shell/fssh_fs_cache.h
index 05304c9ffb..3f6723f205 100644
--- a/headers/private/fs_shell/fssh_fs_cache.h
+++ b/headers/private/fs_shell/fssh_fs_cache.h
@@ -92,6 +92,8 @@ extern fssh_status_t	fssh_block_cache_set_dirty(void *_cache,
 							int32_t transaction);
 extern void				fssh_block_cache_put(void *_cache,
 							fssh_off_t blockNumber);
+extern fssh_status_t	fssh_block_cache_prefetch(void* _cache, fssh_off_t blockNumber,
+							fssh_size_t* _numBlocks);
 
 /* file cache */
 extern void *			fssh_file_cache_create(fssh_mount_id mountID,
diff --git a/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c b/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c
index dc22429733..45bbd0ade3 100644
--- a/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c
+++ b/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c
@@ -407,6 +407,14 @@ getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag,
 		if (status != 0)
 			return B_TO_POSIX_ERROR(status);
 
+#ifdef _KERNEL_MODE
+		// for high block counts, try to get all blocks in one disk read
+		if (cBlockCount > 4) {
+			size_t prefetchBlocks = cBlockCount;
+			block_cache_prefetch(blockCache, dblkno, &prefetchBlocks);
+		}
+#endif // _KERNEL_MODE
+
 		for (i = 0; i < cBlockCount && status == B_OK; i++) {
 			if (readOnly == true)
 				newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i);
diff --git a/src/add-ons/kernel/file_systems/fat/kernel_interface.cpp b/src/add-ons/kernel/file_systems/fat/kernel_interface.cpp
index 6001dcae36..ab1d7de415 100644
--- a/src/add-ons/kernel/file_systems/fat/kernel_interface.cpp
+++ b/src/add-ons/kernel/file_systems/fat/kernel_interface.cpp
@@ -2894,6 +2894,7 @@ dosfs_readdir(fs_volume* volume, fs_vnode* vnode, void* cookie, struct dirent* b
 				break;
 			dirBuf->d_ino = ino;
 
+
 			dirBuf->d_dev = volume->id;
 
 			// Is this direntry associated with a chain of previous winentries?
@@ -3743,6 +3744,14 @@ fat_volume_init(vnode* devvp, mount* bsdVolume, const uint64_t fatFlags, const c
 	if (readOnly == true)
 		bsdVolume->mnt_flag |= MNT_RDONLY;
 
+	// attempt to read the FAT into memory in advance of fillinusemap, to prevent fillinusemap
+	// from doing a separate disk read for each block
+	if (fatVolume->pm_FATsecs > 4) {
+		size_t fatBlocks = fatVolume->pm_FATsecs;
+		block_cache_prefetch(bsdVolume->mnt_cache, static_cast<off_t>(fatVolume->pm_fatblk),
+			&fatBlocks);
+	}
+
 	// have the inuse map filled in
 	rw_lock_write_lock(&fatVolume->pm_fatlock.haikuRW);
 	status = B_FROM_POSIX_ERROR(fillinusemap(fatVolume));
diff --git a/src/system/kernel/cache/block_cache.cpp b/src/system/kernel/cache/block_cache.cpp
index 86a39fb3c7..2e5fb8585b 100644
--- a/src/system/kernel/cache/block_cache.cpp
+++ b/src/system/kernel/cache/block_cache.cpp
@@ -26,6 +26,9 @@
 #include <StackOrHeapArray.h>
 #include <vm/vm_page.h>
 
+#ifndef BUILDING_USERLAND_FS_SERVER
+#include "IORequest.h"
+#endif // !BUILDING_USERLAND_FS_SERVER
 #include "kernel_debug_config.h"
 
 
@@ -327,6 +330,39 @@ private:
 };
 
 
+#ifndef BUILDING_USERLAND_FS_SERVER
+class BlockPrefetcher {
+public:
+								BlockPrefetcher(block_cache* cache, off_t fBlockNumber,
+									size_t numBlocks);
+								~BlockPrefetcher();
+
+			status_t			Allocate();
+			status_t			ReadAsync();
+
+	static	status_t			IterativeIOGetVecsHook(void* cookie, io_request* request,
+									off_t offset, size_t size, struct file_io_vec* vecs,
+									size_t* _count);
+	static	status_t			IterativeIOFinishedHook(void* cookie, io_request* request,
+									status_t status, bool partialTransfer,
+									size_t bytesTransferred);
+
+			size_t				NumAllocated() { return fNumAllocated; }
+
+private:
+			void				_RemoveAllocated(size_t unbusyCount, size_t removeCount);
+
+private:
+			block_cache* 		fCache;
+			off_t				fBlockNumber;
+			size_t				fNumRequested;
+			size_t				fNumAllocated;
+			cached_block** 		fBlocks;
+			generic_io_vec* 	fDestVecs;
+};
+#endif // !BUILDING_USERLAND_FS_SERVER
+
+
 class TransactionLocking {
 public:
 	inline bool Lock(block_cache* cache)
@@ -777,6 +813,10 @@ static DoublyLinkedListLink<block_cache> sMarkCache;
 static object_cache* sBlockCache;
 
 
+static void mark_block_busy_reading(block_cache* cache, cached_block* block);
+static void mark_block_unbusy_reading(block_cache* cache, cached_block* block);
+
+
 //	#pragma mark - notifications/listener
 
 
@@ -1411,6 +1451,207 @@ BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
 }
 
 
+#ifndef BUILDING_USERLAND_FS_SERVER
+//	#pragma mark - BlockPrefetcher
+
+
+BlockPrefetcher::BlockPrefetcher(block_cache* cache, off_t blockNumber, size_t numBlocks)
+	:
+	fCache(cache),
+	fBlockNumber(blockNumber),
+	fNumRequested(numBlocks),
+	fNumAllocated(0)
+{
+	fBlocks = new cached_block*[numBlocks];
+	fDestVecs = new generic_io_vec[numBlocks];
+}
+
+
+BlockPrefetcher::~BlockPrefetcher()
+{
+	delete[] fBlocks;
+	delete[] fDestVecs;
+}
+
+
+/*!	Allocates cached_block objects in preparation for prefetching.
+	@return If an error is returned, then no blocks have been allocated.
+	@post Blocks have been constructed (including allocating the current_data member)
+	but current_data is uninitialized.
+*/
+status_t
+BlockPrefetcher::Allocate()
+{
+	TRACE(("BlockPrefetcher::Allocate: looking up %" B_PRIuSIZE " blocks, starting with %"
+		B_PRIdOFF "\n", fNumBlocks, fBlockNumber));
+
+	ASSERT_LOCKED_MUTEX(&fCache->lock);
+
+	size_t finalNumBlocks = fNumRequested;
+
+	// determine whether any requested blocks are already cached
+	for (size_t i = 0; i < fNumRequested; ++i) {
+		off_t blockNumIter = fBlockNumber + i;
+		if (blockNumIter < 0 || blockNumIter >= fCache->max_blocks) {
+			panic("BlockPrefetcher::Allocate: invalid block number %" B_PRIdOFF " (max %"
+				B_PRIdOFF ")", blockNumIter, fCache->max_blocks - 1);
+			return B_BAD_VALUE;
+		}
+		cached_block* block = fCache->hash->Lookup(blockNumIter);
+		if (block != NULL) {
+			// truncate the request
+			TRACE(("BlockPrefetcher::Allocate: found an existing block (%" B_PRIdOFF ")\n",
+				blockNumIter));
+			fBlocks[i] = NULL;
+			finalNumBlocks = i;
+			break;
+		}
+	}
+
+	// allocate the blocks
+	for (size_t i = 0; i < finalNumBlocks; ++i) {
+		cached_block* block = fCache->NewBlock(fBlockNumber + i);
+		if (block == NULL) {
+			_RemoveAllocated(0, i);
+			return B_NO_MEMORY;
+		}
+		fCache->hash->Insert(block);
+
+		block->unused = true;
+		fCache->unused_blocks.Add(block);
+		fCache->unused_block_count++;
+
+		fBlocks[i] = block;
+	}
+
+	fNumAllocated = finalNumBlocks;
+
+	return B_OK;
+}
+
+
+/*!	Schedules reads from disk to cache.
+	@return If an error is returned, then the previously allocated blocks have been cleaned up.
+	@post The calling object will eventually be deleted by IterativeIOFinishedHook.
+*/
+status_t
+BlockPrefetcher::ReadAsync()
+{
+	TRACE(("BlockPrefetcher::Read: reading %" B_PRIuSIZE " blocks\n", fNumAllocated));
+
+	size_t blockSize = fCache->block_size;
+	generic_io_vec* vecs = fDestVecs;
+	for (size_t i = 0; i < fNumAllocated; ++i) {
+		vecs[i].base = reinterpret_cast<generic_addr_t>(fBlocks[i]->current_data);
+		vecs[i].length = blockSize;
+		mark_block_busy_reading(fCache, fBlocks[i]);
+	}
+
+	IORequest* request = new IORequest;
+	status_t status = request->Init(fBlockNumber * blockSize, vecs, fNumAllocated,
+		fNumAllocated * blockSize, false, B_DELETE_IO_REQUEST);
+	if (status != B_OK) {
+		TB(Error(fCache, fBlockNumber, "IORequest::Init starting here failed", status));
+		TRACE_ALWAYS("BlockPrefetcher::Read: failed to initialize IO request for %" B_PRIuSIZE
+			" blocks starting with %" B_PRIdOFF ": %s\n",
+			fNumAllocated, fBlockNumber, strerror(status));
+		_RemoveAllocated(fNumAllocated, fNumAllocated);
+		delete request;
+		return status;
+	}
+
+	return do_iterative_fd_io(fCache->fd, request, IterativeIOGetVecsHook, IterativeIOFinishedHook,
+		this);
+}
+
+
+/*static*/ status_t
+BlockPrefetcher::IterativeIOGetVecsHook(void* cookie, io_request* request, off_t offset,
+	size_t size, struct file_io_vec* vecs, size_t* _count)
+{
+	TRACE(("BlockPrefetcher::IterativeIOGetVecsHook: setting offset %" B_PRIdOFF " and length %"
+		B_PRIuSIZE "\n", offset, size));
+
+	if (*_count == 0)
+		return B_OK;
+
+	vecs[0].offset = offset;
+		// the requested offset was volume-relative to begin with
+	vecs[0].length = size;
+		// the request is always for a contiguous run of blocks
+	*_count = 1;
+
+	return B_OK;
+}
+
+
+/*static*/ status_t
+BlockPrefetcher::IterativeIOFinishedHook(void* cookie, io_request* request, status_t status,
+	bool partialTransfer, size_t bytesTransferred)
+{
+	TRACE(("BlockPrefetcher::IterativeIOFinishedHook: status %s, partial %d\n", strerror(status),
+		partialTransfer));
+
+	BlockPrefetcher* blockPrefetcher = reinterpret_cast<BlockPrefetcher*>(cookie);
+	block_cache* cache = blockPrefetcher->fCache;
+	cached_block** blocks = blockPrefetcher->fBlocks;
+	size_t blockSize = cache->block_size;
+	off_t blockNumber = blockPrefetcher->fBlockNumber;
+	size_t numBlocks = blockPrefetcher->fNumAllocated;
+
+	MutexLocker locker(&cache->lock);
+
+	if (bytesTransferred < numBlocks * blockSize) {
+		blockPrefetcher->_RemoveAllocated(numBlocks, numBlocks);
+		TB(Error(cache, blockNumber, "prefetch starting here failed", status));
+		TRACE_ALWAYS("prefetch_iterative_io_finished_hook: transferred only %" B_PRIuSIZE
+			" bytes in attempt to read %" B_PRIuSIZE " blocks (start block %" B_PRIdOFF "): %s\n",
+			bytesTransferred, numBlocks, blockNumber, strerror(status));
+	} else {
+		for (size_t i = 0; i < numBlocks; ++i) {
+			TB(Read(cache, blockNumber + i));
+			mark_block_unbusy_reading(cache, blocks[i]);
+			blocks[i]->last_accessed = system_time() / 1000000L;
+		}
+	}
+
+	delete blockPrefetcher;
+
+	return status;
+}
+
+
+/*!	Cleans up blocks that were allocated for prefetching when an in-progress prefetch
+	is cancelled.
+*/
+void
+BlockPrefetcher::_RemoveAllocated(size_t unbusyCount, size_t removeCount)
+{
+	TRACE(("BlockPrefetcher::_RemoveAllocated:  unbusy %" B_PRIuSIZE " and remove %" B_PRIuSIZE
+		" starting with %" B_PRIdOFF "\n", unbusyCount, removeCount, (*fBlocks)->block_number));
+
+	ASSERT_LOCKED_MUTEX(&fCache->lock);
+
+	for (size_t i = 0; i < unbusyCount; ++i)
+		mark_block_unbusy_reading(fCache, fBlocks[i]);
+
+	for (size_t i = 0; i < removeCount; ++i) {
+		ASSERT(fBlocks[i]->is_dirty == false && fBlocks[i]->unused == true);
+
+		fCache->unused_blocks.Remove(fBlocks[i]);
+		fCache->unused_block_count--;
+
+		fCache->RemoveBlock(fBlocks[i]);
+		fBlocks[i] = NULL;
+	}
+
+	fNumAllocated = 0;
+
+	return;
+}
+#endif // !BUILDING_USERLAND_FS_SERVER
+
+
 //	#pragma mark - block_cache
 
 
@@ -3752,3 +3993,48 @@ block_cache_put(void* _cache, off_t blockNumber)
 	put_cached_block(cache, blockNumber);
 }
 
+
+/*! Allocates blocks and schedules them to be read from disk, but does not get references to the
+	blocks.
+	@param blockNumber The index of the first requested block.
+	@param _numBlocks As input, the number of blocks requested. As output, the number of
+	blocks actually scheduled.  Prefetching will stop short if the requested range includes a
+	block that is already cached.
+*/
+status_t
+block_cache_prefetch(void* _cache, off_t blockNumber, size_t* _numBlocks)
+{
+#ifndef BUILDING_USERLAND_FS_SERVER
+	TRACE(("block_cache_prefetch: fetching %" B_PRIuSIZE " blocks starting with %" B_PRIdOFF "\n",
+		*_numBlocks, blockNumber));
+
+	block_cache* cache = reinterpret_cast<block_cache*>(_cache);
+	MutexLocker locker(&cache->lock);
+
+	size_t numBlocks = *_numBlocks;
+	*_numBlocks = 0;
+
+	BlockPrefetcher* blockPrefetcher = new BlockPrefetcher(cache, blockNumber, numBlocks);
+
+	status_t status = blockPrefetcher->Allocate();
+	if (status != B_OK || blockPrefetcher->NumAllocated() == 0) {
+		TRACE(("block_cache_prefetch returning early (%s): allocated %" B_PRIuSIZE "\n",
+			strerror(status), blockPrefetcher->NumAllocated()));
+		delete blockPrefetcher;
+		return status;
+	}
+
+	numBlocks = blockPrefetcher->NumAllocated();
+
+	status = blockPrefetcher->ReadAsync();
+
+	if (status == B_OK)
+		*_numBlocks = numBlocks;
+
+	return status;
+
+#else // BUILDING_USERLAND_FS_SERVER
+	*_numBlocks = 0;
+	return B_UNSUPPORTED;
+#endif // !BUILDING_USERLAND_FS_SERVER
+}
diff --git a/src/tools/fs_shell/block_cache.cpp b/src/tools/fs_shell/block_cache.cpp
index fd00c8e04a..34e88e5e75 100644
--- a/src/tools/fs_shell/block_cache.cpp
+++ b/src/tools/fs_shell/block_cache.cpp
@@ -1779,3 +1779,10 @@ fssh_block_cache_put(void* _cache, fssh_off_t blockNumber)
 	put_cached_block(cache, blockNumber);
 }
 
+
+fssh_status_t
+fssh_block_cache_prefetch(void* _cache, fssh_off_t blockNumber, fssh_size_t* _numBlocks)
+{
+	*_numBlocks = 0;
+	return FSSH_B_UNSUPPORTED;
+}