From 58ec046735a00f985d7722f49102828de1be7cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Axel=20D=C3=B6rfler?= Date: Tue, 29 Oct 2002 03:54:07 +0000 Subject: [PATCH] Added support for select(), and poll(). Not yet tested. Moved the CHECK_USER_ADDRESS() macro from vfs.c to kernel.h. git-svn-id: file:///srv/svn/repos/haiku/trunk/current@1746 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/kernel/core/fd.c | 105 +++++++-- src/kernel/core/fs/vfs.c | 394 ++++++++++++++++++++++++++++++-- src/kernel/core/fs/vfs_select.h | 38 +++ 3 files changed, 503 insertions(+), 34 deletions(-) create mode 100644 src/kernel/core/fs/vfs_select.h diff --git a/src/kernel/core/fd.c b/src/kernel/core/fd.c index 6022396813..6c8f132646 100644 --- a/src/kernel/core/fd.c +++ b/src/kernel/core/fd.c @@ -167,13 +167,13 @@ remove_fd(struct io_context *context, int fd) static int -fd_dup(int fd, bool kernel) +dup_fd(int fd, bool kernel) { struct io_context *context = get_current_io_context(kernel); struct file_descriptor *descriptor; int status; - TRACE(("fd_dup: fd = %d\n", fd)); + TRACE(("dup_fd: fd = %d\n", fd)); // Try to get the fd structure descriptor = get_fd(context, fd); @@ -190,12 +190,12 @@ fd_dup(int fd, bool kernel) static int -fd_dup2(int oldfd, int newfd, bool kernel) +dup2_fd(int oldfd, int newfd, bool kernel) { struct file_descriptor *evicted = NULL; struct io_context *context; - TRACE(("fd_dup2: ofd = %d, nfd = %d\n", oldfd, newfd)); + TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd)); // quick check if (oldfd < 0 || newfd < 0) @@ -234,6 +234,70 @@ fd_dup2(int oldfd, int newfd, bool kernel) } +status_t +select_fd(int fd, uint8 event, uint32 ref, struct select_sync *sync, bool kernel) +{ + struct file_descriptor *descriptor; + status_t status; + + PRINT(("select_fd(fd = %d, event = %u, ref = %lu, selectsync = %p)\n", fd, event, ref, sync)); + + descriptor = get_fd(get_current_io_context(kernel), fd); + if (descriptor == NULL) + return B_FILE_ERROR; + + if (descriptor->ops->fd_select) { + status = descriptor->ops->fd_select(descriptor, event, ref, sync); + } else { + // if the I/O subsystem doesn't support select(), we will + // immediately notify the select call + status = notify_select_event((void *)sync, ref); + } + + put_fd(descriptor); + return status; +} + + +status_t +deselect_fd(int fd, uint8 event, struct select_sync *sync, bool kernel) +{ + struct file_descriptor *descriptor; + status_t status; + + PRINT(("deselect_fd(fd = %d, event = %u, ref = %lu, selectsync = %p)\n", fd, event, ref, sync)); + + descriptor = get_fd(get_current_io_context(kernel), fd); + if (descriptor == NULL) + return B_FILE_ERROR; + + if (descriptor->ops->fd_deselect) + status = descriptor->ops->fd_deselect(descriptor, event, sync); + else + status = B_OK; + + put_fd(descriptor); + return status; +} + + +/** This function checks if the specified fd is valid in the current + * context. It can be used for a quick check; the fd is not locked + * so it could become invalid immediately after this check. + */ + +bool +fd_is_valid(int fd, bool kernel) +{ + struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd); + if (descriptor == NULL) + return false; + + put_fd(descriptor); + return true; +} + + // #pragma mark - /*** USER routines ***/ @@ -245,7 +309,8 @@ user_read(int fd, off_t pos, void *buffer, size_t length) ssize_t retval; /* This is a user_function, so abort if we have a kernel address */ - CHECK_USER_ADDR(buffer) + if (!CHECK_USER_ADDRESS(buffer)) + return B_BAD_ADDRESS; descriptor = get_fd(get_current_io_context(false), fd); if (!descriptor) @@ -296,7 +361,7 @@ user_seek(int fd, off_t pos, int seekType) if (!descriptor) return B_FILE_ERROR; - TRACE(("user_seek(descriptor = %p)\n",descriptor)); + TRACE(("user_seek(descriptor = %p)\n", descriptor)); if (descriptor->ops->fd_seek) pos = descriptor->ops->fd_seek(descriptor, pos, seekType); @@ -333,14 +398,14 @@ user_ioctl(int fd, ulong op, void *buffer, size_t length) ssize_t -user_read_dir(int fd, struct dirent *buffer,size_t bufferSize,uint32 maxCount) +user_read_dir(int fd, struct dirent *buffer, size_t bufferSize, uint32 maxCount) { struct file_descriptor *descriptor; ssize_t retval; CHECK_USER_ADDR(buffer) - PRINT(("user_read_dir(fd = %d, buffer = 0x%p, bufferSize = %ld, count = %d)\n",fd,buffer,bufferSize,maxCount)); + PRINT(("user_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = %d)\n", fd, buffer, bufferSize, maxCount)); descriptor = get_fd(get_current_io_context(false), fd); if (descriptor == NULL) @@ -348,7 +413,7 @@ user_read_dir(int fd, struct dirent *buffer,size_t bufferSize,uint32 maxCount) if (descriptor->ops->fd_read_dir) { uint32 count = maxCount; - retval = descriptor->ops->fd_read_dir(descriptor,buffer,bufferSize,&count); + retval = descriptor->ops->fd_read_dir(descriptor, buffer, bufferSize, &count); if (retval >= 0) retval = count; } else @@ -365,7 +430,7 @@ user_rewind_dir(int fd) struct file_descriptor *descriptor; status_t status; - PRINT(("user_rewind_dir(fd = %d)\n",fd)); + PRINT(("user_rewind_dir(fd = %d)\n", fd)); descriptor = get_fd(get_current_io_context(false), fd); if (descriptor == NULL) @@ -394,7 +459,7 @@ user_read_stat(int fd, struct stat *userStat) if (descriptor == NULL) return B_FILE_ERROR; - TRACE(("user_read_stat(descriptor = %p)\n",descriptor)); + TRACE(("user_read_stat(descriptor = %p)\n", descriptor)); if (descriptor->ops->fd_read_stat) { // we're using the stat buffer on the stack to not have to @@ -450,7 +515,7 @@ user_close(int fd) if (descriptor == NULL) return B_FILE_ERROR; - TRACE(("user_close(descriptor = %p)\n",descriptor)); + TRACE(("user_close(descriptor = %p)\n", descriptor)); remove_fd(io, fd); @@ -462,14 +527,14 @@ user_close(int fd) int user_dup(int fd) { - return fd_dup(fd, false); + return dup_fd(fd, false); } int user_dup2(int ofd, int nfd) { - return fd_dup2(ofd, nfd, false); + return dup2_fd(ofd, nfd, false); } @@ -565,12 +630,12 @@ sys_ioctl(int fd, ulong op, void *buffer, size_t length) ssize_t -sys_read_dir(int fd, struct dirent *buffer,size_t bufferSize,uint32 maxCount) +sys_read_dir(int fd, struct dirent *buffer, size_t bufferSize, uint32 maxCount) { struct file_descriptor *descriptor; ssize_t retval; - PRINT(("sys_read_dir(fd = %d, buffer = 0x%p, bufferSize = %ld, count = %u)\n",fd,buffer,bufferSize,maxCount)); + PRINT(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = %u)\n",fd, buffer, bufferSize, maxCount)); descriptor = get_fd(get_current_io_context(true), fd); if (descriptor == NULL) @@ -578,7 +643,7 @@ sys_read_dir(int fd, struct dirent *buffer,size_t bufferSize,uint32 maxCount) if (descriptor->ops->fd_read_dir) { uint32 count = maxCount; - retval = descriptor->ops->fd_read_dir(descriptor,buffer,bufferSize,&count); + retval = descriptor->ops->fd_read_dir(descriptor, buffer, bufferSize, &count); if (retval >= 0) retval = count; } else @@ -621,7 +686,7 @@ sys_read_stat(int fd, struct stat *stat) if (descriptor == NULL) return B_FILE_ERROR; - TRACE(("sys_read_stat(descriptor = %p)\n",descriptor)); + TRACE(("sys_read_stat(descriptor = %p)\n", descriptor)); if (descriptor->ops->fd_read_stat) status = descriptor->ops->fd_read_stat(descriptor, stat); @@ -674,13 +739,13 @@ sys_close(int fd) int sys_dup(int fd) { - return fd_dup(fd, true); + return dup_fd(fd, true); } int sys_dup2(int ofd, int nfd) { - return fd_dup2(ofd, nfd, true); + return dup2_fd(ofd, nfd, true); } diff --git a/src/kernel/core/fs/vfs.c b/src/kernel/core/fs/vfs.c index 1cdccf766e..d2dec927a1 100755 --- a/src/kernel/core/fs/vfs.c +++ b/src/kernel/core/fs/vfs.c @@ -35,6 +35,10 @@ #include "rootfs.h" #include "bootfs.h" +#include "vfs_select.h" +#include +#include + #include #include #include @@ -58,10 +62,6 @@ #define MAX_SYM_LINKS SYMLINKS_MAX -// Passed in buffers from user-space shouldn't be in the kernel -#define CHECK_USER_ADDRESS(x) \ - ((addr)(x) < KERNEL_BASE || (addr)(x) > KERNEL_TOP) - struct vnode { struct vnode *next; struct vnode *mount_prev; @@ -177,8 +177,10 @@ struct fd_ops gFileOps = { file_write, file_seek, common_ioctl, - NULL, - NULL, + NULL, // select() + NULL, // deselect() + NULL, // read_dir() + NULL, // rewind_dir() common_read_stat, common_write_stat, file_close, @@ -186,10 +188,12 @@ struct fd_ops gFileOps = { }; struct fd_ops gDirectoryOps = { - NULL, - NULL, - NULL, + NULL, // read() + NULL, // write() + NULL, // seek() common_ioctl, + NULL, // select() + NULL, // deselect() dir_read, dir_rewind, common_read_stat, @@ -199,10 +203,12 @@ struct fd_ops gDirectoryOps = { }; struct fd_ops gAttributeDirectoryOps = { - NULL, - NULL, - NULL, + NULL, // read() + NULL, // write() + NULL, // seek() common_ioctl, + NULL, // select() + NULL, // deselect() attr_dir_read, attr_dir_rewind, common_read_stat, @@ -216,8 +222,10 @@ struct fd_ops gAttributeOps = { attr_write, attr_seek, common_ioctl, - NULL, - NULL, + NULL, // select() + NULL, // deselect() + NULL, // read_dir() + NULL, // rewind_dir() attr_read_stat, attr_write_stat, attr_close, @@ -229,6 +237,8 @@ struct fd_ops gIndexDirectoryOps = { NULL, // write() NULL, // seek() NULL, // ioctl() + NULL, // select() + NULL, // deselect() index_dir_read, index_dir_rewind, NULL, // read_stat() @@ -1498,6 +1508,22 @@ out: } +status_t +notify_select_event(selectsync *_sync, uint32 ref) +{ + select_sync *sync = (select_sync *)_sync; + + if (sync == NULL + || sync->sem < B_OK + || INDEX_FROM_REF(ref) > sync->count) + return B_BAD_VALUE; + + sync->set[INDEX_FROM_REF(ref)].events |= SELECT_FLAG_FROM_REF(ref); + + return release_sem(sync->sem); +} + + int vfs_getrlimit(int resource, struct rlimit * rlp) { @@ -2529,6 +2555,245 @@ common_path_write_stat(char *path, bool traverseLeafLink, const struct stat *sta } +static int +common_select(int numfds, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, + bigtime_t timeout, sigset_t *sigMask, bool kernel) +{ + struct select_sync sync; + status_t status = B_OK; + int count = 0; + int fd; + + // check if fds are valid before doing anything + + for (fd = 0; fd < numfds; fd++) { + if (((readSet && FD_ISSET(fd, readSet)) + || (writeSet && FD_ISSET(fd, writeSet)) + || (errorSet && FD_ISSET(fd, errorSet))) + && !fd_is_valid(fd, kernel)) + return B_FILE_ERROR; + } + + // allocate resources + + memset(&sync, 0, sizeof(select_sync)); + + sync.sem = create_sem(1, "select"); + if (sync.sem < B_OK) + return sync.sem; + + set_sem_owner(sync.sem, B_SYSTEM_TEAM); + + sync.set = kmalloc(sizeof(select_info) * numfds); + if (sync.set == NULL) { + delete_sem(sync.sem); + return B_NO_MEMORY; + } + memset(sync.set, 0, sizeof(select_info) * numfds); + sync.count = numfds; + + // start selecting file descriptors + + for (fd = 0; fd < numfds; fd++) { + if (readSet && FD_ISSET(fd, readSet) + && select_fd(fd, B_SELECT_READ, MAKE_SELECT_REF(fd, B_SELECT_READ), &sync, kernel) == B_OK) + count++; + + if (writeSet && FD_ISSET(fd, writeSet) + && select_fd(fd, B_SELECT_WRITE, MAKE_SELECT_REF(fd, B_SELECT_WRITE), &sync, kernel) == B_OK) + count++; + + if (errorSet && FD_ISSET(fd, errorSet) + && select_fd(fd, B_SELECT_ERROR, MAKE_SELECT_REF(fd, B_SELECT_ERROR), &sync, kernel) == B_OK) + count++; + } + + if (count < 1) { + count = B_BAD_VALUE; + goto err; + } + + status = acquire_sem_etc(sync.sem, 1, + B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); + + // deselect file descriptors + + for (fd = 0; fd < numfds; fd++) { + if (readSet && FD_ISSET(fd, readSet)) + deselect_fd(fd, B_SELECT_READ, &sync, kernel); + + if (writeSet && FD_ISSET(fd, writeSet)) + deselect_fd(fd, B_SELECT_WRITE, &sync, kernel); + + if (errorSet && FD_ISSET(fd, errorSet)) + deselect_fd(fd, B_SELECT_ERROR, &sync, kernel); + } + + // collect the events that are happened in the meantime + + switch (status) { + case B_OK: + // clear sets to store the received events + if (readSet) + FD_ZERO(readSet); + if (writeSet) + FD_ZERO(writeSet); + if (errorSet) + FD_ZERO(errorSet); + + for (count = 0, fd = 0;fd < numfds; fd++) { + if (readSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_READ)) { + FD_SET(fd, readSet); + count++; + } + if (writeSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) { + FD_SET(fd, writeSet); + count++; + } + if (errorSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) { + FD_SET(fd, errorSet); + count++; + } + } + break; + case B_INTERRUPTED: + count = B_INTERRUPTED; + break; + default: + // B_TIMED_OUT, and B_WOULD_BLOCK + count = 0; + } + +err: + delete_sem(sync.sem); + + return count; +} + + +static int +common_poll(struct pollfd *fds, nfds_t numfds, bigtime_t timeout, bool kernel) +{ + status_t status = B_OK; + int count = 0; + int i; + + // allocate resources + + select_sync sync; + memset(&sync, 0, sizeof(select_sync)); + + sync.sem = create_sem(1, "poll"); + if (sync.sem < B_OK) + return sync.sem; + + set_sem_owner(sync.sem, B_SYSTEM_TEAM); + + sync.set = kmalloc(sizeof(select_info) * numfds); + if (sync.set == NULL) { + delete_sem(sync.sem); + return B_NO_MEMORY; + } + memset(sync.set, 0, sizeof(select_info) * numfds); + sync.count = numfds; + + // start polling file descriptors (by selecting them) + + for (i = 0; i < numfds; i++) { + int fd = fds[i].fd; + + // check if fds are valid + if (!fd_is_valid(fd, kernel)) { + fds[i].revents = POLLNVAL; + continue; + } + + if ((fds[i].events & POLLIN) + && select_fd(fd, B_SELECT_READ, MAKE_SELECT_REF(fd, B_SELECT_READ), &sync, kernel) == B_OK) + count++; + if ((fds[i].events & POLLOUT) + && select_fd(fd, B_SELECT_WRITE, MAKE_SELECT_REF(fd, B_SELECT_WRITE), &sync, kernel) == B_OK) + count++; + + if ((fds[i].events & POLLRDBAND) + && select_fd(fd, B_SELECT_PRI_READ, MAKE_SELECT_REF(fd, B_SELECT_PRI_READ), &sync, kernel) == B_OK) + count++; + if ((fds[i].events & POLLWRBAND) + && select_fd(fd, B_SELECT_PRI_WRITE, MAKE_SELECT_REF(fd, B_SELECT_PRI_WRITE), &sync, kernel) == B_OK) + count++; + + if ((fds[i].events & POLLPRI) + && select_fd(fd, B_SELECT_HIGH_PRI_READ, MAKE_SELECT_REF(fd, B_SELECT_HIGH_PRI_READ), &sync, kernel) == B_OK) + count++; + + // Always select POLLERR and POLLHUB - would be nice if we'd have another + // notify_select_event() call which could directly trigger certain events + // without a specific select. + + if (select_fd(fd, B_SELECT_ERROR, MAKE_SELECT_REF(fd, B_SELECT_ERROR), + &sync, kernel) == B_OK) + count++; + if (select_fd(fd, B_SELECT_DISCONNECTED, MAKE_SELECT_REF(fd, B_SELECT_DISCONNECTED), + &sync, kernel) == B_OK) + count++; + } + + if (count < 1) { + count = B_BAD_VALUE; + goto err; + } + + status = acquire_sem_etc(sync.sem, 1, + B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); + + // deselect file descriptors + + for (i = 0; i < numfds; i++) { + int fd = fds[i].fd; + + if (fds[i].events & POLLIN) + deselect_fd(fd, B_SELECT_READ, &sync, kernel); + if (fds[i].events & POLLOUT) + deselect_fd(fd, B_SELECT_WRITE, &sync, kernel); + + if (fds[i].events & POLLRDBAND) + deselect_fd(fd, B_SELECT_PRI_READ, &sync, kernel); + if (fds[i].events & POLLWRBAND) + deselect_fd(fd, B_SELECT_PRI_WRITE, &sync, kernel); + + if (fds[i].events & POLLPRI) + deselect_fd(fd, B_SELECT_HIGH_PRI_READ, &sync, kernel); + + deselect_fd(fd, B_SELECT_ERROR, &sync, kernel); + deselect_fd(fd, B_SELECT_DISCONNECTED, &sync, kernel); + } + + // collect the events that are happened in the meantime + + switch (status) { + case B_OK: + for (count = 0, i = 0;i < numfds; i++) { + // POLLxxx flags and B_SELECT_xxx flags are compatible + fds[i].revents = sync.set[i].events; + if (fds[i].revents != 0) + count++; + } + break; + case B_INTERRUPTED: + count = B_INTERRUPTED; + break; + default: + // B_TIMED_OUT, and B_WOULD_BLOCK + count = 0; + } + +err: + delete_sem(sync.sem); + + return count; +} + + static status_t attr_dir_open(int fd, char *path, bool kernel) { @@ -3664,6 +3929,21 @@ sys_write_path_stat(const char *path, bool traverseLeafLink, const struct stat * } +int +sys_select(int numfds, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, + bigtime_t timeout, sigset_t *sigMask) +{ + return common_select(numfds, readSet, writeSet, errorSet, timeout, sigMask, true); +} + + +int +sys_poll(struct pollfd *fds, int numfds, bigtime_t timeout) +{ + return common_poll(fds, numfds, timeout, true); +} + + int sys_open_attr_dir(int fd, const char *path) { @@ -4188,6 +4468,92 @@ user_write_path_stat(const char *userPath, bool traverseLeafLink, const struct s } +int +user_select(int numfds, fd_set *userReadSet, fd_set *userWriteSet, fd_set *userErrorSet, + bigtime_t timeout, sigset_t *userSigMask) +{ + fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL; + uint32 bytes = _howmany(numfds, NFDBITS) * sizeof(fd_mask); + sigset_t sigMask; + int result; + + if (numfds < 0) + return B_BAD_VALUE; + + if ((userReadSet != NULL && !CHECK_USER_ADDRESS(userReadSet)) + || (userWriteSet != NULL && !CHECK_USER_ADDRESS(userWriteSet)) + || (userErrorSet != NULL && !CHECK_USER_ADDRESS(userErrorSet)) + || (userSigMask != NULL && !CHECK_USER_ADDRESS(userSigMask))) + return B_BAD_ADDRESS; + + // copy parameters + + if (userReadSet != NULL) { + readSet = kmalloc(bytes); + if (readSet == NULL) { + result = B_NO_MEMORY; + goto err; + } + if (user_memcpy(readSet, userReadSet, bytes) < B_OK) { + result = B_BAD_ADDRESS; + goto err; + } + } + + if (userWriteSet != NULL) { + writeSet = kmalloc(bytes); + if (writeSet == NULL) { + result = B_NO_MEMORY; + goto err; + } + if (user_memcpy(writeSet, userWriteSet, bytes) < B_OK) { + result = B_BAD_ADDRESS; + goto err; + } + } + + if (userErrorSet != NULL) { + errorSet = kmalloc(bytes); + if (errorSet == NULL) { + result = B_NO_MEMORY; + goto err; + } + if (user_memcpy(errorSet, userErrorSet, bytes) < B_OK) { + result = B_BAD_ADDRESS; + goto err; + } + } + + if (userSigMask != NULL) + sigMask = *userSigMask; + + result = common_select(numfds, readSet, writeSet, errorSet, timeout, userSigMask ? &sigMask : NULL, false); + + // copy back results + + if (result >= B_OK + && ((readSet != NULL && user_memcpy(userReadSet, readSet, bytes) < B_OK) + || (writeSet != NULL && user_memcpy(userWriteSet, writeSet, bytes) < B_OK) + || (errorSet != NULL && user_memcpy(userErrorSet, errorSet, bytes) < B_OK))) + result = B_BAD_ADDRESS; + +err: + kfree(readSet); + kfree(writeSet); + kfree(errorSet); + + return result; +} + + +int +user_poll(struct pollfd *userfds, int numfds, bigtime_t timeout) +{ + //return common_poll(userfds, numfds, timeout, false); + return B_ERROR; +} + + int user_open_attr_dir(int fd, const char *userPath) { diff --git a/src/kernel/core/fs/vfs_select.h b/src/kernel/core/fs/vfs_select.h new file mode 100644 index 0000000000..6a26c042f1 --- /dev/null +++ b/src/kernel/core/fs/vfs_select.h @@ -0,0 +1,38 @@ +/* +** Copyright 2002, Axel Dörfler, axeld@pinc-software.de. All rights reserved. +** Distributed under the terms of the OpenBeOS License. +*/ +#ifndef FS_SELECT_H +#define FS_SELECT_H + +typedef struct select_info { + uint8 events; +} select_info; + +typedef struct select_sync { + sem_id sem; + uint32 count; + select_info *set; +} select_sync; + +enum select_events { + B_SELECT_READ = 1, + B_SELECT_WRITE, + B_SELECT_ERROR, + + B_SELECT_PRI_READ, + B_SELECT_PRI_WRITE, + + B_SELECT_HIGH_PRI_READ, + B_SELECT_HIGH_PRI_WRITE, + + B_SELECT_DISCONNECTED +}; + +#define SELECT_FLAG(type) (1L << (type - 1)) +#define MAKE_SELECT_REF(index, type) ((SELECT_FLAG(type) << 16L) | ((index) & 0xffffUL)) + +#define INDEX_FROM_REF(ref) ((ref) & 0xffffUL) +#define SELECT_FLAG_FROM_REF(ref) ((ref) >> 16L) + +#endif /* FS_SELECT_H */