diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ece387..cf61f9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project("vuos") include(GNUInstallDirs) set(CMAKE_C_FLAGS - "-ggdb -Wall -Wextra -pedantic -std=gnu11 -Wno-unused-parameter") + "-ggdb -Wall -Wextra -pedantic -std=gnu11 -Wno-unused-parameter") add_definitions(-D_GNU_SOURCE) execute_process(COMMAND echo ${add_definitions} RESULT_VARIABLE rv) diff --git a/umvu/include/service.h b/umvu/include/service.h index 659ba8e..46f4684 100644 --- a/umvu/include/service.h +++ b/umvu/include/service.h @@ -23,7 +23,7 @@ struct vu_service_t { struct vuht_entry_t *service_ht; // private data of the module (modules can use this pointer as they please. void *private; - // table of vu_syscalls implementation. + // table of vu_syscalls implementation syscall_t module_syscall[]; }; diff --git a/umvu/src/umvu_tracer.c b/umvu/src/umvu_tracer.c index f2f5198..0ca542a 100644 --- a/umvu/src/umvu_tracer.c +++ b/umvu/src/umvu_tracer.c @@ -411,6 +411,12 @@ static int umvu_trace_seccomp(pid_t tracee_tid) P_CONT(sig_tid, 0L); } else P_CONT(sig_tid, 0L); + /* + * PTRACE_O_SYSGOOD option was set by the tracer, so when + * the syscall-exit-stop event occurs, WSTOPSIG(wstatus) will + * be equal to (SIGTRAP | 0x80) to be able to tell it apart + * from the syscall-enter-stop event o normal traps + * */ } else if (WSTOPSIG(wstatus) == (SIGTRAP | 0x80)) { if (syscall_desc.waiting_pid != 0) r_kill(syscall_desc.waiting_pid, SIGKILL); @@ -423,6 +429,9 @@ static int umvu_trace_seccomp(pid_t tracee_tid) umvu_poke_syscall(®s, &syscall_desc, POKE_ARGS); P_SETREGS(sig_tid, ®s); } else { + /* If the SC's return value has been modified by the wrap + * function, update the corresponding register + * */ if (umvu_poke_syscall(®s, &syscall_desc, POKE_RETVALUE)) P_SETREGS(sig_tid, ®s); syscall_desc.inout = NULL; diff --git a/umvu/src/vu_modutils.c b/umvu/src/vu_modutils.c index cef2563..7dcc3fb 100644 --- a/umvu/src/vu_modutils.c +++ b/umvu/src/vu_modutils.c @@ -146,6 +146,7 @@ struct vu_service_t *module_load(const char *modname) printkdebug(m, "%s syscall %s -> %s", module->name, vu_syscall_names[i], fname); } } + return service; } else { errno = EINVAL; diff --git a/umvu/src/vu_wrap_file.c b/umvu/src/vu_wrap_file.c index 9990a89..7e8b867 100644 --- a/umvu/src/vu_wrap_file.c +++ b/umvu/src/vu_wrap_file.c @@ -558,6 +558,50 @@ void wi_fcntl(struct vuht_entry_t *ht, struct syscall_descriptor_t *sd) { } } return; + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: ; + /* + * perform the SC on the VUFS virtualized file + * using the real SC + * let VUFS manage this since it can create virtual representations + * of the fs and apply locks on them + * */ + uintptr_t flockaddr = sd->syscall_args[2]; + void *lockinfo = malloc(sizeof(struct flock)); + + /* + * the third parameter is a pointer to a struct flock + * so process memory cannot be accessed directly + * */ + if (umvu_peek_data(flockaddr, lockinfo, sizeof(struct flock)) < 0) { + free(lockinfo); + break; + } + + ret_value = service_syscall(ht, __VU_fcntl)(sfd, cmd, lockinfo, sd->extra->path); + if (ret_value < 0) { + sd->ret_value = -errno; + } else { + /* + * F_GETLK and F_OFD_GETLK could set the pid field in the struct flock* + * parameter of the process that is blocking the specified file, if any + * */ + if (cmd == F_GETLK || cmd == F_OFD_GETLK) { + /* as before, the tracee memory cannot be accessed directly */ + if (umvu_poke_data(flockaddr, lockinfo, sizeof(struct flock) < 0)) { + } + } + + sd->ret_value = ret_value; + } + + free(lockinfo); + sd->action = SKIPIT; + break; } } else { switch (cmd) { /* common mgmt real fd*/ @@ -568,6 +612,17 @@ void wi_fcntl(struct vuht_entry_t *ht, struct syscall_descriptor_t *sd) { case F_SETFL: sd->action = DOIT_CB_AFTER; return; + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: + /* + * without VUFS we can't do much, so we let the + * SC run unmodified and fail if it has to + * */ + return; } } if (nested) { @@ -633,9 +688,31 @@ void wo_fcntl(struct vuht_entry_t *ht, struct syscall_descriptor_t *sd) { } break; } + sd->ret_value = ret_value; } +void wi_flock(struct vuht_entry_t *ht, struct syscall_descriptor_t *sd) { + int fd = sd->syscall_args[0]; + int op = sd->syscall_args[1]; + int ret_value; + + if (ht) { + ret_value = service_syscall(ht, __VU_flock)(fd, op, sd->extra->path); + if (ret_value < 0) { + sd->ret_value = -errno; + } else { + sd->ret_value = ret_value; + } + + sd->ret_value = ret_value; + sd->action = SKIPIT; + return; + } + + sd->action = DOIT; +} + /* umask */ /* umask always succeeds. just copy the value */ void wi_umask(struct vuht_entry_t *ht, struct syscall_descriptor_t *sd) { diff --git a/vu_syscalls.conf b/vu_syscalls.conf index 3f1f942..3b53595 100644 --- a/vu_syscalls.conf +++ b/vu_syscalls.conf @@ -71,6 +71,7 @@ capset/2: sc, capset, NULL, NULL clock_gettime/2, gettimeofday/2, time/1: sc, clock_gettime, NULL, NULL clock_settime/2, settimeofday/2: sc, clock_settime, NULL, NULL clock_getres/2: sc, clock_getres, NULL, NULL +flock/3: std, flock, NULL, NULL BUILTIN execve/13, execveat/315: path, execve, NULL, execve diff --git a/vufs/vufs.c b/vufs/vufs.c index 9362153..2aa4875 100644 --- a/vufs/vufs.c +++ b/vufs/vufs.c @@ -48,15 +48,15 @@ static int vufs_confirm(uint8_t type, void *arg, int arglen, struct vuht_entry_t } static int set_mount_options(const char *input, struct vufs_t *vufs) { - int tagc = stropt(input, NULL, NULL, 0); + int tagc = stropt(input, NULL, NULL, 0); int retval = 0; - if(tagc > 1) { - char buf[strlen(input)+1]; - char *tags[tagc]; - char *args[tagc]; + if(tagc > 1) { + char buf[strlen(input)+1]; + char *tags[tagc]; + char *args[tagc]; int excl_choice = 0; - stropt(input, tags, args, buf); - for (int i=0; tags[i] != NULL; i++) { + stropt(input, tags, args, buf); + for (int i=0; tags[i] != NULL; i++) { uint64_t strcasetag = strcase(tags[i]); if (vufs == NULL) { switch(strcasetag) { @@ -77,8 +77,8 @@ static int set_mount_options(const char *input, struct vufs_t *vufs) { } if (++excl_choice > 1) { printk(KERN_ERR "vufs: move, merge, cow and mincow are mutually exclusive\n", tags[i]); - return -1; - } + return -1; + } break; default: printk(KERN_ERR "vufs: %s unknown tag\n", tags[i]); @@ -161,11 +161,11 @@ int vu_vufs_mount(const char *source, const char *target, pthread_mutex_init(&(new_vufs->mutex), NULL); pthread_mutex_lock(&(new_vufs->mutex)); - vuht_pathadd(CHECKPATH, source, target, filesystemtype, mountflags, data, s, 0, vufs_confirm, new_vufs); + vuht_pathadd(CHECKPATH, source, target, filesystemtype, mountflags, data, s, 0, vufs_confirm, new_vufs); pthread_mutex_unlock(&(new_vufs->mutex)); - errno = 0; - return 0; + errno = 0; + return 0; rdirerr: close(new_vufs->vdirfd); vdirerr: @@ -175,21 +175,21 @@ int vu_vufs_mount(const char *source, const char *target, } int vu_vufs_umount2(const char *target, int flags) { - struct vuht_entry_t *ht = vu_mod_getht(); - int ret_value; - if ((ret_value = vuht_del(ht, flags)) < 0) { - errno = -ret_value; - return -1; - } - return 0; + struct vuht_entry_t *ht = vu_mod_getht(); + int ret_value; + if ((ret_value = vuht_del(ht, flags)) < 0) { + errno = -ret_value; + return -1; + } + return 0; } void vu_vufs_cleanup(uint8_t type, void *arg, int arglen,struct vuht_entry_t *ht) { - if (type == CHECKPATH) { - struct vufs_t *vufs = vuht_get_private_data(ht); - if (vufs == NULL) { - errno = EINVAL; - } else { + if (type == CHECKPATH) { + struct vufs_t *vufs = vuht_get_private_data(ht); + if (vufs == NULL) { + errno = EINVAL; + } else { if (vufs->ddirfd >= 0) close(vufs->ddirfd); if (vufs->rdirfd >= 0) @@ -200,38 +200,47 @@ void vu_vufs_cleanup(uint8_t type, void *arg, int arglen,struct vuht_entry_t *ht free(vufs->target); free(vufs); } - } + } } void *vu_vufs_init(void) { - struct vu_service_t *s = vu_mod_getservice(); + struct vu_service_t *s = vu_mod_getservice(); + /* the following assignments set the actual glibc function + * as the handler for every SC this module does not virtualize + * this tells the hypervisor to use the original implementation + * of the SC instead of the one provided by the module + * */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wincompatible-pointer-types" - vu_syscall_handler(s, read) = read; - vu_syscall_handler(s, write) = write; - vu_syscall_handler(s, lseek) = lseek; + vu_syscall_handler(s, read) = read; + vu_syscall_handler(s, write) = write; + vu_syscall_handler(s, lseek) = lseek; vu_syscall_handler(s, pread64) = pread; - vu_syscall_handler(s, pwrite64) = pwrite; - vu_syscall_handler(s, fcntl) = fcntl; + vu_syscall_handler(s, pwrite64) = pwrite; + /* removed fcntl line since it is virtualized, is it needed? */ #pragma GCC diagnostic pop - return NULL; + + for (int i = 0; i < MAX_OPEN_FILES; i++) + open_fds[i] = -1; + + return NULL; } int vu_vufs_fini(void *private) { - return 0; + return 0; } - struct vu_module_t vu_module = { - .name = "vufs", - .description = "vu filesystem patchworking" - }; +struct vu_module_t vu_module = { + .name = "vufs", + .description = "vu filesystem patchworking" +}; __attribute__((constructor)) - static void init(void) { - debug_set_name(V, "VUFS"); - } + static void init(void) { + debug_set_name(V, "VUFS"); + } __attribute__((destructor)) - static void fini(void) { - debug_set_name(V, ""); - } + static void fini(void) { + debug_set_name(V, ""); + } diff --git a/vufs/vufs.h b/vufs/vufs.h index b3718f4..88d48d7 100644 --- a/vufs/vufs.h +++ b/vufs/vufs.h @@ -5,6 +5,8 @@ VU_PROTOTYPES(vufs) +#define MAX_OPEN_FILES 1024 + #define O_UNLINK (O_PATH | O_EXCL) #define VUFS_TYPEMASK 0x7 @@ -32,4 +34,6 @@ struct vufs_fdprivate { char path[]; }; +int open_fds[MAX_OPEN_FILES]; + #endif diff --git a/vufs/vufsa.c b/vufs/vufsa.c index 7a57439..b01efdb 100644 --- a/vufs/vufsa.c +++ b/vufs/vufsa.c @@ -244,7 +244,9 @@ static vufsa_status vufsa_mincow(vufsa_status status, else return VUFSA_DOREAL; case VUFSA_DOREAL: - if (rv < 0 && errno == EACCES) + if (rv < 0 && (errno == EACCES || + errno == EAGAIN || + errno == EWOULDBLOCK )) return VUFSA_DOCOPY; else return VUFSA_FINAL; diff --git a/vufs/vufsops.c b/vufs/vufsops.c index 558d605..47c41c4 100644 --- a/vufs/vufsops.c +++ b/vufs/vufsops.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #define MAXSIZE ((1ULL<<((sizeof(size_t)*8)-1))-1) #define CHUNKSIZE 4096 @@ -670,25 +672,25 @@ int vu_vufs_chmod(const char *path, mode_t mode, int fd, void *fdprivate) { int vu_vufs_open(const char *pathname, int flags, mode_t mode, void **private) { struct vufs_t *vufs = vu_get_ht_private_data(); vufsa_status status = VUFSA_START; - int retval; + int retval; const char *filepath; mode_t oldmode = vu_mod_getmode(); - pathname += 1; + pathname += 1; if (flags == O_UNLINK) flags = O_PATH; //PATH+EXCL has the special meaning of UNLINK - vufsa_next vufsa_next = vufsa_select(vufs, flags); - while ((status = vufsa_next(status, vufs, pathname, retval)) != VUFSA_EXIT) { - switch (status) { - case VUFSA_DOREAL: + vufsa_next vufsa_next = vufsa_select(vufs, flags); + while ((status = vufsa_next(status, vufs, pathname, retval)) != VUFSA_EXIT) { + switch (status) { + case VUFSA_DOREAL: filepath = *pathname ? pathname : vufs->target; retval = openat(vufs->rdirfd, filepath, flags, mode); - break; - case VUFSA_DOVIRT: + break; + case VUFSA_DOVIRT: filepath = *pathname ? pathname : vufs->source; if (oldmode == 0) vufs_create_path(vufs->vdirfd, filepath, vufs_copyfile_create_path_cb, vufs); retval = openat(vufs->vdirfd, filepath, flags, mode); - break; - case VUFSA_DOCOPY: + break; + case VUFSA_DOCOPY: retval = vufs_copyfile(vufs, pathname, flags & O_TRUNC ? 0 : MAXSIZE); break; case VUFSA_FINAL: @@ -722,6 +724,11 @@ int vu_vufs_close(int fd, void *fdprivate) { struct vufs_t *vufs = vu_get_ht_private_data(); int retval; pthread_mutex_lock(&(vufs->mutex)); + if (open_fds[fd] != -1) { + // TODO: check correctness + close(open_fds[fd]); + open_fds[fd] = -1; + } retval = close(fd); if (retval == 0 && fdprivate != NULL) { struct vufs_fdprivate *vufs_fdprivate = fdprivate; @@ -732,3 +739,115 @@ int vu_vufs_close(int fd, void *fdprivate) { pthread_mutex_unlock(&(vufs->mutex)); return retval; } + +// RECORD LOCKING SYSCALLS +int vu_vufs_fcntl(int fd, int cmd, ...) { + struct vufs_t *vufs = vu_get_ht_private_data(); + int retval; + + va_list ap; + va_start(ap, cmd); + + switch (cmd) { + case F_SETLK: + case F_SETLKW: + case F_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: + case F_OFD_GETLK: ; + /* retrieve variadic parameters */ + struct flock *lockinfo = va_arg(ap, struct flock*); + char *dest_path = va_arg(ap, char *); + vufsa_status status = VUFSA_START; + // if this is used, vu_fd_table.h must be included + // int flags = vu_fd_get_fdflags(fd, 0); + int flags = O_RDWR; + int vfd = fd; + dest_path += 1; + + vufsa_next vufsa_next = vufsa_select(vufs, flags); + while ((status = vufsa_next(status, vufs, dest_path, retval)) != VUFSA_EXIT) { + switch (status) { + case VUFSA_DOREAL: + retval = fcntl(fd, cmd, lockinfo); + break; + case VUFSA_DOVIRT: + if (open_fds[fd] != -1) { + vfd = open_fds[fd]; + } else { + // TODO: check that this fd is effectively closed + vfd = openat(vufs->vdirfd, dest_path, flags); + open_fds[fd] = vfd; + } + retval = fcntl(vfd, cmd, lockinfo); + break; + case VUFSA_DOCOPY: + vufs_copyfile(vufs, dest_path, MAXSIZE); + break; + case VUFSA_ERR: + retval = -1; + case VUFSA_FINAL: + break; + } + } + break; + + case F_GETOWN_EX: + case F_SETOWN_EX: + retval = fcntl(fd, cmd, va_arg(ap, struct f_owner_ex*)); + break; + + case F_GET_RW_HINT: + case F_SET_RW_HINT: + case F_GET_FILE_RW_HINT: + case F_SET_FILE_RW_HINT: + retval = fcntl(fd, cmd, va_arg(ap, uint64_t *)); + break; + + default: + retval = fcntl(fd, cmd, va_arg(ap, int)); + break; + } + + va_end(ap); + return retval; +} + +int vu_vufs_flock(int fd, int operation, char *dest_path) { + struct vufs_t *vufs = vu_get_ht_private_data(); + int retval; + vufsa_status status = VUFSA_START; + // if this is used, vu_fd_table.h must be included + // int flags = vu_fd_get_fdflags(fd, 0); + int flags = O_RDWR; + int vfd = fd; + dest_path += 1; + + vufsa_next vufsa_next = vufsa_select(vufs, flags); + while ((status = vufsa_next(status, vufs, dest_path, retval)) != VUFSA_EXIT) { + switch (status) { + case VUFSA_DOREAL: + retval = flock(fd, operation); + break; + case VUFSA_DOVIRT: + if (open_fds[fd] != -1) { + vfd = open_fds[fd]; + } else { + // TODO: check that this fd is effectively closed + vfd = openat(vufs->vdirfd, dest_path, flags); + open_fds[fd] = vfd; + } + retval = flock(vfd, operation); + break; + case VUFSA_DOCOPY: + retval = vufs_copyfile(vufs, dest_path, MAXSIZE); + break; + case VUFSA_ERR: + retval = -1; + case VUFSA_FINAL: + break; + } + } + + return retval; +} diff --git a/vunet/vunet_ioctl.c b/vunet/vunet_ioctl.c index 64fc12b..30f8cff 100644 --- a/vunet/vunet_ioctl.c +++ b/vunet/vunet_ioctl.c @@ -21,6 +21,7 @@ #include #include #include +#include long vunet_ioctl_parms(unsigned long request) { switch (request) {