/
usr
/
src
/
file_protector-1.1-1507
/
transport
/
File Upload :
llllll
Current File: //usr/src/file_protector-1.1-1507/transport/transport.c
/** @file @brief Message transport between kernel and userspace @details Copyright (c) 2017-2021 Acronis International GmbH @author Mikhail Krivtsov (mikhail.krivtsov@acronis.com) @since $Id: $ */ #include "transport.h" #include "compat.h" #include "debug.h" #include "device.h" #include "memory.h" #include "message.h" #include "ring.h" #include "set.h" #include "syscall_common.h" #include "task_info_map.h" #include "tracepoints.h" #include <asm/io.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/jiffies.h> // msecs_to_jiffies() #include <linux/mutex.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/uaccess.h> // copy_from_user(), copy_to_user() #include <linux/wait.h> // wait_event*(), wake_up*() #define TRANSPORT_MSG_SIZE_MAX (1<<10) #define TRANSPORT_QUEUE_CAPACITY (0x1000 / sizeof(msg_t *)) #define TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS (60*1000) #define TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS (5*1000) #define TRANSPORT_PRINTF(format, args...) DPRINTF(format, ##args) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #define DATA_QUEUE_HEADER_SIZE sizeof(shared_data_queue_t) #define DATA_QUEUE_ENTRY_HEADER_SIZE sizeof(data_queue_entry_t) typedef struct { struct list_head transport_list_node; pid_t control_tgid; atomic64_t events_mask; // FIXME: Use 'msg_wait_queue.lock' instead of 'msg_spinlock' // #define msg_spinlock msg_wait_queue.lock spinlock_t msg_spinlock; wait_queue_head_t msg_wait_queue; bool shutdown; ring_t msg_ring; // sent messages waiting for 'reply' set_t sent_msgs_set; uint32_t queue_size; shared_data_queue_t *queue; atomic_t queue_event; } transport_t; typedef struct { struct mutex transport_mutex; unsigned transport_count; atomic64_t combined_events_mask; spinlock_t transport_spinlock; struct list_head transport_list; msg_id_t msg_id_sequence; } transport_global_t; static transport_global_t transport_global; // Virtual memory allocators that are suitable for 'mmap' static void *virt_mem_alloc(size_t size) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | __GFP_NORETRY; return (void *) __get_free_pages(gfp_flags, get_order(size)); } static void virt_mem_free(void *ptr, size_t size) { struct page *page; if (!ptr) return; page = virt_to_head_page(ptr); __free_pages(page, get_order(size)); } static void transport_global_init(void) { mutex_init(&transport_global.transport_mutex); transport_global.transport_count = 0; spin_lock_init(&transport_global.transport_spinlock); INIT_LIST_HEAD(&transport_global.transport_list); transport_global.msg_id_sequence = 0; } static void transport_global_register(transport_t *transport) { spin_lock(&transport_global.transport_spinlock); list_add_tail(&transport->transport_list_node, &transport_global.transport_list); spin_unlock(&transport_global.transport_spinlock); } static void transport_global_unregister(transport_t *transport) { spin_lock(&transport_global.transport_spinlock); if (!list_empty(&transport->transport_list_node)) { list_del_init(&transport->transport_list_node); } spin_unlock(&transport_global.transport_spinlock); } static bool transport_is_control_tgid_impl(pid_t tgid) { transport_t *transport; bool is_control_tgid = false; list_for_each_entry(transport, &transport_global.transport_list, transport_list_node) { if (tgid == transport->control_tgid) { is_control_tgid = true; break; } } return is_control_tgid; } static bool transport_is_control_tgid(pid_t tgid) { bool is_control_tgid; spin_lock(&transport_global.transport_spinlock); is_control_tgid = transport_is_control_tgid_impl(tgid); spin_unlock(&transport_global.transport_spinlock); return is_control_tgid; } static msg_id_t transport_global_sequence_next_impl(void) { msg_id_t msg_id = ++transport_global.msg_id_sequence; if (!msg_id) { msg_id = ++transport_global.msg_id_sequence; } DPRINTF("msg_id=%llX", msg_id); return msg_id; } static msg_id_t transport_global_sequence_next(void) { // TODO DK: use 'atomic64' instead of spinlock msg_id_t msg_id; spin_lock(&transport_global.transport_spinlock); msg_id = transport_global_sequence_next_impl(); spin_unlock(&transport_global.transport_spinlock); return msg_id; } static void transport_global_recalculate_combined_event_mask_impl(void) { transport_t *transport; uint64_t combined_mask = 0; list_for_each_entry(transport, &transport_global.transport_list, transport_list_node) { barrier(); combined_mask |= atomic64_read(&transport->events_mask); barrier(); } barrier(); atomic64_set(&transport_global.combined_events_mask, combined_mask); } static void transport_global_recalculate_combined_event_mask(void) { spin_lock(&transport_global.transport_spinlock); transport_global_recalculate_combined_event_mask_impl(); spin_unlock(&transport_global.transport_spinlock); } uint64_t transport_global_get_combined_mask(void) { return atomic64_read(&transport_global.combined_events_mask); } static void drop_msgs_impl(ring_t *ring) { while (!ring_is_empty(ring)) { msg_t *msg = *(msg_t **) ring_consumer_ptr(ring); msg_unref(msg); ring_consumer_index_move_one(ring); } } /* 'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in 'msg_reply_wait_count inc/dec'. There is no need for separate 'msg ref/unref' calls. */ static void drop_sent_msgs_impl(set_t *set) { void *item_ptr = set_begin_ptr(set); void *end_ptr = set_end_ptr(set); while (item_ptr < end_ptr) { msg_t *msg = *(msg_t **) item_ptr; msg_reply_wait_count_dec(msg); item_ptr = set_ptr_next(set, item_ptr); } set->count = 0; } static void transport_shutdown(transport_t *transport) { DPRINTF("transport=%p", transport); spin_lock(&transport->msg_spinlock); { atomic64_set(&transport->events_mask, 0); transport->shutdown = true; // Discard undelivered messages drop_msgs_impl(&transport->msg_ring); // Discard messages waiting for 'reply' drop_sent_msgs_impl(&transport->sent_msgs_set); } spin_unlock(&transport->msg_spinlock); // wakeup all userspace 'read' waiters wake_up_all(&transport->msg_wait_queue); } // identify and shutdown transport failed to reply static void transport_shutdown_msg(transport_t *transport, msg_t *unreplied_msg) { bool found = false; DPRINTF("transport=%p unreplied_msg=%p", transport, unreplied_msg); spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { if (unreplied_msg == *(msg_t **) item_ptr) { found = true; break; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } } spin_unlock(&transport->msg_spinlock); if (found) { WPRINTF("deativating transport on reply wait timeout"); transport_shutdown(transport); } } // identify and shutdown transport failed to reply static void transport_global_shutdown_msg(msg_t *unreplied_msg) { DPRINTF("unreplied_msg=%p", unreplied_msg); spin_lock(&transport_global.transport_spinlock); { transport_t *transport; transport_t *next_transport; list_for_each_entry_safe(transport, next_transport, &transport_global.transport_list, transport_list_node) { transport_shutdown_msg(transport, unreplied_msg); } } spin_unlock(&transport_global.transport_spinlock); } static void transport_free(transport_t *transport) { DPRINTF("transport=%p", transport); transport_global_unregister(transport); transport_shutdown(transport); IPRINTF("message queue items_count_max=%u capacity=%u", ring_items_count_max(&transport->msg_ring), ring_capacity(&transport->msg_ring)); IPRINTF("sent_msgs_set items_count_max=%u capacity=%u", set_items_count_max(&transport->sent_msgs_set), set_capacity(&transport->sent_msgs_set)); mem_free(ring_buffer(&transport->msg_ring)); mem_free(set_buffer(&transport->sent_msgs_set)); if (transport->queue) { virt_mem_free(transport->queue, transport->queue_size + DATA_QUEUE_HEADER_SIZE); } mem_free(transport); } static bool transport_ring_init(ring_t *ring) { size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *); msg_t **msgs; bool success; if (!buffer_size) { msgs = NULL; success = true; } else { msgs = mem_alloc0(buffer_size); success = (bool) msgs; } ring_init(ring, msgs, buffer_size, sizeof(msg_t *)); return success; } static bool transport_set_init(set_t *set) { size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *); msg_t **msgs; bool success; if (!buffer_size) { msgs = NULL; success = true; } else { msgs = mem_alloc0(buffer_size); success = (bool) msgs; } set_init(set, msgs, buffer_size, sizeof(msg_t *)); return success; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Shared with userspace Data Queue implementation // Some defines for compiler atomic hacks. We only care about x86_64 here, on other platforms those are NOT valid. // Other platforms must provider their implementations for 'smp_*' & 'READ/WRITE_ONCE' // Generally those convenience memory ordering functions are available in Linux 3.12 & Linux 3.18. // I assume it will not be necessary to support any other architectures other than x86_64 on kernels that old. #ifndef READ_ONCE #define __READ_ONCE_SIZE \ ({ \ switch (size) { \ case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \ case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \ case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \ case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \ default: \ barrier(); \ __builtin_memcpy((void *)res, (const void *)p, size); \ barrier(); \ } \ }) static void _do_read_once_size(const volatile void *p, void *res, int size) { __READ_ONCE_SIZE; } #define READ_ONCE(x) \ ({ \ union { typeof(x) __val; char __c[1]; } __u; \ _do_read_once_size(&(x), __u.__c, sizeof(x)); \ smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) #endif #ifndef WRITE_ONCE static void _do_write_once_size(volatile void *p, void *res, int size) { switch (size) { case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; case 4: *(volatile __u32 *)p = *(__u32 *)res; break; case 8: *(volatile __u64 *)p = *(__u64 *)res; break; default: barrier(); __builtin_memcpy((void *)p, (const void *)res, size); barrier(); } } #define WRITE_ONCE(x, val) \ ({ \ union { typeof(x) __val; char __c[1]; } __u = \ { .__val = (__force typeof(x)) (val) }; \ _do_write_once_size(&(x), __u.__c, sizeof(x)); \ __u.__val; \ }) #endif #ifndef smp_store_release #define smp_store_release(p, v) \ do { \ barrier(); \ WRITE_ONCE(*p, v); \ } while (0) #endif #ifndef smp_load_acquire #define smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ barrier(); \ ___p1; \ }) #endif #define DATA_QUEUE_ENTRY_AT(queue, v) (data_queue_entry_t*)((uint8_t *)queue->entries + v) #ifdef ROUND_UP #undef ROUND_UP #endif #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) static inline void data_queue_write_new_entry(shared_data_queue_t *queue, uint32_t offset, void *data, uint32_t data_size) { data_queue_entry_t *entry = DATA_QUEUE_ENTRY_AT(queue, offset); // Technically this 'WRITE_ONCE' is useless but let's follow the rules WRITE_ONCE(entry->size, data_size); // Not protecting the memcpy with 'barrier' call, we will use 'smp_*' later anyways memcpy(&entry->data, data, data_size); } // This function is called from data queue 'writer' under the spin_lock as it is NOT thread-safe // As such, reads from 'queue->tail' can be done use 'READ_ONCE' (relaxed), writes must be done using 'smp_store_release'. // 'reader' may decide to alter 'queue->head' so 'smp_load_acquire' must be used to read it, writes are NOT allowed. static bool transport_shared_data_queue_enqueue_impl(transport_t *transport, void *data, uint32_t data_size) { uint32_t head, tail, new_tail; // 'entry_size' is the size of the whole 'data_queue_entry' including the header // in the 'data_queue_entry' 'data_size' is written instead to avoid useless checks. uint32_t entry_size = data_size + DATA_QUEUE_ENTRY_HEADER_SIZE; shared_data_queue_t *queue = transport->queue; uint32_t queue_size = transport->queue_size; // Notice that we are not doing any memory shenanigans here to load tail & head. // The barriers will be done later if it will appear that they are necessary. // !!! 'head' might not be synchronized with 'reader', it is OK and will be handled in the end. tail = READ_ONCE(queue->tail); head = smp_load_acquire(&queue->head); // Check for unreasonable 'tail' or 'head', must never happen. if (queue_size < tail || queue_size < head) { WPRINTF("Invalid tail/head detected: tail=%u, head=%u, size=%u" , (unsigned) tail, (unsigned) head, (unsigned) queue_size); return false; } // Start inserting the contents of 'data' in the shared data queue if (tail >= head) { // Tail is further than head, it is a regular scenario. Handle it // head tail // V V // -----|*************|----------------- // ^ ^ // data to be dequeued | // free space if ((tail + entry_size) <= queue_size) { // There is enough buffer in the 'tail' of the queue, write the entry and move the tail // head tail new_tail // V V V // -----|*************|+++++++|------- // ^ // new entry data_queue_write_new_entry(queue, tail /*off*/, data, data_size); new_tail = tail + entry_size; } else if (head > entry_size) { // As first condition did not satisfy, cannot put data after 'tail' // Have to loop back to the start and there is enough space before userspace 'head' // head tail // V V // |++++++|------------|*************|?? <- zapped entry w/ size>queue_size-tail, if fits // ^ ^ // off new_tail // Need to explain the userspace that current entry is too long to fit. // If there is not enough space to even place a entry header, do nothing. // Otherwise, deliberately zap the entry by putting 'data_size' that is too big. if ((queue_size - tail) >= DATA_QUEUE_ENTRY_HEADER_SIZE) { data_queue_entry_t *entry_to_zap = DATA_QUEUE_ENTRY_AT(queue, tail); entry_to_zap->size = data_size; // do not touch 'entry_to_zap->data', it is bogus. entry just says go to the start } // Write data at the beginning of the queue data_queue_write_new_entry(queue, 0 /*off*/, data, data_size); new_tail = /*off==0 + */ entry_size; } else { // There is neither enough space after 'tail' nor before 'head', bail WPRINTF("No more space is left"); return false; } } else { // Catching to the 'head' from the other size. // tail head // V V // ****|--------------|*************** // Insert can still be done if 'head' will not be overrun if ((head - tail) > entry_size) { // tail head // V V // ****|+++++|------|*************** // ^ // new_tail data_queue_write_new_entry(queue, tail, data, data_size); new_tail = tail + entry_size; } else { // There is not enough space not to overrun 'head', bail WPRINTF("No more space is left"); return false; } } // Expose all the content written in this thread as per 'release' semantics. // Reader must do 'smp_store_acquire' on the same variable ('tail') to see 'entries' written. // !!! This logic does NOT enforce 'tail' in 'reader' to be equal to 'tail' in 'writer' new_tail = ROUND_UP(new_tail, sizeof(uint32_t)); smp_store_release(&queue->tail, new_tail); // The new tail was published to the 'queue' but is it necessary to notify the 'reader'? // If in the beginning 'tail == head', it means that userspace has finished reading all the // content and is going to wait or is already waiting for the 'event'. // In such case it is clear that will must notify the 'reader' no matter what. // Moreover 'reader' cannot move the 'head' past the 'tail' so it is guaranteed that it is // indeed the latest published 'head' by the reader. if (tail != head) { // If 'tail != head', it is not as clear. If it so happened that userspace moved the 'head' // to be equal to 'tail' while 'writer' was adding the new entry, 'reader' will go 'wait'. // So we must refresh the 'head' to ensure we actually do not need to wakeup the 'reader'. // The other situation is also valid - 'writer' might delay writes to the head as 'atomic' ops. // We need to make sure userspace will continue consuming events as we wrote the 'tail'. // Whenever userspace will detect that its current 'tail==head', it will perform 'smb_mb' // to fetch the new 'tail' we just wrote to ensure it does not need to consume anymore. smp_mb(); head = READ_ONCE(queue->head); } if (tail == head) { // atomic_ops.rst: atomic_read() and atomic_set() DO NOT IMPLY BARRIERS! atomic_set(&transport->queue_event, 1); // The data queue was empty, wakeup the 'reader' which is waiting for us. // Use 'smp_wmb' to make sure 'tail' that we stored will be seen by the user. // It is also fine if we did 'smp_mb' before, we will pair with 'smb_rmb' just fine. // Also using 'smp_wmb' to ensure 'atomic_set' did set the 'queue_event'. smp_wmb(); wake_up_interruptible(&transport->msg_wait_queue); TRANSPORT_PRINTF("woken up ht=%u nt=%u", tail, new_tail); } return true; } static long transport_queue_events_available(transport_t *transport) { uint32_t tail, head; int ev; shared_data_queue_t *queue = READ_ONCE(transport->queue); smp_rmb(); ev = atomic_xchg(&transport->queue_event, 0); if (ev) { TRANSPORT_PRINTF("check ev active"); return 1; } // This should not be necessary but doing it just in case. tail = READ_ONCE(queue->tail); head = READ_ONCE(queue->head); TRANSPORT_PRINTF("check h=%u t=%u", head, tail); return transport->shutdown || (head != tail); } // This function is called whenever userspace 'reader' deemed that there are no more events to read. // It will be waiting for the data queue to gain new content using 'msg_wait_queue'. // 'wake_up_interruptible' does 'wakeup' when it detects that the queue is being empty. static long transport_data_queue_wait(transport_t *transport) { shared_data_queue_t *queue = READ_ONCE(transport->queue); long ret; task_info_map_delete_exited(); if (!queue) { return -EINVAL; } if (wait_event_interruptible_exclusive(transport->msg_wait_queue, transport_queue_events_available(transport))) { ret = -EINTR; } else { if (transport->shutdown) { ret = -EIO; } else { ret = 0; } } return ret; } static int transport_data_queue_mmap(transport_t *transport, struct vm_area_struct *vma) { loff_t offset = (loff_t) vma->vm_pgoff << PAGE_SHIFT; unsigned long sz = vma->vm_end - vma->vm_start; unsigned long pfn; struct page *page; void *ptr; if (0 != offset) { return -EINVAL; } ptr = READ_ONCE(transport->queue); if (!ptr) { return -EINVAL; } page = virt_to_head_page(ptr); if (sz > (PAGE_SIZE << compound_order(page))) { return -EINVAL; } pfn = virt_to_phys(ptr) >> PAGE_SHIFT; return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); } static long data_queue_create(const data_queue_params_t *params, shared_data_queue_t **pqueue) { shared_data_queue_t *queue; uint32_t size = params->size; if (size <= DATA_QUEUE_HEADER_SIZE) return -EINVAL; queue = (shared_data_queue_t*) virt_mem_alloc(size); if (!queue) return -ENOMEM; *pqueue = queue; return 0; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static bool transport_send_msg_nowait(transport_t *transport, msg_t *msg) { bool need_wakeup = false; msg_type_t mt = MSG_TYPE(msg); if (mt >= MT_FIRST_ACTIVITY_EVENT && !(atomic64_read(&transport->events_mask) & MSG_TYPE_TO_EVENT_MASK(mt))) { return false; } spin_lock(&transport->msg_spinlock); { if (transport->shutdown) { spin_unlock(&transport->msg_spinlock); return false; } if (msg_img_is_reply_required(MSG_IMG(msg))) { unsigned item_index; if (set_is_full(&transport->sent_msgs_set)) { WPRINTF("'sent_msgs_set' overflow (capacity=%u)", set_capacity(&transport->sent_msgs_set)); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } item_index = set_items_count(&transport->sent_msgs_set); /* 'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in 'msg_reply_wait_count inc/dec'. There is no need for separate 'msg ref/unref' calls. */ *(msg_t **) set_item_ptr(&transport->sent_msgs_set, item_index) = msg_reply_wait_count_inc(msg); set_items_count_set(&transport->sent_msgs_set, item_index + 1); } if (transport->queue) { need_wakeup = false; if (!transport_shared_data_queue_enqueue_impl(transport, MSG_IMG(msg), MSG_SIZE(msg))) { WPRINTF("mmaped queue overflow"); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } } else { need_wakeup = true; if (ring_is_full(&transport->msg_ring)) { WPRINTF("message queue overflow (capacity=%u)", ring_capacity(&transport->msg_ring)); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } *(msg_t **) ring_producer_ptr(&transport->msg_ring) = msg_ref(msg); ring_producer_index_move_one(&transport->msg_ring); } } spin_unlock(&transport->msg_spinlock); if (need_wakeup) { // wakeup userspace reader wake_up_interruptible_sync(&transport->msg_wait_queue); } return true; } static bool transport_send_hello_nowait(transport_t *transport) { msg_t *msg = hello_msg_new(); bool success; if (!msg) { success = false; } else { success = transport_send_msg_nowait(transport, msg); msg_unref(msg); } return success; } static bool send_msg_nowait(msg_t *msg) { bool sent = false; spin_lock(&transport_global.transport_spinlock); if (!transport_is_control_tgid_impl(current->tgid)) { transport_t *transport; transport_t *next_transport; list_for_each_entry_safe(transport, next_transport, &transport_global.transport_list, transport_list_node) { sent |= transport_send_msg_nowait(transport, msg); } } spin_unlock(&transport_global.transport_spinlock); return sent; } static transport_t *transport_new(void) { transport_t *transport = mem_alloc0(sizeof(transport_t)); if (transport) { INIT_LIST_HEAD(&transport->transport_list_node); // remember client's process doing 'open' to auto-ignore it transport->control_tgid = current->tgid; spin_lock_init(&transport->msg_spinlock); init_waitqueue_head(&transport->msg_wait_queue); atomic64_set(&transport->events_mask, 0); transport->shutdown = false; transport->queue = NULL; atomic_set(&transport->queue_event, 0); if (transport_ring_init(&transport->msg_ring) && transport_set_init(&transport->sent_msgs_set) && 0 == task_info_status_set(current->tgid, TS_IGNORE) && transport_send_hello_nowait(transport)) { transport_global_register(transport); } else { transport_free(transport); transport = NULL; } } DPRINTF("transport=%p", transport); return transport; } int __init transport_mod_init(void) { int ret; transport_global_init(); ret = device_mod_init(); if (ret) { EPRINTF("'device_mod_init()' failure %i", ret); } return ret; } void transport_mod_down(void) { DPRINTF(""); device_mod_down(); DPRINTF(""); } static msg_t *transport_lookup_msg_ref(transport_t *transport, msg_id_t reply_id) { msg_t* msg = NULL; DPRINTF(""); // TODO DK: Is it possible to use radix tree here instead? spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { msg_t *query = *(msg_t **) item_ptr; if (MSG_ID(query) == reply_id) { msg = query; msg_ref(msg); goto unlock; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } } unlock: spin_unlock(&transport->msg_spinlock); DPRINTF("ret=%p", msg); return msg; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static long transport_ioctl_pid_info(msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; if (query_msg->img_size < (sizeof(msg_img_t) + sizeof(get_pid_info_img_t))) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); ret = -EINVAL; } else { msg_img_t *msg_img = MSG_IMG(query_msg); get_pid_info_img_t *img = IMG_PAYLOAD(msg_img); pid_t pid = img->pid; ret = pid_info_return_msg_new(reply_msg, pid); } DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_fs_root(msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; if (query_msg->img_size < (sizeof(msg_img_t) + sizeof(get_fs_root_img_t))) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); ret = -EINVAL; } else { msg_img_t *msg_img = MSG_IMG(query_msg); get_fs_root_img_t *img = IMG_PAYLOAD(msg_img); pid_t pid = img->pid; ret = fs_root_return_msg_new(reply_msg, pid); } DPRINTF("ret=%li", ret); return ret; } static long install_file(struct file *file, int fd, int *pfd) { if (IS_ERR(file)) { put_unused_fd(fd); return PTR_ERR(file); } // file is consumed so no need to call 'fput' fd_install(fd, file); *pfd = fd; return 0; } static long open_file_with_flags(const char *full_path, int uflags, int mode, int *pfd) { struct file *file; int flags; int fd; DPRINTF("Opening file '%s', uflags '%d', mode '%d'", full_path, uflags, mode); fd = get_unused_fd_compat(); if (fd < 0) { return fd; } flags = uflags #ifdef O_LARGEFILE | O_LARGEFILE #endif #ifdef O_NOATIME | O_NOATIME #endif // 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file. #ifdef FMODE_NONOTIFY | FMODE_NONOTIFY #endif ; file = filp_open(full_path, flags, mode); return install_file(file, fd, pfd); } static inline long open_file(struct path *path, int *pfd) { struct file *file; int flags; int fd; if (!path->dentry && !path->mnt) { return -ENOENT; } fd = get_unused_fd_compat(); if (fd < 0) { return fd; } flags = O_RDONLY #ifdef O_LARGEFILE | O_LARGEFILE #endif #ifdef O_NOATIME | O_NOATIME #endif // 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file. #ifdef FMODE_NONOTIFY | FMODE_NONOTIFY #endif ; file = dentry_open_compat(path, flags); if (IS_ERR(file)) { // If open failed, let's try to open via the path. // Notice that this open will be inside 'client' service context // so this 'filp_open' has a good chance of failing as // 'mount namespaces' might be different in the process. // Perhaps a proper solution would be opening the file inside the original // process context, but that would result in having to create 'file' // early or to do extra context switches to the scanned process. // Either way seems inefficient so it is currently avoided. size_t size = PAGE_SIZE; char *buf = mem_alloc(size); const char *full_path; if (!buf) { return -ENOMEM; } full_path = d_path(path, buf, size); if (!IS_ERR(full_path)) { file = filp_open(full_path, flags, 0); } mem_free(buf); } return install_file(file, fd, pfd); } static long transport_ioctl_handle_open_file_from_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; msg_t* msg; msg_img_t *msg_img = MSG_IMG(query_msg); open_file_from_msg_img_t *img = IMG_PAYLOAD(msg_img); if (MSG_SIZE(query_msg) < sizeof(msg_img_t) + sizeof(open_file_from_msg_img_t)) return -EINVAL; msg = transport_lookup_msg_ref(transport, MSG_ID(query_msg)); if (!msg) { ret = -EINVAL; } else { int fd = -1; struct path path; thread_safe_path_load(img->num == 0 ? &msg->path : &msg->path2, &path); msg_unref(msg); ret = open_file(&path, &fd); path_put(&path); if (0 == ret) { ret = open_file_return_msg_new(reply_msg, fd); } } return ret; } static long transport_ioctl_handle_open_file_by_path(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; char *path; size_t pathSize; int fd = -1; msg_img_t *msg_img = MSG_IMG(query_msg); open_file_by_path_img_t *img = IMG_PAYLOAD(msg_img); if (MSG_SIZE(query_msg) <= sizeof(msg_img_t) + sizeof(open_file_by_path_img_t)) return -EINVAL; path = img->path; pathSize = MSG_SIZE(query_msg) - (sizeof(msg_img_t) + sizeof(open_file_by_path_img_t)); path[pathSize - 1] = '\0'; ret = open_file_with_flags(path, img->flags, img->mode, &fd); if (0 == ret) { ret = open_file_return_msg_new(reply_msg, fd); } return ret; } static long transport_ioctl_handle_get_version(msg_varsized_t *reply_msg) { return version_info_return_msg_new(reply_msg); } static long transport_ioctl_handle_data_queue_init(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { msg_img_t *msg_img = MSG_IMG(query_msg); data_queue_params_t *params = IMG_PAYLOAD(msg_img); long err; shared_data_queue_t *queue; uint32_t queue_size; if (MSG_SIZE(query_msg) < sizeof(msg_img_t) + sizeof(data_queue_params_t)) return -EINVAL; err = data_queue_create(params, &queue); if (err) { return err; } queue_size = params->size - DATA_QUEUE_HEADER_SIZE; { spin_lock(&transport->msg_spinlock); if (transport->queue) { spin_unlock(&transport->msg_spinlock); virt_mem_free(queue, params->size); return -EEXIST; } transport->queue = queue; transport->queue_size = queue_size; spin_unlock(&transport->msg_spinlock); } return data_queue_offsets_return_msg_new(reply_msg, queue_size); } static long transport_ioctl_write_read_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; action_type_t action_type; if (MSG_REPLY(query_msg)) { EPRINTF("'reply' ioctl is not supported"); ret = -EINVAL; goto out; } action_type = MSG_TYPE(query_msg); switch (action_type) { case AT_GET_PID_INFO: ret = transport_ioctl_pid_info(reply_msg, query_msg); break; case AT_GET_FS_ROOT: ret = transport_ioctl_fs_root(reply_msg, query_msg); break; case AT_OPEN_FILE_FROM_MSG: ret = transport_ioctl_handle_open_file_from_msg(transport, reply_msg, query_msg); break; case AT_OPEN_FILE_BY_PATH: ret = transport_ioctl_handle_open_file_by_path(transport, reply_msg, query_msg); break; case AT_GET_VERSION: ret = transport_ioctl_handle_get_version(reply_msg); break; case AT_INIT_SHARED_DATA_QUEUE: ret = transport_ioctl_handle_data_queue_init(transport, reply_msg, query_msg); break; default: EPRINTF("Unexpected '%s' message", action_type_to_string(action_type)); HEX_DUMP("query_msg: ", MSG_IMG(query_msg), MSG_SIZE(query_msg)); ret = -EINVAL; break; } out: DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_copy_from_user(ioctl_hdr_t *ioctl_hdr, msg_varsized_t *query_msg, void __user *user_data) { long ret; size_t msg_size; msg_sized_t *msg; msg_img_t *msg_img; void *payload; if (copy_from_user(ioctl_hdr, user_data, sizeof(ioctl_hdr_t))) { EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; goto out; } msg_size = ioctl_hdr->size; if (msg_size < sizeof(msg_img_t)) { EPRINTF("message image is too small"); ret = -EINVAL; goto out; } if (msg_size > TRANSPORT_MSG_SIZE_MAX) { EPRINTF("size > TRANSPORT_MSG_SIZE_MAX"); ret = -E2BIG; goto out; } msg = msg_varsized_init(query_msg, msg_size); if (!msg) { ret = -ENOMEM; goto out; } msg_img = MSG_IMG(msg); payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t); if (copy_from_user(msg_img, payload, msg_size)) { msg_varsized_uninit(query_msg); EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; goto out; } ret = 0; out: DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_copy_to_user(ioctl_hdr_t *ioctl_hdr, msg_sized_t *reply_msg, void __user *user_data) { long ret; size_t msg_size = MSG_SIZE(reply_msg); size_t capacity; void *payload; msg_img_t *msg_img; ioctl_hdr->size = msg_size; if (copy_to_user(user_data, ioctl_hdr, sizeof(ioctl_hdr_t))) { EPRINTF("'copy_to_user()' failure"); ret = -EFAULT; goto out; } capacity = ioctl_hdr->capacity; if (capacity < msg_size) { WPRINTF("capacity=%zu < msg_size=%zu", capacity, msg_size); ret = -ENOSPC; goto out; } payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t); msg_img = MSG_IMG(reply_msg); if (copy_to_user(payload, msg_img, msg_size)) { EPRINTF("'copy_to_user()' failure"); ret = -EFAULT; goto out; } ret = 0; out: DPRINTF("ret=%li", ret); return ret; } long transport_device_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { transport_t *transport = filp->private_data; long ret; if (transport->shutdown) { ret = -EIO; goto out; } switch (cmd) { case IOCTL_WRITE_AND_READ_MSG: case IOCTL_READ_VERSION: { ioctl_hdr_t ioctl_hdr; void *user_data = (void *)arg; msg_varsized_t query_msg; ret = transport_ioctl_copy_from_user(&ioctl_hdr, &query_msg, user_data); if (!ret) { msg_varsized_t reply_msg; ret = transport_ioctl_write_read_msg(transport, &reply_msg, MSG_VARSIZED_GET_SIZED(&query_msg)); if (!ret) { ret = transport_ioctl_copy_to_user(&ioctl_hdr, MSG_VARSIZED_GET_SIZED(&reply_msg), user_data); msg_varsized_uninit(&reply_msg); } msg_varsized_uninit(&query_msg); } break; } default: EPRINTF("Unexpected IOCTL cmd=%u", cmd); ret = -ENOIOCTLCMD; } out: if (-EINVAL == ret) { transport_shutdown(transport); } DPRINTF("ret=%li", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ssize_t transport_device_read(struct file *filp, char __user *user_data, size_t size, loff_t *offset) { msg_t *msg; transport_t *transport = filp->private_data; size_t img_size; ssize_t ret; if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; transport_shutdown(transport); goto out; } if (!size) { EPRINTF("'empty read' is not supported"); ret = -EINVAL; transport_shutdown(transport); goto out; } task_info_map_delete_exited(); retry_wait: // We may start with 'wait*()' because it itself starts // with 'condition' check. if (wait_event_interruptible_exclusive(transport->msg_wait_queue, transport->shutdown || !ring_is_empty(&transport->msg_ring))) { ret = -EINTR; goto out; } // Lock the state and check if processing is actually possible. spin_lock(&transport->msg_spinlock); { if (transport->shutdown) { ret = -EIO; spin_unlock(&transport->msg_spinlock); goto out; } if (ring_is_empty(&transport->msg_ring)) { WPRINTF("wakeup without messages"); spin_unlock(&transport->msg_spinlock); goto retry_wait; } msg = *(msg_t **) ring_consumer_ptr(&transport->msg_ring); img_size = msg->img_size; DPRINTF("size=%zu img_size=%zu", size, img_size); if (size < img_size) { ret = -ENOSPC; spin_unlock(&transport->msg_spinlock); goto out; } ring_consumer_index_move_one(&transport->msg_ring); } spin_unlock(&transport->msg_spinlock); // 'copy_to_user' MAY sleep (for example in page fault handler) if (copy_to_user(user_data, &msg->img, img_size)) { WPRINTF("'copy_to_user()' failure"); ret = -EFAULT; transport_shutdown(transport); } else { ret = img_size; } msg_unref(msg); out: DPRINTF("ret=%zi", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Forward declaration, to be available in the end of the code static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies); static void msg_wait_reply(msg_t *msg) { long ret; // Do not block 'control' process if (atomic_read(&msg->reply_wait_count) && MSG_TYPE(msg) != MT_PONG && transport_is_control_tgid(current->tgid)) { WPRINTF_RATELIMITED("avoiding blocking wait in control process"); return; } // We may start with 'wait*()' because it itself starts // with 'condition' check. DPRINTF("waiting for userspace reply..."); ret = wait_msg_killable_timeout(msg, msecs_to_jiffies(TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS)); if (!ret) { // Timeout here means unexpected issue with userspace. FPRINTF("timeout waiting for userspace reply (msg_type=%i/%s)", MSG_TYPE(msg), msg_type_to_string(MSG_TYPE(msg))); HEX_DUMP("msg: ", MSG_IMG(msg), MSG_SIZE(msg)); dump_stack(); // identify and shutdown transport failed to reply transport_global_shutdown_msg(msg); } else if (ret < 0) { // Calling process has been interrupted as SIGKILL was received. // In practice this means 'block'. DPRINTF("message was interrupted..."); msg->interrupted = true; } else { // Userspace reply has been received (msg->reply_msg) or // waiting has been explicitly aborted (msg->aborted) for // example on userspace disconnect. DPRINTF("wait finished (msg->block=%i)", msg->block); } } static void msg_mark_async(msg_t *msg) { MSG_ID(msg) = 0; MSG_REPLY(msg) = false; } void send_msg_async(msg_t *msg) { DPRINTF("msg=%p", msg); msg_mark_async(msg); send_msg_nowait(msg); DPRINTF(""); } void send_msg_async_unref(msg_t *msg) { if (msg) { send_msg_async(msg); msg_unref(msg); } } static void msg_mark_sync(msg_t *msg) { MSG_ID(msg) = transport_global_sequence_next(); MSG_REPLY(msg) = false; } bool send_msg_sync_nowait(msg_t *msg) { bool sent; DPRINTF("msg=%p", msg); msg_mark_sync(msg); sent = send_msg_nowait(msg); DPRINTF("msg=%p sent=%i", msg, sent); return sent; } void send_msg_sync(msg_t *msg) { DPRINTF("msg=%p", msg); if (send_msg_sync_nowait(msg)) { msg_wait_reply(msg); } DPRINTF(""); } void send_msg_sync_unref(msg_t *msg) { if (msg) { send_msg_sync(msg); thread_safe_path_clear(&msg->path); msg_unref(msg); } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static int transport_handle_ping_msg(transport_t *transport, msg_sized_t *ping) { int ret; msg_t *pong; bool sync; if (ping->img_size < (sizeof(msg_img_t) + sizeof(ping_img_t))) { DPRINTF("'ping' message is too short. ignoring it."); ret = -EINVAL; goto out; } pong = pong_msg_new(ping); if (!pong) { ret = -ENOMEM; goto out; } // reflect ping's 'reply' policy sync = !!MSG_ID(ping); if (sync) { msg_mark_sync(pong); } else { msg_mark_async(pong); } transport_send_msg_nowait(transport, pong); if (sync) { msg_wait_reply(pong); } msg_unref(pong); ret = 0; out: return ret; } static int transport_handle_set_pid_status_msg(msg_sized_t *msg) { msg_img_t *msg_img; pid_set_st_img_t *img; pid_t pid; task_status_t status; int ret; if (msg->img_size < (sizeof(msg_img_t) + sizeof(pid_set_st_img_t))) { DPRINTF("'pid' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); pid = img->pid; status = img->status; ret = task_info_status_set(pid, status); out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_del_pid_msg(msg_sized_t *msg) { msg_img_t *msg_img; pid_del_img_t *img; pid_t pid; int ret; if (msg->img_size < (sizeof(msg_img_t) + sizeof(pid_del_img_t))) { DPRINTF("'pid' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); pid = img->pid; ret = task_info_map_del(pid); out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_enable_events(transport_t *transport, msg_sized_t *msg) { msg_img_t *msg_img; events_mask_img_t *img; uint64_t mask; uint64_t old_mask; int ret; if (msg->img_size < (sizeof(msg_img_t) + sizeof(events_mask_img_t))) { DPRINTF("'events' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); mask = img->events_mask; spin_lock(&transport->msg_spinlock); { if (transport->shutdown) { // Do not allow changing the mask when shutdown. // Transport will not be able to receive any events. ret = -EFAULT; } else { old_mask = atomic64_read(&transport->events_mask); atomic64_set(&transport->events_mask, old_mask | mask); ret = 0; } } spin_unlock(&transport->msg_spinlock); transport_global_recalculate_combined_event_mask(); out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_disable_events(transport_t *transport, msg_sized_t *msg) { msg_img_t *msg_img; events_mask_img_t *img; uint64_t mask; uint64_t old_mask; int ret; if (msg->img_size < (sizeof(msg_img_t) + sizeof(events_mask_img_t))) { DPRINTF("'events' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); mask = img->events_mask; spin_lock(&transport->msg_spinlock); { if (transport->shutdown) { // Do not allow changing the mask when shutdown. // It is not particularly useful but keeping it. ret = -EFAULT; } else { old_mask = atomic64_read(&transport->events_mask); atomic64_set(&transport->events_mask, old_mask & (~mask)); ret = 0; } } spin_unlock(&transport->msg_spinlock); transport_global_recalculate_combined_event_mask(); out: DPRINTF("ret=%i", ret); return ret; } // FIXME: do something with 'reply'. For example merge several replies // into one; link replies into list; extract 'responces' and merge them. static void handle_reply(msg_t *query_msg, msg_sized_t *reply_msg) { // handle 'long' 'reply' size_t headers_size = sizeof(msg_img_t) + sizeof(reply_img_t); // Note: for compatibility with legacy short 'reply_img_t' default 'reply_type' is RT_ALLOW if (MSG_SIZE(reply_msg) >= headers_size) { msg_img_t *reply_msg_img = MSG_IMG(reply_msg); reply_img_t *reply_img = IMG_PAYLOAD(reply_msg_img); reply_type_t reply_type = reply_img->type; DPRINTF("MSG_SIZE(reply_msg)=%zu - headers_size=%zu = %zu reply_type=%u", MSG_SIZE(reply_msg), headers_size, MSG_SIZE(reply_msg) - headers_size, reply_type); if (RT_BLOCK == reply_type) { query_msg->block = true; } } } static int transport_handle_reply(transport_t *transport, msg_sized_t *reply) { int ret; msg_id_t reply_id = MSG_ID(reply); msg_type_t reply_type = MSG_TYPE(reply); DPRINTF(""); // find 'query' matching this 'reply' spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { msg_t *query = *(msg_t **) item_ptr; if (MSG_ID(query) == reply_id) { // remove 'query' from 'set' *(msg_t **) item_ptr = *(msg_t **) set_item_ptr( &transport->sent_msgs_set, set_items_count_dec(&transport->sent_msgs_set)); handle_reply(query, reply); msg_reply_wait_count_dec(query); ret = 0; goto unlock; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } WPRINTF("Unexpected 'reply' with type=%i id=%llX", reply_type, reply_id); ret = -EINVAL; } unlock: spin_unlock(&transport->msg_spinlock); DPRINTF("ret=%i", ret); return ret; } static int transport_handle_msg(transport_t *transport, msg_sized_t *msg) { int ret; if (msg->img_size < sizeof(msg_img_t)) { DPRINTF("message image is too small"); ret = -EINVAL; goto out; } if (MSG_REPLY(msg)) { ret = transport_handle_reply(transport, msg); } else { // !reply action_type_t type = MSG_TYPE(msg); switch (type) { case AT_PING: ret = transport_handle_ping_msg(transport, msg); break; case AT_PID_SET_ST: ret = transport_handle_set_pid_status_msg(msg); break; case AT_PID_DEL: ret = transport_handle_del_pid_msg(msg); break; case AT_ENABLE_EVENTS: ret = transport_handle_enable_events(transport, msg); break; case AT_DISABLE_EVENTS: ret = transport_handle_disable_events(transport, msg); break; case AT_WAIT_SHARED_DATA_QUEUE: ret = transport_data_queue_wait(transport); break; case AT_EX_EVENTS_SUPPORTED: ret = 0; break; default: WPRINTF("Unexpected message type=%i/%s", type, action_type_to_string(type)); ret = -EINVAL; } } out: DPRINTF("ret=%i", ret); return ret; } ssize_t transport_device_write(struct file *filp, const char __user *user_data, size_t size, loff_t *offset) { transport_t *transport = filp->private_data; msg_varsized_t msg; msg_sized_t* smsg; msg_img_t *msg_img; ssize_t ret; if (transport->shutdown) { ret = -EIO; goto out; } if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; transport_shutdown(transport); goto out; } if (!size) { WPRINTF("'zero write' is not supported"); ret = -EINVAL; transport_shutdown(transport); goto out; } if (size > TRANSPORT_MSG_SIZE_MAX) { WPRINTF("size > TRANSPORT_MSG_SIZE_MAX"); ret = -E2BIG; goto out; } smsg = msg_varsized_init(&msg, size); if (!smsg) { ret = -ENOMEM; goto out; } msg_img = MSG_IMG(smsg); if (copy_from_user(msg_img, user_data, size)) { EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; transport_shutdown(transport); goto free_msg; } ret = transport_handle_msg(transport, smsg); if (ret) { // make sure error code is negative if (ret > 0) { EPRINTF("error code must be negative"); ret = -ret; } goto free_msg; } ret = size; free_msg: msg_varsized_uninit(&msg); out: DPRINTF("ret=%zi", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /* Warning: 'transport_open()' and 'transport_release()' may be simultaneously invoked by several threads or processes. Note: We can match different 'transport' instances using 'device' 'major'/'minor' from 'inode->i_rdev'. Pointer to selected 'trasport' can be stored in 'filp->private_data' for later use in '*_read()', '*_write()', etc. Note: We may create 'transport' on 'first open' and destroy it on 'last close'. */ /* There is possibility of 'deadlock' between our 'kernel' and 'userspace' code while processing events generated by our userspace process until registration of our userspace process in 'ignore' list. */ int transport_device_open(struct inode *inode, struct file *filp) { transport_t *transport; int ret; DPRINTF("inode->i_rdev: major=%u minor=%u", imajor(inode), iminor(inode)); DPRINTF("filp->f_flags=%X", filp->f_flags); if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; goto out; } mutex_lock(&transport_global.transport_mutex); { DPRINTF("transport_count=%u", transport_global.transport_count); transport = transport_new(); if (!transport) { WPRINTF("'%s()' failure", "transport_new"); ret = -ENOMEM; goto unlock_open_close_mutex; } filp->private_data = transport; if (!transport_global.transport_count) { // FIXME: 'attach' may fail IPRINTF("attaching interceptors"); mod_rundown_protection_set_ready(); ret = syscall_hooks_attach(); if (ret) { EPRINTF("'%s()' failure %i", "syscall_hooks_attach", ret); goto unlock_open_close_mutex; } ret = tracepoints_attach(); if (ret) { EPRINTF("'%s()' failure %i", "tracepoints_attach", ret); syscall_hooks_detach(); goto unlock_open_close_mutex; } IPRINTF("interceptors attached"); } ++transport_global.transport_count; ret = 0; } unlock_open_close_mutex: mutex_unlock(&transport_global.transport_mutex); out: DPRINTF("ret=%i", ret); return ret; } // 'release()' means 'close()' int transport_device_release(struct inode *inode, struct file *filp) { bool ok; mutex_lock(&transport_global.transport_mutex); { transport_t *transport = filp->private_data; transport_shutdown(transport); transport_free(transport); DPRINTF("transport_count=%u", transport_global.transport_count); if (!--transport_global.transport_count) { IPRINTF("detaching interceptors"); tracepoints_detach(); // FIXME: 'syscall_hooks_detach()' may fail syscall_hooks_detach(); mod_rundown_protection_set_rundown_active(); ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS)); if (!ok) { WPRINTF("Failed to wait for module rundown"); } task_info_map_clear(); IPRINTF("interceptors detached"); } } mutex_unlock(&transport_global.transport_mutex); return 0; } int transport_device_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; transport_t *transport = filp->private_data; if (transport->shutdown) { ret = -EIO; goto out; } ret = transport_data_queue_mmap(transport, vma); out: return ret; } static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies) { #ifndef HAVE_WAIT_EVENT_KILLABLE_TIMEOUT // 'wait_event_interruptible_timeout' has to be a define and so is // 'TASK_KILLABLE' and 'TASK_INTERRUPTIBLE'. // I need functionality of 'wait_event_interruptible_timeout' // but 'TASK_INTERRUPTIBLE' replaced with 'TASK_KILLABLE' which // is achieved using the 'define' tricks by redefining 'TASK_INTERRUPTIBLE'. // If the trick won't work, using the regular 'wait_event_timeout'. #if defined(TASK_KILLABLE) && defined(TASK_INTERRUPTIBLE) && defined(wait_event_interruptible_timeout) && !defined(signal_pending) #undef TASK_INTERRUPTIBLE #define TASK_INTERRUPTIBLE TASK_KILLABLE #define signal_pending fatal_signal_pending return wait_event_interruptible_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #undef TASK_INTERRUPTIBLE #undef signal_pending #else // Something weird is going on, rollback to 'TASK_UNINTERRUPTIBLE' variant. // It should not cause any issues though as far as APL // daemon is responding to events so it is not bad. return wait_event_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #endif #else // Just use the well defined macros available. return wait_event_killable_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #endif }
Copyright ©2k19 -
Hexid
|
Tex7ure