Red-black tree is used which simplifies the code. Instead of clear_wait(), thread_wakeup_prim() is used with the waiter's offset as the event, which removes the need for a seperate cross wake function. Value at the futex address is retrieved via a copyin() call and the compare before the block is atomic with GCC builtins. First version of futex_wait_timeout() is added, but untested.
This is not yet ready to be called from userspace since glibc build on the Hurd doesn't seem to compile the new RPCs. I don't know how to actually call them. I tested this with GDB in kernel, without vm_map_lookup() and not actually blocking the thread. --- kern/futex.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kern/futex.h | 64 +++++++++++++ kern/startup.c | 3 + 3 files changed, 354 insertions(+) create mode 100644 kern/futex.c create mode 100644 kern/futex.h diff --git a/kern/futex.c b/kern/futex.c new file mode 100644 index 0000000..e73165e --- /dev/null +++ b/kern/futex.c @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2013, 2014 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marin Ramesa + */ + +/* + * Fast userspace locking + * + */ + +#include <kern/futex.h> +#include <kern/sched_prim.h> +#include <kern/rbtree.h> +#include <vm/vm_map.h> +#include <kern/thread.h> +#include <machine/locore.h> +#include <kern/lock.h> +#include <kern/kalloc.h> + +/* + * When color of the node is red, parent of the node is the futex address, left child is + * the futex waiter offset and right child is the another futex address. When color of + * the node is black, parent of the node is the futex waiter's offset. + */ +static struct rbtree futex_tree; + +decl_simple_lock_data(static, futex_lock); + +void futex_setup(void) +{ + rbtree_init(&futex_tree); + simple_lock_init(futex_lock); +} + +/* Returns the difference in futex addresses; assertion in rbtree_insert() fails if it returns 0. */ +static inline int compare_nodes_insert_futex(struct rbtree_node *node1, struct rbtree_node *node2) +{ + return node1->parent - node2->parent; +} + +/* Add the differences between the node addresses and offsets. */ +static inline int compare_nodes_insert_waiter(struct rbtree_node *node1, struct rbtree_node *node2) +{ + return node1 - node2 + node1->parent - node2->parent; +} + +/* For the lookup to succeed the return value must be zero. Which means the key node's parent must not be the offset. */ +static inline int compare_nodes_lookup(struct rbtree_node *node1, struct rbtree_node *node2) +{ + if (node1 == futex_tree.root) + return node1->parent - node2->parent; + else + return node1->parent - node2->parent + ((node1->parent & RBTREE_COLOR_MASK) == RBTREE_COLOR_BLACK); +} + +/* Insert a new address as the node in the futex tree. */ +static int futex_init(int *address) +{ + struct rbtree_node *futex; + + simple_lock(&futex_lock); + + futex = (struct rbtree_node *)kalloc(sizeof(*futex)); + if (futex == NULL) + return FUTEX_RESOURCE_SHORTAGE; + + rbtree_insert(&futex_tree, futex, compare_nodes_insert_futex); + futex->parent = (unsigned long)address; + + simple_unlock(&futex_lock); + + return 0; +} + +/* Find the node slot of the futex address in the tree. */ +static unsigned long futex_lookup_address(int *address) +{ + unsigned long node_slot = 0; + struct rbtree_node node; + + simple_lock(&futex_lock); + + node.parent = (unsigned long)address; + rbtree_lookup_slot(&futex_tree, &node, compare_nodes_lookup, node_slot); + + simple_unlock(&futex_lock); + + return node_slot; +} + +/* Atomic compare using GCC builtins. + * First call fetches and substracts the values. + * Second call compares the return value with zero and returns with success + * if the swap was done. + */ +static inline _Bool atomic_compare(int value1, int value2) +{ + return __sync_bool_compare_and_swap((int[]){__sync_sub_and_fetch(&value1, value2)}, 0, 1); +} + +int futex_wait(int *address, int value) +{ + unsigned long node_slot = 0; + + lookup: + + node_slot = futex_lookup_address(address); + + if (node_slot != 0) { + + simple_lock(&futex_lock); + + int addr_value; + copyin(address, &addr_value, sizeof(int)); + + if (atomic_compare(addr_value, value)) { + + vm_offset_t offset; + struct rbtree_node node; + + /* Fetch the offset from the (task map, futex address) value pair. */ + kern_return_t kr; + kr = vm_map_lookup(¤t_map(), (vm_offset_t)address, VM_PROT_READ, NULL, NULL, &offset, NULL, NULL); + if (kr != KERN_SUCCESS) + return FUTEX_MEMORY_ERROR; + + /* Link with the futex. */ + rbtree_slot_parent(node_slot)->children[RBTREE_LEFT] = &node; + + /* Mark the node black and write the offset. */ + node.parent = offset | RBTREE_COLOR_BLACK; + + rbtree_insert(&futex_tree, &node, compare_nodes_insert_waiter); + + /* Block with the offset as the event. */ + thread_sleep((event_t)node.parent, (simple_lock_t)&futex_lock, FALSE); + + /* Thread was awakened in thread_wakeup_prim(). Remove the offset. */ + rbtree_remove(&futex_tree, &node); + + return FUTEX_RESUMED_BY_WAKE; + + } else { + simple_unlock(&futex_lock); + return FUTEX_NO_THREAD_SUSPENDED; + } + + } else { + + if (futex_init(address) != 0) + return FUTEX_RESOURCE_SHORTAGE; + + goto lookup; + } +} + +int futex_wake(int *address, _Bool wake_all) +{ + #define offset(x) x->children[RBTREE_LEFT]->parent + #define next_futex(x) x->children[RBTREE_RIGHT] + + unsigned node_slot = 0; + node_slot = futex_lookup_address(address); + + if (node_slot != 0) { + + simple_lock(&futex_lock); + + struct rbtree_node *futex; + + if (wake_all) { + + for (futex = rbtree_first(&futex_tree); futex != NULL; futex = next_futex(futex)) { + + /* Clean up the futex with zero waiters. */ + if (futex->children[RBTREE_LEFT] == NULL) { + kfree((vm_offset_t)futex, sizeof(*futex)); + rbtree_remove(&futex_tree, futex); + continue; + } + + /* If addresses match and the child node is black wake the threads. */ + if (((offset(futex) & RBTREE_COLOR_MASK) == RBTREE_COLOR_BLACK) && ((int *)futex->parent == address)) { + thread_wakeup((event_t)offset(futex)); + } + + /* Break if this was the last node. */ + if (futex == rbtree_last(&futex_tree)) { + break; + } + + } + + } else { + + for (futex = rbtree_first(&futex_tree); futex != NULL; futex = next_futex(futex)) { + + /* Clean up the futex with zero waiters. */ + if (futex->children[RBTREE_LEFT] == NULL) { + kfree((vm_offset_t)futex, sizeof(*futex)); + rbtree_remove(&futex_tree, futex); + continue; + } + + /* If addresses match and the child node is black wake one thread. */ + if (((offset(futex) & RBTREE_COLOR_MASK) == RBTREE_COLOR_BLACK) && ((int *)futex->parent == address)) { + thread_wakeup_one((event_t)offset(futex)); + break; + } + } + } + + simple_unlock(&futex_lock); + return FUTEX_SOME_NUMBER_RESUMED; + + } else { + return FUTEX_NO_THREAD; + } +#undef offset +#undef next_futex +} + +/* Timed wait for msec time. */ +static int futex_wait_timeout(int *address, int value, unsigned int msec) +{ + unsigned long node_slot = 0; + + lookup: + + node_slot = futex_lookup_address(address); + + if (node_slot != 0) { + + simple_lock(&futex_lock); + + int addr_value; + copyin(address, &addr_value, sizeof(int)); + + if (atomic_compare(addr_value, value)) { + + thread_timeout_setup(current_thread()); + + assert_wait(NULL, TRUE); + thread_will_wait_with_timeout(current_thread(), msec); + simple_unlock(&futex_lock); + + thread_block(NULL); + + return FUTEX_TIMED_OUT; + + } else { + simple_unlock(&futex_lock); + return FUTEX_NO_THREAD_SUSPENDED; + } + + } else { + + if (futex_init(address) != 0) + return FUTEX_RESOURCE_SHORTAGE; + + goto lookup; + } +} + +int r_futex_wait(task_t task, pointer_t address, int value) +{ + return futex_wait((int *)address, value); +} + +int r_futex_wake(task_t task, pointer_t address, boolean_t wake_all) +{ + return futex_wake((int *)address, (_Bool)wake_all); +} diff --git a/kern/futex.h b/kern/futex.h new file mode 100644 index 0000000..74f2dc2 --- /dev/null +++ b/kern/futex.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2013, 2014 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marin Ramesa + */ + +#ifndef _KERN_FUTEX_H_ +#define _KERN_FUTEX_H_ + +#include <mach/boolean.h> +#include <mach/std_types.h> +#include <kern/task.h> + +/* Definitions for return values. */ +#define FUTEX_RESUMED_BY_WAKE 0 +#define FUTEX_NO_THREAD_SUSPENDED 1 +#define FUTEX_SOME_NUMBER_RESUMED 2 +#define FUTEX_NO_THREAD 3 +#define FUTEX_MEMORY_ERROR 4 +#define FUTEX_RESOURCE_SHORTAGE 6 +#define FUTEX_TIMED_OUT 7 + +/* + * This is a method for a thread to wait for a change of value at a given address. + * + * Function performs a lookup of the address in the futex tree. If address is + * found, value at the address is atomically compared with the argument value. + * If the values are same, offset from the task's map and futex address is + * calculated and thread blocks. + */ +int futex_wait(int *address, int value); + +/* + * This is a method for waking up one or all threads waiting for a change of + * value at a given address. + * + * Function performs a lookup of the address in the futex tree. If address is + * found and wake_all argument is TRUE, all threads with the same offsets are + * awakened. If wake_all is FALSE, only one thread from the futex is awakened. + */ +int futex_wake(int *address, _Bool wake_all); + +/* Initialization of the futex tree and the locks. */ +void futex_setup(void); + +/* RPCs to call from userspace. */ +int r_futex_wait(task_t task, pointer_t address, int value); +int r_futex_wake(task_t task, pointer_t address, boolean_t wake_all); + +#endif /* _KERN_FUTEX_H_ */ diff --git a/kern/startup.c b/kern/startup.c index 12f5123..447c528 100644 --- a/kern/startup.c +++ b/kern/startup.c @@ -50,6 +50,7 @@ #include <kern/bootstrap.h> #include <kern/time_stamp.h> #include <kern/startup.h> +#include <kern/futex.h> #include <vm/vm_kern.h> #include <vm/vm_map.h> #include <vm/vm_object.h> @@ -158,6 +159,8 @@ void setup_main(void) recompute_priorities(NULL); compute_mach_factor(); + futex_setup(); + /* * Create a kernel thread to start the other kernel * threads. Thread_resume (from kernel_thread) calls -- 1.7.10.4