This patch updates the implementations of locks and notes used in libgo
to use the current version from the master Go library.  This now uses
futexes when running on GNU/Linux, while still using semaphores on other
systems.  This implementation should be faster, and does not require
explicit initialization.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  I tested both the futex and the semaphore
versions.  Committed to mainline.

Ian

diff -r 8b1402d2f0d3 libgo/Makefile.am
--- a/libgo/Makefile.am	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/Makefile.am	Tue Nov 22 12:18:39 2011 -0800
@@ -394,6 +394,12 @@
 rtems_task_variable_add_file =
 endif
 
+if LIBGO_IS_LINUX
+runtime_lock_files = runtime/lock_futex.c runtime/thread-linux.c
+else
+runtime_lock_files = runtime/lock_sema.c runtime/thread-sema.c
+endif
+
 runtime_files = \
 	runtime/go-append.c \
 	runtime/go-assert.c \
@@ -432,7 +438,6 @@
 	runtime/go-new-channel.c \
 	runtime/go-new-map.c \
 	runtime/go-new.c \
-	runtime/go-note.c \
 	runtime/go-panic.c \
 	runtime/go-print.c \
 	runtime/go-rec-big.c \
@@ -474,6 +479,7 @@
 	runtime/go-unsafe-pointer.c \
 	runtime/go-unwind.c \
 	runtime/cpuprof.c \
+	$(runtime_lock_files) \
 	runtime/mcache.c \
 	runtime/mcentral.c \
 	$(runtime_mem_file) \
diff -r 8b1402d2f0d3 libgo/runtime/cpuprof.c
--- a/libgo/runtime/cpuprof.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/cpuprof.c	Tue Nov 22 12:18:39 2011 -0800
@@ -115,12 +115,6 @@
 static bool evict(Profile*, Entry*);
 static bool flushlog(Profile*);
 
-void
-runtime_cpuprofinit(void)
-{
-	runtime_initlock(&lk);
-}
-
 // LostProfileData is a no-op function used in profiles
 // to mark the number of profiling stack traces that were
 // discarded due to slow data writers.
diff -r 8b1402d2f0d3 libgo/runtime/go-main.c
--- a/libgo/runtime/go-main.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-main.c	Tue Nov 22 12:18:39 2011 -0800
@@ -47,11 +47,10 @@
   g = &runtime_g0;
   m->curg = g;
   g->m = m;
-  runtime_initpanic ();
   runtime_mallocinit ();
-  runtime_cpuprofinit ();
   __go_gc_goroutine_init (&argc);
 
+  runtime_osinit();
   runtime_goargs();
   runtime_goenvs();
 
diff -r 8b1402d2f0d3 libgo/runtime/go-nanotime.c
--- a/libgo/runtime/go-nanotime.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-nanotime.c	Tue Nov 22 12:18:39 2011 -0800
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Return time in nanoseconds.  This is only used for computing runtime.
+// Return current time in nanoseconds.
 
 #include <sys/time.h>
 
diff -r 8b1402d2f0d3 libgo/runtime/go-note.c
--- a/libgo/runtime/go-note.c	Tue Nov 22 12:16:53 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-/* go-note.c -- implement notesleep, notewakeup and noteclear.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-/* A note is a one-time notification.  noteclear clears the note.
-   notesleep waits for a call to notewakeup.  notewakeup wakes up
-   every thread waiting on the note.  */
-
-#include "go-assert.h"
-#include "runtime.h"
-
-/* We use a single global lock and condition variable.  It would be
-   better to use a futex on GNU/Linux.  */
-
-static pthread_mutex_t note_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t note_cond = PTHREAD_COND_INITIALIZER;
-
-/* noteclear is called before any calls to notesleep or
-   notewakeup.  */
-
-void
-runtime_noteclear (Note* n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  n->woken = 0;
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
-
-/* Wait until notewakeup is called.  */
-
-void
-runtime_notesleep (Note* n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  while (!n->woken)
-    {
-      i = pthread_cond_wait (&note_cond, &note_lock);
-      __go_assert (i == 0);
-    }
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
-
-/* Wake up every thread sleeping on the note.  */
-
-void
-runtime_notewakeup (Note *n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  n->woken = 1;
-
-  i = pthread_cond_broadcast (&note_cond);
-  __go_assert (i == 0);
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
diff -r 8b1402d2f0d3 libgo/runtime/go-semacquire.c
--- a/libgo/runtime/go-semacquire.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-semacquire.c	Tue Nov 22 12:18:39 2011 -0800
@@ -117,35 +117,3 @@
       __go_assert (i == 0);
     }
 }
-
-
-#ifndef HAVE_SYNC_FETCH_AND_ADD_4
-
-/* For targets which don't have the required sync support.  Really
-   this should be provided by gcc itself.  FIXME.  */
-
-static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
-
-uint32
-__sync_fetch_and_add_4(uint32*, uint32)
-  __attribute__((visibility("hidden")));
-
-uint32
-__sync_fetch_and_add_4(uint32* ptr, uint32 add)
-{
-  int i;
-  uint32 ret;
-
-  i = pthread_mutex_lock(&sync_lock);
-  __go_assert(i == 0);
-
-  ret = *ptr;
-  *ptr += add;
-
-  i = pthread_mutex_unlock(&sync_lock);
-  __go_assert(i == 0);
-
-  return ret;
-}
-
-#endif
diff -r 8b1402d2f0d3 libgo/runtime/lock_futex.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/lock_futex.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,146 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// This implementation depends on OS-specific implementations of
+//
+//	runtime.futexsleep(uint32 *addr, uint32 val, int64 ns)
+//		Atomically,
+//			if(*addr == val) sleep
+//		Might be woken up spuriously; that's allowed.
+//		Don't sleep longer than ns; ns < 0 means forever.
+//
+//	runtime.futexwakeup(uint32 *addr, uint32 cnt)
+//		If any procs are sleeping on addr, wake up at most cnt.
+
+enum
+{
+	MUTEX_UNLOCKED = 0,
+	MUTEX_LOCKED = 1,
+	MUTEX_SLEEPING = 2,
+
+	ACTIVE_SPIN = 4,
+	ACTIVE_SPIN_CNT = 30,
+	PASSIVE_SPIN = 1,
+};
+
+// Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING.
+// MUTEX_SLEEPING means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
+void
+runtime_lock(Lock *l)
+{
+	uint32 i, v, wait, spin;
+
+	if(m->locks++ < 0)
+		runtime_throw("runtime_lock: lock count");
+
+	// Speculative grab for lock.
+	v = runtime_xchg(&l->key, MUTEX_LOCKED);
+	if(v == MUTEX_UNLOCKED)
+		return;
+
+	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+	// depending on whether there is a thread sleeping
+	// on this mutex.  If we ever change l->key from
+	// MUTEX_SLEEPING to some other value, we must be
+	// careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets
+	// its wakeup call.
+	wait = v;
+
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin = 0;
+	if(runtime_ncpu > 1)
+		spin = ACTIVE_SPIN;
+
+	for(;;) {
+		// Try for lock, spinning.
+		for(i = 0; i < spin; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime_procyield(ACTIVE_SPIN_CNT);
+		}
+
+		// Try for lock, rescheduling.
+		for(i=0; i < PASSIVE_SPIN; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime_osyield();
+		}
+
+		// Sleep.
+		v = runtime_xchg(&l->key, MUTEX_SLEEPING);
+		if(v == MUTEX_UNLOCKED)
+			return;
+		wait = MUTEX_SLEEPING;
+		runtime_futexsleep(&l->key, MUTEX_SLEEPING, -1);
+	}
+}
+
+void
+runtime_unlock(Lock *l)
+{
+	uint32 v;
+
+	if(--m->locks < 0)
+		runtime_throw("runtime_unlock: lock count");
+
+	v = runtime_xchg(&l->key, MUTEX_UNLOCKED);
+	if(v == MUTEX_UNLOCKED)
+		runtime_throw("unlock of unlocked lock");
+	if(v == MUTEX_SLEEPING)
+		runtime_futexwakeup(&l->key, 1);
+}
+
+// One-time notifications.
+void
+runtime_noteclear(Note *n)
+{
+	n->key = 0;
+}
+
+void
+runtime_notewakeup(Note *n)
+{
+	runtime_xchg(&n->key, 1);
+	runtime_futexwakeup(&n->key, 1);
+}
+
+void
+runtime_notesleep(Note *n)
+{
+	while(runtime_atomicload(&n->key) == 0)
+		runtime_futexsleep(&n->key, 0, -1);
+}
+
+void
+runtime_notetsleep(Note *n, int64 ns)
+{
+	int64 deadline, now;
+
+	if(ns < 0) {
+		runtime_notesleep(n);
+		return;
+	}
+
+	if(runtime_atomicload(&n->key) != 0)
+		return;
+
+	deadline = runtime_nanotime() + ns;
+	for(;;) {
+		runtime_futexsleep(&n->key, 0, ns);
+		if(runtime_atomicload(&n->key) != 0)
+			return;
+		now = runtime_nanotime();
+		if(now >= deadline)
+			return;
+		ns = deadline - now;
+	}
+}
diff -r 8b1402d2f0d3 libgo/runtime/lock_sema.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/lock_sema.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,217 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// This implementation depends on OS-specific implementations of
+//
+//	uintptr runtime.semacreate(void)
+//		Create a semaphore, which will be assigned to m->waitsema.
+//		The zero value is treated as absence of any semaphore,
+//		so be sure to return a non-zero value.
+//
+//	int32 runtime.semasleep(int64 ns)
+//		If ns < 0, acquire m->waitsema and return 0.
+//		If ns >= 0, try to acquire m->waitsema for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	int32 runtime.semawakeup(M *mp)
+//		Wake up mp, which is or will soon be sleeping on mp->waitsema.
+//
+
+enum
+{
+	LOCKED = 1,
+
+	ACTIVE_SPIN = 4,
+	ACTIVE_SPIN_CNT = 30,
+	PASSIVE_SPIN = 1,
+};
+
+void
+runtime_lock(Lock *l)
+{
+	uintptr v;
+	uint32 i, spin;
+
+	if(m->locks++ < 0)
+		runtime_throw("runtime_lock: lock count");
+
+	// Speculative grab for lock.
+	if(runtime_casp(&l->waitm, nil, (void*)LOCKED))
+		return;
+
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin = 0;
+	if(runtime_ncpu > 1)
+		spin = ACTIVE_SPIN;
+
+	for(i=0;; i++) {
+		v = (uintptr)runtime_atomicloadp(&l->waitm);
+		if((v&LOCKED) == 0) {
+unlocked:
+			if(runtime_casp(&l->waitm, (void*)v, (void*)(v|LOCKED)))
+				return;
+			i = 0;
+		}
+		if(i<spin)
+			runtime_procyield(ACTIVE_SPIN_CNT);
+		else if(i<spin+PASSIVE_SPIN)
+			runtime_osyield();
+		else {
+			// Someone else has it.
+			// l->waitm points to a linked list of M's waiting
+			// for this lock, chained through m->nextwaitm.
+			// Queue this M.
+			for(;;) {
+				m->nextwaitm = (void*)(v&~LOCKED);
+				if(runtime_casp(&l->waitm, (void*)v, (void*)((uintptr)m|LOCKED)))
+					break;
+				v = (uintptr)runtime_atomicloadp(&l->waitm);
+				if((v&LOCKED) == 0)
+					goto unlocked;
+			}
+			if(v&LOCKED) {
+				// Queued.  Wait.
+				runtime_semasleep(-1);
+				i = 0;
+			}
+		}
+	}
+}
+
+void
+runtime_unlock(Lock *l)
+{
+	uintptr v;
+	M *mp;
+
+	if(--m->locks < 0)
+		runtime_throw("runtime_unlock: lock count");
+
+	for(;;) {
+		v = (uintptr)runtime_atomicloadp(&l->waitm);
+		if(v == LOCKED) {
+			if(runtime_casp(&l->waitm, (void*)LOCKED, nil))
+				break;
+		} else {
+			// Other M's are waiting for the lock.
+			// Dequeue an M.
+			mp = (void*)(v&~LOCKED);
+			if(runtime_casp(&l->waitm, (void*)v, mp->nextwaitm)) {
+				// Dequeued an M.  Wake it.
+				runtime_semawakeup(mp);
+				break;
+			}
+		}
+	}
+}
+
+// One-time notifications.
+void
+runtime_noteclear(Note *n)
+{
+	n->waitm = nil;
+}
+
+void
+runtime_notewakeup(Note *n)
+{
+	M *mp;
+
+	do
+		mp = runtime_atomicloadp(&n->waitm);
+	while(!runtime_casp(&n->waitm, mp, (void*)LOCKED));
+
+	// Successfully set waitm to LOCKED.
+	// What was it before?
+	if(mp == nil) {
+		// Nothing was waiting.  Done.
+	} else if(mp == (M*)LOCKED) {
+		// Two notewakeups!  Not allowed.
+		runtime_throw("notewakeup - double wakeup");
+	} else {
+		// Must be the waiting m.  Wake it up.
+		runtime_semawakeup(mp);
+	}
+}
+
+void
+runtime_notesleep(Note *n)
+{
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+	if(!runtime_casp(&n->waitm, nil, m)) {  // must be LOCKED (got wakeup)
+		if(n->waitm != (void*)LOCKED)
+			runtime_throw("notesleep - waitm out of sync");
+		return;
+	}
+	// Queued.  Sleep.
+	runtime_semasleep(-1);
+}
+
+void
+runtime_notetsleep(Note *n, int64 ns)
+{
+	M *mp;
+	int64 deadline, now;
+
+	if(ns < 0) {
+		runtime_notesleep(n);
+		return;
+	}
+
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+
+	// Register for wakeup on n->waitm.
+	if(!runtime_casp(&n->waitm, nil, m)) {  // must be LOCKED (got wakeup already)
+		if(n->waitm != (void*)LOCKED)
+			runtime_throw("notetsleep - waitm out of sync");
+		return;
+	}
+
+	deadline = runtime_nanotime() + ns;
+	for(;;) {
+		// Registered.  Sleep.
+		if(runtime_semasleep(ns) >= 0) {
+			// Acquired semaphore, semawakeup unregistered us.
+			// Done.
+			return;
+		}
+
+		// Interrupted or timed out.  Still registered.  Semaphore not acquired.
+		now = runtime_nanotime();
+		if(now >= deadline)
+			break;
+
+		// Deadline hasn't arrived.  Keep sleeping.
+		ns = deadline - now;
+	}
+
+	// Deadline arrived.  Still registered.  Semaphore not acquired.
+	// Want to give up and return, but have to unregister first,
+	// so that any notewakeup racing with the return does not
+	// try to grant us the semaphore when we don't expect it.
+	for(;;) {
+		mp = runtime_atomicloadp(&n->waitm);
+		if(mp == m) {
+			// No wakeup yet; unregister if possible.
+			if(runtime_casp(&n->waitm, mp, nil))
+				return;
+		} else if(mp == (M*)LOCKED) {
+			// Wakeup happened so semaphore is available.
+			// Grab it to avoid getting out of sync.
+			if(runtime_semasleep(-1) < 0)
+				runtime_throw("runtime: unable to acquire - semaphore out of sync");
+			return;
+		} else {
+			runtime_throw("runtime: unexpected waitm - semaphore out of sync");
+		}
+	}
+}
diff -r 8b1402d2f0d3 libgo/runtime/malloc.goc
--- a/libgo/runtime/malloc.goc	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/malloc.goc	Tue Nov 22 12:18:39 2011 -0800
@@ -376,12 +376,6 @@
 	runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
 	m->mcache = runtime_allocmcache();
 
-	// Initialize malloc profiling.
-	runtime_Mprof_Init();
-
-	// Initialize finalizer.
-	runtime_initfintab();
-
 	// See if it works.
 	runtime_free(runtime_malloc(1));
 }
diff -r 8b1402d2f0d3 libgo/runtime/malloc.h
--- a/libgo/runtime/malloc.h	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/malloc.h	Tue Nov 22 12:18:39 2011 -0800
@@ -405,7 +405,6 @@
 	FlagNoGC = 1<<2,	// must not free or scan for pointers
 };
 
-void	runtime_Mprof_Init(void);
 void	runtime_MProf_Malloc(void*, uintptr);
 void	runtime_MProf_Free(void*, uintptr);
 void	runtime_MProf_Mark(void (*scan)(byte *, int64));
diff -r 8b1402d2f0d3 libgo/runtime/mcentral.c
--- a/libgo/runtime/mcentral.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mcentral.c	Tue Nov 22 12:18:39 2011 -0800
@@ -26,7 +26,6 @@
 void
 runtime_MCentral_Init(MCentral *c, int32 sizeclass)
 {
-	runtime_initlock(c);
 	c->sizeclass = sizeclass;
 	runtime_MSpanList_Init(&c->nonempty);
 	runtime_MSpanList_Init(&c->empty);
diff -r 8b1402d2f0d3 libgo/runtime/mfinal.c
--- a/libgo/runtime/mfinal.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mfinal.c	Tue Nov 22 12:18:39 2011 -0800
@@ -41,15 +41,6 @@
 	uint8 pad[0 /* CacheLineSize - sizeof(Fintab) */];	
 } fintab[TABSZ];
 
-void
-runtime_initfintab()
-{
-	int32 i;
-
-	for(i=0; i<TABSZ; i++)
-		runtime_initlock(&fintab[i]);
-}
-
 static void
 addfintab(Fintab *t, void *k, void (*fn)(void*), const struct __go_func_type *ft)
 {
diff -r 8b1402d2f0d3 libgo/runtime/mgc0.c
--- a/libgo/runtime/mgc0.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mgc0.c	Tue Nov 22 12:18:39 2011 -0800
@@ -860,12 +860,6 @@
 		p = runtime_getenv("GOGCTRACE");
 		if(p != nil)
 			gctrace = runtime_atoi(p);
-
-		runtime_initlock(&work.fmu);
-		runtime_initlock(&work.emu);
-		runtime_initlock(&work.markgate);
-		runtime_initlock(&work.sweepgate);
-		runtime_initlock(&work.Lock);
 	}
 	if(gcpercent < 0)
 		return;
diff -r 8b1402d2f0d3 libgo/runtime/mheap.c
--- a/libgo/runtime/mheap.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mheap.c	Tue Nov 22 12:18:39 2011 -0800
@@ -40,7 +40,6 @@
 {
 	uint32 i;
 
-	runtime_initlock(h);
 	runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h);
 	runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil);
 	// h->mapcache needs no init
diff -r 8b1402d2f0d3 libgo/runtime/mprof.goc
--- a/libgo/runtime/mprof.goc	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mprof.goc	Tue Nov 22 12:18:39 2011 -0800
@@ -186,12 +186,6 @@
 	return nil;
 }
 
-void
-runtime_Mprof_Init()
-{
-	runtime_initlock(&proflock);
-}
-
 // Called by malloc to record a profiled block.
 void
 runtime_MProf_Malloc(void *p, uintptr size)
diff -r 8b1402d2f0d3 libgo/runtime/runtime.c
--- a/libgo/runtime/runtime.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/runtime.c	Tue Nov 22 12:18:39 2011 -0800
@@ -14,12 +14,6 @@
 static Lock paniclk;
 
 void
-runtime_initpanic(void)
-{
-	runtime_initlock(&paniclk);
-}
-
-void
 runtime_startpanic(void)
 {
 	if(m->dying) {
@@ -56,7 +50,6 @@
 		// Wait forever without chewing up cpu.
 		// It will exit when it's done.
 		static Lock deadlock;
-		runtime_initlock(&deadlock);
 		runtime_lock(&deadlock);
 		runtime_lock(&deadlock);
 	}
diff -r 8b1402d2f0d3 libgo/runtime/runtime.h
--- a/libgo/runtime/runtime.h	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/runtime.h	Tue Nov 22 12:18:39 2011 -0800
@@ -48,33 +48,17 @@
 typedef	uint8			bool;
 typedef	uint8			byte;
 typedef	struct	G		G;
+typedef	union	Lock		Lock;
 typedef	struct	M		M;
+typedef	union	Note		Note;
 typedef	struct	MCache		MCache;
 typedef struct	FixAlloc	FixAlloc;
-typedef	struct	Lock		Lock;
 
 typedef	struct	__go_defer_stack	Defer;
 typedef	struct	__go_panic_stack	Panic;
 typedef	struct	__go_open_array		Slice;
 typedef	struct	__go_string		String;
 
-/* We use mutexes for locks.  6g uses futexes directly, and perhaps
-   someday we will do that too.  */
-
-struct	Lock
-{
-	uint32 key;
-	sem_t sem;
-};
-
-/* A Note.  */
-
-typedef	struct	Note		Note;
-
-struct Note {
-	int32 woken;
-};
-
 /* Per CPU declarations.  */
 
 #ifdef __rtems__
@@ -99,8 +83,19 @@
 	false	= 0,
 };
 
-/* Structures.  */
-
+/*
+ * structures
+ */
+union	Lock
+{
+	uint32	key;	// futex-based impl
+	M*	waitm;	// linked list of waiting M's (sema-based impl)
+};
+union	Note
+{
+	uint32	key;	// futex-based impl
+	M*	waitm;	// waiting M (sema-based impl)
+};
 struct	G
 {
 	Defer*	defer;
@@ -141,6 +136,10 @@
 	int32	profilehz;
 	uint32	fastrand;
 	MCache	*mcache;
+	M*	nextwaitm;	// next M waiting for lock
+	uintptr	waitsema;	// semaphore for parking on locks
+	uint32	waitsemacount;
+	uint32	waitsemalock;
 
 	/* For the list of all threads.  */
 	struct __go_thread_id *list_entry;
@@ -173,6 +172,7 @@
  * external data
  */
 extern	uint32	runtime_panicking;
+int32	runtime_ncpu;
 
 /*
  * common functions and data
@@ -183,13 +183,13 @@
  * very low level c-called
  */
 void	runtime_args(int32, byte**);
+void	runtime_osinit();
 void	runtime_goargs(void);
 void	runtime_goenvs(void);
 void	runtime_throw(const char*);
 void*	runtime_mal(uintptr);
 String	runtime_gostringnocopy(byte*);
 void	runtime_mallocinit(void);
-void	runtime_initfintab(void);
 void	siginit(void);
 bool	__go_sigsend(int32 sig);
 int64	runtime_nanotime(void);
@@ -208,27 +208,45 @@
  * mutual exclusion locks.  in the uncontended case,
  * as fast as spin locks (just a few user-level instructions),
  * but on the contention path they sleep in the kernel.
+ * a zeroed Lock is unlocked (no need to initialize each lock).
  */
-void	runtime_initlock(Lock*);
 void	runtime_lock(Lock*);
 void	runtime_unlock(Lock*);
-void	runtime_destroylock(Lock*);
-
-void runtime_semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
-void runtime_semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
 
 /*
  * sleep and wakeup on one-time events.
  * before any calls to notesleep or notewakeup,
  * must call noteclear to initialize the Note.
- * then, any number of threads can call notesleep
+ * then, exactly one thread can call notesleep
  * and exactly one thread can call notewakeup (once).
- * once notewakeup has been called, all the notesleeps
- * will return.  future notesleeps will return immediately.
+ * once notewakeup has been called, the notesleep
+ * will return.  future notesleep will return immediately.
+ * subsequent noteclear must be called only after
+ * previous notesleep has returned, e.g. it's disallowed
+ * to call noteclear straight after notewakeup.
+ *
+ * notetsleep is like notesleep but wakes up after
+ * a given number of nanoseconds even if the event
+ * has not yet happened.  if a goroutine uses notetsleep to
+ * wake up early, it must wait to call noteclear until it
+ * can be sure that no other goroutine is calling
+ * notewakeup.
  */
 void	runtime_noteclear(Note*);
 void	runtime_notesleep(Note*);
 void	runtime_notewakeup(Note*);
+void	runtime_notetsleep(Note*, int64);
+
+/*
+ * low-level synchronization for implementing the above
+ */
+uintptr	runtime_semacreate(void);
+int32	runtime_semasleep(int64);
+void	runtime_semawakeup(M*);
+// or
+void	runtime_futexsleep(uint32*, uint32, int64);
+void	runtime_futexwakeup(uint32*, uint32);
+
 
 /* Functions.  */
 #define runtime_printf printf
@@ -248,17 +266,22 @@
 #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 #define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 #define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
 
-void	runtime_initpanic(void);
 void	runtime_dopanic(int32) __attribute__ ((noreturn));
 void	runtime_startpanic(void);
 const byte*	runtime_getenv(const char*);
 int32	runtime_atoi(const byte*);
 void	runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr);
-void	runtime_cpuprofinit(void);
 void	runtime_resetcpuprofiler(int32);
 void	runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
 uint32	runtime_fastrand1(void);
+void	runtime_semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
+void	runtime_semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
 void	runtime_procyield(uint32);
 void	runtime_osyield(void);
 void	runtime_usleep(uint32);
diff -r 8b1402d2f0d3 libgo/runtime/thread-linux.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/thread-linux.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,100 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <linux/futex.h>
+
+typedef struct timespec Timespec;
+
+// Atomically,
+//	if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+// Don't sleep longer than ns; ns < 0 means forever.
+void
+runtime_futexsleep(uint32 *addr, uint32 val, int64 ns)
+{
+	Timespec ts, *tsp;
+
+	if(ns < 0)
+		tsp = nil;
+	else {
+		ts.tv_sec = ns/1000000000LL;
+		ts.tv_nsec = ns%1000000000LL;
+		// Avoid overflowdefs
+		if(ts.tv_sec > 1<<30)
+			ts.tv_sec = 1<<30;
+		tsp = &ts;
+	}
+
+	// Some Linux kernels have a bug where futex of
+	// FUTEX_WAIT returns an internal error code
+	// as an errno.  Libpthread ignores the return value
+	// here, and so can we: as it says a few lines up,
+	// spurious wakeups are allowed.
+	syscall(__NR_futex, addr, FUTEX_WAIT, val, tsp, nil, 0);
+}
+
+// If any procs are sleeping on addr, wake up at most cnt.
+void
+runtime_futexwakeup(uint32 *addr, uint32 cnt)
+{
+	int64 ret;
+
+	ret = syscall(__NR_futex, addr, FUTEX_WAKE, cnt, nil, nil, 0);
+
+	if(ret >= 0)
+		return;
+
+	// I don't know that futex wakeup can return
+	// EAGAIN or EINTR, but if it does, it would be
+	// safe to loop and call futex again.
+	runtime_printf("futexwakeup addr=%p returned %lld\n", addr, (long long)ret);
+	*(int32*)0x1006 = 0x1006;
+}
+
+static int32
+getproccount(void)
+{
+	int32 fd, rd, cnt, cpustrlen;
+	const byte *cpustr, *pos;
+	byte *bufpos;
+	byte buf[256];
+
+	fd = open("/proc/stat", O_RDONLY|O_CLOEXEC, 0);
+	if(fd == -1)
+		return 1;
+	cnt = 0;
+	bufpos = buf;
+	cpustr = (const byte*)"\ncpu";
+	cpustrlen = runtime_findnull((const byte*)cpustr);
+	for(;;) {
+		rd = read(fd, bufpos, sizeof(buf)-cpustrlen);
+		if(rd == -1)
+			break;
+		bufpos[rd] = 0;
+		for(pos=buf; (pos=(const byte*)strstr((const char*)pos, (const char*)cpustr)) != nil; cnt++, pos++) {
+		}
+		if(rd < cpustrlen)
+			break;
+		memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1);
+		bufpos = buf+cpustrlen-1;
+	}
+	close(fd);
+	return cnt ? cnt : 1;
+}
+
+void
+runtime_osinit(void)
+{
+	runtime_ncpu = getproccount();
+}
diff -r 8b1402d2f0d3 libgo/runtime/thread-sema.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/thread-sema.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,74 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+#include <semaphore.h>
+
+/* Create a semaphore.  */
+
+uintptr
+runtime_semacreate(void)
+{
+  sem_t *p;
+
+  /* Call malloc rather than runtime_malloc.  This will allocate space
+     on the C heap.  We can't call runtime_malloc here because it
+     could cause a deadlock.  */
+  p = malloc (sizeof (sem_t));
+  if (sem_init (p, 0, 0) != 0)
+    runtime_throw ("sem_init");
+  return (uintptr) p;
+}
+
+/* Acquire m->waitsema.  */
+
+int32
+runtime_semasleep (int64 ns)
+{
+  int r;
+
+  if (ns >= 0)
+    {
+      struct timespec ts;
+
+      ns += runtime_nanotime ();
+      ts.tv_sec = ns / 1000000000LL;
+      ts.tv_nsec = ns % 1000000000LL;
+      r = sem_timedwait ((sem_t *) m->waitsema, &ts);
+      if (r != 0)
+	{
+	  if (errno == ETIMEDOUT || errno == EINTR)
+	    return -1;
+	  runtime_throw ("sema_timedwait");
+	}
+      return 0;
+    }
+
+  while (sem_wait ((sem_t *) m->waitsema) != 0)
+    {
+      if (errno == EINTR)
+	continue;
+      runtime_throw ("sem_wait");
+    }
+
+  return 0;
+}
+
+/* Wake up mp->waitsema.  */
+
+void
+runtime_semawakeup (M *mp)
+{
+  if (sem_post ((sem_t *) mp->waitsema) != 0)
+    runtime_throw ("sem_post");
+}
+
+void
+runtime_osinit(void)
+{
+}
diff -r 8b1402d2f0d3 libgo/runtime/thread.c
--- a/libgo/runtime/thread.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/thread.c	Tue Nov 22 12:18:39 2011 -0800
@@ -6,100 +6,68 @@
 #include "runtime.h"
 #include "go-assert.h"
 
-void
-runtime_initlock(Lock *l)
-{
-	l->key = 0;
-	if(sem_init(&l->sem, 0, 0) != 0)
-		runtime_throw("sem_init failed");
-}
+/* For targets which don't have the required sync support.  Really
+   these should be provided by gcc itself.  FIXME.  */
 
-// noinline so that runtime_lock doesn't have to split the stack.
-static void runtime_lock_full(Lock *l) __attribute__ ((noinline));
+#if !defined (HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4) || !defined (HAVE_SYNC_FETCH_AND_ADD_4)
 
-static void
-runtime_lock_full(Lock *l)
-{
-	for(;;){
-		if(sem_wait(&l->sem) == 0)
-			return;
-		if(errno != EINTR)
-			runtime_throw("sem_wait failed");
-	}
-}
+static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
 
-void
-runtime_lock(Lock *l)
-{
-	if(m != nil) {
-		if(m->locks < 0)
-			runtime_throw("lock count");
-		m->locks++;
-	}
-
-	if(runtime_xadd(&l->key, 1) > 1)	// someone else has it; wait
-		runtime_lock_full(l);
-}
-
-static void runtime_unlock_full(Lock *l) __attribute__ ((noinline));
-
-static void
-runtime_unlock_full(Lock *l)
-{
-	if(sem_post(&l->sem) != 0)
-		runtime_throw("sem_post failed");
-}
-
-void
-runtime_unlock(Lock *l)
-{
-	if(m != nil) {
-		m->locks--;
-		if(m->locks < 0)
-			runtime_throw("lock count");
-	}
-
-	if(runtime_xadd(&l->key, -1) > 0)	// someone else is waiting
-		runtime_unlock_full(l);
-}
-
-void
-runtime_destroylock(Lock *l)
-{
-	sem_destroy(&l->sem);
-}
+#endif
 
 #ifndef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4
 
-// For targets which don't have the required sync support.  Really
-// this should be provided by gcc itself.  FIXME.
-
-static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
+_Bool
+__sync_bool_compare_and_swap_4 (uint32*, uint32, uint32)
+  __attribute__ ((visibility ("hidden")));
 
 _Bool
-__sync_bool_compare_and_swap_4(uint32*, uint32, uint32)
-  __attribute__((visibility("hidden")));
-
-_Bool
-__sync_bool_compare_and_swap_4(uint32* ptr, uint32 old, uint32 new)
+__sync_bool_compare_and_swap_4 (uint32* ptr, uint32 old, uint32 new)
 {
   int i;
   _Bool ret;
 
-  i = pthread_mutex_lock(&sync_lock);
-  __go_assert(i == 0);
+  i = pthread_mutex_lock (&sync_lock);
+  __go_assert (i == 0);
 
-  if(*ptr != old) {
+  if (*ptr != old)
     ret = 0;
-  } else {
-    *ptr = new;
-    ret = 1;
-  }
+  else
+    {
+      *ptr = new;
+      ret = 1;
+    }
 
-  i = pthread_mutex_unlock(&sync_lock);
-  __go_assert(i == 0);
+  i = pthread_mutex_unlock (&sync_lock);
+  __go_assert (i == 0);
 
   return ret;
 }
 
 #endif
+
+#ifndef HAVE_SYNC_FETCH_AND_ADD_4
+
+uint32
+__sync_fetch_and_add_4 (uint32*, uint32)
+  __attribute__ ((visibility ("hidden")));
+
+uint32
+__sync_fetch_and_add_4 (uint32* ptr, uint32 add)
+{
+  int i;
+  uint32 ret;
+
+  i = pthread_mutex_lock (&sync_lock);
+  __go_assert (i == 0);
+
+  ret = *ptr;
+  *ptr += add;
+
+  i = pthread_mutex_unlock (&sync_lock);
+  __go_assert (i == 0);
+
+  return ret;
+}
+
+#endif

Reply via email to