On Sat, 26 Mar 2011 10:52:08 -0700 Freddie Cash wrote: FC> hastd backtrace is here: FC> http://www.sd73.bc.ca/downloads/crash/hast-backtrace.png
It is not a hastd crash, but a kernel crash triggered by hastd process.
I am not sure I got the same crash as you but apparently the race is possible
in g_gate on device creation.
I got the following crash starting many hast providers simultaneously:
fault virtual address = 0x0
#8 0xc0c11adc in calltrap () at /usr/src/sys/i386/i386/exception.s:168
#9 0xc086ac6b in g_gate_ioctl (dev=0xc6a24300, cmd=3374345472,
addr=0xc9fec000 "\002", flags=3, td=0xc7ff0b80)
at /usr/src/sys/geom/gate/g_gate.c:410
#10 0xc0853c5b in devfs_ioctl_f (fp=0xc9b9e310, com=3374345472,
data=0xc9fec000, cred=0xc8c9c200, td=0xc7ff0b80)
at /usr/src/sys/fs/devfs/devfs_vnops.c:678
#11 0xc09210cd in kern_ioctl (td=0xc7ff0b80, fd=3, com=3374345472,
data=0xc9fec000 "\002") at file.h:262
#12 0xc0921254 in ioctl (td=0xc7ff0b80, uap=0xf5edbcec)
at /usr/src/sys/kern/sys_generic.c:679
#13 0xc0916616 in syscallenter (td=0xc7ff0b80, sa=0xf5edbce4)
at /usr/src/sys/kern/subr_trap.c:315
#14 0xc0c2b9ff in syscall (frame=0xf5edbd28)
at /usr/src/sys/i386/i386/trap.c:1086
#15 0xc0c11b71 in Xint0x80_syscall ()
at /usr/src/sys/i386/i386/exception.s:266
Or just creating many ggate devices simultaneously:
for i in `jot 100`; do
./ggiocreate $i&
done
ggiocreate.c is attached.
In my case the kernel crashes in g_gate_create() when checking for name
collisions in strcmp():
/* Check for name collision. */
for (unit = 0; unit < g_gate_maxunits; unit++) {
if (g_gate_units[unit] == NULL)
continue;
if (strcmp(name, g_gate_units[unit]->sc_provider->name) != 0)
continue;
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&sc->sc_queue_mtx);
free(sc, M_GATE);
return (EEXIST);
}
I think the issue is the following. When preparing sc we take
g_gate_units_lock, check for name collision, fill sc fields except
sc->sc_provider, and registers sc in g_gate_units[unit]. sc_provider is filled
later, when g_gate_units_lock is released. So the scenario is possible:
1) Thread A registers sc in g_gate_units[unit] with
g_gate_units[unit]->sc_provider still null and releases g_gate_units_lock.
2) Thread B traverses g_gate_units[] when checking for name collision and
craches accessing g_gate_units[unit]->sc_provider->name.
The attached patch fixes the issue in my case.
--
Mikolaj Golub
ggiocreate.c
Description: Binary data
Index: sys/geom/gate/g_gate.c
===================================================================
--- sys/geom/gate/g_gate.c (revision 220050)
+++ sys/geom/gate/g_gate.c (working copy)
@@ -407,13 +407,14 @@ g_gate_create(struct g_gate_ctl_create *ggio)
for (unit = 0; unit < g_gate_maxunits; unit++) {
if (g_gate_units[unit] == NULL)
continue;
- if (strcmp(name, g_gate_units[unit]->sc_provider->name) != 0)
+ if (strcmp(name, g_gate_units[unit]->sc_name) != 0)
continue;
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&sc->sc_queue_mtx);
free(sc, M_GATE);
return (EEXIST);
}
+ sc->sc_name = name;
g_gate_units[sc->sc_unit] = sc;
g_gate_nunits++;
mtx_unlock(&g_gate_units_lock);
@@ -432,6 +433,9 @@ g_gate_create(struct g_gate_ctl_create *ggio)
sc->sc_provider = pp;
g_error_provider(pp, 0);
g_topology_unlock();
+ mtx_lock(&g_gate_units_lock);
+ sc->sc_name = sc->sc_provider->name;
+ mtx_unlock(&g_gate_units_lock);
if (sc->sc_timeout > 0) {
callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
Index: sys/geom/gate/g_gate.h
===================================================================
--- sys/geom/gate/g_gate.h (revision 220050)
+++ sys/geom/gate/g_gate.h (working copy)
@@ -76,6 +76,7 @@
* 'P:' means 'Protected by'.
*/
struct g_gate_softc {
+ char *sc_name; /* P: (read-only) */
int sc_unit; /* P: (read-only) */
int sc_ref; /* P: g_gate_list_mtx */
struct g_provider *sc_provider; /* P: (read-only) */
@@ -96,7 +97,6 @@ struct g_gate_softc {
LIST_ENTRY(g_gate_softc) sc_next; /* P: g_gate_list_mtx */
char sc_info[G_GATE_INFOSIZE]; /* P: (read-only) */
};
-#define sc_name sc_provider->geom->name
#define G_GATE_DEBUG(lvl, ...) do { \
if (g_gate_debug >= (lvl)) { \
_______________________________________________ [email protected] mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-stable To unsubscribe, send any mail to "[email protected]"
