On 10/9/20 6:27 PM, Ryan Moeller wrote:

On 10/9/20 6:22 PM, Alan Somers wrote:
This sounds like it might be a regression introduced by the OpenZFS merge.
Have you compared vdev_geom.c in OpenZFS vs the old version?
-Alan


I don't think vdev_geom.c is involved, we're taking a wrong path in zvol_os.c because it seems the volume is created using the default volmode and later changed to volmode=dev.

Yes, you're on the right track. I tried this several times on a VM and it eventually hit the window:

# zfs create -s -V 20G -o primarycache=none -o volmode=dev head_root/testvol
zvol_create_minor_impl:1250[1]: Creating ZVOL head_root/testvol...
zvol_create_minor_impl:1371[1]: ZVOL head_root/testvol created.


Fatal trap 12: page fault while in kernel mode
cpuid = 7; apic id = 07
fault virtual address   = 0x110
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff82167fca
stack pointer           = 0x28:0xfffffe000edcdb30
frame pointer           = 0x28:0xfffffe000edcdb70
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 13 (g_down)
trap number             = 12

db> acttrace

Tracing command zfskern pid 21 tid 100478 td 0xfffffe00610c9800 (CPU 6)
cpustop_handler() at cpustop_handler+0x28/frame 0xfffffe0011880e00
ipi_nmi_handler() at ipi_nmi_handler+0x39/frame 0xfffffe0011880e10
trap() at trap+0x56/frame 0xfffffe0011880f20
nmi_calltrap() at nmi_calltrap+0x8/frame 0xfffffe0011880f20
--- trap 0x13, rip = 0xffffffff80c25fb2, rsp = 0xfffffe006168c820, rbp = 0xfffffe006168c830 ---
lock_delay() at lock_delay+0x42/frame 0xfffffe006168c830
_mtx_lock_spin_cookie() at _mtx_lock_spin_cookie+0xc1/frame 0xfffffe006168c8a0 __mtx_lock_spin_flags() at __mtx_lock_spin_flags+0xd5/frame 0xfffffe006168c8e0
cnputs() at cnputs+0x58/frame 0xfffffe006168c910
vprintf() at vprintf+0xcd/frame 0xfffffe006168c9e0
printf() at printf+0x43/frame 0xfffffe006168ca40
zvol_free() at zvol_free+0x53/frame 0xfffffe006168ca80
zvol_task_cb() at zvol_task_cb+0x271/frame 0xfffffe006168cae0
taskq_run() at taskq_run+0x1f/frame 0xfffffe006168cb00
taskqueue_run_locked() at taskqueue_run_locked+0xaa/frame 0xfffffe006168cb80
taskqueue_thread_loop() at taskqueue_thread_loop+0x94/frame 0xfffffe006168cbb0
fork_exit() at fork_exit+0x80/frame 0xfffffe006168cbf0
fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe006168cbf0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---

Tracing command geom pid 13 tid 100049 td 0xfffffe0011862700 (CPU 7)
kdb_enter() at kdb_enter+0x37/frame 0xfffffe000edcd7e0
vpanic() at vpanic+0x19e/frame 0xfffffe000edcd830
panic() at panic+0x43/frame 0xfffffe000edcd890
trap_fatal() at trap_fatal+0x387/frame 0xfffffe000edcd8f0
trap_pfault() at trap_pfault+0x97/frame 0xfffffe000edcd950
trap() at trap+0x2ab/frame 0xfffffe000edcda60
calltrap() at calltrap+0x8/frame 0xfffffe000edcda60
--- trap 0xc, rip = 0xffffffff82167fca, rsp = 0xfffffe000edcdb30, rbp = 0xfffffe000edcdb70 ---
zvol_geom_bio_start() at zvol_geom_bio_start+0x2a/frame 0xfffffe000edcdb70
g_io_schedule_down() at g_io_schedule_down+0x134/frame 0xfffffe000edcdba0
g_down_procbody() at g_down_procbody+0x5c/frame 0xfffffe000edcdbb0
fork_exit() at fork_exit+0x80/frame 0xfffffe000edcdbf0
fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe000edcdbf0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---

(The other CPUs were idle.)

On Fri, Oct 9, 2020 at 3:48 PM Eric van Gyzen <e...@vangyzen.net> wrote:

On 10/9/20 4:39 PM, Eric van Gyzen wrote:
Does this look familiar?  I'm creating a zvol with volmode=dev, but some
geom code paths were taken.  If this looks new, I'll provide more
details.

primarycache=none also seems to be a factor.  I can easily repro with:

zfs create -s -V 10G -o primarycache=none -o volmode=dev .../testvol


I don't think primarycache is a factor, I can easily repro with primarycache left at the default. The volmode property is being set asynchronously and losing the race with the initial creation of the minors. When volmode is changed the minor is supposed to be destroyed and then recreated in the correct mode, but that does not seem to be working correctly. Setting vfs.zfs.debug=1 you can see in dmesg the zvol is created once in volmode=geom and then a second attempt to create
the zvol fails, because zvol_free did not occur.

zvol_create_minor_impl:1250[1]: Creating ZVOL p0/testvol...
name=p0/testvol error=0 volmode=1
zvol_create_minor_impl:1372[1]: ZVOL p0/testvol created.
zvol_create_minor_impl:1250[1]: Creating ZVOL p0/testvol...

So something is preventing zv_free from being called by zvol_remove_minors_impl.

-Ryan



13.0-CURRENT r366500+84ccaf49083c-c272054 GENERIC

#8  <signal handler called>
#9  zvol_geom_bio_getattr (bp=0xfffff80376132900)
      at /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:545
#10 zvol_geom_bio_start (bp=0xfffff80376132900)
      at /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:519
#11 0xffffffff80b1c684 in g_io_schedule_down (tp=<optimized out>)
      at /usr/src/sys/geom/geom_io.c:848
#12 0xffffffff80b1cfcc in g_down_procbody (arg=<optimized out>)
      at /usr/src/sys/geom/geom_kern.c:111

(kgdb) f 9
#9  zvol_geom_bio_getattr (bp=0xfffff80376132900)
      at /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:545
545        spa_t *spa = dmu_objset_spa(zv->zv_objset);

(kgdb) l
540        zvol_state_t *zv;
541
542        zv = bp->bio_to->private;
543        ASSERT(zv != NULL);
544
545        spa_t *spa = dmu_objset_spa(zv->zv_objset);
546        uint64_t refd, avail, usedobjs, availobjs;
547
548        if (g_handleattr_int(bp, "GEOM::candelete", 1))
549            return (0);

(kgdb) p zv
$1 = (zvol_state_t *) 0x0

(kgdb) p *bp
$3 = {
    bio_cmd = 4,
    bio_flags = 0,
    bio_cflags = 0,
    bio_pflags = 0,
    bio_dev = 0x0,
    bio_disk = 0x0,
    bio_offset = 0,
    bio_bcount = 0,
    bio_data = 0xfffff801fa687c00 "",
    bio_ma = 0x0,
    bio_ma_offset = 0,
    bio_ma_n = 0,
    bio_error = 0,
    bio_resid = 0,
    bio_done = 0x0,
    bio_driver1 = 0x0,
    bio_driver2 = 0x0,
    bio_caller1 = 0x0,
    bio_caller2 = 0x0,
    bio_queue = {
      tqe_next = 0xffffffffffffffff,
      tqe_prev = 0xffffffffffffffff
    },
    bio_attribute = 0xffffffff81223c03 "GEOM::physpath",
    bio_zone = {
      zone_cmd = 0 '\000',
      zone_params = {
        disk_params = {
          zone_mode = 0,
          flags = 0,
          optimal_seq_zones = 0,
          optimal_nonseq_zones = 0,
          max_seq_zones = 0
        },
        rwp = {
          id = 0,
          flags = 0 '\000'
        },
        report = {
          starting_id = 0,
          rep_options = 0 '\000',
          header = {
            same = 0 '\000',
            maximum_lba = 0,
            reserved = '\000' <repeats 63 times>
          },
          entries_allocated = 0,
          entries_filled = 0,
          entries_available = 0,
          entries = 0x0
        }
      }
    },
    bio_from = 0xfffff80006b92880,
    bio_to = 0xfffff80006972500,
    bio_length = 1024,
    bio_completed = 0,
    bio_children = 0,
    bio_inbed = 0,
    bio_parent = 0x0,
    bio_t0 = {
      sec = 50,
      frac = 10248368299661698441
    },
    bio_task = 0x0,
    bio_task_arg = 0x0,
    bio_spare1 = 0x0,
    bio_spare2 = 0x0,
    bio_track_bp = 0x0,
    bio_pblkno = 0
}

(kgdb) p *bp->bio_to
$4 = {
    name = 0xfffff80006972598 "zvol/disco_fast/vm/onefs1-1/disk7",
    provider = {
      le_next = 0x0,
      le_prev = 0xfffff80006972428
    },
    geom = 0xfffff80006972400,
    consumers = {
      lh_first = 0xfffff80006b92880
    },
    acr = 1,
    acw = 0,
    ace = 0,
    error = 0,
    orphan = {
      tqe_next = 0x0,
      tqe_prev = 0x0
    },
    mediasize = 5368709120,
    sectorsize = 512,
    stripesize = 8192,
    stripeoffset = 0,
    stat = 0xfffff80006d3d120,
    spare1 = 0,
    spare2 = 0,
    flags = 48,
    aliases = {
      lh_first = 0x0
    },
    private = 0x0,
    index = 0
}
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to