Hi tech@, Currently "bioctl -R" works only if disk state is "Offline" (set by "bioctl -O") and it doesn't work for "Failed" disk.
To make it work with hot swapped disk, report unused ("unconfigured" in MegaRAID) disk to userland, and handle it properly when rebuilding. ---- Notes about implementation mfi_ioctl_disk(): to check if disk is missing, mar_pd_state in struct mfi_array shouldn't be used. when disk is missing, it reports MFI_PD_UNCONFIG_GOOD(0x00) which shouldn't be right. instead, check if mar_pd.mfp_id is 0xffff as same as mfiutil(8) on FreeBSD. if there is unused (MFI_PD_UNCONFIG_GOOD or MFI_PD_UNCONFIG_BAD) disk (i.e. after hot swapping), report it to userland as "Unused" instead of "Failed" disk. mfi_ioctl_setstate(): to use unused disk for rebuilding, disk state must be "uncofigured good", and must not be "foreign". then, disk must be set as global hot spare. new functions, mfi_makegood() and mfi_makespare(), handle it before rebuilding. if disk state is "Offline", handle it as same as before. rename following variables (cosmetic change), struct mfi_pd_details *info -> struct mfi_pd_details *pd struct mfi_pd_list *pd -> struct mfi_pd_list *pl Index: sys/dev/ic/mfi.c =================================================================== RCS file: /cvs/src/sys/dev/ic/mfi.c,v retrieving revision 1.167 diff -u -p -r1.167 mfi.c --- sys/dev/ic/mfi.c 2 Feb 2017 03:47:41 -0000 1.167 +++ sys/dev/ic/mfi.c 29 Jun 2017 08:05:23 -0000 @@ -1791,11 +1791,12 @@ mfi_ioctl_disk(struct mfi_softc *sc, str struct mfi_array *ar; struct mfi_ld_cfg *ld; struct mfi_pd_details *pd; + struct mfi_pd_list *pl; struct mfi_pd_progress *mfp; struct mfi_progress *mp; struct scsi_inquiry_data *inqbuf; char vend[8+16+4+1], *vendp; - int rv = EINVAL; + int i, rv = EINVAL; int arr, vol, disk, span; union mfi_mbox mbox; @@ -1811,6 +1812,7 @@ mfi_ioctl_disk(struct mfi_softc *sc, str cfg = sc->sc_cfg; pd = malloc(sizeof *pd, M_DEVBUF, M_WAITOK); + pl = malloc(sizeof *pl, M_DEVBUF, M_WAITOK); ar = cfg->mfc_array; vol = bd->bd_volid; @@ -1834,13 +1836,53 @@ mfi_ioctl_disk(struct mfi_softc *sc, str /* offset disk into pd list */ disk = bd->bd_diskid % ld[vol].mlc_parm.mpa_no_drv_per_span; - bd->bd_target = ar[arr].pd[disk].mar_enc_slot; + + if (ar[arr].pd[disk].mar_pd.mfp_id == 0xffffU) { + /* disk is missing but succeed command */ + bd->bd_status = BIOC_SDFAILED; + rv = 0; + + /* try to find an unused disk for the target to rebuild */ + if (mfi_mgmt(sc, MR_DCMD_PD_GET_LIST, MFI_DATA_IN, sizeof *pl, + pl, NULL)) + goto freeme; + + for (i = 0; i < pl->mpl_no_pd; i++) { + if (pl->mpl_address[i].mpa_scsi_type != 0) + continue; + + memset(&mbox, 0, sizeof(mbox)); + mbox.s[0] = pl->mpl_address[i].mpa_pd_id; + if (mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, + sizeof *pd, pd, &mbox)) + continue; + + if (pd->mpd_fw_state == MFI_PD_UNCONFIG_GOOD || + pd->mpd_fw_state == MFI_PD_UNCONFIG_BAD) + break; + } + + if (i == pl->mpl_no_pd) + goto freeme; + } else { + memset(&mbox, 0, sizeof(mbox)); + mbox.s[0] = ar[arr].pd[disk].mar_pd.mfp_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, + sizeof *pd, pd, &mbox))) { + bd->bd_status = BIOC_SDINVALID; + goto freeme; + } + } + + /* get the remaining fields */ + bd->bd_channel = pd->mpd_enc_idx; + bd->bd_target = pd->mpd_enc_slot; /* get status */ - switch (ar[arr].pd[disk].mar_pd_state){ + switch (pd->mpd_fw_state){ case MFI_PD_UNCONFIG_GOOD: - case MFI_PD_FAILED: - bd->bd_status = BIOC_SDFAILED; + case MFI_PD_UNCONFIG_BAD: + bd->bd_status = BIOC_SDUNUSED; break; case MFI_PD_HOTSPARE: /* XXX dedicated hotspare part of array? */ @@ -1851,6 +1893,10 @@ mfi_ioctl_disk(struct mfi_softc *sc, str bd->bd_status = BIOC_SDOFFLINE; break; + case MFI_PD_FAILED: + bd->bd_status = BIOC_SDFAILED; + break; + case MFI_PD_REBUILD: bd->bd_status = BIOC_SDREBUILD; break; @@ -1859,27 +1905,15 @@ mfi_ioctl_disk(struct mfi_softc *sc, str bd->bd_status = BIOC_SDONLINE; break; - case MFI_PD_UNCONFIG_BAD: /* XXX define new state in bio */ + case MFI_PD_COPYBACK: + case MFI_PD_SYSTEM: default: bd->bd_status = BIOC_SDINVALID; break; } - /* get the remaining fields */ - memset(&mbox, 0, sizeof(mbox)); - mbox.s[0] = ar[arr].pd[disk].mar_pd.mfp_id; - if (mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, - sizeof *pd, pd, &mbox)) { - /* disk is missing but succeed command */ - rv = 0; - goto freeme; - } - bd->bd_size = pd->mpd_size * 512; /* bytes per block */ - /* if pd->mpd_enc_idx is 0 then it is not in an enclosure */ - bd->bd_channel = pd->mpd_enc_idx; - inqbuf = (struct scsi_inquiry_data *)&pd->mpd_inq_data; vendp = inqbuf->vendor; memcpy(vend, vendp, sizeof vend - 1); @@ -1899,6 +1933,7 @@ mfi_ioctl_disk(struct mfi_softc *sc, str rv = 0; freeme: free(pd, M_DEVBUF, sizeof *pd); + free(pl, M_DEVBUF, sizeof *pl); return (rv); } @@ -2009,27 +2044,136 @@ done: return (rv); } +/* We currently don't know the full details of the following struct */ +struct mfi_foreign_scan_cfg { + char data[24]; +}; + +struct mfi_foreign_scan_info { + uint32_t count; /* Number of foreign configs found */ + struct mfi_foreign_scan_cfg cfgs[8]; +}; + +static int +mfi_makegood(struct mfi_softc *sc, uint16_t pd_id) +{ + struct mfi_foreign_scan_info *fsi; + struct mfi_pd_details *pd; + union mfi_mbox mbox; + int rv; + + fsi = malloc(sizeof *fsi, M_DEVBUF, M_WAITOK); + pd = malloc(sizeof *pd, M_DEVBUF, M_WAITOK); + + memset(&mbox, 0, sizeof mbox); + mbox.s[0] = pd_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, sizeof *pd, pd, + &mbox))) + goto done; + + if (pd->mpd_fw_state == MFI_PD_UNCONFIG_BAD) { + mbox.s[0] = pd_id; + mbox.s[1] = pd->mpd_pd.mfp_seq; + mbox.b[4] = MFI_PD_UNCONFIG_GOOD; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_SET_STATE, MFI_DATA_NONE, 0, + NULL, &mbox))) + goto done; + } + + memset(&mbox, 0, sizeof mbox); + mbox.s[0] = pd_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, sizeof *pd, pd, + &mbox))) + goto done; + + if (pd->mpd_ddf_state & MFI_DDF_FOREIGN) { + if ((rv = mfi_mgmt(sc, MR_DCMD_CFG_FOREIGN_SCAN, MFI_DATA_IN, + sizeof *fsi, fsi, NULL))) + goto done; + + if (fsi->count > 0) { + if ((rv = mfi_mgmt(sc, MR_DCMD_CFG_FOREIGN_CLEAR, + MFI_DATA_NONE, 0, NULL, NULL))) + goto done; + } + } + + memset(&mbox, 0, sizeof mbox); + mbox.s[0] = pd_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, sizeof *pd, pd, + &mbox))) + goto done; + + if (pd->mpd_fw_state != MFI_PD_UNCONFIG_GOOD || + pd->mpd_ddf_state & MFI_DDF_FOREIGN) + rv = ENXIO; + +done: + free(fsi, M_DEVBUF, sizeof *fsi); + free(pd, M_DEVBUF, sizeof *pd); + + return (rv); +} + +static int +mfi_makespare(struct mfi_softc *sc, uint16_t pd_id) +{ + struct mfi_hotspare *hs; + struct mfi_pd_details *pd; + union mfi_mbox mbox; + size_t size; + int rv = EINVAL; + + /* we really could skip and expect that inq took care of it */ + if (mfi_bio_getitall(sc)) { + DNPRINTF(MFI_D_IOCTL, "%s: mfi_bio_getitall failed\n", + DEVNAME(sc)); + return (rv); + } + size = sizeof *hs + sizeof(uint16_t) * sc->sc_cfg->mfc_no_array; + + hs = malloc(size, M_DEVBUF, M_WAITOK); + pd = malloc(sizeof *pd, M_DEVBUF, M_WAITOK); + + memset(&mbox, 0, sizeof mbox); + mbox.s[0] = pd_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, sizeof *pd, pd, + &mbox))) + goto done; + + memset(hs, 0, size); + hs->mhs_pd.mfp_id = pd->mpd_pd.mfp_id; + hs->mhs_pd.mfp_seq = pd->mpd_pd.mfp_seq; + rv = mfi_mgmt(sc, MR_DCMD_CFG_MAKE_SPARE, MFI_DATA_OUT, size, hs, NULL); + +done: + free(hs, M_DEVBUF, size); + free(pd, M_DEVBUF, sizeof *pd); + + return (rv); +} + int mfi_ioctl_setstate(struct mfi_softc *sc, struct bioc_setstate *bs) { - struct mfi_pd_list *pd; - struct mfi_pd_details *info; + struct mfi_pd_details *pd; + struct mfi_pd_list *pl; int i, found, rv = EINVAL; union mfi_mbox mbox; DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl_setstate %x\n", DEVNAME(sc), bs->bs_status); - pd = malloc(sizeof(*pd), M_DEVBUF, M_WAITOK); - info = malloc(sizeof *info, M_DEVBUF, M_WAITOK); + pd = malloc(sizeof *pd, M_DEVBUF, M_WAITOK); + pl = malloc(sizeof *pl, M_DEVBUF, M_WAITOK); if (mfi_mgmt(sc, MR_DCMD_PD_GET_LIST, MFI_DATA_IN, - sizeof(*pd), pd, NULL)) + sizeof *pl, pl, NULL)) goto done; - for (i = 0, found = 0; i < pd->mpl_no_pd; i++) - if (bs->bs_channel == pd->mpl_address[i].mpa_enc_index && - bs->bs_target == pd->mpl_address[i].mpa_enc_slot) { + for (i = 0, found = 0; i < pl->mpl_no_pd; i++) + if (bs->bs_channel == pl->mpl_address[i].mpa_enc_index && + bs->bs_target == pl->mpl_address[i].mpa_enc_slot) { found = 1; break; } @@ -2038,14 +2182,14 @@ mfi_ioctl_setstate(struct mfi_softc *sc, goto done; memset(&mbox, 0, sizeof(mbox)); - mbox.s[0] = pd->mpl_address[i].mpa_pd_id; + mbox.s[0] = pl->mpl_address[i].mpa_pd_id; if (mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, - sizeof *info, info, &mbox)) + sizeof *pd, pd, &mbox)) goto done; - mbox.s[0] = pd->mpl_address[i].mpa_pd_id; - mbox.s[1] = info->mpd_pd.mfp_seq; + mbox.s[0] = pl->mpl_address[i].mpa_pd_id; + mbox.s[1] = pd->mpd_pd.mfp_seq; switch (bs->bs_status) { case BIOC_SSONLINE: @@ -2061,6 +2205,30 @@ mfi_ioctl_setstate(struct mfi_softc *sc, break; case BIOC_SSREBUILD: + if (pd->mpd_fw_state != MFI_PD_OFFLINE) { + if ((rv = mfi_makegood(sc, + pl->mpl_address[i].mpa_pd_id))) + goto done; + + if ((rv = mfi_makespare(sc, + pl->mpl_address[i].mpa_pd_id))) + goto done; + + memset(&mbox, 0, sizeof(mbox)); + mbox.s[0] = pl->mpl_address[i].mpa_pd_id; + if ((rv = mfi_mgmt(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, + sizeof *pd, pd, &mbox))) + goto done; + + /* rebuilding might be started by mfi_makespare() */ + if (pd->mpd_fw_state == MFI_PD_REBUILD) { + rv = 0; + goto done; + } + + mbox.s[0] = pl->mpl_address[i].mpa_pd_id; + mbox.s[1] = pd->mpd_pd.mfp_seq; + } mbox.b[4] = MFI_PD_REBUILD; break; @@ -2078,7 +2246,7 @@ mfi_ioctl_setstate(struct mfi_softc *sc, rv = 0; done: free(pd, M_DEVBUF, sizeof *pd); - free(info, M_DEVBUF, sizeof *info); + free(pl, M_DEVBUF, sizeof *pl); return (rv); } Index: sys/dev/ic/mfireg.h =================================================================== RCS file: /cvs/src/sys/dev/ic/mfireg.h,v retrieving revision 1.48 diff -u -p -r1.48 mfireg.h --- sys/dev/ic/mfireg.h 7 Feb 2017 07:07:29 -0000 1.48 +++ sys/dev/ic/mfireg.h 29 Jun 2017 08:05:23 -0000 @@ -139,6 +139,9 @@ #define MR_DCMD_CONF_GET 0x04010000 #define MR_DCMD_CFG_ADD 0x04020000 #define MR_DCMD_CFG_CLEAR 0x04030000 +#define MR_DCMD_CFG_MAKE_SPARE 0x04040000 +#define MR_DCMD_CFG_FOREIGN_SCAN 0x04060100 +#define MR_DCMD_CFG_FOREIGN_CLEAR 0x04060500 #define MR_DCMD_BBU_GET_STATUS 0x05010000 #define MR_DCMD_BBU_GET_CAPACITY_INFO 0x05020000 #define MR_DCMD_BBU_GET_DESIGN_INFO 0x05030000