On Fri, May 18, 2018 at 07:09:10PM +0000, Sunil Muthuswamy wrote:
> In the VM mode on Hyper-V, currently, when the kernel panics, an error
> code and few register values are populated in an MSR and the Hypervisor
> notified. This information is collected on the host. The amount of
> information currently collected is found to be limited and not very
> actionable. To gather more actionable data, such as stack trace, the
> proposal is to write one page worth of kmsg data on an allocated page
> and the Hypervisor notified of the page address through the MSR.
>
> - Added a sysctl option to control the behavior, with ON by default.
>
> CC: [email protected]
> CC: [email protected]
> Signed-off-by: Sunil Muthuswamy <[email protected]>
> ---
> arch/x86/hyperv/hv_init.c | 35 ++++++++++
> arch/x86/include/asm/hyperv-tlfs.h | 5 +-
> arch/x86/include/asm/mshyperv.h | 1 +
> drivers/hv/vmbus_drv.c | 128
> +++++++++++++++++++++++++++++++++++++
> 4 files changed, 167 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index cfecc22..fc1e3cb 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -395,6 +395,41 @@ void hyperv_report_panic(struct pt_regs *regs, long err)
> }
> EXPORT_SYMBOL_GPL(hyperv_report_panic);
>
> +/**
> + * hyperv_report_panic_msg - report panic message to Hyper-V
> + * @pa: physical address of the panic page containing the message
> + * @size: size of the message in the page
> + *
> + */
> +
The blank lines are not needed, please drop.
> +void hyperv_report_panic_msg(phys_addr_t pa, size_t size)
> +{
> + static bool panic_msg_reported;
> +
> + if (panic_msg_reported)
> + return;
> + panic_msg_reported = true;
> +
> + /*
> + * P3 to contain the physical address of the panic page & P4 to
> + * contain the size of the panic data in that page. Rest of the
> + * registers are no-op when the NOTIFY_MSG flag is set.
> + */
> + wrmsrl(HV_X64_MSR_CRASH_P0, 0);
> + wrmsrl(HV_X64_MSR_CRASH_P1, 0);
> + wrmsrl(HV_X64_MSR_CRASH_P2, 0);
> + wrmsrl(HV_X64_MSR_CRASH_P3, pa);
> + wrmsrl(HV_X64_MSR_CRASH_P4, size);
> +
> + /*
> + * Let Hyper-V know there is crash data available along with
> + * the panic message.
> + */
> + wrmsrl(HV_X64_MSR_CRASH_CTL,
> + (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG));
> +}
> +EXPORT_SYMBOL_GPL(hyperv_report_panic_msg);
> +
> bool hv_is_hyperv_initialized(void)
> {
> union hv_x64_msr_hypercall_contents hypercall_msr;
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h
> b/arch/x86/include/asm/hyperv-tlfs.h
> index 416cb0e..fc2932c 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -171,9 +171,10 @@
> #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14)
>
> /*
> - * Crash notification flag.
> + * Crash notification flags.
> */
> -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
> +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
> +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG (1ULL << 62)
BIT_ULL()?
And again, why are they not in numerical order?
> /* MSR used to identify the guest OS. */
> #define HV_X64_MSR_GUEST_OS_ID 0x40000000
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index b90e796..ac83f2d 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -262,6 +262,7 @@ void hyperv_init(void);
> void hyperv_setup_mmu_ops(void);
> void hyper_alloc_mmu(void);
> void hyperv_report_panic(struct pt_regs *regs, long err);
> +void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
> bool hv_is_hyperv_initialized(void);
> void hyperv_cleanup(void);
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index b10fe26..7b04f7f 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -56,6 +56,8 @@ static struct completion probe_event;
>
> static int hyperv_cpuhp_online;
>
> +static void *hv_panic_page;
> +
> static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
> void *args)
> {
> @@ -1018,6 +1020,86 @@ static void vmbus_isr(void)
> add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
> }
>
> +/*
> + * Boolean to control whether to report panic messages over Hyper-V.
> + *
> + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg
> + */
> +int sysctl_record_panic_msg = 1;
Why was a sysctl chosen? I'm not objecting, but you have to document
new user/kernel apis when you create them.
> +/*
> + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
> + * buffer and call into Hyper-V to transfer the data.
> + */
> +static void hv_kmsg_dump(struct kmsg_dumper *dumper,
> + enum kmsg_dump_reason reason)
> +{
> + size_t bytes_written;
> + phys_addr_t panic_pa;
> +
> + /* We are only interested in panics. */
> + if (reason != KMSG_DUMP_PANIC)
> + return;
> +
> + if (!sysctl_record_panic_msg)
> + return;
> +
> + if (!hv_panic_page)
> + return;
> +
> + panic_pa = virt_to_phys(hv_panic_page);
> +
> + /*
> + * Write dump contents to the page. No need to synchronize; panic should
> + * be single-threaded.
> + */
> + if (!kmsg_dump_get_buffer(dumper, true, hv_panic_page,
> + PAGE_SIZE, &bytes_written)) {
> + pr_err("Hyper-V - Unable to get kmsg data for panic\n");
> + return;
> + }
> +
> + hyperv_report_panic_msg(panic_pa, bytes_written);
> +}
> +
> +static struct kmsg_dumper hv_kmsg_dumper = {
> + .dump = hv_kmsg_dump,
> +};
> +
> +static struct ctl_table_header *hv_ctl_table_hdr;
> +static int zero;
> +static int one = 1;
> +
> +static struct ctl_table hv_ctl_table[] = {
> + {
> + .procname = "record_panic_msg",
> + .data = &sysctl_record_panic_msg,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &zero,
> + .extra2 = &one
> + },
> + {}
> +};
> +
> +static struct ctl_table hv_ctl_dir[] = {
> + {
> + .procname = "hyperv",
> + .mode = 0555,
> + .child = hv_ctl_table
> + },
> + {}
> +};
> +
> +static struct ctl_table hv_root_table[] = {
> + {
> + .procname = "kernel",
> + .mode = 0555,
> + .child = hv_ctl_dir
> + },
> + {}
> +};
>
> /*
> * vmbus_bus_init -Main vmbus driver initialization routine.
> @@ -1065,6 +1147,31 @@ static int vmbus_bus_init(void)
> * Only register if the crash MSRs are available
> */
> if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
> + if (!hv_ctl_table_hdr) {
> +
> + /*
No blank line needed.
> + * Sysctl registration is not fatal, since default is
> + * for operation.
> + */
> + hv_ctl_table_hdr =
> + register_sysctl_table(hv_root_table);
> + if (!hv_ctl_table_hdr)
> + pr_err("Hyper-v: sysctl table register error");
> + }
> +
> + hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
> + if (!hv_panic_page) {
> + ret = -ENOMEM;
> + goto err_connect;
> + }
> +
> + ret = kmsg_dump_register(&hv_kmsg_dumper);
> + if (ret) {
> + pr_err("Hyper-V - kmsg dump register failure 0x%x\n",
> + ret);
> + goto err_connect;
> + }
> +
> register_die_notifier(&hyperv_die_block);
> atomic_notifier_chain_register(&panic_notifier_list,
> &hyperv_panic_block);
> @@ -1081,6 +1188,15 @@ static int vmbus_bus_init(void)
> hv_remove_vmbus_irq();
>
> bus_unregister(&hv_bus);
> + if (hv_panic_page) {
Again, no need to check this.
> + free_page((unsigned long)hv_panic_page);
> + hv_panic_page = NULL;
> + }
> +
> + if (!hv_ctl_table_hdr) {
> + unregister_sysctl_table(hv_ctl_table_hdr);
> + hv_ctl_table_hdr = NULL;
> + }
>
> return ret;
> }
> @@ -1785,10 +1901,22 @@ static void __exit vmbus_exit(void)
> vmbus_free_channels();
>
> if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
> + kmsg_dump_unregister(&hv_kmsg_dumper);
> unregister_die_notifier(&hyperv_die_block);
> atomic_notifier_chain_unregister(&panic_notifier_list,
> &hyperv_panic_block);
> }
> +
> + if (hv_panic_page) {
Again, no need to check. I thought I asked about this last code review?
thanks,
greg k-h
_______________________________________________
devel mailing list
[email protected]
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel