------- Comment From [email protected] 2018-03-26 06:13 EDT-------
Hi,

Verified issue with latest ubunu1804 daily build kernel and now not
seeing problem with triggering crash with below levels

root@whip:~#  dpkg -l | grep kexec-tools
ii  kexec-tools                         1:2.0.16-1ubuntu1                       
                       ppc64el      tools to support fast kexec reboots
root@whip:~# dpkg -l | grep makedumpfile
ii  makedumpfile                        1:1.6.3-1                               
                       ppc64el      VMcore extraction tool
root@whip:~# uname -a
Linux whip 4.15.0-12-generic #13 SMP Thu Mar 22 07:28:54 CDT 2018 ppc64le 
ppc64le ppc64le GNU/Linux

Triggered crash:
*****************
root@whip:/etc/default/grub.d# echo c > /proc/sysrq-trigger
[  183.215596] sysrq: SysRq : This sysrq operation is disabled.
root@whip:/etc/default/grub.d# echo 1 > /proc/sys/kernel/sysrq
root@whip:/etc/default/grub.d# echo c > /proc/sysrq-trigger
[  210.082354] sysrq: SysRq : Trigger a crash
[  210.082396] Unable to handle kernel paging request for data at address 
0x00000000
[  210.082518] Faulting instruction address: 0xc0000000007ec4e8
[  210.082581] Oops: Kernel access of bad area, sig: 11 [#1]
[  210.082646] LE SMP NR_CPUS=2048 NUMA PowerNV
[  210.082713] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache 
rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) 
ib_uverbs(OE) ib_umad(OE) esp6_offload esp6 esp4_offload esp4 xfrm_algo 
mlx5_fpga_tools(OE) mlx4_en(OE) mlx4_ib(OE) mlx4_core(OE) ofpart cmdlinepart 
vmx_crypto powernv_flash mtd idt_89hpesx crct10dif_vpmsum ipmi_powernv 
ipmi_devintf ipmi_msghandler at24 uio_pdrv_genirq uio opal_prd ibmpowernv 
binfmt_misc nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel knem(OE) 
ip_tables x_tables autofs4 btrfs xor zstd_compress raid6_pq mlx5_ib(OE) 
ib_core(OE) mlx5_core(OE) nouveau mlxfw(OE) devlink mlx_compat(OE) lpfc ast 
i2c_algo_bit ttm drm_kms_helper nvmet_fc syscopyarea nvmet cxl sysfillrect 
sysimgblt nvme_fc fb_sys_fops ahci nvme_fabrics crc32c_vpmsum drm tg3 pnv_php
[  210.083672]  libahci scsi_transport_fc
[  210.083722] CPU: 10 PID: 5235 Comm: bash Tainted: G           OE    
4.15.0-12-generic #13
[  210.083792] NIP:  c0000000007ec4e8 LR: c0000000007ed428 CTR: c0000000007ec4c0
[  210.083895] REGS: c000007fb73279f0 TRAP: 0300   Tainted: G           OE     
(4.15.0-12-generic)
[  210.084027] MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 28222222  
XER: 20040000
[  210.084154] CFAR: c0000000007ed424 DAR: 0000000000000000 DSISR: 42000000 
SOFTE: 1
[  210.084154] GPR00: c0000000007ed428 c000007fb7327c70 c0000000016eaf00 
0000000000000063
[  210.084154] GPR04: c000007fdeb7ce18 c000007fdeb94368 9000000000009033 
000000000000000a
[  210.084154] GPR08: 0000000000000007 0000000000000001 0000000000000000 
9000000000001003
[  210.084154] GPR12: c0000000007ec4c0 c000000003266e00 00000f1697af6b08 
0000000000000000
[  210.084154] GPR16: 00000f167ebce9f0 00000f167ec61998 00000f167ec619d0 
00000f167ec98204
[  210.084154] GPR20: 0000000000000000 0000000000000001 0000000000000000 
00007fffc5069ac4
[  210.084154] GPR24: 00007fffc5069ac0 00000f167ec9afc4 c0000000015e9968 
0000000000000002
[  210.084154] GPR28: 0000000000000063 0000000000000007 c000000001572a9c 
c0000000015e9d08
[  210.085152] NIP [c0000000007ec4e8] sysrq_handle_crash+0x28/0x30
[  210.085269] LR [c0000000007ed428] __handle_sysrq+0xf8/0x2c0
[  210.085328] Call Trace:
[  210.085378] [c000007fb7327c70] [c0000000007ed408] __handle_sysrq+0xd8/0x2c0 
(unreliable)
[  210.085482] [c000007fb7327d10] [c0000000007edc34] 
write_sysrq_trigger+0x64/0x90
[  210.085584] [c000007fb7327d40] [c00000000047de88] proc_reg_write+0x88/0xd0
[  210.085673] [c000007fb7327d70] [c0000000003d11bc] __vfs_write+0x3c/0x70
[  210.085751] [c000007fb7327d90] [c0000000003d1418] vfs_write+0xd8/0x220
[  210.085824] [c000007fb7327de0] [c0000000003d1738] SyS_write+0x68/0x110
[  210.085941] [c000007fb7327e30] [c00000000000b184] system_call+0x58/0x6c
[  210.086030] Instruction dump:
[  210.086067] 4bfff9f1 4bfffe50 3c4c00f0 3842ea40 7c0802a6 60000000 39200001 
3d42001c
[  210.086185] 394a6db0 912a0000 7c0004ac 39400000 <992a0000> 4e800020 3c4c00f0 
3842ea10
[  210.086293] ---[ end trace 2141bc6e05b3cc02 ]---
[  211.090273]
211.090393] Sending IPI to other CP[  373.057331960,5] OPAL: Switch to 
big-endian OS
Us
[  211.12[  377.207676398,5] OPAL: Switch to little-endian OS
0361] IPI complete
[  213.393057] kexec: Starting switchover sequence.

[    1.295245] i
ntegrity: Unable
to open file: /
etc/keys/x509_im
a.der (-2)

[    1.295249] integrity: Unable to open file: /etc/keys/x509_evm.der (-2)
[    1.353447] vio vio: uevent: failed to send synthetic uevent
[    2.089461] nouveau 0004:04:00.0: unknown chipset (140000a1)
[    2.131257] nouveau 0004:05:00.0: unknown chipset (140000a1)
[    2.131538] nouveau 0035:03:00.0: unknown chipset (140000a1)
[    2.131664] nouveau 0035:04:00.0: unknown chipset (140000a1)
/dev/sda2: recovering journal
/dev/sda2: clean, 335484/122101760 files, 13969682/488376576 blocks
[    6.208502] vio vio: uevent: failed to send synthetic uevent
[  OK  ] Started Show Plymouth Boot Screen.
plymouth-start.service
[  OK  ] Started Forward Password Requests to Plymouth Directory Watch.
[  OK  ] Reached target Local Encrypted Volumes.
systemd-networkd.service
[  OK  ] Started Network Service.
Starting Wait for Network to be Configured...
[  OK  ] Started Network Time Synchronization.
systemd-timesyncd.service
[  OK  ] Reached target System Time Synchronized.
[    8.506483] lpfc 0000:01:00.0: 0:6101 Disabling NVME support: Not supported 
by firmware: 1 1
[    8.506624] lpfc 0000:01:00.0: 0:2574 IO channels: irqs 4 fcp 4 nvme 0 MRQ: 0
[  OK  ] Listening on Load/Save RF Kill Switch Status /dev/rfkill Watch.
[    9.358738] lpfc 0000:01:00.0: 0:3176 Port Name 0 Physical Link is functional
[    9.574467] lpfc 0000:01:00.1: 1:6101 Disabling NVME support: Not supported 
by firmware: 1 1
[    9.574550] lpfc 0000:01:00.1: 1:2574 IO channels: irqs 4 fcp 4 nvme 0 MRQ: 0
[  OK  ] Started AppArmor initialization.
apparmor.service
[  OK  ] Reached target System Initialization.
[   10.386796] lpfc 0000:01:00.1: 1:3176 Port Name 1 Physical Link is functional
[   10.646666] lpfc 0000:01:00.0: 0:1303 Link Up Event x1 received Data: x1 x0 
x20 x0 x0 x0 0
[  OK  ] Created slice system-mlnx_interface_mgr.slice.
[email protected]
[  OK  ] Started mlnx_interface_mgr - configure enP48p1s0f0.
[  OK  ] Started mlnx_interface_mgr - configure enP48p1s0f1.
[email protected]
openibd.service
[  OK  ] Started openibd - configure Mellanox devices.
[  OK  ] Reached target Network.
[  OK  ] Started Wait for Network to be Configured.
systemd-networkd-wait-online.service
[  OK  ] Reached target Network is Online.
Starting Kernel crash dump capture service...
[   16.055959] kdump-tools[2400]: Starting kdump-tools:  * running makedumpfile 
-c -d 31 /proc/vmcore /var/crash/201803221639/dump-incomplete
Copying data                                      : [100.0 %] /           eta: 
0s
[   40.957946] kdump-tools[2400]: The kernel version is not supported.
[   40.958026] kdump-tools[2400]: The makedumpfile operation may be incomplete.
[   40.958099] kdump-tools[2400]: The dumpfile is saved to 
/var/crash/201803221639/dump-incomplete.
[   40.958167] kdump-tools[2400]: makedumpfile Completed.
[   40.974357] kdump-tools[2400]:  * kdump-tools: saved vmcore in 
/var/crash/201803221639
[   41.840111] kdump-tools[2400]:  * running makedumpfile --dump-dmesg 
/proc/vmcore /var/crash/201803221639/dmesg.201803221639
[   41.878179] kdump-tools[2400]: The kernel version is not supported.
[   41.878303] kdump-tools[2400]: The makedumpfile operation may be incomplete.
[   41.878371] kdump-tools[2400]: The dmesg log is saved to 
/var/crash/201803221639/dmesg.201803221639.
[   41.878454] kdump-tools[2400]: makedumpfile Completed.
[   41.878536] kdump-tools[2400]:  * kdump-tools: saved dmesg content in 
/var/crash/201803221639
[   41.966570] kdump-tools[2400]: Thu, 22 Mar 2018 16:39:38 -0500
[   42.071819] kdump-tools[2400]: Rebooting.
[   42.084325] mlx5_core 0030:01:00.1: mlx5_enter_error_state:141:(pid 2441): 
start
[   42.084399] mlx5_core 0030:01:00.1: mlx5_enter_error_state:159:(pid 2441): 
end
[   42.094529] mlx5_core 0030:01:00.0: mlx5_enter_error_state:141:(pid 2441): 
start
[   42.094611] mlx5_core 0030:01:00.0: mlx5_enter_error_state:159:(pid 2441): 
end
[   45.826681] reboot: Restarting system
[  460.446012693,5] OPAL: Reboot request...
..

--== Welcome to Hostboot hostboot-ca203c9/hbicore.bin ==--

4.04291|secure|SecureROM valid - enabling functionality
4.04295|secure|Booting in non-secure mode.
5.94075|ISTEP  6. 5 - host_init_fsi
6.10500|ISTEP  6. 6 - host_set_ipl_parms
6.12308|ISTEP  6. 7 - host_discover_targets
6.66043|HWAS|PRESENT> DIMM[03]=AAAA000000000000
6.66044|HWAS|PRESENT> Proc[05]=8800000000000000
6.66045|HWAS|PRESENT> Core[07]=EFEFFFFDFDFF0000
6.68759|ISTEP  6. 8 - host_update_master_tpm

CRASH LOGS:
************

s -lrt /varoot@whip:~# ls -lrt /var/crash
total 40
drwxr-xr-x 2 root root  4096 Mar 22 16:39 201803221639
-rw-r--r-- 1 root root   223 Mar 22 16:43 kexec_cmd
-rw-r----- 1 root root 29741 Mar 22 16:43 
linux-image-4.15.0-12-generic-201803221639.crash
root@whip:~# date
Thu Mar 22 16:43:31 CDT 2018
root@whip:~# cd /var/crash
root@whip:/var/crash# cd 201803221639
root@whip:/var/crash/201803221639# ls
dmesg.201803221639  dump.201803221639
root@whip:/var/crash/201803221639# ls -l
total 439040
-rw------- 1 root root    111536 Mar 22 16:39 dmesg.201803221639
-rw------- 1 root root 449549329 Mar 22 16:39 dump.201803221639
root@whip:/var/crash/201803221639#

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1743529

Title:
  Merge kexec-tools 2.0.16-1 from Debian: System hung with Kernel panic
  -not syncing: Out of memory message when crash is triggered.

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu-power-systems/+bug/1743529/+subscriptions

-- 
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to