LINUX.ORG.RU

Помогите с Call traces пжлст. Это драйвер GPU падает?

 


0

1

День добрый, подскажите пожалуйста, что это так падает? (выдержки из dmesg ниже). Спасибо заранее!

Ubuntu 22.04.1 LTS, Lenovo e595, Ryzen 5 3500U

[    3.674901] Hardware name: LENOVO 20NFS0EV00/20NFS0EV00, BIOS R11ET37W (1.17 ) 06/19/2020
[    3.674901] Call Trace:
[    3.674901]  <IRQ>
[    3.674901]  dump_stack_lvl+0x76/0xa0
[    3.674901]  dump_stack+0x10/0x20
[    3.674901]  __report_bad_irq+0x30/0xd0
[    3.674901]  note_interrupt+0x2b7/0x300
[    3.674901]  handle_irq_event+0x79/0x80
[    3.674901]  handle_fasteoi_irq+0x7d/0x200
[    3.674901]  __common_interrupt+0x51/0xf0
[    3.674901]  common_interrupt+0x44/0xb0
[    3.674901]  asm_common_interrupt+0x27/0x40
[    3.674901] RIP: 0010:rcu_report_qs_rnp+0x121/0x140
[    3.674901] Code: 0f b7 05 e2 3a 40 02 48 c7 c7 80 96 b8 95 83 c8 02 66 89 05 d1 3a 40 02 e8 ec 22 07 01 e8 67 fa ff ff 5b 41 5c 41 5d 41 5e 5d <31> c0 31 d2 31 c9 31 f6 31 ff e9 60 3e 07 01 0f 0b eb c3 66 66 2e
[    3.674901] RSP: 0018:ffffbbdc80304ed8 EFLAGS: 00000246
[    3.674901] RAX: 0000000000000000 RBX: ffff9acbf87b5a00 RCX: 0000000000000000
[    3.674901] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    3.674901] RBP: ffffbbdc80304f08 R08: 0000000000000000 R09: 0000000000000000
[    3.674901] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff95b89680
[    3.674901] R13: 0000000000000246 R14: 0000000000000008 R15: ffffffff95a06108
[    3.674901]  ? rcu_report_qs_rdp+0xa7/0x180
[    3.674901]  rcu_core+0xcb/0x390
[    3.674901]  rcu_core_si+0xe/0x20
[    3.674901]  handle_softirqs+0xdb/0x340
[    3.674901]  __irq_exit_rcu+0xd9/0x100
[    3.674901]  irq_exit_rcu+0xe/0x20
[    3.674901]  sysvec_apic_timer_interrupt+0x92/0xd0
[    3.674901]  </IRQ>
[    3.674901]  <TASK>
[    3.674901]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
[    3.674901] RIP: 0010:tick_nohz_idle_enter+0x4b/0x70
[    3.674901] Code: bb 90 00 00 00 00 75 36 80 0b 01 83 43 74 01 e8 bb 9c fe ff 80 0b 04 48 89 43 78 83 43 74 01 e8 5b 17 f7 ff fb 0f 1f 44 00 00 <48> 8b 5d f8 c9 31 c0 31 d2 31 c9 31 f6 31 ff e9 81 c0 02 01 0f 0b
[    3.674901] RSP: 0018:ffffbbdc80197ed8 EFLAGS: 00000246
[    3.674901] RAX: 0000000000000000 RBX: ffff9acbf87a5000 RCX: 0000000000000000
[    3.674901] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    3.674901] RBP: ffffbbdc80197ee0 R08: 0000000000000000 R09: 0000000000000000
[    3.674901] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9acb40a20000
[    3.674901] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[    3.674901]  ? tick_nohz_idle_enter+0x45/0x70
[    3.674901]  do_idle+0x2f/0xf0
[    3.674901]  cpu_startup_entry+0x2a/0x30
[    3.674901]  start_secondary+0x129/0x160
[    3.674901]  secondary_startup_64_no_verify+0x184/0x18b
[    3.674901]  </TASK>
[    3.674901] handlers:
[    3.674901] [<00000000f23d377f>] amd_gpio_irq_handler
[    3.674901] Disabling IRQ #7
[    3.674901] fbcon: Taking over console
[    3.710434] Console: switching to colour frame buffer device 240x67
[    3.761452] RAS: Correctable Errors collector initialized.



[   56.846268] CPU: 4 PID: 655 Comm: (udev-worker) Not tainted 6.8.0-51-generic #52-Ubuntu
[   56.846268] Hardware name: LENOVO 20NFS0EV00/20NFS0EV00, BIOS R11ET37W (1.17 ) 06/19/2020
[   56.846268] RIP: 0010:find_kallsyms_symbol+0x1d3/0x220
[   56.846268] Code: 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 45 31 d2 45 31 db e9 31 07 05 01 40 80 fe 24 <0f> 84 49 ff ff ff e9 34 ff ff ff 49 8d 91 60 02 00 00 e9 eb fe ff
[   56.846268] RSP: 0018:ffffbbdc8060b688 EFLAGS: 00000202
[   56.846268] RAX: ffffffffc234b518 RBX: ffffffffc1a4d4e0 RCX: ffffffffc240c27d
[   56.846268] RDX: 000000000000a836 RSI: 000000000000005f RDI: ffffffffc18b0bb0
[   56.846268] RBP: ffffbbdc8060b6d0 R08: ffffbbdc8060b778 R09: 000000000000ba83
[   56.846268] R10: ffffffffc18b0a10 R11: 000000000000743b R12: ffffffffc1d19fb8
[   56.846268] R13: ffffffffc241fa70 R14: ffffffffc18b0a10 R15: 0000000000000000
[   56.846268] FS:  00007b98a90518c0(0000) GS:ffff9acbf8800000(0000) knlGS:0000000000000000
[   56.846268] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   56.846268] CR2: 00007688444cecf0 CR3: 0000000104ade000 CR4: 00000000003506f0
[   56.846268] Call Trace:
[   56.846268]  <IRQ>
[   56.846268]  ? show_regs+0x6d/0x80
[   56.846268]  ? watchdog_timer_fn+0x206/0x290
[   56.846268]  ? __pfx_watchdog_timer_fn+0x10/0x10
[   56.846268]  ? __hrtimer_run_queues+0x112/0x2a0
[   56.846268]  ? hrtimer_interrupt+0xf6/0x250
[   56.846268]  ? __sysvec_apic_timer_interrupt+0x51/0x150
[   56.846268]  ? sysvec_apic_timer_interrupt+0x8d/0xd0
[   56.846268]  </IRQ>
[   56.846268]  <TASK>
[   56.846268]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[   56.846268]  ? __pfx_set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x10/0x10 [amdgpu]
[   56.850515]  ? dmub_dcn32_setup_outbox0+0x60/0x60 [amdgpu]
[   56.853015]  ? __pfx_set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x10/0x10 [amdgpu]
[   56.857012]  ? set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x1a0/0x1a0 [amdgpu]
[   56.861006]  ? find_kallsyms_symbol+0x1d3/0x220
[   56.861006]  ? srso_return_thunk+0x5/0x5f
[   56.861006]  ? __module_address+0x58/0x190
[   56.861006]  ? srso_return_thunk+0x5/0x5f
[   56.861006]  ? sysvec_apic_timer_interrupt+0xa6/0xd0
[   56.861006]  ? __pfx_set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x10/0x10 [amdgpu]
[   56.865013]  module_address_lookup+0x64/0xb0
[   56.865013]  ? __pfx_set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x10/0x10 [amdgpu]
[   56.869005]  kallsyms_lookup_buildid+0xe2/0x140
[   56.869005]  ? __pfx_set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override+0x10/0x10 [amdgpu]
[   56.873012]  kallsyms_lookup+0x18/0x40
[   56.873012]  test_for_valid_rec+0x6d/0xc0
[   56.873012]  ftrace_module_enable+0x239/0x3d0
[   56.873012]  ? __mutex_unlock_slowpath.isra.0+0x90/0x150
[   56.873012]  load_module+0x9d7/0xcf0
[   56.873012]  ? vfree.part.0+0xf0/0x280
[   56.873012]  init_module_from_file+0x96/0x100
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? init_module_from_file+0x96/0x100
[   56.873012]  idempotent_init_module+0x11c/0x310
[   56.873012]  __x64_sys_finit_module+0x64/0xd0
[   56.873012]  x64_sys_call+0x2019/0x25a0
[   56.873012]  do_syscall_64+0x7f/0x180
[   56.873012]  ? rseq_ip_fixup+0x90/0x1f0
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? restore_fpregs_from_fpstate+0x3d/0xd0
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? switch_fpu_return+0x55/0xf0
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? syscall_exit_to_user_mode+0x86/0x260
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? do_syscall_64+0x8c/0x180
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? switch_fpu_return+0x55/0xf0
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? syscall_exit_to_user_mode+0x86/0x260
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? native_flush_tlb_global+0x97/0xb0
[   56.873012]  ? ext4_llseek+0xc3/0x130
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? __flush_tlb_all+0x13/0x60
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? do_flush_tlb_all+0xe/0x20
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? __flush_smp_call_function_queue+0xa2/0x440
[   56.873012]  ? syscall_exit_to_user_mode+0x86/0x260
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? irqentry_exit_to_user_mode+0x7b/0x260
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  ? irqentry_exit+0x43/0x50
[   56.873012]  ? srso_return_thunk+0x5/0x5f
[   56.873012]  entry_SYSCALL_64_after_hwframe+0x78/0x80
[   56.873012] RIP: 0033:0x7b98a8f2725d
[   56.873012] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
[   56.873012] RSP: 002b:00007ffdbe4ca328 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[   56.873012] RAX: ffffffffffffffda RBX: 000061171c40e6f0 RCX: 00007b98a8f2725d
[   56.873012] RDX: 0000000000000004 RSI: 00007b98a919407d RDI: 0000000000000033
[   56.873012] RBP: 00007ffdbe4ca3e0 R08: 0000000000000040 R09: 00007ffdbe4ca370
[   56.873012] R10: 00007b98a9003b20 R11: 0000000000000246 R12: 00007b98a919407d
[   56.873012] R13: 0000000000020000 R14: 000061171c40fbb0 R15: 000061171c411e20
[   56.873012]  </TASK>
[   62.302729] [drm] amdgpu kernel modesetting enabled.
[   56.846268]  <IRQ>
[   56.846268]  ? show_regs+0x6d/0x80
[   56.846268]  ? watchdog_timer_fn+0x206/0x290
[   56.846268]  ? __pfx_watchdog_timer_fn+0x10/0x10
[   56.846268]  ? __hrtimer_run_queues+0x112/0x2a0
[   56.846268]  ? hrtimer_interrupt+0xf6/0x250
[   56.846268]  ? __sysvec_apic_timer_interrupt+0x51/0x150
[   56.846268]  ? sysvec_apic_timer_interrupt+0x8d/0xd0
[   56.846268]  </IRQ>

IMHO сработал какой-то watchdog в недрах gpu-шного драйвера где-то около set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override()

Судя по названию что-то связанное с displayport.

vel ★★★★★
()