Crash Report - Fatal trap 12: page fault while in kernel mode (lsof)
-
I have been experiencing random crashes are at least two separate systems, they are the same hardware setup. I have done a little looking around, but I will be honest in saying I am not a FreeBSD expert... I have been putting this off for a bit but really need to get to the bottom of it. Any help is super appreciated.
TopTon
Intel(R) N100
Current: 2015 MHz, Max: 806 MHz
4 CPUs: 1 package(s) x 4 core(s)
AES-NI CPU Crypto: Yes (active)
QAT Crypto: No
2.7.2-RELEASE (amd64)
4x Intel i226-vAvg Temp ~100F
RAM - ~50% of 8GB
HDD - 1% used of 180G (zfs)
Device 01:
Fatal trap 12: page fault while in kernel mode
cpuid = 2; apic id = 04
fault virtual address = 0x40
fault code = supervisor read data, page not present
instruction pointer = 0x20:0xffffffff80ca9140
stack pointer = 0x28:0xfffffe00b16fc730
frame pointer = 0x28:0xfffffe00b16fc730
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 49398 (lsof)
rdi: fffff801f3150e00 rsi: fffff801b00beb00 rdx: 0000000000000000
rcx: ffffffff82d62a40 r8: 0000000000000002 r9: ffffffffffffffff
rax: 0000000000000000 rbx: fffffe00b16fcbe0 rbp: fffffe00b16fc730
r10: 0000000000000000 r11: fffffe00b413e1a0 r12: fffffe00b16fc7d8
r13: fffffe00b413dc80 r14: fffff801f3150e00 r15: fffff801f77a1540
trap number = 12
panic: page fault
cpuid = 2
time = 1742174470
KDB: enter: panicdb:0:kdb.enter.default> show pcpu cpuid = 2 dynamic pcpu = 0xfffffe008ef67f80 curthread = 0xfffffe00b413dc80: pid 49398 tid 100440 critnest 1 "lsof" curpcb = 0xfffffe00b413e1a0 fpcurthread = 0xfffffe00b413dc80: pid 49398 "lsof" idlethread = 0xfffffe0011ee5560: tid 100005 "idle: cpu2" self = 0xffffffff84012000 curpmap = 0xfffff800065afd38 tssp = 0xffffffff84012384 rsp0 = 0xfffffe00b16fd000 kcr3 = 0xffffffffffffffff ucr3 = 0xffffffffffffffff scr3 = 0x0 gs32p = 0xffffffff84012404 ldt = 0xffffffff84012444 tss = 0xffffffff84012434 curvnet = 0xfffff800012004c0 db:0:kdb.enter.default> bt Tracing pid 49398 tid 100440 td 0xfffffe00b413dc80 kdb_enter() at kdb_enter+0x32/frame 0xfffffe00b16fc410 vpanic() at vpanic+0x163/frame 0xfffffe00b16fc540 panic() at panic+0x43/frame 0xfffffe00b16fc5a0 trap_fatal() at trap_fatal+0x40c/frame 0xfffffe00b16fc600 trap_pfault() at trap_pfault+0x4f/frame 0xfffffe00b16fc660 calltrap() at calltrap+0x8/frame 0xfffffe00b16fc660 --- trap 0xc, rip = 0xffffffff80ca9140, rsp = 0xfffffe00b16fc730, rbp = 0xfffffe00b16fc730 --- prison_check() at prison_check+0x20/frame 0xfffffe00b16fc730 cr_canseeinpcb() at cr_canseeinpcb+0x19/frame 0xfffffe00b16fc760 tcp_pcblist() at tcp_pcblist+0x1f6/frame 0xfffffe00b16fcaf0 sysctl_root_handler_locked() at sysctl_root_handler_locked+0x90/frame 0xfffffe00b16fcb40 sysctl_root() at sysctl_root+0x216/frame 0xfffffe00b16fcbc0 userland_sysctl() at userland_sysctl+0x176/frame 0xfffffe00b16fcc70 kern___sysctlbyname() at kern___sysctlbyname+0x21d/frame 0xfffffe00b16fcdc0 sys___sysctlbyname() at sys___sysctlbyname+0x2d/frame 0xfffffe00b16fce00 amd64_syscall() at amd64_syscall+0x109/frame 0xfffffe00b16fcf30 fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe00b16fcf30 --- syscall (570, FreeBSD ELF64, __sysctlbyname), rip = 0x8222c326a, rsp = 0x82092cb88, rbp = 0x82092cbc0 --- db:0:kdb.enter.default> ps pid ppid pgrp uid state wmesg wchan cmd 54279 1 39114 0 R ping 53801 39669 39114 0 R CPU 1 telegraf 53600 39669 39114 0 S select 0xfffff80120681540 ping 53581 39669 39114 0 R telegraf 53308 39669 39114 0 D sysctl 0xffffffff82c02d00 ps 52602 39669 39114 0 D sysctl 0xffffffff82c02d00 lsof 52268 39669 39114 0 R telegraf 52103 39669 39114 0 R CPU 3 pgrep 51807 39669 39114 0 D sysctl 0xffffffff82c02d00 lsof 49398 39669 39114 0 R CPU 2 lsof 39669 39114 39114 0 R (threaded) telegraf
Device 02:
Fatal trap 12: page fault while in kernel mode
cpuid = 2; apic id = 04
fault virtual address = 0x40
fault code = supervisor read data, page not present
instruction pointer = 0x20:0xffffffff80ca9140
stack pointer = 0x28:0xfffffe01057eb730
frame pointer = 0x28:0xfffffe01057eb730
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 43259 (lsof)
rdi: fffff8021dd69700 rsi: fffff80193948400 rdx: 0000000000000000
rcx: ffffffff82d62a40 r8: 0000000000000002 r9: ffffffffffffffff
rax: 0000000000000000 rbx: fffffe01057ebbe0 rbp: fffffe01057eb730
r10: 00000a1944f62158 r11: fffffe011c81ec60 r12: fffffe01057eb7d8
r13: fffffe011c81e740 r14: fffff8021dd69700 r15: fffff802b52f3000
trap number = 12
panic: page fault
cpuid = 2
time = 1741776490
KDB: enter: panicdb:0:kdb.enter.default> show pcpu cpuid = 2 dynamic pcpu = 0xfffffe009d500f80 curthread = 0xfffffe011c81e740: pid 43259 tid 102977 critnest 1 "lsof" curpcb = 0xfffffe011c81ec60 fpcurthread = 0xfffffe011c81e740: pid 43259 "lsof" idlethread = 0xfffffe0020490560: tid 100005 "idle: cpu2" self = 0xffffffff84012000 curpmap = 0xfffff80447150398 tssp = 0xffffffff84012384 rsp0 = 0xfffffe01057ec000 kcr3 = 0xffffffffffffffff ucr3 = 0xffffffffffffffff scr3 = 0x0 gs32p = 0xffffffff84012404 ldt = 0xffffffff84012444 tss = 0xffffffff84012434 curvnet = 0xfffff80001240480 db:0:kdb.enter.default> bt Tracing pid 43259 tid 102977 td 0xfffffe011c81e740 kdb_enter() at kdb_enter+0x32/frame 0xfffffe01057eb410 vpanic() at vpanic+0x163/frame 0xfffffe01057eb540 panic() at panic+0x43/frame 0xfffffe01057eb5a0 trap_fatal() at trap_fatal+0x40c/frame 0xfffffe01057eb600 trap_pfault() at trap_pfault+0x4f/frame 0xfffffe01057eb660 calltrap() at calltrap+0x8/frame 0xfffffe01057eb660 --- trap 0xc, rip = 0xffffffff80ca9140, rsp = 0xfffffe01057eb730, rbp = 0xfffffe01057eb730 --- prison_check() at prison_check+0x20/frame 0xfffffe01057eb730 cr_canseeinpcb() at cr_canseeinpcb+0x19/frame 0xfffffe01057eb760 tcp_pcblist() at tcp_pcblist+0x1f6/frame 0xfffffe01057ebaf0 sysctl_root_handler_locked() at sysctl_root_handler_locked+0x90/frame 0xfffffe01057ebb40 sysctl_root() at sysctl_root+0x216/frame 0xfffffe01057ebbc0 userland_sysctl() at userland_sysctl+0x176/frame 0xfffffe01057ebc70 kern___sysctlbyname() at kern___sysctlbyname+0x21d/frame 0xfffffe01057ebdc0 sys___sysctlbyname() at sys___sysctlbyname+0x2d/frame 0xfffffe01057ebe00 amd64_syscall() at amd64_syscall+0x109/frame 0xfffffe01057ebf30 fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe01057ebf30 --- syscall (570, FreeBSD ELF64, __sysctlbyname), rip = 0x8239fd26a, rsp = 0x8208318e8, rbp = 0x820831920 --- db:0:kdb.enter.default> ps pid ppid pgrp uid state wmesg wchan cmd 43867 86475 86226 0 R telegraf 43692 86475 86226 0 R CPU -1 ping 43520 86475 86226 0 R telegraf 43405 86475 86226 0 D sysctl 0xffffffff82c02d00 lsof 43261 86475 86226 0 R lsof 43259 86475 86226 0 R CPU 2 lsof 41994 31844 399 0 S nanslp 0xffffffff83063d63 sleep 31844 98056 399 0 S wait 0xfffffe002048e5c0 sh 1431 64500 399 0 S nanslp 0xffffffff83063d61 sleep 86475 86226 86226 0 R (threaded) telegraf
-
Hmm, not a crash I've seen before. Are you running
lsof
manually to trigger it?What packages do you have installed? lsof is not included in 2.7.2 by default.
-
@stephenw10
I am not running it manually, so honestly I am not really sure what is using it. I do have a couple manual scripts running to populate some Grafana dashboards that collect a fair amount of stats, but I do not remember installing lsof for it's use. Reviewing the bash scripts I do not see it listed.Running ps -ax does not show a lsof as of now
Packages:
Acme
apcupsd
iperf
nmap
ntopng
pfblockerng
service_watchdog
suricata
telegraf
wireguard -
Hmm, it appears lsof is a dependency of Telegraf: https://github.com/pfsense/FreeBSD-ports/blob/devel/net-mgmt/pfSense-pkg-Telegraf/Makefile#L17
How do you have it configured?
-
@stephenw10
Sure here is what I have. I will note that at the bottom there is a listed Github project, and files that should have been called; however, I did not replace them when I migrated to the Topton mini-pc from my old 1U atom that I had been using for the last +8 years.But from what you have shown it should be localized to the Telegraf package, so that really helps.
[[inputs.net]] interfaces = ["igc0", "igc1", "igc2", "igc3","tun_wg0", "tun_wg1", "tun_wg2", "tun_wg3"] [[inputs.conntrack]] [[inputs.filestat]] [[inputs.internal]] [[inputs.interrupts]] [[inputs.linux_sysctl_fs]] [[inputs.net]] [[inputs.net_response]] protocol = "tcp" address = "localhost:443" [[inputs.netstat]] [[inputs.nstat]] [[inputs.procstat]] pattern = "." prefix = "pgrep_serviceprocess" [[inputs.dns_query]] # ## servers to query # servers = ["8.8.8.8"] servers = ["208.67.222.222"] [[inputs.netstat]] # # no configuration # Read metrics about swap memory usage [[inputs.swap]] # no configuration [[inputs.ping]] # ## Hosts to send ping packets to. urls = ["208.67.222.222"] # # ## Method used for sending pings, can be either "exec" or "native". When set # ## to "exec" the systems ping command will be executed. When set to "native" # ## the plugin will send pings directly. # ## # ## While the default is "exec" for backwards compatibility, new deployments # ## are encouraged to use the "native" method for improved compatibility and # ## performance. # # method = "exec" # # ## Number of ping packets to send per interval. Corresponds to the "-c" # ## option of the ping command. # # count = 1 # # ## Time to wait between sending ping packets in seconds. Operates like the # ## "-i" option of the ping command. # # ping_interval = 1.0 # # ## If set, the time to wait for a ping response in seconds. Operates like # ## the "-W" option of the ping command. # # timeout = 1.0 # # ## If set, the total ping deadline, in seconds. Operates like the -w option # ## of the ping command. # # deadline = 10 # # ## Interface or source address to send ping from. Operates like the -I or -S # ## option of the ping command. # # interface = "" # # ## Specify the ping executable binary. # # binary = "ping" # # ## Arguments for ping command. When arguments is not empty, the command from # ## the binary option will be used and other options (ping_interval, timeout, # ## etc) will be ignored. # # arguments = ["-c", "3"] # # ## Use only IPv6 addresses when resolving a hostname. # # ipv6 = false #################### ## GIT: https://github.com/VictorRobellini/pfSense-Dashboard [[inputs.exec]] commands = [ "/usr/local/bin/telegraf_pfinterface.php", "/usr/local/bin/telegraf_gateways.py", "/usr/local/bin/telegraf_pfifgw.php", "sh /usr/local/bin/telegraf_temperature.sh", "sh /usr/local/bin/telegraf_pinger_loss.sh" ] data_format = "influx" [[inputs.logparser]] files = ["/var/log/pfblockerng/dnsbl.log"] from_beginning=true [inputs.logparser.grok] measurement = "dnsbl_log" patterns = ["^%{WORD:BlockType}-%{WORD:BlockSubType},%{SYSLOGTIMESTAMP:timestamp:ts-syslog},%{IPORHOST:destination:tag},%{IPORHOST:source:tag},%{GREEDYDATA:call},%{WORD:BlockMethod},%{WORD:BlockList},%{IPORHOST:tld:tag},%{WORD:DefinedList:tag},%{GREEDYDATA:hitormiss}"] timezone = "Local" [inputs.logparser.tags] value = "1" [[inputs.logparser]] files = ["/var/log/pfblockerng/ip_block.log"] from_beginning=true [inputs.logparser.grok] measurement = "ip_block_log" patterns = ["^%{SYSLOGTIMESTAMP:timestamp:ts-syslog},%{NUMBER:TrackerID},%{GREEDYDATA:Interface},%{WORD:InterfaceName},%{WORD:action},%{NUMBER:IPVersion},%{NUMBER:ProtocolID},%{GREEDYDATA:Protocol},%{IPORHOST:SrcIP:tag},%{IPORHOST:DstIP:tag},%{NUMBER:SrcPort},%{NUMBER:DstPort},%{WORD:Dir},%{WORD:GeoIP:tag},%{GREEDYDATA:AliasName},%{GREEDYDATA:IPEvaluated},%{GREEDYDATA:FeedName:tag},%{HOSTNAME:ResolvedHostname},%{HOSTNAME:ClientHostname},%{GREEDYDATA:ASN},%{GREEDYDATA:DuplicateEventStatus}"] timezone = "Local" [[inputs.unbound]] server = "127.0.0.1:953" binary = "/usr/local/bin/telegraf_unbound.sh"
-
Which of those are custom scripts you've imported?
Can you see how lsof is being called? Or disable that as a test?