Check reload state using all cpu time
-
I have an issue with a multi wan setup were the check reload state is slowly building up and then using up all the CPU resources on the box this makes traffic slow way down and although the box does not crash it effectivly stops passing traffic. her is the out out from ps auwww
USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND
root 244 77.9 0.1 3348 1192 ?? RNs 11:08AM 33:08.12 /usr/local/sbin/check_reload_status
root 5189 47.8 2.3 67228 22528 ?? SN 1:45PM 0:00.18 /usr/local/bin/php -f /etc/rc.linkup start em0
_dhcp 9186 46.3 0.1 3256 1408 ?? SN 1:45PM 0:00.01 dhclient: em0 (dhclient)
root 12 9.4 0.0 0 152 ?? WL 11:08AM 11:36.79 [intr]
root 0 0.0 0.0 0 64 ?? DLs 11:08AM 0:26.43 [kernel]
root 1 0.0 0.1 1888 504 ?? SLs 11:08AM 0:00.05 /sbin/init –
root 2 0.0 0.0 0 8 ?? DL 11:08AM 0:00.27 [g_event]
root 3 0.0 0.0 0 8 ?? DL 11:08AM 0:00.62 [g_up]
root 4 0.0 0.0 0 8 ?? DL 11:08AM 0:00.81 [g_down]
root 5 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [crypto]
root 6 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [crypto returns]
root 7 0.0 0.0 0 8 ?? DL 11:08AM 0:00.03 [fdc0]
root 8 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [sctp_iterator]
root 9 0.0 0.0 0 8 ?? DL 11:08AM 0:02.84 [pfpurge]
root 10 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [audit]
root 11 0.0 0.0 0 8 ?? RL 11:08AM 96:35.83 [idle]
root 13 0.0 0.0 0 8 ?? DL 11:08AM 0:04.24 [ng_queue]
root 14 0.0 0.0 0 8 ?? DL 11:08AM 0:22.85 [yarrow]
root 15 0.0 0.0 0 160 ?? DL 11:08AM 0:00.15 [usb]
root 16 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [xpt_thrd]
root 17 0.0 0.0 0 8 ?? DL 11:08AM 0:00.01 [pagedaemon]
root 18 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [vmdaemon]
root 19 0.0 0.0 0 8 ?? DL 11:08AM 0:00.00 [pagezero]
root 20 0.0 0.0 0 8 ?? DL 11:08AM 0:00.01 [idlepoll]
root 21 0.0 0.0 0 8 ?? DL 11:08AM 0:00.03 [bufdaemon]
root 22 0.0 0.0 0 8 ?? DL 11:08AM 0:00.31 [syncer]
root 23 0.0 0.0 0 8 ?? DL 11:08AM 0:00.03 [vnlru]
root 24 0.0 0.0 0 8 ?? DL 11:08AM 0:00.03 [softdepflush]
root 40 0.0 0.0 0 8 ?? DL 11:08AM 0:00.37 [md0]
root 246 0.0 0.1 3348 1124 ?? IN 11:08AM 0:00.00 check_reload_status: Monitoring daemon of check_reload_status
root 256 0.0 0.1 1888 536 ?? Ss 11:08AM 0:00.29 /sbin/devd
nobody 941 0.0 0.3 5504 2940 ?? S 11:09AM 0:39.11 /usr/local/sbin/dnsmasq –local-ttl 1 --all-servers --rebind-localhost-ok --stop-dns-rebind --dns-forward-max=5000 --cache-size=10000
root 3321 0.0 0.2 3596 1564 ?? SN 1:45PM 0:00.00 /bin/sh /sbin/dhclient-script
root 3449 0.0 1.9 61084 17956 ?? I 12:09PM 0:06.84 /usr/local/bin/php
root 7434 0.0 0.3 5224 3004 ?? Is 11:08AM 0:00.00 /usr/sbin/sshd
root 9684 0.0 0.1 3388 1200 ?? SN 1:45PM 0:00.00 /sbin/ping -q -c 1 -t 1 173.19.200.1
root 11286 0.0 1.8 61084 17376 ?? S 1:45PM 0:00.05 /usr/local/bin/php
root 11577 0.0 0.1 3256 1260 ?? SNs 1:45PM 0:00.00 dhclient: em0 [priv] (dhclient)
root 17659 0.0 2.3 67228 22524 ?? IN 1:44PM 0:00.18 /usr/local/bin/php -f /etc/rc.linkup start em0
root 17785 0.0 0.4 7944 3596 ?? Ss 11:12AM 0:00.97 sshd: root@pts/0 (sshd)
root 17974 0.0 0.1 1504 592 ?? IN 1:44PM 0:00.00 sleep 60
_dhcp 18764 0.0 0.1 3256 1416 ?? SN 1:44PM 0:00.01 dhclient: em0 (dhclient)
root 19931 0.0 0.1 3472 1164 ?? Is 11:09AM 0:00.01 /usr/local/sbin/sshlockout_pf 15
root 20763 0.0 0.1 3256 1312 ?? SNs 1:44PM 0:00.00 dhclient: em0 [priv] (dhclient)
root 24971 0.0 0.3 4908 2488 ?? Ss 11:08AM 0:05.47 /usr/sbin/syslogd -s -c -c -l /var/dhcpd/var/run/log -f /var/etc/syslog.conf
root 25627 0.0 0.1 3376 1332 ?? Is 11:08AM 0:00.02 /usr/sbin/inetd -wW -R 0 -a 127.0.0.1 /var/etc/inetd.conf
root 29763 0.0 0.1 3256 1320 ?? Ss 1:14PM 0:00.43 /usr/local/sbin/apinger -c /var/etc/apinger.conf
root 30101 0.0 0.2 4536 2016 ?? I 1:14PM 0:00.01 rrdtool -
root 37642 0.0 0.5 7804 5276 ?? S 11:08AM 0:21.97 /usr/local/sbin/lighttpd -f /var/etc/lighty-webConfigurator.conf
root 37756 0.0 1.3 60060 12972 ?? Is 11:08AM 0:00.06 /usr/local/bin/php
root 39492 0.0 1.3 60060 12972 ?? Is 11:08AM 0:00.07 /usr/local/bin/php
root 40396 0.0 1.3 60060 12972 ?? Ss 11:08AM 0:00.09 /usr/local/bin/php
root 43469 0.0 0.6 6020 6044 ?? SNs 11:09AM 0:00.61 /usr/local/bin/ntpd -g -c /var/etc/ntpd.conf
root 52026 0.0 0.1 3348 1328 ?? Is 11:09AM 0:00.01 /usr/sbin/cron -s
root 58336 0.0 0.1 3256 984 ?? Is 11:09AM 0:00.00 /usr/local/bin/minicron 240 /var/run/ping_hosts.pid /usr/local/bin/ping_hosts.sh
root 58884 0.0 0.1 3256 1032 ?? I 11:09AM 0:00.01 minicron: helper /usr/local/bin/ping_hosts.sh (minicron)
root 59087 0.0 0.1 3256 984 ?? Is 11:09AM 0:00.00 /usr/local/bin/minicron 3600 /var/run/expire_accounts.pid /etc/rc.expireaccounts
root 59393 0.0 0.1 3256 1032 ?? I 11:09AM 0:00.00 minicron: helper /etc/rc.expireaccounts (minicron)
root 59637 0.0 0.1 3256 984 ?? Is 11:09AM 0:00.00 /usr/local/bin/minicron 86400 /var/run/update_alias_url_data.pid /etc/rc.update_alias_url_data
root 60102 0.0 0.1 3256 1032 ?? I 11:09AM 0:00.00 minicron: helper /etc/rc.update_alias_url_data (minicron)
root 60416 0.0 1.9 61084 17944 ?? S 1:36PM 0:03.88 /usr/local/bin/php
root 63328 0.0 0.2 3596 1544 ?? IN 12:25PM 0:00.71 /bin/sh /var/db/rrd/updaterrd.sh
root 17268 0.0 0.2 3628 1556 v0 Is 11:09AM 0:00.01 login [pam] (login)
root 19984 0.0 0.2 3596 1464 v0 I 11:09AM 0:00.00 -sh (sh)
root 21526 0.0 0.2 3596 1464 v0 I+ 11:09AM 0:00.00 /bin/sh /etc/rc.initial
root 24365 0.0 0.3 5860 2684 v0- S 11:08AM 0:01.80 /usr/sbin/tcpdump -s 256 -v -S -l -n -e -ttt -i pflog0
root 24572 0.0 0.1 3256 904 v0- S 11:08AM 0:02.53 logger -t pf -p local0.info
root 12717 0.0 0.1 3396 1224 0 R+ 1:45PM 0:00.00 ps uxawww
root 30658 0.0 0.2 3596 1464 0 Is 11:12AM 0:00.00 -sh (sh)
root 31850 0.0 0.2 3596 1468 0 I 11:12AM 0:00.00 /bin/sh /etc/rc.initial
root 34363 0.0 0.2 4636 2336 0 S 11:12AM 0:00.02 /bin/tcsh -
This has been covered many times in many threads here on the forum.
that's a symptom of some other issue, not a cause. The check_reload_status program is only following commands signaled to it from other places. If it's overloaded, it's because something else is calling it very often.
In particular, I'd be curious about why em0 seems to be having link events going up/down making it run /etc/rc.linkup
-
I unplugged that interface last night and as long as I leave it unplugged I do not see the check state go crazy but as soon as I plug it back in it starts raising back up. Any ideals on how to fix it? This is a load ballancer and it has been working flawlessly for the whole school yaer. I am not sure what to check this one has me stumped. Could it be the router from media com?
-
Impossible to say without more info. The system log when the interface is plugged in might help. Also if you're on 2.0.x, you might give a 2.1 snapshot a try.
It could also be a problem with the NIC, the cable, or the modem on that line.