Overheat-Shutdown-Script
root@therm01 ~]# less /etc/crontab
SHELL=/bin/bash
PATH=/sbin:/bin:/usr/sbin:/usr/bin
MAILTO=root
HOME=/
# run-parts
01 * * * * root run-parts /etc/cron.hourly
02 4 * * * root run-parts /etc/cron.daily
22 4 * * 0 root run-parts /etc/cron.weekly
42 4 1 * * root run-parts /etc/cron.monthly
#
*/05 * * * * root /usr/local/sbin/sdt-ganglia.sh
*/05 * * * * root /srv/cluster-autoinstall/config-space/scripts/temp-check.sh 70 70 &> /dev/null
*/01 * * * * root /usr/local/bin/keep-alive.sh
*/15 * * * * root /usr/local/bin/mce-check.sh
[root@therm01 ~]#
[root@therm01 ~]# less /srv/cluster-autoinstall/config-space/scripts/temp-check.sh
#!/bin/bash
#Source Projektconfiguration
. /srv/cluster-autoinstall/config-space/etc/projekt.conf
#Set errors to 0, if error occured (equals 1) Email will be sent
ERRORS=0
#echo $CONFIG_ADMIN_EMAIL
#exit 0
#Check if first parameter for max CPU temp was given
if [ -z $1 ]; then
echo "No cpu temperature given...!"
exit 1
fi
#Check if second parameter for max AMBIENT temp was given
if [ -z $2 ]; then
echo "No ambient temperature given...!"
exit 1
fi
#Temp file for collecting mesagges, which will be sent via email
#Automatically deleted if already present
if [ -e /tmp/tempmail.out ]; then
mv /tmp/tempmail.out /root/tempmail.$(date +%Y%m%d_%H%M%S).out
rm -f /tmp/tempmail.out && echo "/tmp/tempmail.out deleted...!"
fi
#Loop going for all $CONFIG_IPMI_PREFIX (*ipmi*) entries in /etc/hosts
for x in $(cat /etc/hosts | grep an0 | awk '{print $3}');do
echo "Temperature for $x"
#OUTPUT includes filtered CPU1 Temp
OUTPUT=""
OUTPUT=`ipmitool -I lanplus -H $x -U ADMIN -P ADMIN sdr | \
grep "CPU1 Temp" | \
awk '{print $4}' 2> /dev/null`
#OUTPUT2 includes fitlered Sys Temp (Ambient)
OUTPUT2=""
OUTPUT2=`ipmitool -I lanplus -H $x -U ADMIN -P ADMIN sdr | \
grep "Sys Temp" | \
awk '{print $4}' 2> /dev/null`
#If OUTPUT or OUTPUT2 equals "no" or "", no Ipmi data was retrieved, so no temp check will be enforced
if [ ! $OUTPUT == "no" ] && [ ! $OUTPUT == "" ]; then
if [ $OUTPUT -gt $1 ]; then
echo "Error CPU temperature greater than $1 -- current $OUTPUT C"
#If current temp over given max temp, system will shutdown
ipmitool -I lanplus -H $x -U ADMIN -P ADMIN power off
#And email will be send
echo "$x shutdown, due to high temperature: Error CPU temperature greater than $1 -- current $OUTPUT C" >> /tmp/tempmail.out
#ERRORS Flag set, so email will be sent at the end, including all errors
ERRORS=1
else
#Temp is ok, nothing done
echo "CPU Ok -- $OUTPUT C"
fi
else
#As already mentioned, if a "no" string was returned, no data could be retrieved (due to offline system)
echo "No CPU data retrieved...!"
fi
echo ""
#Same as above, just for Ambient temperature
if [ ! $OUTPUT2 == "no" ] && [ ! $OUTPUT2 == "" ]; then
if [ $OUTPUT2 -gt $2 ]; then
echo "Error ambient temperature greater than $2 -- current $OUTPUT2 C"
ipmitool -I lanplus -H $x -U ADMIN -P ADMIN power off
echo "$x shutdown, due to high temperature: Error ambient temperature greater than $2 -- current $OUTPUT2 C" >> /tmp/tempmail.out
ERRORS=1
else
echo "Ambient Ok -- $OUTPUT2 C"
fi
else
echo "No ambient data retrieved...!"
fi
echo ""
done
if [ $ERRORS == 1 ]; then
cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" $CONFIG_ADMIN_EMAIL
cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" ssiczek@sysgen.de
cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" dnikisch@sysgen.de
#cat /tmp/tempmail.out
fi
#Same procedure for Headnode
OUTPUT2=""
OUTPUT2=`ipmitool -I open sdr | \
grep "Sys Temp" | \
awk '{print $4}'`
echo "Temperature for $CONFIG_HEADNODE_PREFIX $OUTPUT2 C"
if [ $OUTPUT2 -gt $2 ] && [ ! $OUTPUT2 == "no" ]; then
echo "Error ambient temperature greater than $2 -- current $OUTPUT2 C"
echo "$CONFIG_HEADNODE_PREFIX temperature to high, shutting down... its greater than $2 -- current $OUTPUT2 C" | mail -s "$CONFIG_HEADNODE_PREFIX temperature to high, shutting down" $CONFIG_ADMIN_EMAIL
#shutdown -h now
fi
#Script done
| Article Details: | Views: |
|---|---|
| Last updated: 2010/07/15 |
|
| Autor: | |
|
|
