Laden...

Overheat-Shutdown-Script

root@therm01 ~]# less /etc/crontab

SHELL=/bin/bash

PATH=/sbin:/bin:/usr/sbin:/usr/bin

MAILTO=root

HOME=/

 

# run-parts

01 * * * * root run-parts /etc/cron.hourly

02 4 * * * root run-parts /etc/cron.daily

22 4 * * 0 root run-parts /etc/cron.weekly

42 4 1 * * root run-parts /etc/cron.monthly

#

*/05 * * * * root /usr/local/sbin/sdt-ganglia.sh

*/05 * * * * root /srv/cluster-autoinstall/config-space/scripts/temp-check.sh 70 70 &> /dev/null

*/01 * * * * root /usr/local/bin/keep-alive.sh

*/15 * * * * root /usr/local/bin/mce-check.sh

[root@therm01 ~]#

 

[root@therm01 ~]# less /srv/cluster-autoinstall/config-space/scripts/temp-check.sh

#!/bin/bash

 

#Source Projektconfiguration

. /srv/cluster-autoinstall/config-space/etc/projekt.conf

#Set errors to 0, if error occured (equals 1) Email will be sent

ERRORS=0

#echo $CONFIG_ADMIN_EMAIL

#exit 0

#Check if first parameter for max CPU temp was given

if [ -z $1 ]; then

echo "No cpu temperature given...!"

exit 1

fi

 

#Check if second parameter for max AMBIENT temp was given

if [ -z $2 ]; then

echo "No ambient temperature given...!"

exit 1

fi

 

#Temp file for collecting mesagges, which will be sent via email

#Automatically deleted if already present

if [ -e /tmp/tempmail.out ]; then

mv /tmp/tempmail.out /root/tempmail.$(date +%Y%m%d_%H%M%S).out

rm -f /tmp/tempmail.out && echo "/tmp/tempmail.out deleted...!"

fi

 

#Loop going for all $CONFIG_IPMI_PREFIX (*ipmi*) entries in /etc/hosts

for x in $(cat /etc/hosts | grep an0 | awk '{print $3}');do

echo "Temperature for $x"

 

#OUTPUT includes filtered CPU1 Temp

 

OUTPUT=""

OUTPUT=`ipmitool -I lanplus -H $x -U ADMIN -P ADMIN sdr | \

grep "CPU1 Temp" | \

awk '{print $4}' 2> /dev/null`

 

#OUTPUT2 includes fitlered Sys Temp (Ambient)

 

OUTPUT2=""

OUTPUT2=`ipmitool -I lanplus -H $x -U ADMIN -P ADMIN sdr | \

grep "Sys Temp" | \

awk '{print $4}' 2> /dev/null`

 

#If OUTPUT or OUTPUT2 equals "no" or "", no Ipmi data was retrieved, so no temp check will be enforced

if [ ! $OUTPUT == "no" ] && [ ! $OUTPUT == "" ]; then

if [ $OUTPUT -gt $1 ]; then

echo "Error CPU temperature greater than $1 -- current $OUTPUT C"

#If current temp over given max temp, system will shutdown

ipmitool -I lanplus -H $x -U ADMIN -P ADMIN power off

 

#And email will be send

echo "$x shutdown, due to high temperature: Error CPU temperature greater than $1 -- current $OUTPUT C" >> /tmp/tempmail.out

#ERRORS Flag set, so email will be sent at the end, including all errors

ERRORS=1

else

#Temp is ok, nothing done

echo "CPU Ok -- $OUTPUT C"

fi

else

#As already mentioned, if a "no" string was returned, no data could be retrieved (due to offline system)

echo "No CPU data retrieved...!"

fi

echo ""

 

#Same as above, just for Ambient temperature

if [ ! $OUTPUT2 == "no" ]  && [ ! $OUTPUT2 == "" ]; then

if [ $OUTPUT2 -gt $2 ]; then

echo "Error ambient temperature greater than $2 -- current $OUTPUT2 C"

ipmitool -I lanplus -H $x -U ADMIN -P ADMIN power off

echo "$x shutdown, due to high temperature: Error ambient temperature greater than $2 -- current $OUTPUT2 C" >> /tmp/tempmail.out

ERRORS=1

else

echo "Ambient Ok -- $OUTPUT2 C"

fi

else

echo "No ambient data retrieved...!"

fi

echo ""

 

done

 

if [ $ERRORS == 1 ]; then

cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" $CONFIG_ADMIN_EMAIL

cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" ssiczek@sysgen.de

cat /tmp/tempmail.out | mail -s "Node shutdown! Temperature to high" dnikisch@sysgen.de

 

#cat /tmp/tempmail.out

fi

 

#Same procedure for Headnode

OUTPUT2=""

OUTPUT2=`ipmitool -I open sdr | \

grep "Sys Temp" | \

awk '{print $4}'`

 

echo "Temperature for $CONFIG_HEADNODE_PREFIX $OUTPUT2 C"

if [ $OUTPUT2 -gt $2 ] && [ ! $OUTPUT2 == "no" ]; then

echo "Error ambient temperature greater than $2 -- current $OUTPUT2 C"

 

echo "$CONFIG_HEADNODE_PREFIX temperature to high, shutting down... its greater than $2 -- current $OUTPUT2 C" | mail -s "$CONFIG_HEADNODE_PREFIX temperature to high, shutting down" $CONFIG_ADMIN_EMAIL

#shutdown -h now

 

fi

 

#Script done

Article Details: Views:
Last updated:
2010/07/15
Article
article viewed 130 times
Autor:
Autor
Dieter Nikisch