#!/bin/bash
groupadd nagcmd
useradd nagios -g nagcmd
chmod 755 check_cpu.sh check_mem.sh
tar -zxf nrpe-2.12.tar.gz
tar -zxf nagios-plugins-2.1.1.tar.gz
cd nrpe-2.12
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
make install-xinetd
if ! [ `cat /etc/services | grep NRPE| wc -l` -ge 1 ]
then
echo "nrpe 5666/tcp # NRPE">>/etc/services
fi
cd ../nagios-plugins-2.1.1
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make && make install
/bin/cp ../check_mem.sh /usr/local/nagios/libexec/
/bin/cp ../check_cpu.sh /usr/local/nagios/libexec/
/bin/cp /usr/local/nagios/etc/nrpe.cfg /usr/local/nagios/etc/nrpe.cfg.default
cat >>/usr/local/nagios/etc/nrpe.cfg<<EOF
command[check_Users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_Load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_disk_root]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /
command[check_disk_home]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /home
command[check_disk_boot]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /boot
command[check_Cpu]=/usr/local/nagios/libexec/check_cpu.sh -w 20 -c 10
command[check_Memory]=/usr/local/nagios/libexec/check_mem.sh -w 20 -c 10
command[check_Swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
EOF
sed -i ‘s/only_from/\#only_from/g‘ /etc/xinetd.d/nrpe
sed -i ‘/}/d‘ /etc/xinetd.d/nrpe
cat >>/etc/xinetd.d/nrpe<<EOF
only_from = 127.0.0.1 xxx.xxx.xxx.xxx #这里换成监控机IP地址
}
EOF
/etc/init.d/xinetd restart
#check_cpu.sh和check_mem.sh是从网上找的,一并贴上来了
check_cpu.sh:
#!/bin/bash
# Check CPU Usage via /proc/stats
########################
# DECLARATIONS
########################
PROGNAME=`basename $0`
REVISION=`echo ‘$Revision: 1.0 $‘ | sed -e ‘s/[^0-9.]//g‘`
DEBUG=0
exitstatus=0
result=""
perfdata=""
scale=2
show_all=0
warning=999
critical=999
TMPFILE="/tmp/check_cpu.tmp"
status[0]="OK: "
status[1]="WARNING: "
status[2]="CRITICAL: "
status[3]="UNKNOWN: "
########################
# FUNCTIONS
########################
print_usage() {
echo "Usage: $PROGNAME [options]"
echo " e.g. $PROGNAME -w 75 -c 90 -s 2 --all"
echo
echo "Options:"
echo -e "\t --help | -h print help"
echo -e "\t --version | -V print version"
echo -e "\t --verbose | -v be verbose (debug mode)"
echo -e "\t --scale | -s [int] decimal precision of results"
echo -e "\t default=2"
echo -e "\t --all | -a return values for all cpus individually"
echo -e "\t default= summary data only"
echo -e "\t -w [int] set warning value"
echo -e "\t -c [int] set critical value"
echo
echo
}
print_help() {
# print_revision $PROGNAME $REVISION
echo "${PROGNAME} Revision: ${REVISION}"
echo
echo "This plugin checks local cpu usage using /proc/stat"
echo
print_usage
echo
# support
exit 3
}
parse_options() {
# parse cmdline arguments
(( DEBUG )) && echo "Parsing options $1 $2 $3 $4 $5 $6 $7 $8"
if [ "$#" -gt 0 ]; then
while [ "$#" -gt 0 ]; do
case "$1" in
‘--help‘|‘-h‘)
print_help
exit 3
;;
‘--version‘|‘-V‘)
#print_revision $PROGNAME $REVISION
echo "${PROGNAME} Revision: ${REVISION}"
exit 3
;;
‘--verbose‘|‘-v‘)
DEBUG=1
shift 1
;;
‘--scale‘|‘-s‘)
scale="$2"
shift 2
;;
‘--all‘|‘-a‘)
show_all=1
shift 1
;;
‘-c‘)
critical="$2"
shift 2
;;
‘-w‘)
warning="$2"
shift 2
;;
*)
echo "Unknown option!"
print_usage
exit 3
;;
esac
done
fi
}
write_tmpfile() {
echo "old_date=$(date +%s)" > ${TMPFILE}
for a in $(seq 0 1 ${cpucount} ); do
echo "old_system[${a}]=${system[${a}]}" >> ${TMPFILE}
echo "old_user[${a}]=${user[${a}]}" >> ${TMPFILE}
echo "old_nice[${a}]=${nice[${a}]}" >> ${TMPFILE}
echo "old_iowait[${a}]=${iowait[${a}]}" >> ${TMPFILE}
echo "old_irq[${a}]=${irq[${a}]}" >> ${TMPFILE}
echo "old_softirq[${a}]=${softirq[${a}]}" >> ${TMPFILE}
echo "old_idle[${a}]=${idle[${a}]}" >> ${TMPFILE}
echo "old_used[${a}]=${used[${a}]}" >> ${TMPFILE}
echo "old_total[${a}]=${total[${a}]}" >> ${TMPFILE}
done
}
read_tmpfile() {
if [ -e ${TMPFILE} ]; then
source ${TMPFILE} # include the vars from the tmp file
fi
(( DEBUG )) && cat ${TMPFILE}
}
########################
# MAIN
########################
parse_options $@
read_tmpfile
procstat=$(cat /proc/stat 2>&1)
(( DEBUG )) && echo "$procstat"
cpucount=$(( $(grep -i cpu <<< "${procstat}" | tail -n 1 | cut -d‘ ‘ -f 1 | grep -Eo [0-9]+) + 1 ))
(( DEBUG )) && echo "cpucount=${cpucount}"
for a in $(seq 0 1 ${cpucount} ); do
if [ $a -eq ${cpucount} ]; then
cpu[$a]=$(head -n 1 <<< "${procstat}" | sed ‘s/ / /g‘)
else
cpu[$a]=$(grep cpu${a} <<< "${procstat}")
fi
user[$a]=$(cut -d‘ ‘ -f 2 <<< ${cpu[$a]})
nice[$a]=$(cut -d‘ ‘ -f 3 <<< ${cpu[$a]})
system[$a]=$(cut -d‘ ‘ -f 4 <<< ${cpu[$a]})
idle[$a]=$(cut -d‘ ‘ -f 5 <<< ${cpu[$a]})
iowait[$a]=$(cut -d‘ ‘ -f 6 <<< ${cpu[$a]})
irq[$a]=$(cut -d‘ ‘ -f 7 <<< ${cpu[$a]})
softirq[$a]=$(cut -d‘ ‘ -f 8 <<< ${cpu[$a]})
used[$a]=$((( ${user[$a]} + ${nice[$a]} + ${system[$a]} + ${iowait[$a]} + ${irq[$a]} + ${softirq[$a]} )))
total[$a]=$((( ${user[$a]} + ${nice[$a]} + ${system[$a]} + ${idle[$a]} + ${iowait[$a]} + ${irq[$a]} + ${softirq[$a]} )))
[ -z ${old_user[${a}]} ] && old_user[${a}]=0
[ -z ${old_nice[${a}]} ] && old_nice[${a}]=0
[ -z ${old_system[${a}]} ] && old_system[${a}]=0
[ -z ${old_idle[${a}]} ] && old_idle[${a}]=0
[ -z ${old_iowait[${a}]} ] && old_iowait[${a}]=0
[ -z ${old_irq[${a}]} ] && old_irq[${a}]=0
[ -z ${old_softirq[${a}]} ] && old_softirq[${a}]=0
[ -z ${old_used[${a}]} ] && old_used[${a}]=0
[ -z ${old_total[${a}]} ] && old_total[${a}]=0
diff_user[$a]=$(((${user[$a]}-${old_user[${a}]})))
diff_nice[$a]=$(((${nice[$a]}-${old_nice[${a}]})))
diff_system[$a]=$(((${system[$a]}-${old_system[${a}]})))
diff_idle[$a]=$(((${idle[$a]}-${old_idle[${a}]})))
diff_iowait[$a]=$(((${iowait[$a]}-${old_iowait[${a}]})))
diff_irq[$a]=$(((${irq[$a]}-${old_irq[${a}]})))
diff_softirq[$a]=$(((${softirq[$a]}-${old_softirq[${a}]})))
diff_used[$a]=$(((${used[$a]}-${old_used[${a}]})))
diff_total[$a]=$(((${total[$a]}-${old_total[${a}]})))
pct_user[$a]=$(bc <<< "scale=${scale};${diff_user[$a]}*100/${diff_total[$a]}")
pct_nice[$a]=$(bc <<< "scale=${scale};${diff_nice[$a]}*100/${diff_total[$a]}")
pct_system[$a]=$(bc <<< "scale=${scale};${diff_system[$a]}*100/${diff_total[$a]}")
pct_idle[$a]=$(bc <<< "scale=${scale};${diff_idle[$a]}*100/${diff_total[$a]}")
pct_iowait[$a]=$(bc <<< "scale=${scale};${diff_iowait[$a]}*100/${diff_total[$a]}")
pct_irq[$a]=$(bc <<< "scale=${scale};${diff_irq[$a]}*100/${diff_total[$a]}")
pct_softirq[$a]=$(bc <<< "scale=${scale};${diff_softirq[$a]}*100/${diff_total[$a]}")
pct_used[$a]=$(bc <<< "scale=${scale};${diff_used[$a]}*100/${diff_total[$a]}")
done
write_tmpfile
[ $(cut -d‘.‘ -f 1 <<< ${pct_used[${cpucount}]}) -ge ${warning} ] && exitstatus=1
[ $(cut -d‘.‘ -f 1 <<< ${pct_used[${cpucount}]}) -ge ${critical} ] && exitstatus=2
result="CPU=${pct_used[${cpucount}]}"
if [ $show_all -gt 0 ]; then
for a in $(seq 0 1 $(((${cpucount} - 1)))); do
result="${result}, CPU${a}=${pct_used[${a}]}"
done
fi
if [ "${warning}" = "999" ]; then
warning=""
fi
if [ "${critical}" = "999" ]; then
critical=""
fi
perfdata="used=${pct_used[${cpucount}]};${warning};${critical};; system=${pct_system[${cpucount}]};;;; user=${pct_user[${cpucount}]};;;; nice=${pct_nice[${cpucount}]};;;; iowait=${pct_iowait[${cpucount}]};;;; irq=${pct_irq[${cpucount}]};;;; softirq=${pct_softirq[${cpucount}]};;;;"
if [ $show_all -gt 0 ]; then
for a in $(seq 0 1 $(((${cpucount} - 1)))); do
perfdata="${perfdata} used${a}=${pct_used[${a}]};;;; system${a}=${pct_system[${a}]};;;; user${a}=${pct_user[${a}]};;;; nice${a}=${pct_nice[${a}]};;;; iowait${a}=${pct_iowait[${a}]};;;; irq${a}=${pct_irq[${a}]};;;; softirq${a}=${pct_softirq[${a}]};;;;"
done
fi
echo "${status[$exitstatus]}${result} | ${perfdata}"
exit $exitstatus
check_mem.sh :
#!/bin/bash
# check_mem for Nagios
# http://cto.luxiaok.com
# 2013-03-25
USAGE="`basename $0` [-w|--warning]<percent free:0-99> [-c|--critical]<percent free:0-99>"
THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*"
critical=""
warning=""
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
# print usage
if [[ $# -lt 4 ]]
then
echo ""
echo "Wrong Syntax: `basename $0` $*"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# read input
while [[ $# -gt 0 ]]
do
case "$1" in
-w|--warning)
shift
warning=$1
;;
-c|--critical)
shift
critical=$1
;;
esac
shift
done
# verify input
if [[ $warning -eq $critical || $warning -lt $critical ]]
then
echo ""
echo "$THRESHOLD_USAGE"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# Total memory available
total=`free -m | head -2 |tail -1 |gawk ‘{print $2}‘`
# Total memory used
used=`free -m | head -2 |tail -1 |gawk ‘{print $3}‘`
# Calc total minus used
free=`free -m | head -2 |tail -1 |gawk ‘{print $4+$6+$7}‘`
# Free Mem = free + buffers + cached
# Normal values
#echo "$total"MB total
#echo "$used"MB used
#echo "$free"MB free
# make it into % percent free = ((free mem / total mem) * 100)
FREETMP=`expr $free \* 100`
percent=`expr $FREETMP / $total`
if [[ "$percent" -le $critical ]]
then
echo "Critical - $free MB ($percent%) Free Memory"
exit 2
elif [[ "$percent" -le $warning ]]
then
echo "Warning - $free MB ($percent%) Free Memory"
exit 1
elif [[ "$percent" -gt $warning ]]
then
echo "OK - $free MB ($percent%) Free Memory"
exit 0
else
echo "Unknow Status"
exit 3
fi
原文地址:http://4878151.blog.51cto.com/4868151/1826497