标签:linux nagios
nagios 服务端 yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe #设置登录nagios后台的用户和密码: htpasswd -c /etc/nagios/passwd nagiosadmin vim /etc/nagios/nagios.cfg #检测配置文件 nagios -v /etc/nagios/nagios.cfg #启动服务: service httpd start; service nagios start #浏览器访问: 客户端 yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe vim /etc/nagios/nrpe.cfg 修改“allowed_hosts=127.0.0.1”为“allowed_hosts=127.0.0.1,192.168.177.145” 后面的ip为服务端ip; 修改“dont_blame_nrpe=0”为“dont_blame_nrpe=1” #启动客户端 /etc/init.d/nrpe start #服务器端添加需要监控的客户端配置(如客户端192.168.177.140) vim /etc/nagios/conf.d/192168.177140.cfg 添加配置如下 define host{ use linux-server host_name 192.168.177.140 alias 177.140 address 192.168.177.140 } #check_ping define service{ use generic-service host_name 192.168.177.140 service_description check_ping check_command check_ping!100.0,20%!200.0,50% max_check_attempts 5 normal_check_interval 1 } #check_ssh define service{ use generic-service host_name 192.168.177.140 service_description check_ssh check_command check_ssh max_check_attempts 5 normal_check_interval 1 } #check_http define service{ use generic-service host_name 192.168.177.140 service_description check_http check_command check_http max_check_attempts 5 normal_check_interval 1 } #check_load define service{ use generic-service host_name 192.168.177.140 service_description check_load check_command check_load max_check_attempts 5 normal_check_interval 1 } #check_hda1 使用check_nrpe需要编辑服务器端command.cfg文件 define service{ use generic-service host_name 192.168.177.140 service_description check_hda1 check_command check_nrpe!check_hda1 max_check_attempts 5 normal_check_interval 1 } #check_hda3 define service{ use generic-service host_name 192.168.177.140 service_description check_hda3 check_command check_nrpe!check_hda3 max_check_attempts 5 normal_check_interval 1 } #check_mysql define service{ use generic-service host_name 192.168.177.140 service_description check_mysql check_command check_nrpe!check_mysql max_check_attempts 5 normal_check_interval 1 } #当nagios检测到问题时,一共尝试检测5次都有问题才会告警,如果该数值为1,那么检测到问题立即告警 max_check_attempts 5 #重新检测的时间间隔,单位是分钟,默认是3分钟 normal_check_interval 1 #在服务出现异常后,故障一直没有解决,nagios再次对使用者发出通知的时间。单位是分钟。如果你认为,所有的事件只需要一次通知就够了,可以把这里的选项设为0 notification_interval 60 #服务器端/etc/nagios/objects/command.cfg vim /etc/nagios/objects/command.cfg 添加 define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ } #check_mysql脚本 #!/bin/bash USAGE="`basename $0` [-w|--warning]<percent free> [-c|--critical]<percent free>" THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*" calc=/tmp/memcalc percent_free=/tmp/mempercent critical="" warning="" STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 # print usage if [[ $# -lt 4 ]] then echo "" echo "Wrong Syntax: `basename $0` $*" echo "" echo "Usage: $USAGE" echo "" exit 0 fi # read input while [[ $# -gt 0 ]] do case "$1" in -w|--warning) shift warning=$1 ;; -c|--critical) shift critical=$1 ;; esac shift done # verify input if [[ $warning -eq $critical || $warning -lt $critical ]] then echo "" echo "$THRESHOLD_USAGE" echo "" echo "Usage: $USAGE" echo "" exit 0 fi # Total memory available total=`free -m | head -2 |tail -1 |gawk ‘{print $2}‘` # Total memory used used=`free -m | head -2 |tail -1 |gawk ‘{print $3}‘` # Calc total minus used free=`free -m | head -2 |tail -1 |gawk ‘{print $2-$3}‘` # normal values #echo "$total"MB total #echo "$used"MB used #echo "$free"MB free # make it into % percent free = ((free mem / total mem) * 100) echo "5" > $calc # decimal accuracy echo "k" >> $calc # commit echo "100" >> $calc # multiply echo "$free" >> $calc # division integer echo "$total" >> $calc # division integer echo "/" >> $calc # division sign echo "*" >> $calc # multiplication sign echo "p" >> $calc # print percent=`/usr/bin/dc $calc|/bin/sed ‘s/^\./0./‘|/usr/bin/tr "." " "|/usr/bin/gawk {‘print $1‘}` #percent1=`/usr/bin/dc $calc` #echo "$percent1" if [[ "$percent" -le $critical ]] then echo "CRITICAL - $free MB ($percent%) Free Memory" exit 2 fi if [[ "$percent" -le $warning ]] then echo "WARNING - $free MB ($percent%) Free Memory" exit 1 fi if [[ "$percent" -gt $warning ]] then echo "OK - $free MB ($percent%) Free Memory" exit 0 fi
本文出自 “ubuntu” 博客,谢绝转载!
标签:linux nagios
原文地址:http://thankinglove.blog.51cto.com/2311485/1721519