ganglia
ganglia 是分布式的监控系统,有两个Daemon, 分别是:客户端Ganglia Monitoring Daemon (gmond)和服务端Ganglia Meta Daemon (gmetad),还有Ganglia PHP Web Frontend(基于web的动态访问方式)组成是一个Linux下图形化监控系统运行性能的软件,界面美观、丰富,功能强大
http://ganglia.sourceforge.net/ 软件下载
环境:RHEL6 x86-64 disable selinux and iptables
主机:192.168.0.27 server27.example.com
192.168.0.168 server68.example.com
#下载ganglia-3.6.0.tar.gz ganglia-web-3.5.2.tar.gz libconfuse-devel-2.6-3.el6.x86_64.rpm libconfuse-2.7-4.el6.x86_64.rpm
#yum install -y rpm-build libart_lgpl-devel gcc-c++ python-devel libconfuse-devel pcre-devel expat-devel apr-devel ##安装软件包依赖性
#rpm -ivh libconfuse-*
#rpmbuild -tb ganglia-3.6.0.tar.gz
#下载rdtool-devel-1.3.8-6.el6.x86_64.rpm
#rpm -q rrdtool ##查看是否安装
#rpm -ivh rrdtool-devel-1.3.8-6.el6.x86_64.rpm
#rpmbuild -tb ganglia-3.6.0.tar.gz
#cd /root/rpmbuild/RPMS/x86_64
#rpm -ivh *
#rpmbuild -tb ganglia-web-3.5.2.tar.gz
#cd rpmbuild/RPMS/ noarch/
#rpm -ivh ganglia-web-3.5.2-1.noarch.rpm
#cd /etc/ganglia/
#vim gmetad.conf
name= “my cluster”
data_source "my cluster" 192.168.0.27(本机ip):86xx
#vim gmond.conf
name=“my cluster”
修改udp的端口号(3个)
8649-->86xx
#cd /root/rpmbuild/ RPMS/ x86_64/
#scp ganglia-gmond-3.6.0-1.x86_64.rpm ganglia-gmond-modules-python-3.6.0-1.x86_64.rpm ibganglia-3.6.0-1.x86_64.rpm 192.168.0.168: (yum install openssh-clients )
在168这台主机上
#下载libconfuse-devel-2.6-3.el6.x86_64.rpm libconfuse-2.7-4.el6.x86_64.rpm
#yum localinstall *
#cd /etc/ganglia/
#vim gmod.conf
name=”my cluster”
修改udp的端口号(3个)
8649-->86xx (!注意:该端口必须与主机的保持一致)
#service gmond restart
#iptables -F
#service gmond restart
#service gmetad restart
#cd /var/lib/ganglia/rrds/ 在该目录下回生成相应的数据文件,可以看到每个主机的度量指标,生成了易于查看的图形.
这时通过web访问192.168.0.27/ganglia就可看见刚建立的集群
ganglia与nagios的整合
注:ganglia与nagios可以部署在不同的主机,因为端口号设置相同,脚本会自动提取内容。
注:check_ganglia.py 命令仅在阈值过高时发出警告。如果希望在阈值过低时发出警告(在disk_free 中是这样),则需要修改代码。我更改了文件的最后部分,如下所示:
#cd /root/ganglia-3.6.0 /contrib/
#cp check_ganglia.py /usr/local/nagios/libexec/
#cd /usr/local/nagios/libexec/(查看是否复制成功)
#chown nagios.nagios check_ganglia.py
#/usr/local/nagios/libexec/check_ganglia.py -h server27.example.com -m disk_free_percent_rootfs -w 30 -c 10 -p 86xx
#vim check_ganglia.py (创建脚本)
except Exception, err:
print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err)
sys.exit(3)
if critical > warning:
if value >= critical:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif value >= warning:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)
else:
if critical >= value:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif warning >= value:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)
#/usr/local/nagios/libexec/check_ganglia.py -h server27.example.com -m disk_free_percent_rootfs -w 30 -c 10 看运行结果是否报错
#vim check_ganglia.py
ganglia_port=86xx
#/usr/local/nagios/libexec/check_ganglia.py -h server27.example.com -m disk_free_percent_rootfs -w 30 -c 10 看运行结果是否报错
#cd /usr/local/nagios/etc/objects/
#vim commands.cfg
# ‘check_ganglia‘ command definition
define command{
command_name check_ganglia
command_line $USER1$/check_ganglia.py -h $HOSTADDRESS$ -m $ARG1$ -w $ARG2$ -c $ARG3$
}
#vim /usr/local/nagios/etc/objects/templates.cfg
最后加入
define service {
use generic-service
name ganglia-service
hostgroup_name ganglia-servers
service_groups ganglia-metrics
register 0 ##不进行注册
}
# vim /usr/local/nagios/etc/objects/hosts.cfg ##定义主机
最后加入
define hostgroup {
hostgroup_name ganglia-servers
alias ganglia-servers
members server86.example.com
}
#vim /usr/local/nagios/etc/objects/services.cfg ##定义服务器
###############check_ganglia################
define servicegroup {
servicegroup_name ganglia-metrics
alias Ganglia Metrics
}
define service{
use ganglia-service
service_description 根分区
check_command check_ganglia!disk_free_percent_rootfs!20!10
}
define service{
use ganglia-service
service_description 系统负载
check_command check_ganglia!load_one!4!5
}
define service{
use ganglia-service
service_description 内存空闲
check_command check_ganglia!mem_free!50000!30000
}
#service nagios reload
#web 访问192.168.0.27/nagios
如果一切正常,您应该看到 Ganglia 数据现在已经在 Nagios 的监视之下 ,结合使用 Ganglia 和 Nagios,您可以监视任何内容。
——leeypp@gmail.com
原文地址:http://my.oschina.net/leeypp1/blog/293340