标签:
#!/bin/bash # Script to fetch ngstatus statuses for tribily monitoring systems # Author: krish@toonheart.com # License: GPLv2 # Set Variables HOST=`/sbin/ifconfig eth2 | sed -n ‘/inet /{s/.*addr://;s/ .*//;p}‘` PORT="80" # Functions to return ngstatus stats function active { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| grep ‘Active‘ | awk ‘{print $NF}‘ } function reading { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| grep ‘Reading‘ | awk ‘{print $2}‘ } function writing { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2>/dev/null| grep ‘Writing‘ | awk ‘{print $4}‘ } function waiting { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| grep ‘Waiting‘ | awk ‘{print $6}‘ } function accepts { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| awk NR==3 | awk ‘{print $1}‘ } function handled { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| awk NR==3 | awk ‘{print $2}‘ } function requests { /usr/bin/curl "http://$HOST:$PORT/ngstatus" 2> /dev/null| awk NR==3 | awk ‘{print $3}‘ } # Run the requested function case "$1" in active) active ;; reading) reading ;; writing) writing ;; waiting) waiting ;; accepts) accepts ;; handled) handled ;; requests) requests ;; *) echo "Usage: $0 {nginx_site_discovery}" echo "Usage: $0 {active [host]|reading [host]|writing [host]|accepts [host]|handled [host]|requests [host]}" esac
#!/usr/bin/env python #coding:utf-8 import sys import os import traceback import thrift from thrift import protocol, transport from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from fb303 import * from fb303.ttypes import * from scribeadmin.sina_fb303_mgmt import fb303_wrapper descriptors = [] trans_factory = TTransport.TFramedTransportFactory() prot_factory = TBinaryProtocol.TBinaryProtocolFactory() def metric_handler(name): global trans_factory, prot_factory host = ‘127.0.0.1‘ port = 1463 tmp = name.split("_") metric = ‘‘ if tmp[1] == "received": metric = "received good" elif tmp[1] == "sent": metric = "sent" elif tmp[1] == "retries": metric = "retries" elif tmp[1] == "reconnections": metric = "number of reconnections" else: pass try: retdict = fb303_wrapper(‘counters‘, host, port, trans_factory,prot_factory) return retdict[metric] except IOError,KeyError: return 0 def metric_init(params): global descriptors d1 = {‘name‘: ‘scribe_received_good‘, ‘call_back‘: metric_handler, ‘time_max‘: 90, ‘value_type‘: ‘uint‘, ‘units‘: ‘‘, ‘slope‘: ‘positive‘, ‘format‘: ‘%u‘, ‘description‘: ‘Received good log entrys‘, ‘groups‘: ‘scribe‘} d2 = {‘name‘: ‘scribe_sent‘, ‘call_back‘: metric_handler, ‘time_max‘: 90, ‘value_type‘: ‘uint‘, ‘units‘:‘‘, ‘slope‘:‘positive‘, ‘format‘:‘%u‘, ‘description‘:‘HDFS bytes written‘, ‘groups‘:‘scribe‘} d3 = {‘name‘: ‘scribe_retries‘, ‘call_back‘: metric_handler, ‘time_max‘: 90, ‘value_type‘: ‘uint‘, ‘units‘:‘‘, ‘slope‘:‘both‘, ‘format‘:‘%u‘, ‘description‘:‘retries for queue size‘, ‘groups‘:‘scribe‘} d4 = {‘name‘: ‘scribe_reconnections‘, ‘call_back‘: metric_handler, ‘time_max‘: 90, ‘value_type‘: ‘uint‘, ‘units‘:‘‘, ‘slope‘:‘both‘, ‘format‘:‘%u‘, ‘description‘:‘number of reconnections‘, ‘groups‘:‘scribe‘} descriptors = [d1,d2,d3,d4] return descriptors #print descriptors def metric_cleanup(): ‘‘‘Clean up the metric module.‘‘‘ pass #This code is for debugging and unit testing if __name__ == ‘__main__‘: metric_init({}) for d in descriptors: v = d[‘call_back‘](d[‘name‘]) print ‘value for %s is %u‘ % (d[‘name‘], v)
1 #!/usr/bin/env python 2 #coding:utf-8 3 import sys 4 import os 5 import traceback 6 7 import thrift 8 from thrift import protocol, transport 9 from thrift.transport import TTransport 10 from thrift.protocol import TBinaryProtocol 11 from fb303 import * 12 from fb303.ttypes import * 13 from scribeadmin.sina_fb303_mgmt import fb303_wrapper 14 15 descriptors = [] 16 trans_factory = TTransport.TFramedTransportFactory() 17 prot_factory = TBinaryProtocol.TBinaryProtocolFactory() 18 19 def metric_handler(name): 20 global trans_factory, prot_factory 21 host = ‘127.0.0.1‘ 22 port = 1463 23 tmp = name.split("_") 24 metric = ‘‘ 25 if tmp[1] == "received": 26 metric = "received good" 27 elif tmp[1] == "sent": 28 metric = "sent" 29 elif tmp[1] == "denied": 30 metric = "denied for queue size" 31 else: 32 pass 33 try: 34 retdict = fb303_wrapper(‘counters‘, host, port, trans_factory, 35 prot_factory) 36 return retdict[metric] 37 except IOError: 38 return 0 39 40 def metric_init(params): 41 global descriptors 42 d1 = {‘name‘: ‘scribe_received_good‘, 43 ‘call_back‘: metric_handler, 44 ‘time_max‘: 90, 45 ‘value_type‘: ‘uint‘, 46 ‘units‘: ‘‘, 47 ‘slope‘: ‘positive‘, 48 ‘format‘: ‘%u‘, 49 ‘description‘: ‘Received good log entrys‘, 50 ‘groups‘: ‘scribe‘} 51 d2 = {‘name‘: ‘scribe_sent‘, 52 ‘call_back‘: metric_handler, 53 ‘time_max‘: 90, 54 ‘value_type‘: ‘uint‘, 55 ‘units‘:‘‘, 56 ‘slope‘:‘positive‘, 57 ‘format‘:‘%u‘, 58 ‘description‘:‘HDFS bytes written‘, 59 ‘groups‘:‘scribe‘} 60 d3 = {‘name‘: ‘scribe_denied_for_queue_size‘, 61 ‘call_back‘: metric_handler, 62 ‘time_max‘: 90, 63 ‘value_type‘: ‘uint‘, 64 ‘units‘:‘‘, 65 ‘slope‘:‘both‘, 66 ‘format‘:‘%u‘, 67 ‘description‘:‘Denied for queue size‘, 68 ‘groups‘:‘scribe‘} 69 descriptors = [d1,d2,d3] 70 return descriptors 71 72 def metric_cleanup(): 73 ‘‘‘Clean up the metric module.‘‘‘ 74 pass 75 76 #This code is for debugging and unit testing 77 if __name__ == ‘__main__‘: 78 metric_init({}) 79 for d in descriptors: 80 v = d[‘call_back‘](d[‘name‘]) 81 print ‘value for %s is %u‘ % (d[‘name‘], v)
日常记录
#!/usr/bin/python2.6 #-*- encoding:utf-8 -*- from __future__ import division import datetime,MySQLdb import smtplib from email.MIMEText import MIMEText sender = ‘datamonitor@staff.sina.com.cn‘ receiver = [‘huming@staff.sina.com.cn‘,‘yantao1@staff.sina.com.cn‘,‘yongsan@staff.sina.com.cn‘, ‘zhaobing@staff.sina.com.cn‘, ‘data_qa@staff.sina.com.cn‘] subject = ‘Rsync log size 2.0‘ smtpserver = ‘mail.staff.sina.com.cn‘ username = ‘datamonitor@staff.sina.com.cn‘ password = ‘1234.com‘ def getValue(size): if size > 1073741824: return ‘%2.2fG‘ %(size/1024/1024/1024) if 1048576 < size and size < 1073741824: return ‘%2.2fM‘ %(size/1024/1024) if 1024 < size and size < 1048576: return ‘%2.2fK‘ %(size/1024) if 0 < size and size < 1024: return ‘%2.0fB‘ %(size) else: return ‘目录为空‘ def getPercent(value1,value2): if value2 == 0: return 0 else: return "%0.2f" % ((value1-value2)/value2*100) def main(): try: conn = MySQLdb.connect(host=‘10.39.2.120‘,user=‘monitor‘,passwd=‘123qwe‘,db=‘monitor_v2‘,port=3306) except: print "Could not connect to MySQL server." exit(0) try: cur = conn.cursor() cur.execute(‘SET autocommit=1‘) cur.execute("SET NAMES ‘utf8‘") date = datetime.date.today() + datetime.timedelta(-1) dt = date.strftime(‘%Y-%m-%d‘) sql = ‘‘‘SELECT l.log_name,lc.check_time_type,lc.filepath,SUM(lc.filesize) AS sum FROM log_size_check_record AS lc JOIN workorder_info AS w ON lc.log_id = w.l og_id JOIN log_info_list AS l ON lc.log_id = l.log_id WHERE w.`status` = ‘0‘ AND DATE(lc.dt) = %s AND l.log_type = ‘local‘ GROUP BY log_name ORDER BY sum DESC ‘‘‘ sql1 = ‘‘‘SELECT l.log_name,lc.check_time_type,lc.filepath,SUM(lc.filesize) AS sum FROM log_size_check_record AS lc JOIN workorder_info AS w ON lc.log_id = w. log_id JOIN log_info_list AS l ON lc.log_id = l.log_id WHERE w.`status` = ‘0‘ AND DATE(lc.dt) = %s AND l.log_type = ‘local‘ GROUP BY log_name ORDER BY sum DESC LIMIT 1 0‘‘‘ sql2 = ‘‘‘SELECT l.log_id,l.log_name,lc.check_time_type,l.receive_module,SUM(lc.filesize) AS sum FROM log_size_check_record AS lc JOIN workorder_info AS w ON lc.log_id = w.log_id JOIN log_info_list AS l ON lc.log_id = l.log_id WHERE w.`status` = ‘0‘ AND DATE(lc.dt) = %s AND l.log_type = ‘local‘ GROUP BY log_name ORDER BY su m DESC LIMIT 10‘‘‘ sql3 = ‘‘‘SELECT l.log_name,l.receive_module,u.user_name,check_time_type,SUM(lc.filesize) AS sum FROM log_info_list AS l JOIN log_size_check_record AS lc ON l. log_id = lc.log_id JOIN log_user_relation lu ON l.log_id = lu.log_id AND lu.type = ‘fz‘ AND lu.level = 1 JOIN all_user_info u ON lu.user_id = u.user_id WHERE l.log_id = %s and DATE(lc.dt) IN (%s,%s,%s) GROUP BY DATE(lc.dt) DESC‘‘‘ context = "" context += "<B>一天概览</B><BR><BR>" context += "<table border=1><tr><td>日期</td><td>日志大小</td><td>日环比</td><td>波动(%)</td><td>周同比</td><td>波动(%)</td></tr>" dtl = [] size = [] for i in [1,2,7]: sum = 0 date1 = datetime.date.today() + datetime.timedelta(-i) dt1 = date1.strftime(‘%Y-%m-%d‘) dtl.append(dt1) cur.execute(sql,dt1) for p in cur.fetchall(): sum += int(p[-1]) size.append(sum) context += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" % (dtl[0],getValue(size[0]),getValue(size[1]),getPercent(size[0],size[ 1]),getValue(size[2]),getPercent(size[0],size[2])) context += "</table>" context += "<BR><B>前一天日志大小前十位</B><BR><BR>" context += "<table border=1><tr><td>日志名称</td><td>模块名称</td><td>日志联系人</td><td>入库类型</td><td>日志大小</td><td>环比</td><td>波动(%)</td><td>周同比< /td><td>波动(%)</td>" log_id = [] cur.execute(sql2,dt) for p in cur.fetchall(): log_id.append(p[0]) #log_id = [‘96888fef1c2640a64ddfd3c746511937‘] for id in log_id: context += "<tr>" cur.execute(sql3,(id,dtl[0],dtl[1],dtl[2])) t = cur.fetchall() if len(t) == 1: b = list(t[0]) b += [0,0,0,0] if len(t) == 2: b = list(t[0]) b.append(t[1][-1]) b.append(getPercent(t[0][-1],t[1][-1])) b += [0,0] if len(t) == 3: b = list(t[0]) b.append(t[1][-1]) b.append(getPercent(t[0][-1],t[1][-1])) b.append(t[2][-1]) b.append(getPercent(t[0][-1],t[2][-1])) l = len(b) for j in range(l): if j == 4 or j == 5 or j == 7: b[j] = getValue(int(b[j])) for j in range(l): context += "<td>%s</td>" % b[j] context += "</tr>" context += "</table>" cur.execute(sql,dt) context += "<BR><B>日志详情</B><BR><BR>" context += "<table border=1><tr><td>日志名称</td><td>入库类型</td><td>日志路径</td><td>日志大小</td></tr>" for p in cur.fetchall(): context += "<tr>" l = len(p) for j in range(l): if j == 3: context += "<td>%s</td>" % getValue(int(p[j])) else: context += "<td>%s</td>" % p[j] context += "</tr>" context += "</table>" msg = MIMEText(context,‘html‘,‘utf-8‘) msg[‘Subject‘] = subject msg[‘To‘] = ",".join(receiver) msg[‘From‘] = sender try: smtp = smtplib.SMTP() smtp.connect(smtpserver) smtp.login(username, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit() except Exception,e: print str(e) except MySQLdb.Error,e: print "Mysql Error %d: %s" %(e.args[0],e.args[1]) if __name__ == ‘__main__‘: main()
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:zifeng from __future__ import division import os import sys import smtplib from email.MIMEText import MIMEText sender = ‘datamonitor@staff.sina.com.cn‘ #receiver = [‘zifeng@staff.sina.com.cn‘] receiver = [‘huming@staff.sina.com.cn‘, ‘yongsan@staff.sina.com.cn‘, ‘yantao1@staff.sina.com.cn‘, ‘zhaobing@staff.sina.com.cn‘] cc = [‘data_qa@staff.sina.com.cn‘] subject = ‘MFS空间使用情况‘ smtpserver = ‘mail.staff.sina.com.cn‘ username = ‘datamonitor@staff.sina.com.cn‘ password = ‘1234.com‘ def getDir(dir, dep): cmd = ‘‘‘find %s -maxdepth %s -mindepth %s -type d‘‘‘ % (dir, dep, dep) return [x.strip() for x in os.popen(cmd).readlines()] def getValue(size): if size >1099511627776: return ‘%2.2fT‘ %(size/1024/1024/1024/1024) if 1073741824 < size and size < 1099511627776: return ‘%2.2fG‘ %(size/1024/1024/1024) if 1048576 < size and size < 1073741824: return ‘%2.2fM‘ %(size/1024/1024) if 1024 < size and size < 1048576: return ‘%2.2fK‘ %(size/1024) if 0 < size and size < 1024: return ‘%2.0fB‘ %(size) else: return ‘目录为空‘ def dirInfo(dir1): cmd = ‘/usr/local/mfs/bin/mfsdirinfo‘ if not os.path.exists(cmd): print ‘Please check mfs conmand was install‘ else: cmd1 = cmd + ‘ ‘ + dir1 p = os.popen(cmd1).readlines() a = [] for i in p: if i.startswith(‘/‘): continue else: a.append(i.strip().split()[1]) a.append(dir1) return a def main(): if len(sys.argv) != 3: print ‘Please use %s + Dirname + depth‘ % sys.argv[0] sys.exit(1) if not os.path.isdir(sys.argv[1]): print ‘Error,The input path is not exists‘ sys.exit(2) else: context = ‘‘ context += "<B>MFS目录详细信息</B><BR><BR>" context += "<tr>将统计%s的%s级目录数据</tr><BR>" % (sys.argv[1], sys.argv[2]) for i in range(1, int(sys.argv[2]) + 1): print ‘In depth %s‘ % i context += "<BR><B>" + ‘In depth ‘ + str(i) + "</B><BR><BR>" context += "<table border=1><tr><td>inode</td><td>directories</td><td>files</td><td>chunks</td><td>length</td><td>size</td><td>realsize</td><td>路径</td></ tr>" l = getDir(sys.argv[1], i) p = [] for x in l: p.append(dirInfo(x)) p = sorted(p, cmp=lambda x, y: cmp(int(x[6]), int(y[6])), reverse=True) for d in p: l = len(d) context += "<tr>" for j in range(l): if j == 5 or j == 6: context += "<td>%s</td>" % getValue(int(d[j])) else: context += "<td>%s</td>" % d[j] context += "</tr>" context += "</table>" msg = MIMEText(context,‘html‘,‘utf-8‘) msg[‘Subject‘] = subject msg[‘To‘] = ",".join(receiver) msg[‘Cc‘] = ",".join(cc) msg[‘From‘] = sender try: smtp = smtplib.SMTP() smtp.connect(smtpserver) smtp.login(username, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit() except Exception,e: print str(e) if __name__ == "__main__": main()
#!/usr/bin/env python #-*- coding: utf-8 -*- import os import re import datetime,MySQLdb def replaceDate(str,dt): lower = [‘y‘,‘m‘,‘d‘,‘h‘,‘i‘] yy = dt[2:4] yyyy = dt[0:4] mm = dt[4:6] dd = dt[6:8] hh = dt[8:10] mi = dt[10:12] regex = ur‘\[.*?\]‘ match = re.findall(regex, str) rrr = ‘‘ for r in match: rrr = r.upper() rrr = rrr.replace(‘MI‘,mi) rrr = rrr.replace(‘HH‘,hh) rrr = rrr.replace(‘YYYY‘,yyyy) rrr = rrr.replace(‘YY‘,yy) rrr = rrr.replace(‘MM‘,mm) rrr = rrr.replace(‘DD‘,dd) str = str.replace(r,rrr) str = str.replace(‘[‘,‘‘) str = str.replace(‘]‘,‘‘) return str def main(): try: conn = MySQLdb.connect(host=‘10.39.2.120‘,user=‘monitor‘,passwd=‘123qwe‘,db=‘monitor_v2‘,port=3306) except: print "Could not connect to MySQL server." exit(0) try: cur = conn.cursor() date = datetime.date.today() + datetime.timedelta(4) dt = date.strftime(‘%Y%m%d‘) sql = """SELECT l.fullpath FROM log_info_list l JOIN workorder_info w ON l.log_id = w.log_id WHERE w.`status` = ‘0‘ AND l.push_type = ‘rsync‘""" cur.execute(sql) for p in cur.fetchall(): destDir = replaceDate(p[0],dt) if not os.path.exists(destDir): os.makedirs(destDir) else: print ‘%s is exists‘ % destDir conn.close() except MySQLdb.Error,e: print "Mysql Error %d: %s" %(e.args[0],e.args[1]) if __name__ == "__main__": main()
#!/usr/bin/env python # coding: utf-8 import os import time import sys import MySQLdb import time import re import datetime sys.path.append("/usr/local/jobclient/bin/python/lib") import tools try: #连接mysql并查询表 conn = MySQLdb.connect(host=‘10.39.2.120‘,user=‘monitor‘,passwd=‘123qwe‘,db=‘monitor_v2‘,port=3306) cur = conn.cursor() #sql = "select org_path,hdfs_path from log_put_conf where status = 0;" sql = "select org_path,hdfs_path from log_put_conf as l JOIN workorder_info w on l.log_id = w.log_id WHERE w.`status` = ‘0‘" cur.execute(sql) result = cur.fetchall() #时间变量和工具引用 tool = tools.tools() today = datetime.date.today() one_day = datetime.timedelta(days=1) two_day = datetime.timedelta(days=2) three_day = datetime.timedelta(days=3) four_day = datetime.timedelta(days=4) five_day = datetime.timedelta(days=5) six_day = datetime.timedelta(days=6) seven_day = datetime.timedelta(days=7) day1 = today - one_day day2 = today - two_day day3 = today - three_day day4 = today - four_day day5 = today - five_day day6 = today - six_day day7 = today - seven_day dt_1 = day1.strftime(‘%Y%m%d‘) dt_2 = day2.strftime(‘%Y%m%d‘) dt_3 = day3.strftime(‘%Y%m%d‘) dt_4 = day4.strftime(‘%Y%m%d‘) dt_5 = day5.strftime(‘%Y%m%d‘) dt_6 = day6.strftime(‘%Y%m%d‘) dt_7 = day7.strftime(‘%Y%m%d‘) mon = "date +%Y%m -d ‘+1 month ago‘" mon_new = tool.execu(mon) ye = "date +%Y" ye_new = tool.execu(ye) old_file = os.path.exists(‘/tmp/list.conf_%s‘ % dt_7) if old_file == True: os.remove(‘/tmp/list.conf_%s‘ % dt_7) else: print ‘list_conf_%s file no exits‘ % dt_7 old_conf = os.path.exists(‘/tmp/list.conf‘) if old_conf == True: os.remove(‘/tmp/list.conf‘) else: print ‘list_conf file no exits‘ #循环读取表中路径信息 for line in result: a0 = line[0] b0 = tool.replaceDate(a0,dt_1) a1 = line[1] b1 = tool.replaceDate(a1,dt_1) #查看HDFS目录大小 h_size = "su - data_qa -s /bin/bash -c \"hadoop fs -dus %s\" | awk ‘{print $2}‘" % b1 h_size = tool.execu(h_size) #检查HDFS目录是否存在 h_dir = "su - data_qa -s /bin/bash -c \"hadoop fs -test -d %s\"" % b1 h_dir = os.system(h_dir) Y = "YY" M = "MM" # regex = ur‘\[.+\].*‘ # ma = re.findall(regex,‘%s‘ % (a0)) # print ma[0] if h_dir == 0 and h_size > 0: if Y in a0: regex = ur‘\[.+\].*‘ ma = re.findall(regex,‘%s‘ % (a0)) new_b0 = a0.replace(ma[0],‘ ‘) d1 = tool.replaceDate(ma[0],dt_1) d2 = tool.replaceDate(ma[0],dt_2) d3 = tool.replaceDate(ma[0],dt_3) d4 = tool.replaceDate(ma[0],dt_4) d5 = tool.replaceDate(ma[0],dt_5) d6 = tool.replaceDate(ma[0],dt_6) d7 = tool.replaceDate(ma[0],dt_7) os.system(‘find %s -mtime +7 -type d | grep \‘[0-9]$\‘ | egrep -v \‘%s|%s|%s|%s|%s|%s|%s|%s$\‘ >> /tmp/list.conf‘ % (new_b0,d1,d2,d3,d4 ,d5,d6,d7,ye_new)) else: #print ‘%s a0 dir not YY‘ % a0 os.system(‘find %s -type f -mtime +7 >> /tmp/list.conf‘ % (a0)) #去掉重复的行 os.system(‘sort -u /tmp/list.conf | sort -rn >> /tmp/list.conf_%s‘ % (dt_7)) cur.close() conn.close() except MySQLdb.Error,e: print "MySQL Error Msg:" #删除目录 os.system(‘/bin/sh /usr/home/liguo/test/del_mail.sh‘)
#!/usr/bin/env python # -*- encoding:utf-8 -*- import os import sys import time import socket import MySQLdb #os.environ["TEMP"] service = ‘监控系统‘ level = ‘CRITICAL‘ current_time = (time.strftime(‘%Y-%m-%d %H:%M:%S‘)) #查询kafkaproxy主机正常连接到zk的ip def getAliveHosts(): cmd = ‘‘‘/bin/sh /usr/local/zookeeper-3.4.3/bin/zkCli.sh -server 10.39.1.66:22181 ls /kafkaProxy8 2> /dev/null | grep -P "^\["‘‘‘ #cmd = ‘‘‘echo "ls /kafkaProxy8" > script && /usr/local/zookeeper-3.4.3/bin/zkCli.sh -server 10.39.1.66:22181 <script 2>/dev/null |grep yz |grep -v CONNECTED‘‘‘ #cmd = "/bin/sh /usr/home/data_qa/shell/zk_monitor.sh" try: hosts1 = [x.split(":")[0] for x in os.popen(cmd).readline().strip().strip(‘[]‘).split()] hosts2 = ‘‘ hosts = [] for p in hosts1: hosts2 = socket.gethostbyname(p) hosts.append(hosts2) #hosts = [‘10.39.4.204‘, ‘10.39.4.205‘, ‘10.39.4.206‘, ‘10.39.4.208‘, ‘10.39.4.209‘, ‘10.39.4.210‘, ‘10.39.4.211‘, ‘10.39.4.219‘, ‘10.39.4.201‘, ‘10.39.4.220‘, ‘10.39.4.216‘, ‘10.39.4.217‘, ‘10.39.4.213‘, ‘10.39.4.215‘, ‘10.39.4.218‘, ‘10.39.4.212‘] #print len(hosts) #print hosts return hosts except: print ‘get error‘ def Get_Dict(sql): conn = MySQLdb.connect(host=‘10.39.2.120‘,user=‘monitor‘,passwd=‘123qwe‘,db=‘monitor_v2‘,port=3306) cur = conn.cursor() cur.execute(‘SET autocommit=1‘) cur.execute("SET NAMES ‘utf8‘") reCount = cur.execute(sql) data = cur.fetchall() cur.close() conn.close() return data def getLocalIp(): cmd = """/sbin/ifconfig eth1 | awk -F[" ":]+ ‘/inet addr/{print $4}‘""" try: ip = os.popen(cmd).readline().strip() return ip except: print ‘get ip error‘ def getReceiver(): receiver = ‘yongsan,jiangyu2,hongtao4,zhichao1,yantao1,zhaobing‘ #receiver = ‘yongsan‘ #cc = ‘yongsan‘ cmd = "/usr/bin/curl http://monitor.pso.sina.com.cn/monitor/index.php/interface/internal/getDutyUser" try: duty_user = os.popen(cmd).readline() if duty_user: receiver = receiver + ‘,‘ + duty_user except: print "get user error" return receiver def sendsms(recs, service, level, subject): sms=‘http://monitor.pso.sina.com.cn/monitor/index.php/interface/sendSMS‘ cmd = ‘‘‘/usr/bin/curl -d receivers=%s -d service=%s -d level=%s -d subject="%s" %s ‘‘‘ % (recs, service, level, subject, sms) os.system(cmd) def sendmail(recs, service, level, subject, con): mail = ‘http://monitor.pso.sina.com.cn/monitor/index.php/interface/sendMail‘ cmd = ‘‘‘/usr/bin/curl -d receivers=%s -d service=%s -d level=%s -d subject="%s" -d content="%s" %s ‘‘‘ % (recs, service, level, subject, con, mail) os.system(cmd) #查询监控机器 sql = ‘‘‘SELECT s.server_ip FROM scribe_server AS s,scribe_monitor As m where s.server_id=m.server_id and s.descriptor=‘新kafka集群‘ AND m.enabled=‘ 1‘ ‘‘‘ simple_data = Get_Dict(sql) sum = ‘‘ AliveHost = [] for i in simple_data: sum = i[0].split(‘:‘)[0] AliveHost.append(sum) #print sum #print AliveHost ac = len(AliveHost) #print ac message=‘‘ def main(): count = 0 test = getReceiver() line="http://general.wiki.erp.sina.com.cn/Monitor_FAQ/zk_counters" while True: hostList = getAliveHosts() #print hostList try: if len(hostList) < ac: count += 1 if count % 3 == 1: #message = ‘<tr><td>kafkaproxy Host: 未连接到zk,请注意查看!\n<td/><tr/><br>‘ #sms_mesg = ‘kafkaproxy Host未连接到zk,请注意查看\n‘ ip=‘‘ for i in AliveHost: if i not in hostList: print i ip += i + ‘ ‘ #ip += socket.gethostbyname(i) message = ‘<tr><td>[%s] 主机:%s(kafkaproxy)未连接到zk,请注意查看!\n<BR> 处理方法<a href=%s>见这里</a>\n<td/><tr/><br>‘ % (current_time,ip, line) sms_mesg = ‘[%s] kafkaproxy Host:%s 未连接到zk,请注意查看\n‘ % (current_time,ip) print "sms+email" sendsms(test, service, level, sms_mesg) else: print "email" sendmail(test, service, level, "kafkaproxy连接zookeeper报警", message) else: print "All server is ok" #time.sleep(600) break except KeyboardInterrupt: break if __name__ == "__main__": main()
#!/bin/bash #监控zk节点上面的scirbe后端服务器信息 source ~/.bash_profile sms=‘http://monitor.pso.sina.com.cn/monitor/index.php/interface/sendSMS‘ mail=‘http://monitor.pso.sina.com.cn/monitor/index.php/interface/sendMail‘ receiver=‘yongsan,jiangyu2,hongtao4,zhichao1,yantao1,zhaobing‘ #receiver=‘yongsan‘ duty_user=`curl http://monitor.pso.sina.com.cn/monitor/index.php/interface/internal/getDutyUser` if [ $duty_user != "" ];then receiver="$receiver,$duty_user" fi dir="/usr/home/data_qa/shell" receive="$receiver" #echo $receive pid="$dir/pid" /bin/echo $$ >> $pid service=‘监控系统‘ level=‘CRITICAL‘ message=‘‘ local_ip=$(/sbin/ifconfig eth1 | awk -F[" ":]+ ‘/inet addr/{print $4}‘) alive_ip="" alive_num=0 MYSQL=`/usr/bin/mysql -umonitor -h 10.39.2.120 -p123qwe --database monitor_v2 -e "SELECT s.server_ip FROM scribe_server AS s,scribe_monitor As m where s.server_id=m.se rver_id and s.descriptor=‘hadoop2.0接收机‘ AND m.enabled=‘1‘;"` for i in `echo $MYSQL`;do alive_ip+="`echo $i |grep -v server_ip|awk -F: ‘{print $1}‘` "; alive_num=$(expr $alive_num + 1); done #echo "alive_ip:" $alive_ip alive_num=$(expr $alive_num - 1) echo "alive_num:" $alive_num Ng_line=`echo "ls /ScribeCompactNg" > script && /usr/local/zookeeper-3.4.3/bin/zkCli.sh -server 10.39.1.66 <script 2>/dev/null |grep yz |grep -v CONNECTED |sed ‘s/\[/ /g;s/\]//g‘ |awk -F, ‘{for(i=1;i<=NF;i++) print $i}‘` #echo $Ng_line Sc_Ng="" Ng_num=0 for s in `echo $Ng_line`;do Ng_ip=`echo $s|awk -F: ‘{print $1}‘`; Sc_Ng+="$(/bin/ping -c 1 $Ng_ip |grep PING |awk ‘{print $3}‘|sed ‘s/[()]//g‘) "; Ng_num=$(expr $Ng_num + 1); done echo "Ng_num:" $Ng_num #echo "Sc_Ng" $Sc_Ng #Sc_Ng="10.39.5.228 10.39.5.229 10.39.5.231 10.39.5.232 10.39.5.233 10.39.5.234 10.39.5.235 10.39.5.236 10.39.5.217 10.39.5.218 10.39.5.219 10.39.5.220 10.39.5.222 10. 39.5.223 10.39.5.224 10.39.5.225 10.39.5.226 10.39.4.101 10.39.4.103 10.39.4.104 10.39.4.106 10.39.4.107 10.39.4.105 10.39.4.108 10.39.4.109 10.39.4.110" if [ $alive_num != $Ng_num ] ; then ip2=‘‘ for ap in `echo $alive_ip`;do num=`echo $Sc_Ng|grep $ap` if [ "$num" == "" ];then ip2+=,$ap message="[$(date +‘%F %T‘)] 报警内容:正常连接到zk个数为: "$Ng_num"个,未连接到zk的ip为$ip2 <BR> 处理方法<a href="http://general.wiki.erp.sina.c om.cn/Monitor_FAQ/zk_counters">见这里</a>" message1="[`date ‘+%F %T‘`]scribe2.0接收集群ip:$ip2 未连接到zk,请注意查看!" fi done else test -f $pid && /bin/rm -f $pid > /dev/null exit 0 fi if [ "$message" != "" ]; then if [ -e $pid ];then if [ "$(cat $pid |wc -l)" -eq "1" ];then curl -d receivers=$receiver -d service=$service -d level=$level -d subject="zookeeper连接报警" -d content="$message" $mail curl -d receivers=$receiver -d service=$service -d level=$level -d subject="$message1" $sms elif [ $(cat $pid |wc -l) -eq 2 ];then curl -d receivers=$receiver -d service=$service -d level=$level -d subject="zookeeper连接报警" -d content="$message" $mail elif [ $(cat $pid |wc -l) -eq 3 ];then curl -d receivers=$receiver -d service=$service -d level=$level -d subject="zookeeper连接报警" -d content="$message" $mail /bin/rm -f $pid > /dev/null else curl -d receivers=$receiver -d service=$service -d level=$level -d subject="zookeeper连接报警" -d content="$message" $mail /bin/rm -f $pid > /dev/null fi else echo "pid is not exist,exit!" fi fi
标签:
原文地址:http://www.cnblogs.com/liyongsan/p/5571848.html