简单画了个图:
首先,后端程序及客户端都是分成三个版本:内部测试版,线上测试版,线上稳定版。线上测试版是小范围更新,经过一天测试没问题,然后再推到线上稳定版,更新其他服,一般游戏也都是按这个流程来更新的。
运维管理后台,记录了区服信息,提供各种简单API接口给各脚本使用。
然后批量维护脚本,create_list.py是根据运维管理后台提供的API,根据输入的参数(平台,区服范围)生成一份cqbyupdate.py需要使用的iplist文件,然后cqbyupdate.py根据这份ip文件执行相应的操作。
saltstack,是用于全服修改一些配置使用,例如批量修改zabbix的配置,批量修改nginx的配置 等等。
rsync,用于数据同步,例如给游戏服拉取最新版本。
游戏服最关键的只有一个control.py脚本,该脚本集成了管理单个游戏区服的所有操作,根据传进去的版本参数及动作参数执行对应的操作。
整套架构的优点是全服维护可用cqbyupdate.py脚本操作,如果临时游戏服上想做些什么更新,可用单服脚本control.py操作,比较灵活;缺点是对中心机依赖比较高,万一中心机岩了,就麻烦大了,所以搞了一台备份中心机。这套架构已经上线开服3000+
control.py单服维护脚本:
#!/usr/bin/python #coding=utf-8 import subprocess import shutil import os import sys reload(sys) sys.setdefaultencoding(‘utf-8‘) import optparse import ConfigParser import time import jinja2 import urllib2 import json import socket try: import fcntl except: pass import struct import MySQLdb def get_ip_address(ifname): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) return socket.inet_ntoa(fcntl.ioctl( s.fileno(), 0x8915, # SIOCGIFADDR struct.pack(‘256s‘, ifname[:15]) )[20:24]) class Cqby: def __init__(self, version, platform, platformid, id): self.version = version self.platform = platform self.platformid = platformid self.id = id #工作目录: self.workdir = ‘/data/init‘ #定义游戏程序目录: self.gamedir = ‘/data/game/game%s‘ % self.id try: os.makedirs(‘/data/game‘) except: print "目录已存在" #当前游戏聊天监控目录: self.chatdir = ‘/data/game/chat%s‘ % self.id #定义游戏端口: if int(self.id) > 50000: self.gameport = str(self.id) else: self.gameport = 20000 + int(self.id) self.gameport = str(self.gameport) try: self.localip=get_ip_address(‘eth0‘) except: self.localip=get_ip_address(‘em1‘) #定义数据库名称: self.dbname = ‘game%s‘ % self.id #定义管理员使用的数据库帐号密码: self.admindbuser = ‘root‘ self.admindbpass = ‘123456‘ #定义备份目录: self.backup = ‘/data/backup‘ try: os.makedirs(self.backup) except: print "目录已经存在" #建立日志目录: self.gamelogdir = ‘/data/gamelogs/chuanqi/%s/S%s‘ % (self.platform, self.id) if not os.path.isdir(self.gamelogdir): os.makedirs(self.gamelogdir) subprocess.call(‘chown www:www -R /data/gamelogs‘,shell=True) #程序配置文件模板: self.binConfigDir = ‘%s/bin‘ % self.gamedir self.binConfigFiles = [‘socket.jinja2‘] self.confConfigDir = ‘%s/conf‘ % self.gamedir self.confConfigFiles = [‘jade.cfg.jinja2‘] self.independentConfigDir = ‘%s/conf/independent‘ % self.gamedir self.independentConfigFiles = [ ‘auth.properties.jinja2‘, ‘debug.properties.jinja2‘, ‘fcm.properties.jinja2‘, ‘gm.properties.jinja2‘, ‘net.properties.jinja2‘, ‘server.properties.jinja2‘, ‘whiteList.properties.jinja2‘, ‘onlineLimit.properties.jinja2‘, ] self.miscConfigDir = ‘%s/conf/config/common‘ % self.gamedir self.miscConfigFiles = [ ‘misc.properties.jinja2‘, ] #数据库权限: baselist = [‘127.0.0.1‘,] payIPListAll = { ‘37wan‘: [], ‘liebao‘: [], ‘2345‘: [], ‘yilewan‘: [], ‘renrenwang‘: [], ‘6711‘: [], ‘1360‘: [], ‘duowan‘: [], ‘baidu‘: [], ‘lianyun‘: [], ‘tencent‘: [] } try: self.platformPayList = payIPListAll[self.platform] except: self.platformPayList = payIPListAll[‘lianyun‘] self.payList = baselist + self.platformPayList self.mergelist = self.__getMerge() def __getMerge(self): ‘‘‘获取合服列表‘‘‘ i = 0 while True: try: if i >= 3: print "请求超时!!!!!!" sys.exit(2) url = ‘http://yw.admin.xxx.com/yunwei/api/getmergetarget/%s/%s/‘ % (self.platform, self.id) request = urllib2.urlopen(url) response = request.read().split(‘,‘) except Exception, e: print "请求合服信息失败:" + str(e) print "正在重试。。。" i = i + 1 else: break return response def createDatabase(self): ‘‘‘创建数据库‘‘‘ try: print "正在创建数据库:%s" % self.dbname cmd = ‘‘‘ /usr/local/mysql/bin/mysql -u‘%s‘ -p‘%s‘ -e "create database %s DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci" ‘‘‘ % (self.admindbuser, self.admindbpass, self.dbname) ret = subprocess.call(cmd,shell=True) print "执行状态:%s" % ret if ret: print "创建数据库失败,请确认!" sys.exit(2) except Exception,e: print "捕捉到异常:",e sys.exit(2) def updateDB(self, filename): ‘‘‘ 导入数据库文件 ‘‘‘ try: print "正在导入SQL文件:%s" % filename cmd = ‘‘‘ /usr/local/mysql/bin/mysql -u‘%s‘ -p‘%s‘ %s < %s ‘‘‘ % (self.admindbuser, self.admindbpass, self.dbname, filename) ret = subprocess.call(cmd, shell=True) print "执行状态:%s" % ret except Exception,e: print "捕捉到异常:",e sys.exit(2) def dumpDatabase(self): ‘‘‘ 备份数据库 ‘‘‘ try: print "正在备份数据库:%s" % self.dbname curTime = time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())) cmd = ‘‘‘ /usr/local/mysql/bin/mysqldump -u‘%s‘ -p‘%s‘ %s > %s ‘‘‘ % (self.admindbuser, self.admindbpass, self.dbname, ‘%s/%s-%s.sql‘ % (self.backup,curTime,self.dbname)) ret = subprocess.call(cmd, shell=True) print "执行状态:%s" % ret except Exception,e: print "捕捉到异常:",e def dropDatabase(self): ‘‘‘ 删除数据库 ‘‘‘ try: print "正在删除数据库:%s" % self.dbname cmd = ‘‘‘ /usr/local/mysql/bin/mysql -u‘%s‘ -p‘%s‘ -e "drop database %s" ‘‘‘ % (self.admindbuser, self.admindbpass, self.dbname) ret = subprocess.call(cmd, shell=True) print "执行状态:%s" % ret except Exception,e: print "捕捉到异常:",e def createGameDir(self): ‘‘‘ 创建游戏目录 ‘‘‘ try: print "正在检测目录是否存在:%s" % self.gamedir if os.path.isdir(self.gamedir): print "目录已存在,请检查参数!" sys.exit(2) else: print "正在复制程序文件至:%s" % self.gamedir shutil.copytree(‘%s/%s/server‘ % (self.workdir, self.version), self.gamedir) except Exception,e: print "捕捉到异常:",e sys.exit(2) def dropGameDir(self): ‘‘‘ 清理游戏目录 ‘‘‘ try: print "正在删除游戏目录:%s" % self.gamedir if os.path.isdir(self.gamedir): shutil.rmtree(self.gamedir) except Exception,e: print "遇到错误:",e def dropGameLogDir(self): ‘‘‘ 清理游戏日志目录 ‘‘‘ try: print "正在删除日志目录:%s" % self.gamelogdir if os.path.isdir(self.gamelogdir): shutil.rmtree(self.gamelogdir) except Exception,e: print "遇到错误:",e def createConfig(self, configdir, configlist): ‘‘‘创建程序配置‘‘‘ try: print "正在生成配置文件:%s" % configdir url = ‘http://yw.admin.xxx.com/yunwei/api/getmem/%s/%s‘ % (self.platform, self.id) response = urllib2.urlopen(url) mem = response.read() env = jinja2.Environment(loader=jinja2.FileSystemLoader(configdir)) for gateconfig in configlist: print gateconfig template = env.get_template(gateconfig) f = open(‘%s/%s‘ % (configdir,gateconfig.rstrip(‘.jinja2‘)), ‘w‘) f.write( template.render( version=self.version, platformid=self.platformid, platform=self.platform, gameid=self.id, gameport=self.gameport, gamedir=self.gamedir, dbuser=‘game‘, dbpass=‘game123456‘, dbname=self.dbname, paylist=self.platformPayList, mem=mem, mergelist=self.mergelist, ) ) f.close() except Exception,e: print "生成配置文件遇到错误:",e sys.exit(2) def updateconfig(self): self.createConfig(self.binConfigDir, self.binConfigFiles) os.chmod(‘%s/bin/socket‘ % self.gamedir,0755) self.createConfig(self.confConfigDir, self.confConfigFiles) self.createConfig(self.independentConfigDir, self.independentConfigFiles) #self.createConfig(self.miscConfigDir, self.miscConfigFiles) def updategame(self): print "正在更新游戏程序。。。" cmd = ‘‘‘ rsync -avzP --exclude="socket" --exclude="log" --exclude="onlineLimit.properties" --exclude="jade.cfg" --exclude="auth.properties" --exclude="debug.properties" --exclude="fcm.properties" --exclude="gm.properties" --exclude="net.properties" --exclude="server.properties" --exclude="whiteList.properties" %s/%s/server/ %s/ ‘‘‘ % (self.workdir,self.version,self.gamedir) print cmd result = subprocess.call(cmd, shell=True) return result def start(self): print "给JSVC添加执行权限:" os.chmod(‘%s/bin/jsvc‘ % self.gamedir,0755) print "正在启动服务:" cmd = ‘‘‘cd %s/bin ; ./socket start ‘‘‘ % self.gamedir result = subprocess.call(cmd, shell=True) return result def stop(self): print "正在关闭服务:" cmd = ‘‘‘cd %s/bin ; ./socket stop ‘‘‘ % self.gamedir result = subprocess.call(cmd, shell=True) return result def clearnow(self): self.dumpDatabase() self.updateDB(‘%s/%s/server/sql/database.sql‘ % (self.workdir,self.version)) self.dropGameLogDir() def clear(self): try: conn = MySQLdb.connect(user=self.admindbuser, passwd=self.admindbpass, host=‘localhost‘, db=self.dbname, unix_socket=‘/tmp/mysql.sock‘) cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor) sql = ‘‘‘ select * from Player ‘‘‘ sum = cursor.execute(sql) cursor.close() conn.close() print "数据库Player表有:%s" % sum if int(sum) > 30: print "Player表记录总数大于30!请确认后再执行清档操作!!!" sys.exit(2) else: print "Player表记录总数小于30,可以执行清档操作!" self.stop() self.clearnow() self.start() except Exception,e: print "连接数据库错误:%s" % e sys.exit(2) def create(self): ‘‘‘一键搭服‘‘‘ self.createDatabase() self.updateDB(‘%s/%s/server/sql/database.sql‘ % (self.workdir,self.version)) self.mysqlgrant() self.createGameDir() self.updateconfig() self.createchat() self.nginxlogs() def drop(self): self.dumpDatabase() self.dropDatabase() self.dropGameDir() self.dropGameLogDir() self.dropchat() def onekey(self): ‘‘‘一键更新‘‘‘ self.stop() time.sleep(10) self.updategame() self.start() def mysqlgrant(self): ‘‘‘添加数据库授权‘‘‘ print "正在添加数据库授权:" for ip in self.payList: print "正在添加%s权限" % ip cmd = ‘‘‘ /usr/local/mysql/bin/mysql -u‘%s‘ -p‘%s‘ -e "grant all privileges on *.* to game@‘%s‘ Identified by ‘cqbygame‘" ‘‘‘ % (self.admindbuser, self.admindbpass, ip) subprocess.call(cmd, shell=True) cmd = ‘‘‘ /usr/local/mysql/bin/mysql -u‘%s‘ -p‘%s‘ -e "grant select on *.* to db@‘119.131.244.178‘ identified by ‘lizhenjie‘;" ‘‘‘ % (self.admindbuser, self.admindbpass) subprocess.call(cmd, shell=True) if __name__ == "__main__": active_list = [‘create‘, ‘drop‘, ‘updateconfig‘, ‘start‘, ‘stop‘, ‘clear‘, ‘updategame‘, ‘updateDB‘,‘onekey‘,‘mysqlgrant‘,‘clearnow‘] gamever_list = [‘test‘,‘37dev‘,‘37stable‘] usage = ‘‘‘ usage: %prog -p platform %prog -v version -i id -a action %prog -v version -i id -a updateDB -s sqlfile ‘‘‘ parser = optparse.OptionParser( usage = usage, version = "%prog 2.0" ) setplat_opts = optparse.OptionGroup( parser, ‘设置服务器平台标识‘, ‘一台硬件服务器设置一次即可。‘ ) setplat_opts.add_option( ‘-p‘,‘--platform‘, dest="platform", help="平台名称" ) parser.add_option_group(setplat_opts) tools_opts = optparse.OptionGroup( parser, ‘服务器日常功能‘, ) tools_opts.add_option( ‘-v‘,‘--ver‘, dest="ver", help="版本目录", type="choice" , choices=gamever_list, default=gamever_list[1] ) tools_opts.add_option( ‘-i‘,‘--id‘, dest=‘id‘, help="服务器ID" ) tools_opts.add_option( ‘-a‘,‘--action‘, dest=‘action‘, help="执行动作", type="choice" , choices=active_list ) tools_opts.add_option( ‘-s‘,‘--sql‘, dest=‘sql‘, help="SQL文件(可选,配合updateDB使用)" ) parser.add_option_group(tools_opts) options, args = parser.parse_args() err_msg = ‘参数不对,请输--help查看详细说明!‘ ini = ‘platform.ini‘ if options.platform: apiurl = ‘http://yw.admin.xxx.com/yunwei/api/getplatforminfo/‘ ini = ‘platform.ini‘ result = urllib2.urlopen(apiurl) response = json.loads(result.read()) for code, id in response.items(): if options.platform == code: platformid = id print "正在设置服务器标识为:%s-%s" % (platformid, options.platform) cfd = open(ini, ‘w‘) conf = ConfigParser.ConfigParser() conf.add_section(‘platforminfo‘) conf.set(‘platforminfo‘,‘name‘,options.platform) conf.set(‘platforminfo‘,‘id‘,platformid) conf.write(cfd) cfd.close() break sys.exit(0) if options.id and options.ver and options.action: cf = ConfigParser.ConfigParser() cf.read(ini) platform = cf.get(‘platforminfo‘,‘name‘) platformid = cf.get(‘platforminfo‘,‘id‘) cqby = Cqby(options.ver, platform, platformid, options.id) run_function = getattr(cqby,options.action) if options.action in [‘updateDB‘,]: run_function(‘%s/server/sql/%s‘ % (options.ver,options.sql)) else: run_function() else: parser.error(err_msg)
cqbyupdate.py批量维护脚本:
#!/usr/bin/python #coding:utf-8 import threading import Queue import subprocess import optparse import logging import logging.config import datetime import os import sys reload(sys) sys.setdefaultencoding(‘utf-8‘) #test: import time #logging.basicConfig(level = logging.DEBUG,format=‘(%(threadName)-10s) %(message)s‘,) logging.config.fileConfig("logger.conf") logger = logging.getLogger("root") logger2 = logging.getLogger("file") queue = Queue.Queue() Failed_List = [] class Ahdts(threading.Thread): def __init__(self, queue): super(Ahdts,self).__init__() self.queue = queue self.workdir = ‘/data/init‘ #建立日志目录: log_path = ‘updatelog‘ today = datetime.date.today() self.log_path_today = ‘%s/%s‘ % (log_path,today) if not os.path.isdir(self.log_path_today): try: os.makedirs(self.log_path_today) except Exception,e: print e sys.exit(2) def run(self): while True: global action global sqlfile item = self.queue.get() value = item.strip().split(‘,‘) platform = value[0] id = value[1] ip = value[2] port = value[3] opentime = value[4] logging.debug("%10s %6s %15s %15s %10s ThreadingStart!" % (platform,id,ip,action,ver)) if action == ‘rsync‘: cmd = ‘‘‘ cd %s ; ./rsync ‘‘‘ % self.workdir elif action == ‘ntp‘: cmd = ‘‘‘ cd %s ; ./TimeClient.py ‘‘‘ % self.workdir elif action in [‘updateDB‘,]: cmd = ‘‘‘ cd %s ; ./control.py -i %s -a %s -v %s -s %s ‘‘‘ % (self.workdir, id, action, ver, sqlfile) elif action == ‘platform‘: cmd = ‘‘‘ cd %s ; ./control.py -p %s ‘‘‘ % (self.workdir, platform) else: cmd = ‘‘‘ cd %s ; ./control.py -i %s -a %s -v %s ‘‘‘ % (self.workdir, id, action, ver) sshcmd = ‘‘‘ ssh root@%s -n "%s" ‘‘‘ % (ip, cmd) with open(‘%s/%s-%s-%s-%s.log‘ % (self.log_path_today, platform, id, ver, action), ‘a‘) as logfile: exitcode = subprocess.call(sshcmd,shell=True,stdout=logfile, stderr=subprocess.STDOUT) if exitcode == 0: logger2.debug(‘%10s %6s %15s %15s %10s %s‘ % (platform, id, ip, action, ver, cmd)) rettxt = ‘%10s %6s %15s %15s %10s ThreadingEnd! ExitCode:%s‘ % (platform,id,ip,action,ver,exitcode) if exitcode: Failed_List.append(rettxt) logging.debug(rettxt) self.queue.task_done() if __name__ == "__main__": action_list = [‘rsync‘,‘create‘,‘drop‘,‘start‘,‘stop‘,‘clear‘,‘updateconfig‘,‘updategame‘,‘updateDB‘,‘onekey‘] gamever_list = [‘test‘,‘37dev‘,‘37stable‘] usage = ‘‘‘ usage: %prog --file <file.ini> --action <action> Forexample: %prog -f game-test.ini -a create %prog -f game-test.ini -a onekey %prog -f game-test.ini -a updateDB -s test.sql ‘‘‘ parser = optparse.OptionParser( usage = usage, version = "%prog 1.4" ) parser.add_option(‘-f‘,‘--file‘,dest="file",help="IP文件列表") parser.add_option(‘-a‘,‘--action‘,dest="action",help="执行动作",type="choice",choices=action_list) parser.add_option(‘-v‘,‘--ver‘, dest=‘ver‘,help="版本目录标识",type="choice",choices=gamever_list) parser.add_option(‘-s‘,‘--sql‘, dest=‘sql‘,help="待更新的SQL文件") options, args = parser.parse_args() err_msg = ‘参数不对,请输--help查看详细说明!‘ if options.action and options.ver and options.file: with open(options.file) as file: content = file.readlines() action = options.action ver = options.ver sqlfile = options.sql maxThreadNum = 200 if len(content) < 100: maxThreadNum = len(content) for i in range(maxThreadNum): t = Ahdts(queue) t.setDaemon(True) t.start() logging.debug("%10s %6s %15s %15s %10s" % (‘PlatForm‘,‘ID‘,‘IP‘,‘Action‘,‘Version‘)) iplist = [] for i in content: ii = i.strip().split(‘,‘) ip = ii[2] if action in [‘rsync‘,‘platform‘] and ip in iplist: continue queue.put(i) iplist.append(ip) queue.join() #打印执行失败列表: print ‘=‘ * 20 + ‘执行失败列表‘ + ‘=‘ * 20 if Failed_List: for i in Failed_List: print i else: print "None" print ‘=‘ * 52 logging.debug("Done") else: print err_msg
批量维护脚本其实就是ssh远程过去游戏服执行control.py脚本,后面看能不能改成用socket的方式去连接,把socket的东西练练手,整套东西感觉还是比较简单。
本文出自 “运维笔记” 博客,请务必保留此出处http://lihuipeng.blog.51cto.com/3064864/1617958
原文地址:http://lihuipeng.blog.51cto.com/3064864/1617958