码迷,mamicode.com
首页 > 编程语言 > 详细

Python正则处理多行日志一例

时间:2016-07-11 20:50:27      阅读:250      评论:0      收藏:0      [点我收藏+]

标签:

 

    假设现在有这样的SQL日志:    

SELECT * FROM open_app WHERE 1 and `client_id` = ‘a08f5e32909cc9418f‘ and `is_valid` = ‘1‘ order by id desc limit 32700,100;
# Time: 160616 10:05:10
# User@Host: shuqin[qqqq] @  [1.1.1.1]  Id: 46765069
# Schema: db_xxx  Last_errno: 0  Killed: 0
# Query_time: 0.561383  Lock_time: 0.000048  Rows_sent: 100  Rows_examined: 191166  Rows_affected: 0
# Bytes_sent: 14653
SET timestamp=1466042710;
SELECT * FROM open_app WHERE 1 and `client_id` = ‘a08f5e32909cc9418f‘ and `is_valid` = ‘1‘ order by id desc limit 36700,100;
# User@Host: shuqin[ssss] @  [2.2.2.2]  Id: 46765069
# Schema: db_yyy  Last_errno: 0  Killed: 0
# Query_time: 0.501094  Lock_time: 0.000042  Rows_sent: 100  Rows_examined: 192966  Rows_affected: 0
# Bytes_sent: 14966
SET timestamp=1466042727;

   

     要求从中解析出相应的信息, Python 程序:

     

import re

globalRegex = r^\s*(.*?)# (User@Host:.*?)# (Schema:.*?)# (Query_time:.*?)# Bytes_sent:(.*?)SET timestamp=(\d+);\s*$
costRegex = rQuery_time:\s*(.*)\s*Lock_time:\s*(.*)\s*Rows_sent:\s*(\d+)\s*Rows_examined:\s*(\d+)\s*Rows_affected:\s*(\d+)\s*
schemaRegex = rSchema:\s*(.*)\s*Last_errno:(.*)\s*Killed:\s*(.*)\s*

def readSlowSqlFile(slowSqlFilename):
    f = open(slowSqlFilename)
    ftext = ‘‘
    for line in f:
         ftext += line
    f.close()
    return ftext

def findInText(regex, text):
    return re.findall(regex, text, flags=re.DOTALL+re.MULTILINE)

def parseSql(sqlobj, sqlText):
    try:
        if sqlText.find(#) != -1:
            sqlobj[sql] = sqlText.split(#)[0].strip()
            sqlobj[time] = sqlText.split(#)[1].strip()
        else:
            sqlobj[sql] = sqlText.strip()
            sqlobj[time] = ‘‘
    except:
        sqlobj[sql] = sqlText.strip()

def parseCost(sqlobj, costText):
    matched = re.match(costRegex, costText)
    sqlobj[Cost] = costText
    if matched:
        sqlobj[QueryTime] = matched.group(1).strip()
        sqlobj[LockTime] = matched.group(2).strip()
        sqlobj[RowsSent] = int(matched.group(3))
        sqlobj[RowsExamined] = int(matched.group(4))
        sqlobj[RowsAffected] = int(matched.group(5))

def parseSchema(sqlobj, schemaText):
    matched = re.match(schemaRegex, schemaText)
    sqlobj[Schema] = schemaText
    if matched:
        sqlobj[Schema] = matched.group(1).strip()
        sqlobj[LastErrno] = int(matched.group(2))
        sqlobj[Killed] = int(matched.group(3))

def parseSQLObj(matched):
    sqlobj = {}
    try:
        if matched and len(matched) > 0:
            parseSql(sqlobj, matched[0].strip())
            sqlobj[UserHost] = matched[1].strip()
            sqlobj[ByteSent] = int(matched[4])
            sqlobj[timestamp] = int(matched[5])
            parseCost(sqlobj, matched[3].strip())
            parseSchema(sqlobj, matched[2].strip())
            return sqlobj
    except:
        return sqlobj


if __name__ == __main__:

    files = [slow_sqls.txt]

    alltext = ‘‘
    for f in files:
        text = readSlowSqlFile(f)
        alltext += text
    allmatched = findInText(globalRegex, alltext)

    tablenames = [open_app]

    if not allmatched or len(allmatched) == 0:
        print No matched. exit.
        exit(1)

    sqlobjMap = {}
    for matched in allmatched:
        sqlobj = parseSQLObj(matched)
        if len(sqlobj) == 0:
            continue
        for tablename in tablenames:
            if sqlobj[sql].find(tablename) != -1:
                 if not sqlobjMap.get(tablename):
                     sqlobjMap[tablename] = []
                 sqlobjMap[tablename].append(sqlobj)
                 break

    resultMap = {}
    for (tablename, sqlobjlist) in sqlobjMap.iteritems():
        sqlstat = {}
        for sqlobj in sqlobjlist:
            if sqlobj[sql] not in sqlstat:
                sqlstat[sqlobj[sql]] = 0
            sqlstat[sqlobj[sql]] += 1
        resultMap[tablename] = sqlstat

    f_res = open(/tmp/res.txt, w)
    f_res.write(-------------------------------------: \n)
    f_res.write(Bref results: \n)
    for (tablename, sqlstat) in resultMap.iteritems():
        f_res.write(tablename:  + tablename + \n)
        sortedsqlstat = sorted(sqlstat.iteritems(), key=lambda d:d[1], reverse = True)
        for sortedsql in sortedsqlstat:
            f_res.write(sql = %s\ncounts: %d\n\n % (sortedsql[0], sortedsql[1]))
    f_res.write(-------------------------------------: \n\n)

    f_res.write(-------------------------------------: \n)
    f_res.write(Detail results: \n)
    for (tablename, sqlobjlist) in sqlobjMap.iteritems():
        f_res.write(tablename:  + tablename + \n)
        f_res.write(sqlinfo: \n)
        for sqlobj in sqlobjlist:
            f_res.write(sql:  + sqlobj[sql] +  QueryTime:  + str(sqlobj.get(QueryTime)) +  LockTime:  + str(sqlobj.get(LockTime)) + \n)
            f_res.write(str(sqlobj) + \n\n)
    f_res.write(-------------------------------------: \n)
    f_res.close()

 

Python正则处理多行日志一例

标签:

原文地址:http://www.cnblogs.com/lovesqcc/p/5661313.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!