码迷,mamicode.com
首页 > 数据库 > 详细

python利用inotify实现把nginx日志实时写入数据库

时间:2015-08-26 15:55:33      阅读:361      评论:0      收藏:0      [点我收藏+]

标签:

 

利用了pyinotify库,我用的是这里的这个,https://github.com/seb-m/pyinotify
其实网上yum上也有pyinotify库可以安装。
写入数据库是pymysql这里做一下记录,
先务pyinotify实现一个tail -f 的功能:

#!/opt/python3/bin/python3
#
import pyinotify
import time
import os
import sys

class ProcessTransientFile(pyinotify.ProcessEvent):
    def process_IN_MODIFY(self,event):
        line = file.readline()
        if line:
            print(line, end=‘‘)


if __name__ == ‘__main__‘:
    filename = sys.argv[1]
    file = open(filename,‘r‘)
    st_results = os.stat(filename)
    st_size = st_results[6]
    file.seek(st_size)

    wm = pyinotify.WatchManager()
    notifier = pyinotify.Notifier(wm)
    wm.watch_transient_file(filename, pyinotify.IN_MODIFY, ProcessTransientFile)
    notifier.loop()

然后通过pytaif /usr/local/nginx/logs/www.tbbpay.com.access.log就可以进行日志的实时查看。

技术分享

这个是实时查看,和tail -f 功能一样。只打印一行,

现在就是定义一个nginxloganalyzer函数进行日志分析,是默认的nginx日志,这个没有用正则,用了土办法查找特定字符。

def nginxLogAnalyzer(line):
    #print(line)
    g1 = line.find(‘[‘)
    g2 = line.find(‘]‘)
    h1 = line.find(‘"‘)
    h2 = line.find(‘"‘, h1+1)
    h3 = line.find(‘"‘, h2+1)
    h4 = line.find(‘"‘, h3+1)
    h5 = line.find(‘"‘, h4+1)
    h6 = line.find(‘"‘, h5+1)

    #print("g1:%d"%g1)
    #print("g2:%d"%g2)
    #print("h1:%d"%h1)
    #print("h2:%d"%h2)
    #print("h3:%d"%h3)
    #print("h4:%d"%h4)
    #print("h5:%d"%h5)
    #print("h6:%d"%h6)
    
    remote_addr = ""
    remote_user = ""
    time=""
    time_local = ""
    time_zone = ""
    request = ""
    status = ""
    body_bytes_sent = ""
    http_referer = ""
    http_user_agent = ""
    http_x_forwarded_for = ""

    time = line[g1+1:g2]
    time_local = time.split()[0]
    time_zone = time.split()[1]

    request = line[h1+1:h2]
    http_referer = line[h3+1:h4]
    http_user_agent = line[h5+1:h6]

    remote_addr = line.split()[0]
    remote_user = line.split()[1]
    status = line.split()[8]
    body_bytes_sent = line.split()[9]
    
    request = urllib.parse.unquote(request)
    print("time:%s"%(time) )
    print("time_local:%s"%(time_local) )
    print("time_zone:%s"%(time_zone) )
    print("request:%s"%(request) )
    print("http_referer:%s"%(http_referer) )
    print("http_user_agent:%s"%(http_user_agent) )
    
    print("status:%s"%(status) )
    print("body_bytes_sent:%s"%(body_bytes_sent) )

    print("request--------:%s"%(urllib.parse.unquote(request)) )
    l = []
    l.append(remote_addr)
    l.append(remote_user)
    l.append(time)
    l.append(time_local)
    l.append(time_zone)
    l.append(request)
    l.append(status)
    l.append(body_bytes_sent)
    l.append(http_referer)
    l.append(http_user_agent)
    l.append(http_x_forwarded_for)
    print(l)
    return l


对传一个行日志数据进行分析得到一个列表以备用,然后再写一个intodb函数插入数据库

这里先写数据库

CREATE DATABASE `nginxlog` CHARSET=utf8;
use nginxlog;

CREATE TABLE `nginxlog` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `remote_add` varchar(50) DEFAULT NULL,
  `remote_user` varchar(50) DEFAULT NULL,
  `time` varchar(50) DEFAULT NULL,
  `time_local` varchar(50) DEFAULT NULL,
  `time_zone` varchar(10) DEFAULT NULL,
  `request` varchar(1024) DEFAULT NULL,
  `status` varchar(10) DEFAULT NULL,
  `body_bytes_sent` varchar(10) DEFAULT NULL,
  `http_referer` varchar(1024) DEFAULT NULL,
  `http_user_agent` varchar(1024) DEFAULT NULL,
  `http_x_forwarded_for` varchar(1024) DEFAULT NULL,
  PRIMARY KEY (`id`),
) ENGINE=InnoDB AUTO_INCREMENT=1001 DEFAULT CHARSET=utf8;

GRANT ALL PRIVILEGES ON nginxlog.* TO ‘nginxlog‘@‘192.168.1.112‘ IDENTIFIED BY ‘nginxlog‘;


这样就创建了表nginxlog的库和表。并创建了连接用户。

下面是插入数据库所定义的函数

def intodb(line):
    l = nginxLogAnalyzer(line)
    
    s = "INSERT INTO `nginxlog`.`nginxlog` (`id` ,`remote_add` ,`remote_user` ,`time` ,`time_local` ,`time_zone` ,`request` ,`status` ,`body_bytes_sent` ,`http_referer` ,`http_user_agent` ,`http_x_forwarded_for` )VALUES (‘null‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘);"%(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10])
    print(s)
    cur.execute(s)
    conn.commit()



下面是所有源代码nginxlogtomysql.py
[root@localhost ~]# cat nginxLogtoMysql.py 
#!/opt/python3/bin/python3
#
import pyinotify
import time
import os
import sys
import urllib
import urllib3
import pymysql


class ProcessTransientFile(pyinotify.ProcessEvent):
    def process_IN_MODIFY(self,event):
        line = file.readline()
        if line:
            #nginxLogAnalyzer(line)
            intodb(line)





def nginxLogAnalyzer(line):
    print(line,end=‘‘)
    g1 = line.find(‘[‘)
    g2 = line.find(‘]‘)
    h1 = line.find(‘"‘)
    h2 = line.find(‘"‘, h1+1)
    h3 = line.find(‘"‘, h2+1)
    h4 = line.find(‘"‘, h3+1)
    h5 = line.find(‘"‘, h4+1)
    h6 = line.find(‘"‘, h5+1)

    remote_addr = ""
    remote_user = ""
    time=""
    time_local = ""
    time_zone = ""
    request = ""
    status = ""
    body_bytes_sent = ""
    http_referer = ""
    http_user_agent = ""
    http_x_forwarded_for = ""

    time = line[g1+1:g2]
    time_local = time.split()[0]
    time_zone = time.split()[1]

    request = line[h1+1:h2]
    http_referer = line[h3+1:h4]
    http_user_agent = line[h5+1:h6]

    remote_addr = line.split()[0]
    remote_user = line.split()[1]
    status = line.split()[8]
    body_bytes_sent = line.split()[9]
    
    request = urllib.parse.unquote(request)
    #print("time:%s"%(time) )
    #print("time_local:%s"%(time_local) )
    #print("time_zone:%s"%(time_zone) )
    #print("request:%s"%(request) )
    #print("http_referer:%s"%(http_referer) )
    #print("http_user_agent:%s"%(http_user_agent) )
    #print("status:%s"%(status) )
    #print("body_bytes_sent:%s"%(body_bytes_sent) )
    #print("request--------:%s"%(urllib.parse.unquote(request)) )
    l = []
    l.append(remote_addr)
    l.append(remote_user)
    l.append(time)
    l.append(time_local)
    l.append(time_zone)
    l.append(request)
    l.append(status)
    l.append(body_bytes_sent)
    l.append(http_referer)
    l.append(http_user_agent)
    l.append(http_x_forwarded_for)
    #print(l)
    return l


def intodb(line):
    l = nginxLogAnalyzer(line)
    s = "INSERT INTO `nginxlog`.`nginxlog` (`id` ,`remote_add` ,`remote_user` ,`time` ,`time_local` ,`time_zone` ,`request` ,`status` ,`body_bytes_sent` ,`http_referer` ,`http_user_agent` ,`http_x_forwarded_for` )VALUES (‘null‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘, ‘%s‘);"%(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10])
    #print(s)
    cur.execute(s)
    conn.commit()



if __name__ == ‘__main__‘:

    conn = pymysql.connect(host=‘192.168.1.112‘, port=3306, user=‘nginxlog‘, passwd=‘nginxlog‘, db=‘nginxlog‘,charset="utf8")
    cur = conn.cursor()
    cur.execute("SET NAMES utf8")

    filename = sys.argv[1]
    file = open(filename,‘r‘)
    st_results = os.stat(filename)
    st_size = st_results[6]
    file.seek(st_size)

    wm = pyinotify.WatchManager()
    notifier = pyinotify.Notifier(wm)
    wm.watch_transient_file(filename, pyinotify.IN_MODIFY, ProcessTransientFile)
    notifier.loop()

[root@localhost ~]#

运行方式如下:

./nginxlogtomysql.py /usr/local/nginx/logs/www.tbbpay.com.access.log

效果如下
技术分享

技术分享

 

python利用inotify实现把nginx日志实时写入数据库

标签:

原文地址:http://my.oschina.net/lenglingx/blog/497321

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!