码迷,mamicode.com
首页 > Web开发 > 详细

用go实现web日志分析及网页挂马关键字检测

时间:2014-10-22 13:01:28      阅读:458      评论:0      收藏:0      [点我收藏+]

标签:style   http   color   io   os   ar   使用   for   sp   

本程序主要实现网页挂马关键字检测,原理是这样的,通过定时分析用户访问的IIS web日志,对日志的里的URL进行访问,然后检索是否包含相关的关键字,有就入库,这只是go实现自动检索及入库,可以加个前端,实现加关键及报警功能

package main

 
import (
    "bufio"
    "code.google.com/p/mahonia"
    "fmt"
    "io"
    "io/ioutil"
    "labix.org/v2/mgo"
    "labix.org/v2/mgo/bson"
    "log"
    "net"
    "net/http"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "time"
    //qqwry为IP库
    //ftp "github.com/jlaffaye/ftp"
    //"github.com/yinheli/qqwry"
)
 
const Version = "CDN1.0"
 
//var ServerList []ServerMember
//PageKeyWord用来保存可疑Url及这个页面包含有哪些关键字,以及日志记录于哪台服务器及日志文件,并最终插入数据库
type PageHasKeyWord struct {
    Url string
    KeyWords []string
    //    UserIp string
    LogFile string
    ServerIp string
    Audit bool
    Auditor string //记录谁审核
    AuditTime string //记录审核时间
    Level int //可疑级别
}
type DoubtfulKeyWord struct {
    Id bson.ObjectId "_id"
    KeyWord string
    //Level string
}
 
type UrlsToAnalysit struct {
    Urls []string
    LogFile string
    ServerIp string
    Domain string
}
 
//定义MonthTOstr映射,用来将月份换为数字string,go里的月份用英文单词表示,要注意的go赋值要在函数体里赋值
var MonthTOstr = make(map[string]string)
 
func main() {
 
    //fmt.Printf("%s", getLogFileName())
 
    if len(os.Args) != 4 {
        log.Fatal("Usage:" + filepath.Base(os.Args[0]) + " log_dir " + "db_server_ip_address test")
        os.Exit(1)
    }
 
    logFileName, _, _ := getLogFileName()
 
    fmt.Println(time.Now())
 
    logDbServer := os.Args[2]
    dir := os.Args[1]
    dbname := "webmonitordb"
    //doubtfulKW为string数组,从数据库中获取可疑检索的URL,注意的是go是区分大小写的,因此要注意mongodb里的库名及collection名大小写
    //doubtfulKW := []string{}
    keyWords := getdoubtfulkeyword(logDbServer, dbname, "DangeroursWords")
    //wordlist := []string{"百家乐", "博彩网", "网上赌场", "娱乐城", "赌博网站", "皇冠投注", "皇冠开户", "真龙娱乐城", "三亚娱乐城"}
    /*for n, item := range keyWords {
        fmt.Println(n, item.KeyWord)
    }*/
    if os.Args[3] == "test" {
        fmt.Println("wait!")
        time.Sleep(time.Duration(90) * time.Second)
    }
 
    fmt.Println("start!")
    filepath.Walk(dir, func(path string, f os.FileInfo, err error) error {
        if f == nil {
            return err
        }
        if f.IsDir() {
            return nil
        }
        if f.Name() == logFileName {
 
            //fmt.Println(path)
 
            //fmt.Println(time.Now())
            urls := logFileAnalysit(path)
            //fmt.Println(urls)
            for _, v := range urls.Urls {
                //fmt.Println(n, ":", v)
                url := "http://" + urls.Domain + v
                //fmt.Println(n, url)
                pagestr := getPage(url)
                findWord(pagestr, url, urls.LogFile, urls.ServerIp, keyWords)
 
                //fmt.Println(n)
            }
 
        }
 
        return nil
    })
 
}
func logFileAnalysit(logFile string) UrlsToAnalysit {
    readLogFile, err := os.Open(logFile)
    if err != nil {
        log.Fatal(err)
    }
    defer readLogFile.Close()
    //pathFields的作用是将日志path解析成一个数据,从而可以得到日志的域名
    pathFields := strings.Split(logFile, "\\")
    var domainName string
    if len(pathFields) > 3 {
        domainName = pathFields[len(pathFields)-3]
    }
    var Urls UrlsToAnalysit
    Urls.Domain = domainName
    Urls.LogFile = logFile
    Urls.ServerIp = getLocalIpAddr()
 
    Urls.Urls = append(Urls.Urls, "/") //监控站点首页
    //    analysitTime := time.Now()
    bfLogFile := bufio.NewReader(readLogFile)
    //定义一个gbk转utf8解码器
    //enc := mahonia.NewDecoder("gbk")
    for {
        logRecordStr, err := bfLogFile.ReadString(‘\n‘)
 
        if err == io.EOF {
            //注意,这里要使用break,而不是return,return会使整个个程序退出了,break只是中断当前的for循环
            break
        }
        //以"#"开头的要跳过,iiS 日志前几行是做了注释的
        if strings.HasPrefix(logRecordStr, "#") {
            continue
        }
        //recordItems 是个string数组,用来临时保存每行记录里的字段
        //#Fields: date time s-sitename s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken
        fields := strings.Split(logRecordStr, " ")
        //scstatus为服务器状态码的index
        if len(fields) < 13 { //不满16个字段的跳过
            continue
        }
        url := strings.ToLower(fields[5])
        //fields[9] != Urls.ServerIp 过滤掉自己访问的IP记录
        if (strings.HasSuffix(url, "htm") || strings.HasSuffix(url, "html")) && fields[12] == "200" && fields[9] != Urls.ServerIp {
            flag := true
            for _, v := range Urls.Urls {
                if v == url {
                    flag = false
                    break
                }
            }
            if flag {
                Urls.Urls = append(Urls.Urls, url)
 
            }
 
        }
    }
    return Urls
}
 
//getLogFileName()根据当前的时间,生成一个将要被分析日志文件名,因为不同IIS站点每小时时生成一个日志文件,命名格式不exyymmddhh.log
func getLogFileName() (string, string, string) {
    MonthTOstr := map[string]string{"January": "01",
        "February": "02",
        "March": "03",
        "April": "04",
        "May": "05",
        "June": "06",
        "July": "07",
        "August": "08",
        "September": "09",
        "October": "10",
        "November": "11",
        "December": "12"}
    timenow := time.Now()
    year, month, day := timenow.Date()
    //monthStr := month.String()
 
    hour, _, _ := timenow.Clock()
    yearStr := strings.TrimLeft(strconv.Itoa(year), "20") //去掉前面的四位数年份如"2014"年的“20”
    dayStr, hourStr := strconv.Itoa(day), strconv.Itoa(hour)
    if day < 10 {
        dayStr = "0" + dayStr
    }
    if hour < 10 {
        hourStr = "0" + hourStr
    }
 
    fileName := "ex" + yearStr + MonthTOstr[month.String()] + dayStr + hourStr + ".log"
    logDay := yearStr + MonthTOstr[month.String()] + dayStr
    logMonth := yearStr + MonthTOstr[month.String()]
 
    //monthSrt := strconv.Itoa(timenow.Month())
    //fmt.Println(fileName, logDay)
    return fileName, logDay, logMonth
    //fmt.Println(fileName)
 
}
func getPage(page string) string {
    resp, err := http.Get(page)
    if err != nil {
        log.Fatal(err)
    }
    defer resp.Body.Close()
    enc := mahonia.NewDecoder("gb2312")
    pageData, _ := ioutil.ReadAll(resp.Body)
    //return string(pageData)
    PageStr := enc.ConvertString(string(pageData))
    return PageStr
 
}
func findWord(str, url, logfile, serverip string, wordlist []DoubtfulKeyWord) {
    var phkw PageHasKeyWord
    flag := false
    for _, item := range wordlist {
 
        if strings.Contains(str, item.KeyWord) {
            //fmt.Println("the page contains the word:", item.KeyWord, url)
 
            phkw.KeyWords = append(phkw.KeyWords, item.KeyWord)
            flag = true
 
        }
    }
    if flag {
        phkw.Url = url
        phkw.LogFile = logfile
        phkw.ServerIp = serverip
        phkw.Audit = false
        phkw.Level = len(phkw.KeyWords)
        interdatadb("192.168.111.28", "webmonitordb", "dangerPage", phkw)
 
    }
}
 
func interdatadb(dbserver, dbname, celection string, items PageHasKeyWord) {
    session, err := mgo.Dial(dbserver)
    if err != nil {
        panic(err)
    }
    defer session.Close()
 
    // Optional. Switch the session to a monotonic behavior.
    session.SetMode(mgo.Monotonic, true)
 
    c := session.DB(dbname).C(celection)
    //fmt.Println(items)
    //time.Sleep(time.Duration(90) * time.Second)
    err = c.Insert(&PageHasKeyWord{items.Url, items.KeyWords, items.LogFile, items.ServerIp, items.Audit, items.Auditor, items.AuditTime, items.Level})
    //err = c.Insert(&LogItems(logItem))
    if err != nil {
        panic(err)
    }
 
}
 
func getdoubtfulkeyword(dbserver, dbname, collection string) []DoubtfulKeyWord {
    // 连接数据库
    session, err := mgo.Dial(dbserver)
    if err != nil {
        panic(err)
    }
    defer session.Close()
 
    // 获取数据库,获取集合
    c := session.DB(dbname).C(collection)
 
    kws := []DoubtfulKeyWord{}
    //kw := []string{}
    //var ta task
    err = c.Find(bson.M{}).All(&kws)
 
    if err != nil {
        panic(err)
    }
    /*for n, item := range kws {
        fmt.Println(n, item.Id, item.KeyWord)
    }*/
    return kws
}
func getLocalIpAddr() string {
    //这里使用一个合法的IP就行了,端口随便,即使没有打开也行,也许因为使用UDP,如果用TCP的话,对端不打开就会有问题
    conn, err := net.Dial("udp", "192.168.18.51:80")
    if err != nil {
        //fmt.Println(err.Error())
        return "127.0.0.1"
    }
    defer conn.Close()
    //fmt.Println(conn.LocalAddr().String())
    //conn.
    //fmt.Println(strings.Split(conn.LocalAddr().String(), ":")[0])
    return strings.Split(conn.LocalAddr().String(), ":")[0]
}
 

用go实现web日志分析及网页挂马关键字检测

标签:style   http   color   io   os   ar   使用   for   sp   

原文地址:http://my.oschina.net/u/1590519/blog/336127

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!