标签:
LogRecord 类:
case class LogRecord (
clientIpAddress: String,
rfc1413ClientIdentity: String,
remoteUser: String, `
dateTime: String, //[day/month/year:hour:minute:second zone]
request: String,
httpStatusCode: String,
bytesSent: String,
referer: String,
userAgent: String
)
LogParser 解析类
import java.util.regex.Pattern
import java.text.SimpleDateFormat
import java.util.Locale
import scala.util.control.Exception._
import java.util.regex.Matcher
import scala.util.{Try, Success, Failure}
@SerialVersionUID(100L)
class LogParser extends Serializable {
private val ddd = "\\d{1,3}"
private val ip = s"($ddd\\.$ddd\\.$ddd\\.$ddd)?"
private val client = "(\\S+)"
private val user = "(\\S+)"
private val dateTime = "(\\[.+?\\])"
private val request = "\"(.*?)\""
private val status = "(\\d{3})"
private val bytes = "(\\S+)"
private val referer = "\"(.*?)\""
private val agent = "\"(.*?)\""
private val regex = s"$ip $client $user $dateTime $request $status $bytes $referer $agent"
private val p = Pattern.compile(regex)
def parseRecord(record: String): Option[AccessLogRecord] = {
val matcher = p.matcher(record)
if (matcher.find) {
Some(buildAccessLogRecord(matcher))
} else {
None
}
}
def parseRecordReturningNullObjectOnFailure(record: String): AccessLogRecord = {
val matcher = p.matcher(record)
if (matcher.find) {
buildAccessLogRecord(matcher)
} else {
AccessLogParser.nullObjectAccessLogRecord
}
}
private def buildAccessLogRecord(matcher: Matcher) = {
AccessLogRecord(
matcher.group(1),
matcher.group(2),
matcher.group(3),
matcher.group(4),
matcher.group(5),
matcher.group(6),
matcher.group(7),
matcher.group(8),
matcher.group(9))
}
}
/**
* 例子:
* 94.102.63.11 - - [21/Jul/2009:02:48:13 -0700] "GET / HTTP/1.1" 200 18209 "http://acme.com/foo.php" "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)"
*/
object AccessLogParser {
val nullObjectAccessLogRecord = AccessLogRecord("", "", "", "", "", "", "", "", "")
def parseRequestField(request: String): Option[Tuple3[String, String, String]] = {
val arr = request.split(" ")
if (arr.size == 3) Some((arr(0), arr(1), arr(2))) else None
}
def parseDateField(field: String): Option[java.util.Date] = {
val dateRegex = "\\[(.*?) .+]"
val datePattern = Pattern.compile(dateRegex)
val dateMatcher = datePattern.matcher(field)
if (dateMatcher.find) {
val dateString = dateMatcher.group(1)
println("***** DATE STRING" + dateString)
// HH is 0-23; kk is 1-24
val dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH)
allCatch.opt(dateFormat.parse(dateString)) // return Option[Date]
} else {
None
}
}
}
日志分析是经常做的事情,大数据下的日志分析也是一个常用技术。
标签:
原文地址:http://blog.csdn.net/jianghuxiaojin/article/details/51418302