标签:tps http gre lib job span class style time
import re import numpy as np from sklearn import cross_validation from sklearn import datasets from sklearn import svm from sklearn.externals import joblib from sklearn.metrics import classification_report from sklearn import metrics from sklearn import linear_model from sklearn.naive_bayes import GaussianNB from sklearn import cross_validation from sklearn import tree x = [] y = [] def get_len(url): return len(url) def get_url_count(url): if re.search(‘(http://)|(https://)‘, url, re.IGNORECASE) : return 1 else: return 0 def get_evil_char(url): return len(re.findall("[<>,\‘\"/]", url, re.IGNORECASE)) def get_evil_word(url): return len(re.findall("(alert)|(script=)(%3c)|(%3e)|(%20)|(onerror)|(onload)|(eval)|(src=)|(prompt)",url,re.IGNORECASE)) def get_last_char(url): if re.search(‘/$‘, url, re.IGNORECASE) : return 1 else: return 0 def get_feature(url): return [get_len(url),get_url_count(url),get_evil_char(url),get_evil_word(url),get_last_char(url)] def etl(filename,data,isxss): with open(filename) as f: for line in f: f1=get_len(line) f2=get_url_count(line) f3=get_evil_char(line) f4=get_evil_word(line) data.append([f1,f2,f3,f4]) if isxss: y.append(1) else: y.append(0) return data etl(‘../data/xss-200000.txt‘,x,1) etl(‘../data/good-xss-200000.txt‘,x,0) clf = tree.DecisionTreeClassifier() clf2 = svm.SVC(kernel=‘linear‘, C=1) clf3 = linear_model.LogisticRegression(C=1e5) clf4 = GaussianNB() for name,clf in {"decision tree":clf, "svm train long time":clf2, "LR":clf3, "bayes":clf4}.iteritems(): #for name,clf in {"decision tree":clf, "LR":clf3, "bayes":clf4}.iteritems(): print "model:", name score=cross_validation.cross_val_score(clf, x, y, n_jobs=-1, cv=5) print score print np.mean(score)
标签:tps http gre lib job span class style time
原文地址:http://www.cnblogs.com/bonelee/p/7818813.html