标签:ssi tcl ini average android forest bsp info cti
# Author: Baozi #-*- codeing:utf-8 -*- import _pickle as pickle from sklearn import ensemble import random from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix import numpy as np ########## ########## # TRAINING_PICKLE = ‘motog-old-65-withnoise-statistical.p‘ # 1a TRAINING_PICKLE = ‘trunc-dataset1a-noisefree-statistical.p‘ # 1a # TESTING_PICKLE = ‘motog-new-65-withnoise-statistical.p‘ # 2 TESTING_PICKLE = ‘trunc-dataset2-noisefree-statistical.p‘ # 2 print(‘Loading pickles...‘) trainingflowlist = pickle.load(open(TRAINING_PICKLE, ‘rb‘), encoding=‘iso-8859-1‘) testingflowlist = pickle.load(open(TESTING_PICKLE, ‘rb‘), encoding=‘iso-8859-1‘) print(‘Done...‘) print(‘‘) print(‘Training with ‘ + TRAINING_PICKLE + ‘: ‘ + str(len(trainingflowlist))) print(‘Testing with ‘ + TESTING_PICKLE + ‘: ‘ + str(len(testingflowlist))) print(‘‘) for THR in range(10): p = [] r = [] f = [] a = [] c = [] for i in range(5): print(i) ########## PREPARE STUFF trainingexamples = [] classifier = ensemble.RandomForestClassifier() classifier2 = ensemble.RandomForestClassifier() ########## GET FLOWS for package, time, flow in trainingflowlist: trainingexamples.append((flow, package)) # print(‘‘) ########## SHUFFLE DATA to ensure classes are "evenly" distributed random.shuffle(trainingexamples) ########## TRAINING PART 1 X1_train = [] y1_train = [] ##################################################### for flow, package in trainingexamples[:int(float(len(trainingexamples)) / 2)]: X1_train.append(flow) y1_train.append(package) # print(‘Fitting classifier...‘) classifier.fit(X1_train, y1_train) # print(‘Classifier fitted!‘) # print(‘‘ ########## TRAINING PART 2 (REINFORCEMENT) X2_train = [] y2_train = [] tmpx_train = [] tmpy_train = [] count = 0 count1 = 0 count2 = 0 ############################################### for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]: # flow = np.array(flow).reshape(1,-1) # tmp.append(flow) tmpx_train.append(flow) tmpy_train.append(package) predictions = classifier.predict(tmpx_train) #print(type(predictions))#<class ‘numpy.ndarray‘> #print(predictions[0])#com.myfitnesspal.android-auto.csv for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]: X2_train.append(flow) prediction = predictions[count] if (prediction == package): y2_train.append(package) count1 += 1 else: y2_train.append(‘ambiguous‘) count2 += 1 count += 1 print("Step Finished!!!!!!!!!!!") # print(count1) # print(count2) # print(‘Fitting 2nd classifier...‘) classifier2.fit(X2_train, y2_train) # print(‘2nd classifier fitted!‘ # print(‘‘ ########## TESTING threshold = float(THR) / 10 X_test = [] y_test = [] tmpx_test = [] tmpy_test = [] count = 0 totalflows = 0 consideredflows = 0 for package, time, flow in testingflowlist: tmpx_test.append(flow) tmpy_test.append(package) predictionss = classifier2.predict(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本 prediction_proba = classifier2.predict_proba(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本 #print(type(prediction_proba))#<class ‘numpy.ndarray‘> print(prediction_proba[0]) for package, time, flow in testingflowlist: prediction = predictionss[count] if (prediction != ‘ambiguous‘): prediction_probability = max(prediction_proba[0]) totalflows += 1 if (prediction_probability >= threshold): consideredflows += 1 X_test.append(flow) y_test.append(package) count += 1 y_pred = classifier2.predict(X_test) p.append(precision_score(y_test, y_pred, average="macro") * 100) r.append(recall_score(y_test, y_pred, average="macro") * 100) f.append(f1_score(y_test, y_pred, average="macro") * 100) a.append(accuracy_score(y_test, y_pred) * 100) c.append(float(consideredflows) * 100 / totalflows) print(‘Threshold: ‘ + str(threshold)) print(np.mean(p)) print(np.mean(r)) print(np.mean(f)) print(np.mean(a)) print(np.mean(c)) print(‘‘)
标签:ssi tcl ini average android forest bsp info cti
原文地址:https://www.cnblogs.com/xiaoshayu520ly/p/10469420.html