码迷,mamicode.com
首页 > 移动开发 > 详细

Appscanner实验还原code3

时间:2019-03-04 11:25:54      阅读:158      评论:0      收藏:0      [点我收藏+]

标签:ssi   tcl   ini   average   android   forest   bsp   info   cti   

# Author: Baozi
#-*- codeing:utf-8 -*-
import _pickle as pickle
from sklearn import ensemble
import random
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report,     confusion_matrix
import numpy as np

##########
##########

# TRAINING_PICKLE = ‘motog-old-65-withnoise-statistical.p‘      # 1a
TRAINING_PICKLE = trunc-dataset1a-noisefree-statistical.p  # 1a
# TESTING_PICKLE  = ‘motog-new-65-withnoise-statistical.p‘      # 2
TESTING_PICKLE = trunc-dataset2-noisefree-statistical.p  # 2

print(Loading pickles...)
trainingflowlist = pickle.load(open(TRAINING_PICKLE, rb), encoding=iso-8859-1)
testingflowlist = pickle.load(open(TESTING_PICKLE, rb), encoding=iso-8859-1)
print(Done...)
print(‘‘)

print(Training with  + TRAINING_PICKLE + :  + str(len(trainingflowlist)))
print(Testing with  + TESTING_PICKLE + :  + str(len(testingflowlist)))
print(‘‘)

for THR in range(10):

    p = []
    r = []
    f = []
    a = []
    c = []

    for i in range(5):
        print(i)
        ########## PREPARE STUFF
        trainingexamples = []
        classifier = ensemble.RandomForestClassifier()
        classifier2 = ensemble.RandomForestClassifier()

        ########## GET FLOWS
        for package, time, flow in trainingflowlist:
            trainingexamples.append((flow, package))
        # print(‘‘)

        ########## SHUFFLE DATA to ensure classes are "evenly" distributed
        random.shuffle(trainingexamples)

        ########## TRAINING PART 1
        X1_train = []
        y1_train = []
        #####################################################
        for flow, package in trainingexamples[:int(float(len(trainingexamples)) / 2)]:
            X1_train.append(flow)
            y1_train.append(package)

        # print(‘Fitting classifier...‘)
        classifier.fit(X1_train, y1_train)
        # print(‘Classifier fitted!‘)
        # print(‘‘

        ########## TRAINING PART 2 (REINFORCEMENT)
        X2_train = []
        y2_train = []
        tmpx_train = []
        tmpy_train = []

        count = 0
        count1 = 0
        count2 = 0

        ###############################################
        for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]:
            # flow = np.array(flow).reshape(1,-1)
            # tmp.append(flow)
            tmpx_train.append(flow)
            tmpy_train.append(package)

        predictions = classifier.predict(tmpx_train)
        #print(type(predictions))#<class ‘numpy.ndarray‘>
        #print(predictions[0])#com.myfitnesspal.android-auto.csv
        for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]:
            X2_train.append(flow)
            prediction = predictions[count]

            if (prediction == package):
                y2_train.append(package)
                count1 += 1
            else:
                y2_train.append(ambiguous)
                count2 += 1
            count += 1
    print("Step Finished!!!!!!!!!!!")
    # print(count1)
    # print(count2)

    # print(‘Fitting 2nd classifier...‘)
    classifier2.fit(X2_train, y2_train)
    # print(‘2nd classifier fitted!‘
    # print(‘‘

    ########## TESTING

    threshold = float(THR) / 10

    X_test = []
    y_test = []
    tmpx_test = []
    tmpy_test = []
    count  = 0
    totalflows = 0
    consideredflows = 0

    for package, time, flow in testingflowlist:
        tmpx_test.append(flow)
        tmpy_test.append(package)

    predictionss = classifier2.predict(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本
    prediction_proba = classifier2.predict_proba(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本
    #print(type(prediction_proba))#<class ‘numpy.ndarray‘>
    print(prediction_proba[0])

    for package, time, flow in testingflowlist:
        prediction = predictionss[count]
        if (prediction != ambiguous):
            prediction_probability = max(prediction_proba[0])
            totalflows += 1

            if (prediction_probability >= threshold):
                consideredflows += 1

            X_test.append(flow)
            y_test.append(package)
        count += 1

    y_pred = classifier2.predict(X_test)

    p.append(precision_score(y_test, y_pred, average="macro") * 100)
    r.append(recall_score(y_test, y_pred, average="macro") * 100)
    f.append(f1_score(y_test, y_pred, average="macro") * 100)
    a.append(accuracy_score(y_test, y_pred) * 100)
    c.append(float(consideredflows) * 100 / totalflows)

print(Threshold:  + str(threshold))
print(np.mean(p))
print(np.mean(r))
print(np.mean(f))
print(np.mean(a))
print(np.mean(c))
print(‘‘)

 

Appscanner实验还原code3

标签:ssi   tcl   ini   average   android   forest   bsp   info   cti   

原文地址:https://www.cnblogs.com/xiaoshayu520ly/p/10469420.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!