标签:cio 算法 div res config format 数据处理 dump exist
实际项目我是这样做的:
def mining_ue_procedures_behavior(seq, lengths, imsi_list):
print("seq 3:", seq[:3], "lengths 3:", lengths[:3])
# model.fit(seq, lengths)
fitter = LabelEncoder().fit(seq)
import sys
n_components=[5, 10, 20, 30][int(sys.argv[1])]
n_iter=[10, 30, 50, 100][int(sys.argv[2])]
model_file = ‘hmm_model_{}_{}.pkl‘.format(n_components, n_iter)
if os.path.exists(model_file):
input_file = open(model_file, ‘rb‘)
model = pickle.load(input_file)
input_file.close()
else:
model = hmm.MultinomialHMM(n_components=n_components, n_iter=n_iter)
seq2 = fitter.transform(seq)
model.fit(np.array([seq2]).T, lengths)
output_file = open(model_file, ‘wb‘)
pickle.dump(model, output_file)
output_file.close()
print("model.startprob_:", model.startprob_)
print("model.transmat_:", model.transmat_)
print("model.emissionprob_:", model.emissionprob_)
## [[ 1.11111111e-01 2.22222222e-01 6.66666667e-01]
## [ 5.55555556e-01 4.44444444e-01 6.27814351e-28]]
start = 0
ans = []
for i,l in enumerate(lengths):
s = seq[start: start+l]
score = model.score(np.array([[d] for d in fitter.transform(s)]))
ans.append([score, imsi_list[i], s])
# print("score:", model.score(np.array([[d] for d in fitter.transform(s)])), s)
start += l
ans.sort(key=lambda x: x[0])
score_index = 0
malicious_ue = []
for i,item in enumerate(ans):
if item[score_index] < Config.HMMBaseScore:
malicious_ue.append(item)
print(item)
# print(ans)
输入数据参考了下面的优雅做法:
# predict a sequence of hidden states based on visible states
seq = []
lengths = []
for _ in range(100):
length = random.randint(5, 10)
lengths.append(length)
for _ in range(length):
r = random.random()
if r < .2:
seq.append(0)
elif r < .6:
seq.append(1)
else:
seq.append(2)
seq = np.array([seq]).T
model = model.fit(seq, lengths)
此外,HMM模型的持续增量训练:
# 解决问题3,学习问题,仅给出X,估计模型参数,鲍姆-韦尔奇算法,其实就是基于EM算法的求解 # 解决这个问题需要X的有一定的数据量,然后通过model.fit(X, lengths=None)来进行训练然后自己生成一个模型 # 并不需要设置model.startprob_,model.transmat_,model.emissionprob_ # 例如: import numpy as np from hmmlearn import hmm states = ["Rainy", "Sunny"]##隐藏状态 n_states = len(states)##隐藏状态长度 observations = ["walk", "shop", "clean"]##可观察的状态 n_observations = len(observations)##可观察序列的长度 model = hmm.MultinomialHMM(n_components=n_states, n_iter=1000, tol=0.01) X = np.array([[2, 0, 1, 1, 2, 0],[0, 0, 1, 1, 2, 0],[2, 1, 2, 1, 2, 0]]) model.fit(X) print model.startprob_ print model.transmat_ print model.emissionprob_ # [[ 1.11111111e-01 2.22222222e-01 6.66666667e-01] # [ 5.55555556e-01 4.44444444e-01 6.27814351e-28]] print model.score(X) model.fit(X) print model.startprob_ print model.transmat_ print model.emissionprob_ 和第一次fit(X)得到的行顺序不一样 # [[ 5.55555556e-01 4.44444444e-01 9.29759770e-28] # [ 1.11111111e-01 2.22222222e-01 6.66666667e-01]] print model.score(X) model.fit(X) print model.startprob_ print model.transmat_ print model.emissionprob_ print model.score(X) # 可以进行多次fit,然后拿评分最高的模型,就可以预测了 print model.predict(bob_Actions, lengths=None) # 预测最可能的隐藏状态 # 例如: # [0 1 0 0 0 1] print model.predict_proba(bob_Actions, lengths=None)# 预测各个隐藏状态的概率 # 例如: # [[ 0.82770645 0.17229355] # [ 0.27361913 0.72638087] # [ 0.58700959 0.41299041] # [ 0.69861348 0.30138652] # [ 0.81799813 0.18200187] # [ 0.24723966 0.75276034]] # 在生成的模型中,可以随机生成随机生成一个模型的Z和X X,Z = model.sample(n_samples=5, random_state=None) print "Bob Actions:", ", ".join(map(lambda x: observations[x], X)) print "weathers:", ", ".join(map(lambda x: states[x], Z)) # 保存模型 import pickle output = open(‘D:\\xxx\\data1111.pkl‘, ‘wb‘) s = pickle.dump(model, output) output.close() # 调用模型 input = open(‘D:\\xxx\\data.pkl‘, ‘rb‘) model = pickle.load(model) input.close() model.predict(X)
标签:cio 算法 div res config format 数据处理 dump exist
原文地址:https://www.cnblogs.com/bonelee/p/10860978.html