标签:isa enc tran for .text form pst replace cat
import csv
file_path = r‘EmailData.txt‘EmailData = open(file_path,‘r‘,encoding=‘utf-8‘)Email_data = []Email_target = []csv_reader = csv.reader(EmailData,delimiter=‘\t‘) for line in csv_reader: Email_data.append(line[1]) Email_target.append(line[0])EmailData.close()Email_data_clear = []for line in Email_data: for char in line: if char.isalpha() is False: newString = line.replace(char," ") tempList = newString.split(" ")
Email_data_clear.append(tempList) Email_data_clear2 = []for line in Email_data_clear: tempList = [] for word in line: if word != ‘‘ and len(word) > 3 and word.isalpha(): tempList.append(word) tempString = ‘ ‘.join(tempList) Email_data_clear2.append(tempString)Email_data_clear = Email_data_clear2from sklearn.model_selection import train_test_splitx_train,x_test,y_train,y_test = train_test_split(Email_data_clear2,Email_target,test_size=0.3,random_state=0,stratify=Email_target)from sklearn.feature_extraction.text import TfidfVectorizertfidf = TfidfVectorizer()X_train = tfidf.fit_transform(x_train)X_test = tfidf.transform(x_test)import numpy as npX_train = X_train.toarray()X_test = X_test.toarray()X_train.shape for i in range(X_train.shape[0]): for j in range(X_train.shape[1]): if X_train[i][j] != 0: print(i,j,X_train[i][j]) from sklearn.naive_bayes import GaussianNBgnb = GaussianNB()module = gnb.fit(X_train,y_train)y_predict = module.predict(X_test)from sklearn.metrics import classification_reportcr = classification_report(y_predict,y_test)print(cr)标签:isa enc tran for .text form pst replace cat
原文地址:https://www.cnblogs.com/z233/p/10079664.html