码迷,mamicode.com
首页 > 其他好文 > 详细

深度学习情感分析(随机梯度下降代码实现)

时间:2018-02-04 19:27:17      阅读:250      评论:0      收藏:0      [点我收藏+]

标签:turn   progress   sam   sel   权重   0.00   nump   osi   深度   

1.代码没有引入深度学习包,实现了简单的随机梯度下降算法。

2.理论较简单。

技术分享图片
# coding:utf8
# Author:Chaz
import sys,time
import numpy as np

g = open("reviews.txt","r")
reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()

f = open("labels.txt","r")
labels = list(map(lambda x:x[:-1].upper(),f.readlines()))
f.close()

class SentimentNetwork():
    def __init__(self,reviews,labels,hidden_nodes = 10 ,learning_rate = 0.1):
        np.random.seed(1)
        self.pre_process_data(reviews,labels)
        self.init_network(len(self.review_vocab),hidden_nodes,1,learning_rate)

    def pre_process_data(self,reviews,labels):
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                review_vocab.add(word)
        self.review_vocab = list(review_vocab)

        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        self.label_vocab = list(label_vocab)

        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)

        self.word2index = {}
        for i,word in enumerate(review_vocab):
            self.word2index[word] = i

        self.label2index = {}
        for i,label in enumerate(label_vocab):
            self.label2index[label] = i

    def init_network(self,input_nodes,hidden_nodes,output_nodes,learning_rate):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        self.learning_rate = learning_rate

        self.weight_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
        self.weight_1_2 = np.random.normal(0.0,self.output_nodes**-0.5,(self.hidden_nodes,self.output_nodes))

        self.layer_0 = np.zeros((1,self.input_nodes))
        self.layer_1 = np.zeros((1, hidden_nodes))

    def update_input_layer(self,review):
        self.layer_0 *=0
        for word in review.split(" "):
            if word in self.word2index.keys():
                # print(self.word2index[word])
                # print(self.layer_0[0])
                self.layer_0[0][self.word2index[word]] = 1


    def get_target_for_label(self,label):
        if label == "POSITIVE":
            return 1
        else:
            return 0

    def sigmoid(self,x):
        return 1/(1 + np.exp(-x))

    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)

    def train(self,train_reviews_raw,train_labels):
        train_reviews = list()
        for review in train_reviews_raw:
            indices = set()
            for word in review.split(" "):
                if (word in self.word2index.keys()):
                    indices.add(self.word2index[word])
            train_reviews.append(list(indices))
        assert (len(train_reviews) == len(train_labels))
        correct_so_far = 0
        start = time.time()

        for i in range(len(train_reviews)):
            review = train_reviews[i]
            label = train_labels[i]
            self.update_input_layer(train_reviews_raw[i])
            self.layer_1 *= 0
            for index in review:
                self.layer_1 += self.weight_0_1[index]

            layer_2 = self.sigmoid(self.layer_1.dot(self.weight_1_2))

            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            layer_1_error = layer_2_delta.dot(self.weight_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it‘s the same as the error

            self.weight_1_2 -= self.layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            for index in review:

                self.weight_0_1[index] -= layer_1_delta[0] * self.learning_rate

            if layer_2 > 0.5 and label == "POSITIVE":
                correct_so_far += 1
            elif layer_2 <0.5 and label =="NEGATIVE":
                correct_so_far += 1

            elapsed_time = float(time.time() - start)

            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0

            sys.stdout.write("\rProgress:" + str(100 * i / float(len(train_reviews)))[:4]                              + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5]                              + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i + 1)                              + " Training Accuracy:" + str(correct_so_far * 100 / float(i + 1))[:4] + "%")
            if i % 2500 == 0 :
                print("")


    def test(self,test_reviews,test_labels):
        correct = 0
        start = time.time()

        for i in range(len(test_reviews)):
            pred = self.run(test_reviews[i])
            if pred == test_labels[i]:
                correct +=1
            elapsed_time = float(time.time() - start)

            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
            sys.stdout.write("\rProgress:" + str(100 * i / float(len(test_reviews)))[:4]                          + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5]                          + " #Correct:" + str(correct) + " #Tested:" + str(i + 1)                          + " Test Accuracy:" + str(correct * 100 / float(i + 1))[:4] + "%")
    def run(self,review):
        self.update_input_layer(review.lower())
        # print(self.layer_0.shape,self.weight_0_1.shape)
        layer_1 = self.layer_0.dot(self.weight_0_1)
        # print(layer_1.shape,self.weight_1_2.shape)
        layer_2 = self.sigmoid(layer_1.dot(self.weight_1_2))

        if layer_2[0] > 0.5 :
            return "POSITIVE"
        else:
            return "NEGATIVE"

mlp = SentimentNetwork(reviews[:-1000],labels[:-1000],learning_rate=0.001)
mlp.train(reviews[:-1000],labels[:-1000])
mlp.test(reviews[-1000:],labels[-1000:])
View Code

某一层w梯度 = 输入.T * ((后一层delta * 后一层权重.T == error)* 激活函数导数 ==这一层delta)* 学习速率

 

深度学习情感分析(随机梯度下降代码实现)

标签:turn   progress   sam   sel   权重   0.00   nump   osi   深度   

原文地址:https://www.cnblogs.com/jackzone/p/8413745.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!