文本挖掘

时间：2015-07-16 18:13:42 阅读：126 评论：0 收藏：0 [点我收藏+]

标签：

#
library("rJava")
library("Rwordseg")
library("NLP")
library("tm")
library(igraph)
#
setwd("E:\\毕业设计\\")
#
txt<-read.csv(file="总评论文本.csv",header=T)
#
txt<-txt[sample(nrow(txt),2000),]
edit(txt)
txt$id=as.character(txt$id)
txt$logo=as.character(txt$logo)
txt$comment=as.character(txt$comment)
#
insertWords(c("非常满意"))
insertWords(c("非常好"))
insertWords(c("很好"))
insertWords(c("很不好"))
insertWords(c("不好"))
insertWords(c("很不错"))
insertWords(c("不错"))
insertWords(c("很快"))
insertWords(c("很有用"))
insertWords(c("有用"))
insertWords(c("很好用"))
insertWords(c("好用"))
insertWords(c("挺好用"))
insertWords(c("挺好"))
insertWords(c("很快"))
insertWords(c("性价比"))
insertWords(c("苏宁"))
#
comment.Rwordseg<-segmentCN(txt$comment,nature=TRUE)
#
comment.Rwordseg.n<-comment.Rwordseg
for(i in 1:length(comment.Rwordseg))
{
comment.Rwordseg.n[[i]][which(names(comment.Rwordseg[[i]])=="n")]
}
cordata=c()
for(i in 1:length(comment.Rwordseg.n)){
cordata=c(cordata,comment.Rwordseg.n[[i]])
}
#
ovid=Corpus(VectorSource(cordata))
ovid<-tm_map(ovid,removeWords,c("苏宁","热水器","没有","有","很","给","说","没","我","后","的","了","不","买","就","是","也","还","都"))
#
dtm=DocumentTermMatrix(ovid,control=list(wordLengths = c(1, Inf)))
comment.matrix=as.matrix(dtm)
comment.freq=apply(comment.matrix,2,sum)
#
comment.freq.stop=rev(sort(comment.freq))[1:10]
#
plot(comment.freq.stop)
text(c(1:length(comment.freq.stop)),comment.freq.stop,names(comment.freq.stop))

文本挖掘

标签：

原文地址：http://www.cnblogs.com/liuhuan2368935760/p/4651489.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行