#Author:Mini
#!/usr/bin/env python
import jieba
import jieba.posseg
sentence=""
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt")
word6=jieba.posseg.cut(sentence)
for item in word6:
print (item.word+","+item.flag)
import jieba.analyse
tag=jieba.analyse.extract_tags(sentence,2)
print(tag)
word8=jieba.tokenize(sentence)
for item in word8:
print (item)
word9=jieba.tokenize(sentence,mode="search")
for item in word9:
print(item)
print ("")
#conn= pymysql.connect(host="127.0.0.1", user="root", passwd="wangmianny111", db="galaxy_macau_ad",charset=‘utf8‘)
#data=open("C:/Users/Administrator/Desktop/txt1.txt","r",encoding="utf8").read()
#编码解决方案
import urllib.request
data=urllib.request.urlopen("http://127.0.0.1/txt1.txt").read().decode("utf-8","ignore")
words=jieba.posseg.cut(data)
datas=""
for item in words:
if item.flag=="c":
item.word=""
if item.flag=="d":
item.word=""
if item.flag=="x":
item.word=""
if item.flag=="w":
item.word=""
if item.flag=="p":
item.word = ""
if item.flag=="r":
item.word = ""
if item.flag=="t":
pass
if item.flag=="nt":
item.word = ""
if item.flag=="m":
item.word = ""
else:
pass
#print (item.word+","+item.flag)
datas += item.word
word10=jieba.analyse.extract_tags(datas,200)
topic=""
for item in word10:
topic += item+" "
fh = open("C:/Users/Administrator/Desktop/topic_test.txt", "a", encoding="utf_8")
fh.write(topic)