码迷,mamicode.com
首页 > 编程语言 > 详细

python TextMining

时间:2019-02-21 09:26:59      阅读:122      评论:0      收藏:0      [点我收藏+]

标签:and   range   import   tags   urlopen   bsp   get   split   com   

01.Crawling

url_request

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Sun Feb 17 11:08:44 2019
 4 
 5 @author: 502-03
 6 1.anaconda Prompt??
 7 2.python -m pip install --upgrade pip
 8 3.pip install beautifulsoup4
 9 """
10 
11 import urllib.request #url ??
12 from bs4 import BeautifulSoup #html ??
13 
14 url="http://www.naver.com/index.html"
15 
16 #1.???? ?? ??
17 rst=urllib.request.urlopen(url)
18 print(rst) #<http.client.HTTPResponse object at 0x000000000E21F940>
19 
20 data_src=rst.read() #source read
21 print(data_src)
22 """
23 b‘<!doctype html>\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n<html lang="ko">\n<head>
24 ....
25 """
26 
27 #2.html ??:src->html??
28 html=data_src.decode(utf-8) #???
29 soup=BeautifulSoup(html,html.parser)
30 print(soup)
31 """
32 </script>
33 <title>NAVER</title>
34 </meta></meta></meta></meta></meta></head>
35 <style>
36 """
37 
38 #3.tag??  ?? 
39 link=soup.find(a) #??? ???
40 print(link)
41 """
42 <a href="#news_cast" onclick="document.getElementById(‘news_cast2‘).tabIndex = -1;
43 document.getElementById(‘news_cast2‘).focus();return false;">
44 <span>???? ????</span></a>
45 """
46 print(a tag ??,link.string)#a tag ?? ???? ????
47 
48 links=soup.find_all(a)#?? ????
49 print(a tag size,len(links))#a tag size 335
50 links_data=[] #? ???
51 for a in links:#339
52     print(a.string)
53     links_data.append(a.string)
54     
55 print("links_data",len(links_data))#links_data 341
56 print(links_data)
57 """
58 [‘???? ????‘, ‘?????? ????‘, ‘????? ????‘, ‘????? ????‘, ‘??? ????‘, 
59 ....
60 ‘??? ??‘, ‘????‘, ‘NAVER Corp.‘]
61 """

 

selector

 

  1 # -*- coding: utf-8 -*-
  2 """
  3 - ???(selector) ?? ?? ?? 
  4   -> ? ?? ???(css)
  5   -> id(#), class(.)
  6   -> select_one(‘???‘) : ?? ?? ?? 
  7   -> select(‘???‘) : ?? ? ?? ?? 
  8 """
  9 
 10 from bs4 import BeautifulSoup #html ??
 11 
 12 #1.html?? ????
 13 file=open("../data/selector.html",mode=r,encoding=utf-8) 
 14 data_src=file.read()
 15 print(data_src)
 16 """
 17 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 18 <html>
 19 <head>
 20 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 21 <title>id/class ???, ? ???</title>
 22 <style type="text/css">
 23 """
 24 
 25 #2.html??
 26 html=BeautifulSoup(data_src,html.parser)
 27 print(html)
 28 """
 29 
 30 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 31 
 32 <html>
 33 <head>
 34 <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 35 <title>id/class ???, ? ???</title>
 36 <style type="text/css">
 37 """
 38 
 39 #3.selector ?? ??
 40 
 41 #1) id???: selector("tab#id?"))
 42 table=html.select_one(table#tab)
 43 print(table)    
 44 """
 45 <table border="1" id="tab">
 46 <tr> <!-- 1? -->
 47 <!-- ?? ? : th -->
 48 <th id="id"> ?? </th>
 49 ...
 50 <td> you@naver.com </td>
 51 </tr>
 52 </table>
 53 """   
 54 
 55 #2)id ???  >?? ??
 56 th=html.select_one(table#tab > tr > th)
 57 print(th)   #<th id="id"> ?? </th>
 58 
 59 ths=html.select(table#tab > tr > th)
 60 print(ths) 
 61 """
 62 [<th id="id"> ?? </th>, <th id="name"> ?? </th>, <th id="major"> ?? </th>, <th id="email"> ??? </th>]
 63 """
 64 
 65 print(len(ths)) #4
 66 
 67 for th in ths:#<th id="id"> ?? </th>
 68     print(th.string)
 69 """
 70  ?? 
 71  ?? 
 72  ?? 
 73  ??? 
 74 """
 75 
 76 #2) class ?? :select(‘tag.class?‘)
 77 # -5?->?? ?
 78 
 79 # (1) ??? ?? 
 80 trs=html.select(table#tab > tr.odd)
 81 print(trs)   
 82 """
 83 [<tr class="odd"> <!-- 3?(??) -->
 84 <td> 201602 </td>
 85 <td> ??? </td>
 86 <td> ???? </td>
 87 <td> lee@naver.com </td>
 88 </tr>, <tr class="odd"> <!-- 5? -->
 89 <td> 201604 </td>
 90 <td> ??? </td>
 91 <td> ???? </td>
 92 <td> you@naver.com </td>
 93 </tr>]
 94 """
 95 tds_data=[]
 96 for tr in trs:# 2? 
 97     tds=tr.find_all(td)
 98     for td in tds:
 99         print(td.string)
100         tds_data.append(td.string)
101 """
102  201602 
103  ??? 
104  ???? 
105  lee@naver.com 
106  201604 
107  ??? 
108  ???? 
109  you@naver.com 
110 """        
111 print(len(tds_data)) #8
112 
113 #(2) ?? ?? :tag[??=?]
114 trs=html.select(tr[class=odd])
115 print(trs)
116 """
117 [<tr class="odd"> <!-- 3?(??) -->
118 <td> 201602 </td>
119 <td> ??? </td>
120 <td> ???? </td>
121 <td> lee@naver.com </td>
122 </tr>, <tr class="odd"> <!-- 5? -->
123 <td> 201604 </td>
124 <td> ??? </td>
125 <td> ???? </td>
126 <td> you@naver.com </td>
127 </tr>]
128 """

 

newsCrawling

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 news Crawling
 4     url=‘http://media.daum.net‘
 5 """
 6 
 7 import requests  #url ?? 
 8 from bs4 import BeautifulSoup #html ??
 9 
10 url=http://media.daum.net
11 
12 #1.url ??
13 src=requests.get(url)
14 print(src) #<Response [200]>
15 
16 data_src=src.text # source text
17 
18 #2.html ??
19 html=BeautifulSoup(data_src,html.parser)
20 print(html)
21 
22 #3.select(tag[??=?])
23 #     <strong class="tit_g"><a href="http://v.media.daum.net/v/20190217083008573" class="link_txt">美, ??? ? ? ??? ?? ?? ???..韓 ?? ???</a><span class="txt_view">???</span></strong>
24 links=html.select("a[class=link_txt]")
25 print(len(links)) #102
26 
27 creawling_data=[]#????
28 cnt =0;
29 for link in links:
30     cnt+=1
31     cont=str(link.string) #?? ??
32     #print(cont)
33     print(cnt,-,cont.strip()) # ??? ,?? ,??? ??
34     creawling_data.append(cont.split())
35 
36 """
37 1 - ??? "???? ???..中 ????? ??? ?"
38 2 - "?? ????..????? ??? ? ??"
39 3 - ???? ???? ??? ??..??? ? ????
40 4 - ?·? ???? ‘??‘ ?? ???..???? ???...
41 5 - 中??? ???? ???.."?? ??? ?? ????...
42 """
43 
44 print(len(creawling_data))#102
45 
46 #?? ->text file save
47 file=open(../data/crawling_data.txt,
48      mode=w,encoding=utf-8)
49 
50 #list -> str ??
51 file.write(str(creawling_data))
52 file.close()
53 print("file save commit")

 

02.NLP

jpype_test

 

1 # -*- coding: utf-8 -*-
2 """
3 java ???? ????
4 """
5 import jpype
6 
7 path=jpype.getDefaultJVMPath()
8 print(path)#C:\Program Files\Java\jdk1.8.0_181\jre\bin\server\jvm.dll

 

konlpy

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 konlpy??? ??
 4 pip install konlpy
 5 """
 6 
 7 from konlpy.tag import Kkma
 8 
 9 #Kkma object
10 kkma=Kkma()
11 
12 #?? -> ?? ??
13 para="??? ??? ?????.?? ??? ?? age? 28? ???."
14 
15 ex_sent=kkma.sentences(para)
16 print(ex_sent)
17 """
18 [‘??? ??? ?????.‘, ‘?? ??? ?? age? 28? ???.‘]
19 """
20 
21 #?? -> ?? ??
22 ex_nouns=kkma.nouns(para)
23 print(ex_nouns)
24 """
25 [‘???‘, ‘??‘, ‘??‘, ‘?‘, ‘???‘, ‘28‘, ‘28?‘, ‘?‘]
26 """
27 
28 #?? -> ??? ??
29 ex_pos=kkma.pos(para)
30 print(ex_pos)  #[(text,text class)]
31 """
32 [(‘???‘, ‘NNG‘), (‘??‘, ‘NNG‘), (‘?‘, ‘JKO‘), (‘??‘, ‘NNG‘), (‘?‘, ‘VCP‘), 
33 (‘???‘, ‘EFN‘), (‘.‘, ‘SF‘), (‘?‘, ‘NP‘), (‘?‘, ‘JX‘), (‘???‘, ‘NNG‘), 
34 (‘?‘, ‘VCP‘), (‘?‘, ‘ECE‘), (‘age‘, ‘OL‘), (‘?‘, ‘JX‘), (‘28‘, ‘NR‘), 
35 (‘?‘, ‘NNM‘), (‘?‘, ‘VCP‘), (‘???‘, ‘EFN‘), (‘.‘, ‘SF‘)]
36 """
37 ‘‘‘
38 ??? : ??? ??? ?? ??? ???? ??? ?? ?? 
39 NNG ?? ?? NNP ?? ?? NNB ?? ?? NR ?? NP ??? VV ?? 
40 VA ??? VX ?? ?? VCP ?? ??? VCN ?? ??? MM ??? 
41 MAG ?? ?? MAJ ?? ?? IC ??? JKS ?? ?? JKC ?? ?? 
42 JKG ??? ?? JKO ??? ?? JKB ??? ?? JKV ?? ?? 
43 JKQ ??? ?? JC ?? ?? JX ??? EP ????? EF ?? ?? 
44 EC ?? ?? ETN ??? ?? ?? ETM ??? ?? ?? XPN ?? ??? 
45 XSN ???? ??? XSV ?? ?? ??? XSA ??? ?? ??? XR ?? 
46 SF ???, ???, ??? SE ??? SS ???,???,?? 
47 SP ??,????,??,?? SO ???(??,??,??) 
48 SW ???? (??????,????) SH ?? SL ??? SN ?? 
49 NF ?????? NV ?????? NA ?????
50 ‘‘‘
51 
52 #NNG:?? ?? NNP:?? ??  NP:???
53 ex_pos2=[] #?? ??
54 for (text,text_class) in ex_pos:#(text,text class)
55     if text_class==NNG or text_class==NNP or text_class==NP:
56         ex_pos2.append(text)
57         
58 print(ex_pos2)#[‘???‘, ‘??‘, ‘??‘, ‘?‘, ‘???‘]

 

03.WordCloud

ex_nouns

 

  1 # -*- coding: utf-8 -*-
  2 """
  3 1.test file ??
  4 2.?? ?? : Kkma
  5 3.??? :??  ?? ?? ,?? ??
  6 4.word cloud???
  7 """
  8 from konlpy.tag import Kkma
  9 
 10 #object
 11 kkma=Kkma()
 12 
 13 #1.text file??
 14 file=open("../data/text_data.txt",mode=r,encoding="utf-8")
 15 docs=file.read() #text?? ??
 16 file.close()
 17 print(docs)
 18 """
 19 ??? ??? ?????. ?? ??? ??? ?????. 
 20 ??? ??? ?? ??? ???. Text mining ??? 2000? ??? ??? ????.
 21 """
 22 
 23 #1).doc -> sentence
 24 ex_sent=kkma.sentences(docs)
 25 print(len(ex_sent))#4
 26 print(ex_sent)
 27 """
 28 [‘??? ??? ?????.‘, 
 29 ‘?? ??? ??? ?????.‘, 
 30 ‘??? ??? ?? ??? ???.‘, 
 31 ‘Text mining ??? 2000? ??? ??? ????.‘]
 32 """
 33 
 34 for sent in ex_sent:
 35     print(sent)
 36 """
 37 ??? ??? ?????.
 38 ?? ??? ??? ?????.
 39 ??? ??? ?? ??? ???.
 40 Text mining ??? 2000? ??? ??? ????.
 41 """
 42 
 43 #2).docs -> nount ??
 44 ex_nouns=kkma.nouns(docs) #?? ?? ?? ??
 45 print(len(ex_nouns))#13
 46 print(ex_nouns)
 47 """
 48 [‘???‘, ‘??‘, ‘?‘, ‘???‘, ‘??‘, ‘???‘, 
 49 ‘??‘, ‘2000‘, ‘2000?‘, ‘?‘, ‘??‘, ‘??‘, ‘??‘]
 50 """
 51 
 52 from re import match
 53 
 54 #2~3.?? ?? ?? -> ??? ??(??,?? 1? ??)
 55 nouns_words =[]#list
 56 nouns_count={} #set or dict
 57 for sent in ex_sent:#???
 58     for nouns in kkma.nouns(sent):#???
 59         #??? (??,?? 1? ??)
 60         if len(str(nouns))>1 and not(match(^[0-9],nouns)):
 61             nouns_words.append(nouns)
 62             #key=word :value:count
 63             nouns_count[nouns]=nouns_count.get(nouns,0)+1
 64         
 65 print(len(nouns_words))#15->12
 66 """
 67 [‘???‘, ‘??‘, ‘???‘, ‘??‘, ‘??‘, ‘???‘,
 68  ‘??‘, ‘???‘, ‘??‘, ‘??‘, ‘??‘, ‘??‘]
 69 """
 70 
 71 print(nouns_count)
 72 """
 73 {‘???‘: 1, ‘??‘: 3, ‘???‘: 2, ‘??‘: 1, 
 74 ‘???‘: 1, ‘??‘: 1, ‘??‘: 1, ‘??‘: 1, ‘??‘: 1}
 75 """
 76 
 77 #4.word cloud???
 78 from collections import Counter
 79 
 80 #1)dict->Counter ??
 81 word_count=Counter(nouns_count)
 82 
 83 #2)top word
 84 top5=word_count.most_common(5)
 85 print(top5)
 86 """
 87 [(‘??‘, 3), (‘???‘, 2), (‘???‘, 1), (‘??‘, 1), (‘???‘, 1)]
 88 """
 89 #3)word cloud ??? :package ??
 90 
 91 
 92 import pytagcloud
 93 ‘‘‘
 94 Anaconda Prompt?? 
 95   pip install pygame
 96   pip install pytagcloud
 97   pip install simplejson
 98 ‘‘‘
 99 
100 # tag? color, size, tag ?? ?? 
101 word_count_list = pytagcloud.make_tags(top5, maxsize=80)
102 # maxsize : ?? ????
103 print(word_count_list)
104 ‘‘‘
105 [{‘color‘: (91, 34, 34), ‘size‘: 109, ‘tag‘: ‘??‘}, {‘color‘: (95, 159, 59), ‘size‘: 80, ‘tag‘: ‘???‘}, {‘color‘: (194, 214, 193), ‘size‘: 47, ‘tag‘: ‘???‘}]
106 ‘‘‘
107 
108 pytagcloud.create_tag_image(word_count_list,
109                             wordcloud.jpg, 
110                             size=(900, 600), 
111                             fontname=korean, rectangular=False)
112 ‘‘‘
113 ?? ?? ?? ??? Error ?? 
114 C:\Anaconda3\Lib\site-packages\pytagcloud\fonts ????
115   1. fonts.json ?? ?? ?? 
116   [
117     {
118         "name": "korean",
119         "ttf": "malgun.ttf",
120   2. C:\Windows\Fonts ???? ‘?? ??‘ ?? ??/fonts ?? ????
121   3. create_tag_image(fontname=‘korean‘) ?? ??
122 ‘‘‘

 

news_wordCloud

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 news crawling data file
 4     - word cloud ???
 5 """
 6 
 7 from konlpy.tag import Kkma
 8 
 9 #object ??
10 kkma =Kkma()
11 
12 #1.text file load
13 file=open("../data/crawling_data.txt",
14           encoding=utf-8)
15 crawling_data=file.read()
16 file.close()
17 print(crawling_data)
18 """
19 [[‘???‘, ‘"????‘, ‘???..中‘, ‘?????‘, ‘???‘, ‘?"‘], 
20 [‘"??‘, ‘????..?????‘, ‘???‘, ‘?‘, ‘??"‘], 
21 [‘????‘, ‘????‘, ‘???‘, ‘??..???‘, ‘?‘, ‘????‘], 
22 ...
23  [‘???‘, ‘????‘, ‘??‘, ‘???‘, ‘???‘, ‘??‘, ‘??‘, ‘??‘, ‘?????‘]]
24 """
25 
26 #2.docs-> sentences ?? 
27 ex_sent=kkma.sentences(crawling_data)
28 print(len(ex_sent)) #9
29 print(ex_sent)
30 
31 #3??->?? ??
32 # 4. text ??? : ??, 1? ?? ?? 
33 # 5. word count : dict
34 ex_nouns=[] #list
35 word_count={} #dict
36 for sent in ex_sent:# ???
37     for nouns in kkma.nouns(sent):# ??? 
38         if len(str(nouns))>1 and not(match(^[0-9],nouns)):
39             ex_nouns.append(nouns)
40             word_count[nouns]=word_count.get(nouns,0)+1
41 print(len(ex_nouns)) #439
42 print(ex_nouns)
43 print(word_count)
44 
45 # 5. Counter ?? : top10 ?? 
46 from collections import Counter
47 word_count=Counter(word_count)
48 top10=word_count.most_common(10)
49 print(top10)
50 """
51 [(‘?‘, 4), (‘???‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘???‘, 3), (‘??‘, 3), (‘?‘, 3)]
52 """
53 
54 
55 # 6. word cloud ??? 
56 import pytagcloud
57 ‘‘‘
58 Anaconda Prompt?? 
59   pip install pygame
60   pip install pytagcloud
61   pip install simplejson
62 ‘‘‘
63 # tag? color, size, tag ?? ?? 
64 word_count_list = pytagcloud.make_tags(top10, maxsize=80)
65 # maxsize : ?? ????
66 print(word_count_list)
67 ‘‘‘
68 [{‘color‘: (91, 34, 34), ‘size‘: 109, ‘tag‘: ‘??‘}, {‘color‘: (95, 159, 59), ‘size‘: 80, ‘tag‘: ‘???‘}, {‘color‘: (194, 214, 193), ‘size‘: 47, ‘tag‘: ‘???‘}]
69 ‘‘‘
70 
71 pytagcloud.create_tag_image(word_count_list,
72                             news_wordcloud.jpg, 
73                             size=(900, 600), 
74                             fontname=korean, rectangular=False)
75 ‘‘‘
76 ?? ?? ?? ??? Error ?? 
77 C:\Anaconda3\Lib\site-packages\pytagcloud\fonts ????
78   1. fonts.json ?? ?? ?? 
79   [
80     {
81         "name": "korean",
82         "ttf": "malgun.ttf",
83   2. C:\Windows\Fonts ???? ‘?? ??‘ ?? ??/fonts ?? ????
84   3. create_tag_image(fontname=‘korean‘) ?? ??
85 ‘‘‘

 

04.SparseMatrix

TfidfVectorizer

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 TfidfVectorizer ?? : Tfidf(??? ?? ??) ?? ?? ???
 4  1. ?? ??? : ?? -> ??(word) 
 5  2. ?? ??(word dict) : {word : ????}
 6  3. ?? ??(sparse matrix) : ?? ????? ?? ???(TF, TFiDF)
 7     1) TF : ?? ????? -> ??? ??(??? ?? ?? ??.)
 8     2) TFiDF : ?? ????? x ?? ?????? ??(?? ???)  
 9        -> TFiDF = tf(d, t) x log(n/df(t))
10 """
11 
12 from sklearn.feature_extraction.text import TfidfVectorizer
13 
14 #??
15 sentences = [
16     "Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow.",
17     "Professor Plum has a green plant in his study.",
18     "Miss Scarlett watered Professor Plum‘s green plant while he was away from his office last week."
19 ]
20 
21 #1.?? ???:??->??(word)
22 tfidf_fit=TfidfVectorizer().fit(sentences)
23 print(tfidf_fit)#object info
24 """
25 TfidfVectorizer(analyzer=‘word‘, binary=False, decode_error=‘strict‘,
26         dtype=<class ‘numpy.int64‘>, encoding=‘utf-8‘, input=‘content‘,
27         lowercase=True, max_df=1.0, max_features=None, min_df=1,
28         ngram_range=(1, 1), norm=‘l2‘, preprocessor=None, smooth_idf=True,
29         stop_words=None, strip_accents=None, sublinear_tf=False,
30         token_pattern=‘(?u)\\b\\w\\w+\\b‘, tokenizer=None, use_idf=True,
31         vocabulary=None)
32 """
33 
34 #2.?? ??(word dict) : {word:????}
35 voca=tfidf_fit.vocabulary_
36 print(word size=,len(voca))#word size= 31
37 print(voca)# ‘mr‘: 14,   #14? ????
38 """
39 {‘mr‘: 14, ‘green‘: 5, ‘killed‘: 11, ‘colonel‘: 2, ‘mustard‘: 15, ‘in‘: 9,
40  ‘the‘: 24, ‘study‘: 23, ‘with‘: 30, ‘candlestick‘: 1, ‘is‘: 10, ‘not‘: 17, 
41  ‘very‘: 25, ‘nice‘: 16, ‘fellow‘: 3, ‘professor‘: 21, ‘plum‘: 20, ‘has‘: 6,
42  ‘plant‘: 19, ‘his‘: 8, ‘miss‘: 13, ‘scarlett‘: 22, ‘watered‘: 27, ‘while‘: 29, 
43  ‘he‘: 7, ‘was‘: 26, ‘away‘: 0, ‘from‘: 4, ‘office‘: 18, ‘last‘: 12, ‘week‘: 28}
44 """
45 
46 #??? ????:word embedding
47 
48 #3.?? ??(sparse matrix) :text:?? DTM(?:D ,?:T)
49 tfidf=TfidfVectorizer()#object
50 sparse_tfidf=tfidf.fit_transform(sentences)
51 print(type(sparse_tfidf))#<class ‘scipy.sparse.csr.csr_matrix‘>
52 print(sparse_tfidf.shape)#DTM=(3?(Doc), 31?(Term))
53 print("1.scipy.sparse.matrix")
54 print(sparse_tfidf)
55 """
56   (row:doc,col:term) ??? (weight)=Tfidf
57   (0, 14)       0.4411657657527482:‘mr‘
58   (0, 5)        0.26055960805891015:‘green‘
59   (1, 5)        0.2690399207469689  :‘green‘
60   (1, 8)        0.34643788271971976
61   (2, 5)        0.15978698032384395
62   (2, 21)       0.2057548299742193
63   (2, 20)       0.2057548299742193
64   ...
65 """
66 
67 print("2.numpy sparse.matrix")
68 #scipy->numpy ???
69 tfidf_arr=sparse_tfidf.toarray()
70 print(tfidf_arr.shape) #(3, 31)
71 print(type(tfidf_arr))#<class ‘numpy.ndarray‘>
72 print(tfidf_arr)
73 """
74 [[0.         0.22058288 0.22058288 0.22058288 0.         0.26055961
75   0.         0.         0.         0.16775897 0.22058288 0.22058288
76   0.         0.         0.44116577 0.22058288 0.22058288 0.22058288
77   0.         0.         0.         0.         0.         0.16775897
78   0.44116577 0.22058288 0.         0.         0.         0.
79   0.22058288]
80  [0.         0.         0.         0.         0.         0.26903992
81   0.45552418 0.         0.34643788 0.34643788 0.         0.
82   0.         0.         0.         0.         0.         0.
83   0.         0.34643788 0.34643788 0.34643788 0.         0.34643788
84   0.         0.         0.         0.         0.         0.
85   0.        ]
86  [0.27054288 0.         0.         0.         0.27054288 0.15978698
87   0.         0.27054288 0.20575483 0.         0.         0.
88   0.27054288 0.27054288 0.         0.         0.         0.
89   0.27054288 0.20575483 0.20575483 0.20575483 0.27054288 0.
90   0.         0.         0.27054288 0.27054288 0.27054288 0.27054288
91   0.        ]]
92 """
93 
94 """
95 1.scipy sparse matrix
96   -> tensorflow model
97 2.numpy sparse matrix
98   -> sklean model
99 """

 

python TextMining

标签:and   range   import   tags   urlopen   bsp   get   split   com   

原文地址:https://www.cnblogs.com/kingboy100/p/10410281.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!