标签:文件 log blog text switch erro comment data url
import threading from selenium import webdriver from collections import deque songList =set([]); playList =set([]); #歌单 def chrome_browser_songList(url,browser): browser.get(url) play_count = browser.find_element_by_id(‘play-count‘).text if(int(play_count)>10000): data=‘\n‘+browser.find_element_by_class_name(‘f-ff2‘).text+‘ 评论数:‘+str(play_count)+‘ 地址:‘+url save_file(data,‘D:\\songList.txt‘) songQueue = deque() try: # a[href^=\/song] for each in browser.find_elements_by_css_selector(‘a[href^=\/song]‘): try: print("歌曲名字: %s 地址 %s" % (each.text, each.get_property(‘href‘))) songQueue.append(each.get_property(‘href‘)) except: continue except: print(‘someerror‘) song_queue(songQueue,browser) #寻找歌单 playListQueue = deque() try: for each in browser.find_elements_by_css_selector(‘a[href^=\/playlist]‘): try: print("歌单: %s 地址 %s" % (each.text, each.get_property(‘href‘))) playListQueue.append(each.get_property(‘href‘)) except: continue except: print(‘someerror‘) browser.close() browser = webdriver.Chrome(‘C:\Program Files\Google\Chrome\Application\chromedriver.exe‘) play_list_queue(playListQueue,browser) #browser.close() #歌曲 def chrome_browser_song(url): browser = webdriver.Chrome(‘C:\Program Files\Google\Chrome\Application\chromedriver.exe‘) browser.get(url) browser.switch_to_frame(‘g_iframe‘) comment_count = browser.find_element_by_id(‘cnt_comment_count‘).text if(int(comment_count)>10000): data = ‘\n歌曲名字:‘+browser.find_element_by_class_name(‘f-ff2‘).text+‘ 歌手:‘+browser.find_element_by_css_selector(‘a[href^=\/artist]‘).text+‘ 评论数:‘+comment_count+‘ 歌曲地址:‘ +url; save_file(data,‘D:\\song.txt‘) browser.close() #保存文件 def save_file(data,file): save_path = file f_obj = open(save_path, ‘a‘) f_obj.write(data) f_obj.close() #歌队列 def song_queue(songQueue,browser): while songQueue: current_url = songQueue.popleft() if current_url not in songList: songList.add(current_url) try: chrome_browser_song(current_url) except: continue #歌单队列 def play_list_queue(listQueue,browser): while listQueue: current_url = listQueue.popleft() if current_url not in playList: playList.add(current_url) try: chrome_browser_songList(current_url,browser) except: continue url_list =[ ‘http://music.163.com/playlist?id=598057191‘, ‘http://music.163.com/#/playlist?id=144236857‘, ] def thread_1(): url = url_list[0] browser = webdriver.Chrome(‘C:\Program Files\Google\Chrome\Application\chromedriver.exe‘) chrome_browser_songList(url, browser) def thread_2(): url = url_list[1] browser = webdriver.Chrome(‘C:\Program Files\Google\Chrome\Application\chromedriver.exe‘) chrome_browser_songList(url, browser) #多线程 def thread_song(): threads = [] t1 = threading.Thread(target=thread_1) threads.append(t1) t2 = threading.Thread(target=thread_2) threads.append(t2) return threads if __name__ == ‘__main__‘: # url = url_list[1] # browser = webdriver.Chrome(‘C:\Program Files\Google\Chrome\Application\chromedriver.exe‘) # chrome_browser_songList(url, browser) threads = thread_song() for t in threads: t.setDaemon(True) t.start() t.join()
因为没有解决登陆问题,采用了一种比较笨的方法~~
标签:文件 log blog text switch erro comment data url
原文地址:http://www.cnblogs.com/red-j/p/6433227.html