soupR.py 代码如下
# -*- coding: cp936 -*- import urllib import urllib2 import os, re from BeautifulSoup import BeautifulSoup localDir = 'D:/test/' URL = "http://cran.dataguru.cn/bin/windows/contrib/3.1/" page = urllib2.urlopen(URL) soup = BeautifulSoup(page) page.close() for a in soup.findAll('a'): if a is not None: text = a.text.strip() #if re.search('map',text): if text.startswith('map'): href = a.get('href') if href.endswith('.zip'): #print href localFile = localDir + text if os.path.exists(localFile): print localFile,'is exists' else: try: # 按照url进行下载文件,并保存到本地目录 urllib.urlretrieve(URL+href,localFile) print href,'is download' except Exception,e: continue
原文地址:http://blog.csdn.net/belldeep/article/details/40006283