标签:std http split 删除 join url python nbsp query
#encoding=utf-8 from urllib.request import urlopen import random import os ‘‘‘ 1. 定义一个fuc(url, folder_path) 获取url地址的内容,保存到folder_path的文件目录下,并随机生成一个文件名。 ‘‘‘ def save_url_content(url,folder_path=None): if not (url.startswith(‘http://‘) or url.startswith(‘https://‘) ): return u‘url地址不符合规格‘ if not os.path.isdir(folder_path): return u‘folder_path非文件夹‘ d = urlopen(url) content = d.read() rand_filename = ‘test_%s‘%random.randint(1,1000) file_path = os.path.join(folder_path,rand_filename) d = open(file_path,‘wb‘) d.write(content) d.close() return file_path print (save_url_content(‘http://www.baidu.com‘,‘tmp‘)) ‘‘‘ 3. 定义一个func(url),分析该url内容里有多少个链接。 ‘‘‘ def get_url_count(url): if not (url.startswith(‘http://‘) or url.startswith(‘https://‘) ): return u‘url地址不符合规格‘ d = urllib.urlopen(url) content = d.read() return len(content.split(‘<a href=‘)) - 1 # str 到bytes 而精致没解决 print (get_url_count(‘http://hi.baidu.com/jxq61/item/149d29cc8d52513d4594168f‘)) ‘‘‘ 2. 定义一个func(folder_path),合并该目录下的所有文件,生成一个all.txt。 ‘‘‘ def merge(folder_path): if not os.path.exists(folder_path): return ‘not exists‘ for f in os.listdir(folder_path): file_path = os.path.join(folder_path,f) if os.path.isdir(file_path): merge(file_path) else: merge_file = open(‘merge_test‘,‘ab+‘) content = open(file_path,‘rb‘).read() merge_file.write(content) merge_file.close() merge(‘tmp‘) ‘‘‘ 4. 定义一个func(url), 获取他?后的参数,并返回成一个dict。 ‘‘‘ import urlparse # urlparse模块主要是把url拆分为6部分,并返回元组。 # urllib.parse.urlparse(urlstring, scheme=‘‘, allow_fragments=True) def qs(url): query = urlparse.urlparse(url).query return dict([(k,v[0]) for k,v in urlparse.parse_qs(query).items()]) print (qs(‘http://126.com‘)) print (qs(‘http://api/api?f=5&g=6&y=5‘)) print (qs(‘http://api/api?11=53‘))
‘‘‘ 5. 定义一个func(folder),删除该folder下的所有文件。50421 ‘‘‘
#使用递归去解决 def delete(folder_path):##习题5 if not os.path.exists(folder_path): return ‘not exists‘ for f in os.listdir(folder_path): file_path = os.path.join(folder_path,f) if os.path.isdir(file_path): delete(file_path) else: os.remove(file_path) delete(‘/tmp/5‘)
标签:std http split 删除 join url python nbsp query
原文地址:http://www.cnblogs.com/think-and-do/p/6363244.html