标签:
#coding:utf-8 import re def strip_tags(string, allowed_tags=‘‘): if allowed_tags != ‘‘: # Get a list of all allowed tag names. allowed_tags = allowed_tags.split(‘,‘) allowed_tags_pattern = [‘</?‘+allowed_tag+‘[^>]*>‘ for allowed_tag in allowed_tags] all_tags = re.findall(r‘<[^>]+>‘, string, re.I) not_allowed_tags = [] tmp = 0 for tag in all_tags: for pattern in allowed_tags_pattern: rs = re.match(pattern,tag) if rs: tmp += 1 else: tmp += 0 if not tmp: not_allowed_tags.append(tag) tmp = 0 for not_allowed_tag in not_allowed_tags: string = re.sub(re.escape(not_allowed_tag), ‘‘,string) print not_allowed_tags else: # If no allowed tags, remove all. string = re.sub(r‘<[^>]*?>‘, ‘‘, string) return string
标签:
原文地址:http://www.cnblogs.com/bushe/p/4482114.html