标签:
1 using System.Text.RegularExpressions; 2 3 /// <summary> 4 5 /// 去除HTML标记 6 7 /// </summary> 8 9 /// <param name="DeleteHTML">包括HTML的源码 </param> 10 11 /// <returns>已经去除Html标签后的文字</returns> 12 13 public static string DeleteHTML(string Htmlstring) 14 15 { 16 17 //删除脚本 18 19 Htmlstring = Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase); 20 21 //删除HTML 22 23 Htmlstring = Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase); 24 25 Htmlstring = Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase); 26 27 Htmlstring = Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase); 28 29 Htmlstring = Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase); 30 31 32 33 Htmlstring = Regex.Replace(Htmlstring,@"&(quot|#34);","\"",RegexOptions.IgnoreCase); 34 35 Htmlstring = Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase); 36 37 Htmlstring = Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase); 38 39 Htmlstring = Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase); 40 41 Htmlstring = Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",RegexOptions.IgnoreCase); 42 43 Htmlstring = Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",RegexOptions.IgnoreCase); 44 45 Htmlstring = Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",RegexOptions.IgnoreCase); 46 47 Htmlstring = Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",RegexOptions.IgnoreCase); 48 49 Htmlstring = Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",RegexOptions.IgnoreCase); 50 51 Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);","",RegexOptions.IgnoreCase); 52 53 Htmlstring.Replace("<",""); 54 55 Htmlstring.Replace(">",""); 56 57 Htmlstring.Replace("\r\n",""); 58 59 Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); 60 61 return Htmlstring; 62 63 }
标签:
原文地址:http://www.cnblogs.com/soulmate/p/5604053.html