敏感词过滤,国内混的同学看到这个都会会心一笑。其实敏感词过滤,在几乎所有国家都是存在的,只是表现的形式并不完全相同而已。
既然这个功能叫做关键词过滤,那么做在过滤器中,应该是一个好主意。
1、JAVA实现利用过滤器实现敏感信息过滤
过滤器的JAVA代码:
package com.filter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
importjava.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
importjavax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
importjavax.servlet.http.HttpServletResponse;
/**
*一个简单的敏感词过滤器,这里针对从GET的方式做了转码,如果用POST方式,将ISO8859-1字符转换为UTF-8下面一段要注释掉
*@author 范芳铭
*/
public class EasyDirtyFilter implementsFilter{
private FilterConfig config = null;
@Override
public void init(FilterConfig filterConfig) throws ServletException {
System.out.println("----过滤器初始化----");
this.config = filterConfig;
}
//过滤器功能在这里实现
@Override
public void doFilter(ServletRequest req, ServletResponse resp,
FilterChain chain) throwsIOException, ServletException {
HttpServletRequest request = (HttpServletRequest) req;
HttpServletResponse response = (HttpServletResponse) resp;
String charset = "UTF-8";
request.setCharacterEncoding(charset);
response.setCharacterEncoding(charset);
response.setContentType("text/html;charset="+charset);
DirtyRequest dirtyreq = new DirtyRequest(request);
chain.doFilter(dirtyreq, response);
}
@Override
public void destroy() {
System.out.println("----过滤器销毁----");
}
private List<String> getDirtyWords(){
List<String> dirtyWords = new ArrayList<String>();
String dirtyWordPath = config.getInitParameter("dirtyword");
InputStream inputStream =config.getServletContext().getResourceAsStream(dirtyWordPath);
InputStreamReader is = null;
try {
is = newInputStreamReader(inputStream,"UTF-8");
} catch (UnsupportedEncodingException e2) {
e2.printStackTrace();
}
BufferedReader reader = new BufferedReader(is);
String line;
try {
while ((line =reader.readLine())!= null) {//如果 line为空说明读完了
dirtyWords.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
return dirtyWords;
}
//使用Decorator模式包装request对象,实现敏感字符过滤功能
class DirtyRequest extends HttpServletRequestWrapper{
private List<String> dirtyWords = getDirtyWords();
private HttpServletRequest request;
public DirtyRequest(HttpServletRequest request) {
super(request);
this.request = request;
}
//重写getParameter方法,实现对敏感字符的过滤
@Override
public String getParameter(String name) {
String value =this.request.getParameter(name);
//如果get的方式提交表单,通过request.setCharacterEncoding("UTF-8");这种方式是解决不了中文乱码问题
//参考:http://blog.csdn.net/ffm83/article/details/43229819
if(value==null){
return null;
}
//将ISO8859-1字符转换为UTF-8
try {
value=new String(value.getBytes("ISO8859-1"),"UTF-8") ;
}catch (UnsupportedEncodingException e) {
//TODO Auto-generated catch block
e.printStackTrace();
}
for(String dirtyWord : dirtyWords){
if(value.contains(dirtyWord)){
System.out.println("内容中包含敏感词:"+dirtyWord+",将会被替换成****");
//替换敏感字符
value =value.replace(dirtyWord, "****");
}
}
return value;
}
}
}
2、将过滤器添加到Web.xml
<!--配置过滤器 -->
<filter>
<filter-name>easyFilter</filter-name>
<filter-class>com.filter.EasyDirtyFilter2</filter-class>
<!-- 配置要过滤的敏感字符文件 -->
<init-param>
<param-name>dirtyword</param-name>
<param-value>/WEB-INF/dirtyword.txt</param-value>
</init-param>
</filter>
<!--映射过滤器 -->
<filter-mapping>
<filter-name>easyFilter</filter-name>
<!--“/*”表示拦截所有的请求-->
<url-pattern>/*</url-pattern>
</filter-mapping>
<servlet>
<servlet-name>dirty</servlet-name>
<servlet-class>com.servlet.RequestDirty</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>dirty</servlet-name>
<url-pattern>/dirty</url-pattern>
</servlet-mapping>
3、测试用的servlet源代码
package com.servlet;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
importjavax.servlet.http.HttpServletRequest;
importjavax.servlet.http.HttpServletResponse;
/**
*从外部获取信息,如果有敏感词,那么需要过滤
*@author 范芳铭
*/
public class RequestDirty extendsHttpServlet {
publicvoid doGet(HttpServletRequest request, HttpServletResponse response)
throwsServletException, IOException {
Stringinfo = request.getParameter("info");
//在过滤器里进行了转码,这里就不要再进行转码
//info=new String(info.getBytes("ISO8859-1"),"UTF-8") ;
System.out.println(info);
PrintWriterout = response.getWriter();
out.write("获得信息如下:" + info);
}
publicvoid doPost(HttpServletRequest request, HttpServletResponse response)
throwsServletException, IOException {
doGet(request,response);
}
}
4、其他
一个关键词文件,dirtyword.txt 放在WEB-INF下。
关键词文件如下:(仅供示例,无任何含义)
粗话
黑人
黑鬼
5、测试
在浏览器输入:
http://127.0.0.1:8080/webStudy/dirty?info=黑人是美国无产阶级的成员
页面输出结果:获得信息如下:****是美国无产阶级的成员
后台输入情况:
内容中包含敏感词:黑人,将会被替换成****
****是美国无产阶级的成员
原文地址:http://blog.csdn.net/ffm83/article/details/43530337