码迷,mamicode.com
首页 > 其他好文 > 详细

统计文本中重复的内容

时间:2018-03-27 21:58:30      阅读:232      评论:0      收藏:0      [点我收藏+]

标签:null   cti   util   txt   record   adl   cep   AC   ring   

 

1.统计一个文本中重复的内容

package count;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class countWord {

    public static void main(String[] args) {
      count("F:\\A\\B.xml");
    }

public static void count(String filepath)
{
    try
    {
        File file = new File(filepath);
        if(!file.exists())
        {
            System.out.println("file not exist");
            return;
        }

        //create BufferedReader to improve efficient
        InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
        BufferedReader bufReader = new BufferedReader(isr); 
        String line = null;

        //create map collection to record information
        Map<String,Integer> map = new HashMap<String,Integer>();
        while((line = bufReader.readLine()) != null)
        {
            if(map.containsKey(line))
                map.put(line,map.get(line)+1);
            else
                map.put(line,1);
        }
        //print map collction
        showMap(map);
    }
    catch (Exception ex)
    {
        ex.printStackTrace();
    }
}
private static void showMap(Map<String,Integer> map)
{
    if(map == null)
        return;
    Set<String> keyset = map.keySet();
    Iterator<String> it = keyset.iterator();
    int count = 0;
    while(it.hasNext())
    {
        String s = it.next();
        if(map.get(s) > 1) {//个数大于1
            System.out.println( s+ "......" + map.get(s));
            count++;
        }
    }
    System.out.println("重复两次的数据:" + count);
}
}

 

2.统计两个文本中重复的内容

package count;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class countWordTowFile {

    public static void main(String[] args) {
      count("F:\\A\\B.xml","C:\\D\\E.txt");
    }


public static void count(String filepath,String filepath2)
{
    try
    {
        File file = new File(filepath);
        File file2 = new File(filepath2);
        if(!file.exists() || !file2.exists())
        {
            System.out.println("file not exist");
            return;
        }

        //create BufferedReader to improve efficient
        InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
        InputStreamReader isr2 = new InputStreamReader(new FileInputStream(file2), "UTF-8");  
        BufferedReader bufReader = new BufferedReader(isr); 
        BufferedReader bufReader2 = new BufferedReader(isr2); 
        String line = null;
        String line2 = null;

        //create map collection to record information
        Map<String,Integer> map = new HashMap<String,Integer>();
        Map<String,Integer> map2 = new HashMap<String,Integer>();
        while((line = bufReader.readLine()) != null)//读取第一个文件中的数据
        {
            map.put(line,1);
        }
        while((line2 = bufReader2.readLine()) != null) {//读取第二个文件中的内容
            if(map.containsKey(line2)) {
                map2.put(line2,map.get(line2)+1);
            }
        }
        //print map collction
        showMap(map2);
    }
    catch (Exception ex)
    {
        ex.printStackTrace();
    }
}
private static void showMap(Map<String,Integer> map)
{
    if(map == null)
        return;
    Set<String> keyset = map.keySet();
    Iterator<String> it = keyset.iterator();
    int count = 0;
    while(it.hasNext())
    {
        String s = it.next();
        System.out.println( s+ "......" + map.get(s));
        count++;

    }
    System.out.println("重复两次的数据:" + count);
}
}

 

统计文本中重复的内容

标签:null   cti   util   txt   record   adl   cep   AC   ring   

原文地址:https://www.cnblogs.com/taiguyiba/p/8660207.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!