工作上经常会遇到处理大数据的问题,下面两个工具类,是在处理大数据时编写的;推荐的是使用map的方式处理两个list数据,如果遇到list相当大数据这个方法就起到了作用,当时处理了两个十万级的list,使用改方法的变种搞定。
1.txt文件、list集合比较工具
<span style="font-family:KaiTi_GB2312;font-size:18px;">package com.hudong.util.other; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; /** * txt文件对比工具 * list集合比较工具 * @Title: TxtCompareUtil.java * @Copyright: Copyright (c) 2005 * @author 杨凯 */ public class TxtCompareUtil { /** * 使用两个list包含来对比 * * @param input1 * @param input2 * @param output1 * @param output2 */ public static void compareTxt(String input1, String input2, String output1, String output2) { List<String> listInput1 = TxtReadWriteUtil.readTxt(input1); List<String> listInput2 = TxtReadWriteUtil.readTxt(input2); for (String str : listInput1) { if (listInput2.contains(StringUtils.upperCase(str)) || listInput2.contains(StringUtils.lowerCase(str)) || listInput2.contains((str))) { TxtReadWriteUtil.writerTXT(str, output1, true); } else { TxtReadWriteUtil.writerTXT(str, output2, true); } } } /** * 求两个list的交集 通过求交集的方法找出两个txt的共同部分 * * @param input1 * @param input2 * @param output1 * @param output2 */ public static void interseTxt(String input1, String input2, String output1, String output2) { List<String> listInput1 = TxtReadWriteUtil.readTxt(input1); List<String> listInput2 = TxtReadWriteUtil.readTxt(input2); if (listInput2.retainAll(listInput1)) { System.out.println(listInput2); // TxtReadWriteUtil.writerTXT(listInput2, output1, true); } else { // TxtReadWriteUtil.writerTXT(listInput2, output2, true); } } /** * 求两个list的并集 通过求两个list的并集来合并两个txt * * @param input1 * @param input2 * @param output1 * @param output2 */ public static void unionTxt(String input1, String input2, String output1, String output2) { List<String> listInput1 = TxtReadWriteUtil.readTxt(input1); List<String> listInput2 = TxtReadWriteUtil.readTxt(input2); listInput2.removeAll(listInput1); // 移除相同的 listInput2.addAll(listInput1); // 放入不同的 System.out.println(listInput2); } /** * 获取两个集合的不同元素 * * @param collmax * @param collmin * @return */ @SuppressWarnings( { "unchecked" }) public static Collection getDiffent(Collection collmax, Collection collmin) { // 使用LinkeList防止差异过大时,元素拷贝 Collection csReturn = new LinkedList(); Collection max = collmax; Collection min = collmin; // 先比较大小,这样会减少后续map的if判断次数 if (collmax.size() < collmin.size()) { max = collmin; min = collmax; } // 直接指定大小,防止再散列 Map<Object, Integer> map = new HashMap<Object, Integer>(max.size()); for (Object object : max) { map.put(object, 1); } for (Object object : min) { if (map.get(object) == null) { csReturn.add(object); } else { map.put(object, 2); } } for (Map.Entry<Object, Integer> entry : map.entrySet()) { if (entry.getValue() == 1) { csReturn.add(entry.getKey()); } } return csReturn; } /** * 获取两个集合的不同元素,去除重复 * * @param collmax * @param collmin * @return */ @SuppressWarnings( {"unchecked"}) public static Collection getDiffentNoDuplicate(Collection collmax, Collection collmin) { return new HashSet(getDiffent(collmax, collmin)); } public static void main(String[] args) { unionTxt("e:/test/input1.txt", "e:/test/input2.txt", "e:/test/output1.txt", "e:/test/output1.txt"); } } </span>
<span style="font-family:KaiTi_GB2312;font-size:18px;">package com.hudong.util.other; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.List; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; /** * txt文件的读写操作工具 * * @Title: TxtReadWriteUtil.java * @Copyright: Copyright (c) 2005 * @author 杨凯 */ public class TxtReadWriteUtil { /** * 写txt 方式一 * * @param conent * @param txtPath * @param isAppend */ public synchronized static void writerTXT(String conent, String txtPath, boolean isAppend) { try { File file = new File(txtPath); if (!file.getParentFile().exists()) { file.getParentFile().mkdirs(); } if (!file.exists()) { file.createNewFile(); } FileWriter fileWriter = new FileWriter(txtPath, isAppend); BufferedWriter bw = new BufferedWriter(fileWriter); bw.write(conent); bw.newLine(); fileWriter.flush(); bw.close(); fileWriter.close(); } catch (Exception e) { e.printStackTrace(); } } /** * 写txt 方式二 * * @param file * @param sb */ public static void createTxt(String file, StringBuffer sb) { try { OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("gbk"); format.setExpandEmptyElements(true); format.setTrimText(false); FileOutputStream fos = new FileOutputStream(file); XMLWriter xmlWriter = new XMLWriter(fos, format); xmlWriter.write(sb.toString()); xmlWriter.close(); } catch (Exception e) { e.printStackTrace(); } } /** * 读txt * * @param filePath * @return */ public static List<String> readTxt(String filePath) { List<String> list = new ArrayList<String>(); try { BufferedReader br = new BufferedReader(new FileReader(filePath)); String line = null; while ((line = br.readLine()) != null) { list.add(line); } } catch (Exception e) { e.printStackTrace(); } return list; } }</span>
转载请指明:http://blog.csdn.net/yangkai_hudong
list集合、txt文件对比的工具类和文件读写工具类,布布扣,bubuko.com
原文地址:http://blog.csdn.net/yangkai_hudong/article/details/28910059