标签:gre item ons logs arraylist 传统 null value val
需求:读入一个文本文件,确定所有单词的使用频率并从高到低排序,打印出所有单词及其频率的排序列表
先用传统方法解:
1 package cn._1.wordfrequency; 2 3 import java.util.HashSet; 4 import java.util.Map; 5 import java.util.Set; 6 import java.util.TreeMap; 7 import java.util.regex.Matcher; 8 import java.util.regex.Pattern; 9 10 /* 11 * Functional Thinking by Neal Ford(O‘Reilly). 12 */ 13 public class Word { 14 15 @SuppressWarnings("serial") 16 //统计除了以下单词的其他单词的使用频率 17 private Set<String> NON_WORDS = new HashSet<String>() {{ 18 //匿名内部类+初始化块的初始化方式 19 add("the");add("and");add("of");add("to");add("a"); 20 add("i");add("it");add("in");add("or");add("is"); 21 add("as");add("so");add("but");add("be"); 22 }}; 23 public Map<String, Integer> wordFreq(String words) { 24 TreeMap<String,Integer> wordMap = new TreeMap<>(); 25 Matcher m = Pattern.compile("\\w+").matcher(words); 26 while(m.find()){ 27 String word = m.group().toLowerCase(); 28 if (!NON_WORDS.contains(word)) { 29 if (wordMap.get(word) == null) { 30 wordMap.put(word, 1); 31 }else { 32 wordMap.put(word, wordMap.get(word)+1); 33 } 34 } 35 } 36 return wordMap; 37 } 38 }
再使用Java8的新特性解:
1 package cn._1.wordfrequency; 2 3 import java.util.ArrayList; 4 import java.util.HashSet; 5 import java.util.List; 6 import java.util.Map; 7 import java.util.Set; 8 import java.util.TreeMap; 9 import java.util.regex.Matcher; 10 import java.util.regex.Pattern; 11 12 /* 13 * Functional Thinking by Neal Ford(O‘Reilly). 14 */ 15 public class Word2 { 16 @SuppressWarnings("serial") 17 private Set<String> NON_WORDS = new HashSet<String>() {{ 18 //匿名内部类+初始化块的初始化方式 19 add("the");add("and");add("of");add("to");add("a"); 20 add("i");add("it");add("in");add("or");add("is"); 21 add("as");add("so");add("but");add("be"); 22 }}; 23 /* 24 * 使用正则表达式获得包含所有单词的List 25 */ 26 private List<String> regexToList(String words,String regex){ 27 List<String> wordList = new ArrayList<>(); 28 Matcher m = Pattern.compile(regex).matcher(words); 29 while(m.find()) 30 wordList.add(m.group()); 31 return wordList; 32 } 33 public Map<String, Integer> wordFreq(String words){ 34 TreeMap<String, Integer> wordMap = new TreeMap<>();//使用TreeMap是为了使输出结果自然排序 35 /* 36 * java.util.stream.Stream:A sequence of elements supporting sequential and parallel aggregate operations. 37 * map:Returns a stream consisting of the results of applying the given function to the elements of this stream. 38 * filter:Returns a stream consisting of the elements of this stream that match the given predicate. 39 * forEach:Performs an action for each element of this stream. 40 */ 41 regexToList(words, "\\w+").stream()//将collection对象变为元素流 42 .map(w -> w.toLowerCase())//返回一个经过小写处理的元素流 43 .filter(w -> !NON_WORDS.contains(w))//过滤,使流中的元素都是NON_WORDS集合中不包含的元素 44 .forEach(w -> wordMap.put(w, wordMap.getOrDefault(w, 0)+1));//遍历执行操作 45 return wordMap; 46 } 47 }
测试类:
1 package cn._1.wordfrequency; 2 3 import java.io.FileInputStream; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.Collections; 7 import java.util.Comparator; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 12 public class Mmain { 13 14 public static void main(String[] args) throws IOException { 15 String str = readText("/home/yanshaochen/workspace/Functional_Thinking_Examples/mflie/sucai.txt"); 16 //调用老方法 17 /*Map<String, Integer> map = new Word().wordFreq(str);*/ 18 //调用新方法: 19 Map<String, Integer> map = new Word2().wordFreq(str); 20 //自然排序: 21 for (Entry<String, Integer> item : map.entrySet()) { 22 System.out.println(item.getKey()+","+item.getValue()); 23 } 24 //按照value进行排序(摘自网络): 25 /*List<Map.Entry<String, Integer>> infoIds = new ArrayList<>(map.entrySet()); 26 Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() { 27 public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { 28 return (o2.getValue() - o1.getValue()); 29 //return (o1.getKey()).toString().compareTo(o2.getKey()); 30 } 31 }); 32 for (Entry<String, Integer> item : infoIds) { 33 System.out.println(item.getKey()+","+item.getValue()); 34 }*/ 35 } 36 37 /* 38 * IO流 39 */ 40 private static String readText(String path) throws IOException { 41 FileInputStream fis = new FileInputStream(path); 42 byte[] bytes = new byte[1024]; 43 int data; 44 String str =""; 45 while((data = fis.read(bytes))!=-1){ 46 str += new String(bytes, 0, data); 47 } 48 fis.close(); 49 return str; 50 } 51 }
标签:gre item ons logs arraylist 传统 null value val
原文地址:http://www.cnblogs.com/tomasman/p/7067876.html