TRIE树的java实现(又称单词查找树,查询效率比哈希树高)
import org.apache.commons.lang3.Validate; import java.util.HashMap; import java.util.Map; public class TrieTree<T> { private TreeNode<T> root = new TreeNode<>(); private int size = 0; public int getSize() { return size; } public T put(String key, T value) { Validate.notEmpty(key); Validate.isTrue(key.length() <= 64); TreeNode<T> current = this.root; for (int offset = 0; offset < key.length(); offset++) { char head = key.charAt(offset); TreeNode<T> child = current.children.get(head); if (child == null) { child = new TreeNode<>(); current.children.put(head, child); } current = child; } T oldValue = current.value; current.value = value; if (oldValue == null) { this.size++; } return oldValue; } public T find(String text, int offset) { TreeNode<T> current = this.root; for (; offset < text.length(); offset++) { char ch = text.charAt(offset); TreeNode<T> child = current.children.get(ch); if (child == null) return current.value; current = child; } if (current.children.isEmpty()) { return current.value; } else { return null; } } private static class TreeNode<T> { Map<Character, TreeNode<T>> children = new HashMap<>(); T value; } }
使用
import com.google.common.base.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import port.mllib.SparseVectorJ; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; public class HotWords { private static final Logger LOG = LoggerFactory.getLogger(HotWords.class); private TrieTree<Integer> tree; public HotWords(InputStream inStream) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(inStream, "UTF-8")); this.tree = new TrieTree<>(); int i = 0; String line; while (((line = reader.readLine()) != null)) { this.tree.put(line, i++); } inStream.close(); reader.close(); } catch (Exception e) { LOG.error("fail load hot words", e); throw new RuntimeException(e); } } public SparseVectorJ findAll(String text) { List<Integer> ret = new ArrayList<>(); if (Strings.isNullOrEmpty(text)) { return new SparseVectorJ(tree.getSize(), new int[0], new double[0]); } for (int i = 0; i < text.length(); ++i) { Integer id = tree.find(text, i); if (id != null) ret.add(id); } int[] ii = new int[ret.size()]; double[] vv = new double[ret.size()]; for (Integer i = 0; i < ret.size(); i++) { ii[i] = ret.get(i); vv[i] = 1; } return new SparseVectorJ(tree.getSize(), ii, vv); } }