码迷,mamicode.com
首页 > 其他好文 > 详细

TRIE树

时间:2018-03-24 19:32:43      阅读:181      评论:0      收藏:0      [点我收藏+]

标签:row   set   comm   tree   time   else   rac   使用   return   

TRIE树的java实现(又称单词查找树,查询效率比哈希树高)

import org.apache.commons.lang3.Validate;

import java.util.HashMap;
import java.util.Map;

public class TrieTree<T> {
    private TreeNode<T> root = new TreeNode<>();
    private int size = 0;

    public int getSize() {
        return size;
    }

    public T put(String key, T value) {
        Validate.notEmpty(key);
        Validate.isTrue(key.length() <= 64);

        TreeNode<T> current = this.root;
        for (int offset = 0; offset < key.length(); offset++) {
            char head = key.charAt(offset);
            TreeNode<T> child = current.children.get(head);
            if (child == null) {
                child = new TreeNode<>();
                current.children.put(head, child);
            }
            current = child;
        }
        T oldValue = current.value;
        current.value = value;
        if (oldValue == null) {
            this.size++;
        }
        return oldValue;
    }

    public T find(String text, int offset) {
        TreeNode<T> current = this.root;
        for (; offset < text.length(); offset++) {
            char ch = text.charAt(offset);
            TreeNode<T> child = current.children.get(ch);
            if (child == null) return current.value;
            current = child;
        }
        if (current.children.isEmpty()) {
            return current.value;
        } else {
            return null;
        }
    }

    private static class TreeNode<T> {
        Map<Character, TreeNode<T>> children = new HashMap<>();
        T value;
    }
}

 

 

使用

import com.google.common.base.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import port.mllib.SparseVectorJ;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

public class HotWords {
    private static final Logger LOG = LoggerFactory.getLogger(HotWords.class);

    private TrieTree<Integer> tree;

    public HotWords(InputStream inStream) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(inStream, "UTF-8"));
            this.tree = new TrieTree<>();
            int i = 0;
            String line;
            while (((line = reader.readLine()) != null)) {
                this.tree.put(line, i++);
            }
            inStream.close();
            reader.close();
        } catch (Exception e) {
            LOG.error("fail load hot words", e);
            throw new RuntimeException(e);
        }
    }

    public SparseVectorJ findAll(String text) {
        List<Integer> ret = new ArrayList<>();
        if (Strings.isNullOrEmpty(text)) {
            return new SparseVectorJ(tree.getSize(), new int[0], new double[0]);
        }

        for (int i = 0; i < text.length(); ++i) {
            Integer id = tree.find(text, i);
            if (id != null) ret.add(id);
        }

        int[] ii = new int[ret.size()];
        double[] vv = new double[ret.size()];
        for (Integer i = 0; i < ret.size(); i++) {
            ii[i] = ret.get(i);
            vv[i] = 1;
        }
        return new SparseVectorJ(tree.getSize(), ii, vv);
    }
}

 

TRIE树

标签:row   set   comm   tree   time   else   rac   使用   return   

原文地址:https://www.cnblogs.com/tengpan-cn/p/8640668.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!