十三、散列表（哈希表）

时间：2016-04-17 23:04:35 阅读：390 评论：0 收藏：0 [点我收藏+]

标签：

散列表

散列表插入分两步：
1. 根据散列函数找到索引
2. 处理索引冲突情况：拉链法和线性探测法

散列表是时间上和空间上作出权衡的一个例子。散列表采用函数映射找索引，查找很快，但是键的顺序信息不会保存（HashSet HashMap的本质）

散列函数

对于每种类型的键我们都学要一个与之对应的散列函数
正整数散列：
常用取余散列:k%M
浮点数散列：
例如0-1之间可以乘以一个M得到0-M-1之前的索引值，但是高位影响比低位大（0.12的1比2的影响更大，不符合均匀性），所以可以将键表示为二进制数后再取余。
字符串：
可以使用转换

//R是系数(相当于进制，比如10进制R就是10)
hash=(R*hash + s.charAt(i))%M;//针对每个字符i

组合键
例如日期里面的年月日组合

hash=(((day*R+month)%M)*R+year)%M;

将hashcode的返回值转化为数据索引：
java默认hashcode返回内存地址，我们可以：

(x.hashCode()&0x7fffffff)%M

转化为索引（与操作是让最高位变成0，防止出现负数）
自定义hashcode：
例如：
这里31只是一个系数

int hash = 17;
hash = 31*hash + who.hashCode();
hash = 31*hash + when.hashCode();
hash = 31*hash + ((Double) amount).hashCode();
return hash;

软缓存：
每次计算hashCode很复杂怎么办，用一个变量记录下来，Stirng就是这么干的

一个优秀的散列表需要满足三个条件：

一致性
高效性
均匀性

在有性能要求时请谨慎使用散列。
实现散列的最常见的错误是忽略了键的高位

技术分享

基于拉夫链法的散列表

大小为M的数组中存的是一个链表，链表中的每个结点存储键值对
这样查找分两步：先找到链表；然后顺序遍历链表找到相应地键

内部使用了一个无序符号表数组（之前讨论过的,参见SequentialSearchST）

public class SeparateChainingHashST<Key, Value> {
    private static final int INIT_CAPACITY = 4;
    private int N;                                // number of key-value pairs
    private int M;                                // hash table size
    private SequentialSearchST<Key, Value>[] st;  // array of linked-list symbol tables
    public SeparateChainingHashST() {
        this(INIT_CAPACITY);
    } 
    public SeparateChainingHashST(int M) {
        this.M = M;
        st = (SequentialSearchST<Key, Value>[]) new SequentialSearchST[M];
        for (int i = 0; i < M; i++)
            st[i] = new SequentialSearchST<Key, Value>();
    } 

    // resize the hash table to have the given number of chains b rehashing all of the keys
    private void resize(int chains) {
        SeparateChainingHashST<Key, Value> temp = new SeparateChainingHashST<Key, Value>(chains);
        for (int i = 0; i < M; i++) {
            for (Key key : st[i].keys()) {
                temp.put(key, st[i].get(key));
            }
        }
        this.M  = temp.M;
        this.N  = temp.N;
        this.st = temp.st;
    }

    // hash value between 0 and M-1
    private int hash(Key key) {
        return (key.hashCode() & 0x7fffffff) % M;
    } 
    public int size() {
        return N;
    } 
    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        if (key == null) throw new NullPointerException("argument to contains() is null");
        return get(key) != null;
    } 
    public Value get(Key key) {
        if (key == null) throw new NullPointerException("argument to get() is null");
        int i = hash(key);
        return st[i].get(key);
    } 
    public void put(Key key, Value val) {
        if (key == null) throw new NullPointerException("first argument to put() is null");
        if (val == null) {
            delete(key);
            return;
        }

        // double table size if average length of list >= 10
        if (N >= 10*M) resize(2*M);

        int i = hash(key);
        if (!st[i].contains(key)) N++;
        st[i].put(key, val);
    } 
    public void delete(Key key) {
        if (key == null) throw new NullPointerException("argument to delete() is null");

        int i = hash(key);
        if (st[i].contains(key)) N--;
        st[i].delete(key);

        // halve table size if average length of list <= 2
        if (M > INIT_CAPACITY && N <= 2*M) resize(M/2);
    } 

    // return keys in symbol table as an Iterable
    public Iterable<Key> keys() {
        Queue<Key> queue = new Queue<Key>();
        for (int i = 0; i < M; i++) {
            for (Key key : st[i].keys())
                queue.enqueue(key);
        }
        return queue;
    } 
    public static void main(String[] args) { 
        SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<String, Integer>();
        for (int i = 0; !StdIn.isEmpty(); i++) {
            String key = StdIn.readString();
            st.put(key, i);
        }

        // print keys
        for (String s : st.keys()) 
            StdOut.println(s + " " + st.get(s)); 

    }

}

技术分享

基于线性探测法的散列表

实现散列表的另一种方式是用大小为M的数组保存N个键值对，其中，M>N.
需要依靠空位来解决碰撞问题。发生碰撞时，索引值+1，会出现三种结果：

命中：找到了该键；
未命中：空的
继续查找：找到的键和被查找的键不相同

该方法中α=N/M称为使用率（不允许被占满）
键簇
一组连续的条目，称为键簇。显然短的键簇能保证效率。但是长键簇更长的可能性比短键簇更大

性能分析
技术分享

代码：
删除后要重新插入一遍

public class LinearProbingHashST<Key, Value> {
    private static final int INIT_CAPACITY = 4;
    private int N;           // number of key-value pairs in the symbol table
    private int M;           // size of linear probing table
    private Key[] keys;      // the keys
    private Value[] vals;    // the values
    public LinearProbingHashST() {
        this(INIT_CAPACITY);
    }
    public LinearProbingHashST(int capacity) {
        M = capacity;
        keys = (Key[])   new Object[M];
        vals = (Value[]) new Object[M];
    }
    public int size() {
        return N;
    }

    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        if (key == null) throw new NullPointerException("argument to contains() is null");
        return get(key) != null;
    }

    // hash function for keys - returns value between 0 and M-1
    private int hash(Key key) {
        return (key.hashCode() & 0x7fffffff) % M;
    }

    // resizes the hash table to the given capacity by re-hashing all of the keys
    private void resize(int capacity) {
        LinearProbingHashST<Key, Value> temp = new LinearProbingHashST<Key, Value>(capacity);
        for (int i = 0; i < M; i++) {
            if (keys[i] != null) {
                temp.put(keys[i], vals[i]);
            }
        }
        keys = temp.keys;
        vals = temp.vals;
        M    = temp.M;
    }

    public void put(Key key, Value val) {
        if (key == null) throw new NullPointerException("first argument to put() is null");

        if (val == null) {
            delete(key);
            return;
        }

        // double table size if 50% full
        if (N >= M/2) resize(2*M);

        int i;
        for (i = hash(key); keys[i] != null; i = (i + 1) % M) {
            if (keys[i].equals(key)) {
                vals[i] = val;
                return;
            }
        }
        keys[i] = key;
        vals[i] = val;
        N++;
    }
    public Value get(Key key) {
        if (key == null) throw new NullPointerException("argument to get() is null");
        for (int i = hash(key); keys[i] != null; i = (i + 1) % M) 
            if (keys[i].equals(key))
                return vals[i];
        return null;
    }
    public void delete(Key key) {
        if (key == null) throw new NullPointerException("argument to delete() is null");
        if (!contains(key)) return;

        // find position i of key
        int i = hash(key);
        while (!key.equals(keys[i])) {
            i = (i + 1) % M;
        }

        // delete key and associated value
        keys[i] = null;
        vals[i] = null;

        // rehash all keys in same cluster
        i = (i + 1) % M;
        while (keys[i] != null) {
            // delete keys[i] an vals[i] and reinsert
            Key   keyToRehash = keys[i];
            Value valToRehash = vals[i];
            keys[i] = null;
            vals[i] = null;
            N--;  
            put(keyToRehash, valToRehash);
            i = (i + 1) % M;
        }

        N--;        

        // halves size of array if it‘s 12.5% full or less
        if (N > 0 && N <= M/8) resize(M/2);

        assert check();
    }

    public Iterable<Key> keys() {
        Queue<Key> queue = new Queue<Key>();
        for (int i = 0; i < M; i++)
            if (keys[i] != null) queue.enqueue(keys[i]);
        return queue;
    }

    // integrity check - don‘t check after each put() because
    // integrity not maintained during a delete()
    private boolean check() {

        // check that hash table is at most 50% full
        if (M < 2*N) {
            System.err.println("Hash table size M = " + M + "; array size N = " + N);
            return false;
        }

        // check that each key in table can be found by get()
        for (int i = 0; i < M; i++) {
            if (keys[i] == null) continue;
            else if (get(keys[i]) != vals[i]) {
                System.err.println("get[" + keys[i] + "] = " + get(keys[i]) + "; vals[i] = " + vals[i]);
                return false;
            }
        }
        return true;
    }
    public static void main(String[] args) { 
        LinearProbingHashST<String, Integer> st = new LinearProbingHashST<String, Integer>();
        for (int i = 0; !StdIn.isEmpty(); i++) {
            String key = StdIn.readString();
            st.put(key, i);
        }

        // print keys
        for (String s : st.keys()) 
            StdOut.println(s + " " + st.get(s)); 
    }
}

十三、散列表（哈希表）

标签：

原文地址：http://blog.csdn.net/guanhang89/article/details/51170794

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行