标签:
散列表插入分两步:
1. 根据散列函数找到索引
2. 处理索引冲突情况:拉链法和线性探测法
散列表是时间上和空间上作出权衡的一个例子。散列表采用函数映射找索引,查找很快,但是键的顺序信息不会保存(HashSet HashMap的本质)
对于每种类型的键我们都学要一个与之对应的散列函数
正整数散列:
常用取余散列:k%M
浮点数散列:
例如0-1之间可以乘以一个M得到0-M-1之前的索引值,但是高位影响比低位大(0.12的1比2的影响更大,不符合均匀性),所以可以将键表示为二进制数后再取余。
字符串:
可以使用转换
//R是系数(相当于进制,比如10进制R就是10)
hash=(R*hash + s.charAt(i))%M;//针对每个字符i
组合键
例如日期里面的年月日组合
hash=(((day*R+month)%M)*R+year)%M;
将hashcode的返回值转化为数据索引:
java默认hashcode返回内存地址,我们可以:
(x.hashCode()&0x7fffffff)%M
转化为索引(与操作是让最高位变成0,防止出现负数)
自定义hashcode:
例如:
这里31只是一个系数
int hash = 17;
hash = 31*hash + who.hashCode();
hash = 31*hash + when.hashCode();
hash = 31*hash + ((Double) amount).hashCode();
return hash;
软缓存:
每次计算hashCode很复杂怎么办,用一个变量记录下来,Stirng就是这么干的
一个优秀的散列表需要满足三个条件:
一致性
高效性
均匀性
在有性能要求时请谨慎使用散列。
实现散列的最常见的错误是忽略了键的高位
大小为M的数组中存的是一个链表,链表中的每个结点存储键值对
这样查找分两步:先找到链表;然后顺序遍历链表找到相应地键
内部使用了一个无序符号表数组(之前讨论过的,参见SequentialSearchST)
public class SeparateChainingHashST<Key, Value> {
private static final int INIT_CAPACITY = 4;
private int N; // number of key-value pairs
private int M; // hash table size
private SequentialSearchST<Key, Value>[] st; // array of linked-list symbol tables
public SeparateChainingHashST() {
this(INIT_CAPACITY);
}
public SeparateChainingHashST(int M) {
this.M = M;
st = (SequentialSearchST<Key, Value>[]) new SequentialSearchST[M];
for (int i = 0; i < M; i++)
st[i] = new SequentialSearchST<Key, Value>();
}
// resize the hash table to have the given number of chains b rehashing all of the keys
private void resize(int chains) {
SeparateChainingHashST<Key, Value> temp = new SeparateChainingHashST<Key, Value>(chains);
for (int i = 0; i < M; i++) {
for (Key key : st[i].keys()) {
temp.put(key, st[i].get(key));
}
}
this.M = temp.M;
this.N = temp.N;
this.st = temp.st;
}
// hash value between 0 and M-1
private int hash(Key key) {
return (key.hashCode() & 0x7fffffff) % M;
}
public int size() {
return N;
}
public boolean isEmpty() {
return size() == 0;
}
public boolean contains(Key key) {
if (key == null) throw new NullPointerException("argument to contains() is null");
return get(key) != null;
}
public Value get(Key key) {
if (key == null) throw new NullPointerException("argument to get() is null");
int i = hash(key);
return st[i].get(key);
}
public void put(Key key, Value val) {
if (key == null) throw new NullPointerException("first argument to put() is null");
if (val == null) {
delete(key);
return;
}
// double table size if average length of list >= 10
if (N >= 10*M) resize(2*M);
int i = hash(key);
if (!st[i].contains(key)) N++;
st[i].put(key, val);
}
public void delete(Key key) {
if (key == null) throw new NullPointerException("argument to delete() is null");
int i = hash(key);
if (st[i].contains(key)) N--;
st[i].delete(key);
// halve table size if average length of list <= 2
if (M > INIT_CAPACITY && N <= 2*M) resize(M/2);
}
// return keys in symbol table as an Iterable
public Iterable<Key> keys() {
Queue<Key> queue = new Queue<Key>();
for (int i = 0; i < M; i++) {
for (Key key : st[i].keys())
queue.enqueue(key);
}
return queue;
}
public static void main(String[] args) {
SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
// print keys
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}
实现散列表的另一种方式是用大小为M的数组保存N个键值对,其中,M>N.
需要依靠空位来解决碰撞问题。发生碰撞时,索引值+1,会出现三种结果:
命中:找到了该键;
未命中:空的
继续查找:找到的键和被查找的键不相同
该方法中α=N/M称为使用率(不允许被占满)
键簇
一组连续的条目,称为键簇。显然短的键簇能保证效率。但是长键簇更长的可能性比短键簇更大
性能分析
代码:
删除后要重新插入一遍
public class LinearProbingHashST<Key, Value> {
private static final int INIT_CAPACITY = 4;
private int N; // number of key-value pairs in the symbol table
private int M; // size of linear probing table
private Key[] keys; // the keys
private Value[] vals; // the values
public LinearProbingHashST() {
this(INIT_CAPACITY);
}
public LinearProbingHashST(int capacity) {
M = capacity;
keys = (Key[]) new Object[M];
vals = (Value[]) new Object[M];
}
public int size() {
return N;
}
public boolean isEmpty() {
return size() == 0;
}
public boolean contains(Key key) {
if (key == null) throw new NullPointerException("argument to contains() is null");
return get(key) != null;
}
// hash function for keys - returns value between 0 and M-1
private int hash(Key key) {
return (key.hashCode() & 0x7fffffff) % M;
}
// resizes the hash table to the given capacity by re-hashing all of the keys
private void resize(int capacity) {
LinearProbingHashST<Key, Value> temp = new LinearProbingHashST<Key, Value>(capacity);
for (int i = 0; i < M; i++) {
if (keys[i] != null) {
temp.put(keys[i], vals[i]);
}
}
keys = temp.keys;
vals = temp.vals;
M = temp.M;
}
public void put(Key key, Value val) {
if (key == null) throw new NullPointerException("first argument to put() is null");
if (val == null) {
delete(key);
return;
}
// double table size if 50% full
if (N >= M/2) resize(2*M);
int i;
for (i = hash(key); keys[i] != null; i = (i + 1) % M) {
if (keys[i].equals(key)) {
vals[i] = val;
return;
}
}
keys[i] = key;
vals[i] = val;
N++;
}
public Value get(Key key) {
if (key == null) throw new NullPointerException("argument to get() is null");
for (int i = hash(key); keys[i] != null; i = (i + 1) % M)
if (keys[i].equals(key))
return vals[i];
return null;
}
public void delete(Key key) {
if (key == null) throw new NullPointerException("argument to delete() is null");
if (!contains(key)) return;
// find position i of key
int i = hash(key);
while (!key.equals(keys[i])) {
i = (i + 1) % M;
}
// delete key and associated value
keys[i] = null;
vals[i] = null;
// rehash all keys in same cluster
i = (i + 1) % M;
while (keys[i] != null) {
// delete keys[i] an vals[i] and reinsert
Key keyToRehash = keys[i];
Value valToRehash = vals[i];
keys[i] = null;
vals[i] = null;
N--;
put(keyToRehash, valToRehash);
i = (i + 1) % M;
}
N--;
// halves size of array if it‘s 12.5% full or less
if (N > 0 && N <= M/8) resize(M/2);
assert check();
}
public Iterable<Key> keys() {
Queue<Key> queue = new Queue<Key>();
for (int i = 0; i < M; i++)
if (keys[i] != null) queue.enqueue(keys[i]);
return queue;
}
// integrity check - don‘t check after each put() because
// integrity not maintained during a delete()
private boolean check() {
// check that hash table is at most 50% full
if (M < 2*N) {
System.err.println("Hash table size M = " + M + "; array size N = " + N);
return false;
}
// check that each key in table can be found by get()
for (int i = 0; i < M; i++) {
if (keys[i] == null) continue;
else if (get(keys[i]) != vals[i]) {
System.err.println("get[" + keys[i] + "] = " + get(keys[i]) + "; vals[i] = " + vals[i]);
return false;
}
}
return true;
}
public static void main(String[] args) {
LinearProbingHashST<String, Integer> st = new LinearProbingHashST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
// print keys
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}
标签:
原文地址:http://blog.csdn.net/guanhang89/article/details/51170794