标签:
如数据库支持dirty read, 则 所有打开的dbhandle都配置 DB_READ_UNCOMMITTED;
在线程拿到 write锁并做完处理后(比如splite one page), 降为 was_write锁. WWRITE锁和dirty reader不会冲突;
对dirty read锁的请求会被优先处理;
dirty read锁在 读取完成后马上释放; why? 因为txn保留读锁是为了repeatable read.
若cursor 为read_uncommitted, 处理对读锁的请求时, lock_mode会改为 DB_LOCK_READ_UNCOMMITTED (db_meta.c, db_lget);
txn abort的时候 把 was_write锁 重新 升级为 write锁. 因为abort->undo时需要写page. (txn.c, txn_abort())
db.h
typedef enum { DB_LOCK_NG=0, /* Not granted. */ DB_LOCK_READ=1, /* Shared/read. */ DB_LOCK_WRITE=2, /* Exclusive/write. */ DB_LOCK_WAIT=3, /* Wait for event */ // 用于queue am DB_LOCK_IWRITE=4, /* Intent exclusive/write. */ // Intent锁用于hierarchy锁 DB_LOCK_IREAD=5, /* Intent to share/read. */ DB_LOCK_IWR=6, /* Intent to read and write. */ DB_LOCK_READ_UNCOMMITTED=7, /* Degree 1 isolation. */ DB_LOCK_WWRITE=8 /* Was Written. */ } db_lockmode_t;
lock/lock_region.c:
#define DB_LOCK_RIW_N 9 static const u_int8_t db_riw_conflicts[] = { /* N R W WT IW IR RIW DR WW */ /* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, /* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, /* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, /* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, /* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, /* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, /* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, /* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, // dirty read 和was_write不冲突 /* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1 };
lock.c, __lock_get_internal(): dirty read锁优先处理
lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); sh_off = R_OFFSET(<->reginfo, sh_locker);
// 遍历holder列表 for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { if (sh_off == lp->holder) { // 已经hold 锁了 if (lp->mode == lock_mode && lp->status == DB_LSTAT_HELD) { lp->refcount++; lock->off = R_OFFSET(<->reginfo, lp); lock->gen = lp->gen; lock->mode = lp->mode; goto done; } else { ihold = 1; } } else if (__lock_same_family(lt, R_ADDR(<->reginfo, lp->holder), sh_locker)) ihold = 1; else if (CONFLICTS(lt, region, lp->mode, lock_mode)) break; else if (lp->mode == DB_LOCK_READ || lp->mode == DB_LOCK_WWRITE) { grant_dirty = 1; // holder列表只有读锁; 或者有一个ww锁 holder = lp->holder; } } if (lp != NULL) { // 有冲突的holder if (ihold || LF_ISSET(DB_LOCK_UPGRADE) || lock_mode == DB_LOCK_READ_UNCOMMITTED) action = HEAD; // dirty read请求 优先, 放入waiter队列头 else action = TAIL; } else { if (LF_ISSET(DB_LOCK_UPGRADE)) action = UPGRADE; else if (ihold) action = GRANT; else {
// 无冲突的holder; 遍历waiter列表 SH_TAILQ_FOREACH(lp, &sh_obj->waiters, links, __db_lock) if (lp->holder != sh_off && CONFLICTS(lt, region, lp->mode, lock_mode)) break; if (lp == NULL) // 无冲突的waiter action = GRANT; else if (grant_dirty && lock_mode == DB_LOCK_READ_UNCOMMITTED) { lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); if (lp->mode == DB_LOCK_WRITE && lp->holder == holder) action = SECOND; // waiter表头 为一个upgrade请求. 当前DR放入第二个 else action = GRANT; // 批准 DR锁请求. } else if (lock_mode == DB_LOCK_READ_UNCOMMITTED)
// 这里是DR请求, 但是!grant_dirty (没有read/ww holder). 这里有无冲突的holder;
// 有冲突的waiter. ?????? action = SECOND; else action = TAIL; } }
db_meta.c, - write锁 downgrade wwrite锁:
__db_lget()
dbc->lock.pgno = pgno;
...
switch (action) {
default:
if (has_timeout)
goto do_couple;
ret = __lock_get(env, dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp); // 一般的处理
break;
case LCK_DOWNGRADE:
couple[0].op = DB_LOCK_GET;
couple[0].obj = NULL;
couple[0].lock = *lockp;
couple[0].mode = DB_LOCK_WWRITE; // 请求一个新的 wwrite锁
UMRW_SET(couple[0].timeout);
i++;
/* FALLTHROUGH */
case LCK_COUPLE:
do_couple: couple[i].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET;
couple[i].obj = &dbc->lock_dbt;
couple[i].mode = mode; // 对传入的 pgno 请求 一个 新的 锁,
UMRW_SET(couple[i].timeout);
i++;
if (has_timeout)
couple[0].timeout = F_ISSET(dbc, DBC_RECOVER) ? 0 : txn->lock_timeout;
if (action == LCK_COUPLE || action == LCK_DOWNGRADE) {
couple[i].op = DB_LOCK_PUT; // 释放原来的锁 (downgrade的话, 就是原来的 write锁, 从而实现了 锁降级)
couple[i].lock = *lockp;
i++;
}
ret = __lock_vec(env, dbc->locker, lkflags, couple, i, &reqp);
if (ret == 0 || reqp == &couple[i - 1])
*lockp = i == 1 ? couple[0].lock : couple[i - 2].lock; // i != 1标识 LCK_DOWNGRADE, 返回新的 wwrite锁
break;
}
__db_lput()
if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && !F_ISSET(dbc, DBC_ERROR) && lockp->mode == DB_LOCK_WRITE)
action = LCK_DOWNGRADE;
else if (dbc->txn == NULL)
action = LCK_COUPLE; // 这里的couple是直接释放的意思. 非transaction情况
else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) && lockp->mode == DB_LOCK_READ)
action = LCK_COUPLE; // read_commited, 且读锁
else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED)
action = LCK_COUPLE; // read_uncommitted, 读锁, 直接释放
else
action = 0;
env = dbc->env;
switch (action) {
case LCK_COUPLE:
ret = __lock_put(env, lockp);
break;
case LCK_DOWNGRADE:
couple[0].op = DB_LOCK_GET;
couple[0].obj = NULL;
couple[0].mode = DB_LOCK_WWRITE; // 获取 一个ww锁
couple[0].lock = *lockp;
UMRW_SET(couple[0].timeout);
couple[1].op = DB_LOCK_PUT;
couple[1].lock = *lockp; // 释放本来的锁 (写锁)
ret = __lock_vec(env, dbc->locker, 0, couple, 2, &reqp);
if (ret == 0 || reqp == &couple[1])
*lockp = couple[0].lock;
break;
default:
ret = 0; // 嗯, 这里. 如果default isolation level, 保留读锁; 写锁不降级的话, 也到这里.
break;
}
return (ret);
}
标签:
原文地址:http://www.cnblogs.com/brayden/p/5226005.html