标签:conf wait fops ctr ticket struct nested function data
在复现tty的死锁问题的时候,文洋兄使用了如下的方式:
#include <fcntl.h> #include <unistd.h> #include <stdio.h> #define TIOCVHANGUP 0x5437 int main(int argc,char* argv[]) { int fd; if(argc < 2) { printf("error,you should input tty as a parameter\r\n"); return 1; } fd = open(argv[1], O_WRONLY | O_NOCTTY);
if(fd<0)
{
return 1;
}
write(fd, "test tty\n ", 20); ioctl(fd, TIOCVHANGUP, 0); //sleep(1); close(fd); return 0; }
编译成gcc -g -o main.o main.c ,然后使用脚本呼叫:
#!/bin/bash while [ 1 ] do ./main.o /dev/tty4 done
之所以使用脚本而不是在c中while处理,是因为在进程exit的时候,会有些tty的处理,我们希望尽可能地覆盖测试,所以甚至都没有加sleep来延时。
结果复现出来下面的软锁故障,堆栈如下:
[517571.855382] INFO: task systemd:1 blocked for more than 120 seconds. [517571.856127] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [517571.856846] systemd D ffff881fffc347c0 0 1 0 0x00000000 [517571.856852] ffff881fd35c7b50 0000000000000086 ffff881fd35c7fd8 ffff881fd35c7fd8 [517571.856859] ffff881fd35c7fd8 00000000000147c0 ffff881fd313c500 ffff883f5ee2ac80 [517571.856863] ffff883f5ee2ac84 ffff883fd1630000 00000000ffffffff ffff883f5ee2ac88 [517571.856867] Call Trace: [517571.856880] [<ffffffff8163f959>] schedule_preempt_disabled+0x29/0x70 [517571.856883] [<ffffffff8163d415>] __mutex_lock_slowpath+0xc5/0x1c0 [517571.856888] [<ffffffff8163c87f>] mutex_lock+0x1f/0x2f [517571.856890] [<ffffffff81640df8>] tty_lock_nested.isra.0+0x38/0x90 [517571.856892] [<ffffffff81640e5e>] tty_lock+0xe/0x10 [517571.856899] [<ffffffff813b204c>] tty_open+0xcc/0x620 [517571.856906] [<ffffffff811e5721>] chrdev_open+0xa1/0x1e0 [517571.856912] [<ffffffff811de657>] do_dentry_open+0x1a7/0x2e0 [517571.856916] [<ffffffff811e5680>] ? cdev_put+0x30/0x30 [517571.856918] [<ffffffff811de889>] vfs_open+0x39/0x70 [517571.856922] [<ffffffff811ede7d>] do_last+0x1ed/0x1270 [517571.856925] [<ffffffff811f0be2>] path_openat+0xc2/0x490 [517571.856930] [<ffffffff810afb68>] ? __wake_up_common+0x58/0x90 [517571.856935] [<ffffffff811f23ab>] do_filp_open+0x4b/0xb0 [517571.856941] [<ffffffff811fef47>] ? __alloc_fd+0xa7/0x130 [517571.856945] [<ffffffff811dfd53>] do_sys_open+0xf3/0x1f0 [517571.856949] [<ffffffff811dfe6e>] SyS_open+0x1e/0x20 [517571.856955] [<ffffffff81649909>] system_call_fastpath+0x16/0x1b
从堆栈看,显然又是在等锁超时了。反汇编找到这把锁是关键。
void __lockfunc tty_lock(struct tty_struct *tty) { return tty_lock_nested(tty, TTY_MUTEX_NORMAL); }
static void __lockfunc tty_lock_nested(struct tty_struct *tty, unsigned int subclass) { if (tty->magic != TTY_MAGIC) { pr_err("L Bad %p\n", tty); WARN_ON(1); return; } tty_kref_get(tty); mutex_lock_nested(&tty->legacy_mutex, subclass);--------------传入锁的指针 }
由于CONFIG_DEBUG_LOCK_ALLOC并没有配置,所以mutex_lock_nested就是mutex_lock。和堆栈是匹配的。
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
crash> dis -l tty_lock_nested /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 18 0xffffffff81640dc0 <tty_lock_nested>: nopl 0x0(%rax,%rax,1) [FTRACE NOP] 0xffffffff81640dc5 <tty_lock_nested+5>: push %rbp 0xffffffff81640dc6 <tty_lock_nested+6>: mov %rsp,%rbp 0xffffffff81640dc9 <tty_lock_nested+9>: push %rbx /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 21 0xffffffff81640dca <tty_lock_nested+10>: cmpl $0x5401,(%rdi) /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 18 0xffffffff81640dd0 <tty_lock_nested+16>: mov %rdi,%rbx /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 21 0xffffffff81640dd3 <tty_lock_nested+19>: jne 0xffffffff81640dfb <tty_lock_nested+59> /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/include/linux/tty.h: 388 0xffffffff81640dd5 <tty_lock_nested+21>: test %rdi,%rdi 0xffffffff81640dd8 <tty_lock_nested+24>: je 0xffffffff81640dec <tty_lock_nested+44> /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/arch/x86/include/asm/atomic.h: 176 0xffffffff81640dda <tty_lock_nested+26>: mov $0x1,%eax 0xffffffff81640ddf <tty_lock_nested+31>: lock xadd %eax,0x4(%rdi) 0xffffffff81640de4 <tty_lock_nested+36>: add $0x1,%eax /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/include/linux/kref.h: 47 0xffffffff81640de7 <tty_lock_nested+39>: cmp $0x1,%eax 0xffffffff81640dea <tty_lock_nested+42>: jle 0xffffffff81640e1f <tty_lock_nested+95> /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 27 0xffffffff81640dec <tty_lock_nested+44>: lea 0x80(%rbx),%rdi------------------传入的参数是一把锁的地址,即&tty->legacy_mutex,rbx就是tty的指针了。
0xffffffff81640df3 <tty_lock_nested+51>: callq 0xffffffff8163c860 <mutex_lock>--------------------调用mutex_lock
crash> dis -l mutex_lock /usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/kernel/mutex.c: 103 0xffffffff8163c860 <mutex_lock>: nopl 0x0(%rax,%rax,1) [FTRACE NOP] 0xffffffff8163c865 <mutex_lock+5>: push %rbp 0xffffffff8163c866 <mutex_lock+6>: mov %rsp,%rbp 0xffffffff8163c869 <mutex_lock+9>: push %rbx--------------------------------------------------rbx压栈,所以rbp后面就是rbx的值
所以我们能够通过堆栈分析出tty的指针来,rbx的压栈的位置是在rbp之后。
ffff881fd35c7bc0: ffff881fd35c7bd8 ffffffff8163c87f #3 [ffff881fd35c7bc8] mutex_lock at ffffffff8163c87f ffff881fd35c7bd0: ffff883f5ee2ac00 ffff881fd35c7bf0 -----------------------ffff883f5ee2ac00就是rbx的值,也就是tty指针 ffff881fd35c7be0: ffffffff81640df8 #4 [ffff881fd35c7be0] tty_lock_nested at ffffffff81640df8 ffff881fd35c7be8: ffff88211f6a3200 ffff881fd35c7c00 ffff881fd35c7bf8: ffffffff81640e5e
现在,需要找到持有这把锁的owner是谁。
crash> struct tty_struct.legacy_mutex ffff883f5ee2ac00 legacy_mutex = { count = { counter = -1 }, wait_lock = { { rlock = { raw_lock = { { head_tail = 524296, tickets = { head = 8, tail = 8 } } } } } }, wait_list = { next = 0xffff881fd35c7b70, prev = 0xffff881fd35c7b70 }, owner = 0xffff880190f5c500, -----------------持有锁
查看对应的task:
crash> task 0xffff880190f5c500 PID: 5628 TASK: ffff880190f5c500 CPU: 47 COMMAND: "main.o"------------就是我们编译的测试命令
确认下是不是我们的tty4.
crash> struct tty_strt.name ffff883f5ee2ac00 name = "tty4\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
确定无误后,看看进程打开的文件列表:
crash> files 5628 PID: 5628 TASK: ffff880190f5c500 CPU: 47 COMMAND: "main.o" ROOT: / CWD: /home/caq FD FILE DENTRY INODE TYPE PATH 0 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR /dev/pts/45 1 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR /dev/pts/45 2 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR /dev/pts/45 3 ffff881a00324400 ffff883fd1010fc0 ffff883fd0b73820 CHR /dev/tty4
查看对应的tty的属性:
crash> struct file.private_data ffff881a00324400 private_data = 0xffff883f6101e840 crash> struct tty_file_private.tty 0xffff883f6101e840 tty = 0xffff883f5ee2ac00 crash> struct tty_struct.disc_data 0xffff883f5ee2ac00----------------这个 0xffff883f5ee2ac00 也就是在前面反汇编找到的tty指针
disc_data = 0xffff883f9a1d8c00
crash> struct n_tty_data.icanon 0xffff883f9a1d8c00 icanon = 1 ‘\001‘
当然也可以使用tty来直接查看。
最后殊途同归,还是同一个问题,属性导致的。
static void __tty_hangup(struct tty_struct *tty, int exit_session) { struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct tty_file_private *priv; int closecount = 0, n; int refs; if (!tty) return; spin_lock(&redirect_lock); if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } spin_unlock(&redirect_lock); tty_lock(tty);-----------------------------------------加锁 /* some functions below drop BTM, so we need this bit */ set_bit(TTY_HUPPING, &tty->flags); /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(priv, &tty->tty_files, list) { filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) continue; closecount++; __tty_fasync(-1, filp, 0); /* can‘t block */ filp->f_op = &hung_up_tty_fops; } spin_unlock(&tty_files_lock); refs = tty_signal_session_leader(tty, exit_session); /* Account for the p->signal references we killed */ while (refs--) tty_kref_put(tty); /* * it drops BTM and thus races with reopen * we protect the race by TTY_HUPPING */ tty_ldisc_hangup(tty);-----------------------阻塞,阻塞的原因上面已经描述了。 spin_lock_irq(&tty->ctrl_lock); clear_bit(TTY_THROTTLED, &tty->flags); clear_bit(TTY_PUSH, &tty->flags); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; spin_unlock_irq(&tty->ctrl_lock); /* * If one of the devices matches a console pointer, we * cannot just call hangup() because that will cause * tty->count and state->count to go out of sync. * So we just call close() the right number of times. */ if (cons_filp) { if (tty->ops->close) for (n = 0; n < closecount; n++) tty->ops->close(tty, cons_filp); } else if (tty->ops->hangup) (tty->ops->hangup)(tty); /* * We don‘t want to have driver/ldisc interactions beyond * the ones we did here. The driver layer expects no * calls after ->hangup() from the ldisc side. However we * can‘t yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty);-------------------------------导致没有走到这放锁。 if (f) fput(f); }
那么,很显然,这个测试脚本,可以测试《记录linux tty的一次软锁排查》中的修改是否已经ok。
修改脚本如下:
#!/bin/bash while [ 1 ] do for i in {1..64} do ./main.o /dev/tty$i done done
标签:conf wait fops ctr ticket struct nested function data
原文地址:http://www.cnblogs.com/10087622blog/p/7771356.html