标签:infoq getname error 技术 spin ddl device down purpose
内核模块和用户空间模块获取网络设备的信息状态主要方式有
netdev_chain 通知链 netlink 的RTMGRP_LINK 组播
通知链的事件:
网络设备的启用:
ifconfig up cli 最后调用ioctl的SIOCSIFFLAGS 来激活设备。而SIOCSIFFLAGS通过dev_change_flags 调用dev_open来激活设备 ;同时会发送一个NETDEV_UP的消息到网络设备状态改变到通知链上去
当网卡链路状态变化时(如断开或连上),网卡会通知驱动程序或者由驱动程序去查询
网卡的相关寄存器位(例如在 timeout 时去查询这些位),然后由 netif_carrier_on/off 去通知
内核这个变化。
void netif_carrier_on(struct net_device *dev)
{ // test_and_clear_bit - Clear a bit and return its old value
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
}
static inline netif_carrier_off(struct net_device *dev)
{
//test_and_set_bit - Set a bit and return its old value
if (!test_and_set_bit(_ _LINK_STATE_NOCARRIER, &dev->state))
linkwatch_fire_event(dev);
}
这样, netif_carrier_on 会 提交事件给linkwatch_fire_event,它会创建一个 lw_event 结构:
struct lw_event {
struct list_head list;
struct net_device *dev;一个关联网络设备的结构
};
并将这个结构初始化后(event->dev = dev;)加入到事件队列中:
spin_lock_irqsave(&lweventlist_lock, flags);
list_add_tail(&event->list, &lweventlist);
spin_unlock_irqrestore(&lweventlist_lock, flags);
然 后 它 调 用 schedule_work(&linkwatch_work) 由 内 核 线 程 去 处 理 这 些 事 件 。 它 最 终 由
linkwatch_run_queue(void)去完成这些处理工作:
list_for_each_safe(n, next, &head) {
struct lw_event *event = list_entry(n, struct lw_event, list);
struct net_device *dev = event->dev;
…
if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev)) {
dev_activate(dev);
} else
dev_deactivate(dev);
netdev_state_change(dev);
}
}
可以看到,它的最主要工作之一就是 netdev_state_change(dev):
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
raw_notifier_call_chain(&netdev_chain,
NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
这个函数通知注册到 netdev_chain 链表的所有子系统,这个网卡的链路状态有了变化。就是
说,如果某个子系统对网卡的链路状态变化感兴趣,它就可以注册到进这个链表,在变化产
生时,内核便会通知这些子系统。
注意: a. 它只会在网卡状态为 UP 时,才会发出通知,因为,如果状态为 DOWN,网卡链
路的状态改变也没什么意义。
b. 每个见网卡的这些状态变化的事件 lw_event 是不会队列的,即每个网卡只有一个
事件的实例在队列中。还有由上面看到的 lw_event 结构,它只是包含发生状态变化的网卡
设备,而没有包含它是链上或是断开的状状参数。
应用层对套接口有关操作通过ioctl处理:
socket 为一个字符设备驱动,应用层的ioctl操作会调用驱动的file_operations 对应的ioctl接口;
对于socket来说就是socket_file_ops 接口集合的sock_ioctl
static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, .llseek = no_llseek, .aio_read = sock_aio_read, .aio_write = sock_aio_write, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, #endif .mmap = sock_mmap, .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, //Ó¦ÓóÌÐòclose .fasync = sock_fasync, .sendpage = sock_sendpage, .splice_write = generic_splice_sendpage, .splice_read = sock_splice_read, };
//主要是和网络物理设备接口相关,例如设置eth0地址,创建eth2 删除 设置路由 ARP等等
//主要是和网络物理设备接口相关,例如设置eth0地址,创建eth2 删除 设置路由 ARP等等 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; struct net *net; sock = file->private_data; sk = sock->sk; net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { //DEV设备iotcl命令字范围 err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(net, cmd, argp); } else #endif switch (cmd) { /* 设置file或者sock的进程ID或者进程组ID */ case FIOSETOWN: case SIOCSPGRP: err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; err = f_setown(sock->file, pid, 1); break; /* 获取file或者sock的进程ID或者进程组ID */ case FIOGETOWN: case SIOCGPGRP: err = put_user(f_getown(sock->file), (int __user *)argp); break; /* 设置 修改 创建 删除网桥设备 */ case SIOCGIFBR: case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: err = -ENOPKG; if (!br_ioctl_hook) request_module("bridge"); mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; /* 设置 修改 创建 删除VLAN设备 */ case SIOCGIFVLAN: case SIOCSIFVLAN: err = -ENOPKG; if (!vlan_ioctl_hook) request_module("8021q"); mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: case SIOCDELDLCI: err = -ENOPKG; if (!dlci_ioctl_hook) request_module("dlci"); mutex_lock(&dlci_ioctl_mutex); if (dlci_ioctl_hook) err = dlci_ioctl_hook(cmd, argp); mutex_unlock(&dlci_ioctl_mutex); break; /* 其他ioctl命令字调用各自的sock ioctl 调用到inet_ioctl()。*/ default: err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; }
如果是tcp或者udp协议相关的
在创建socket 时,会调用inet_create();
/* * Create an inet socket. */ //pf_inet的net_families[]为inet_family_ops,对应的套接口层ops参考inetsw_array中的inet_stream_ops inet_dgram_ops inet_sockraw_ops,传输层操作集分别为tcp_prot udp_prot raw_prot //netlink的net_families[]netlink_family_ops,对应的套接口层ops为netlink_op
/* * Create an inet socket. */ static int inet_create(struct net *net, struct socket *sock, int protocol, int kern) {. ........................................... sock->ops = answer->ops; 对应的为 answer_prot = answer->prot; answer_no_check = answer->no_check; answer_flags = answer->flags; ........................... }
static const struct proto_ops inet_dccp_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
.poll = dccp_poll,
.ioctl = inet_ioctl,
inet_ioctl的实现
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); switch (cmd) { case SIOCGSTAMP: err = sock_get_timestamp(sk, (struct timeval __user *)arg); break; case SIOCGSTAMPNS: err = sock_get_timestampns(sk, (struct timespec __user *)arg); break; /* 添加 删除 路由操作 */ case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; //ARP添加 删除 设置 case SIOCDARP: case SIOCGARP: case SIOCSARP: err = arp_ioctl(net, cmd, (void __user *)arg); break; /* DEV设备接口操作 */ case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: err = devinet_ioctl(net, cmd, (void __user *)arg); break; default://对具体的某个协议的套接口ioctl操作 if (sk->sk_prot->ioctl) err = sk->sk_prot->ioctl(sk, cmd, arg); else err = -ENOIOCTLCMD; break; } return err; }
dev_ioctl的实现
/** * dev_load - load a network module * @net: the applicable net namespace * @name: name of interface * * If a network interface is not present and the process has suitable * privileges this function loads the module. If module loading is not * available in this kernel then it becomes a nop. */ void dev_load(struct net *net, const char *name) { struct net_device *dev; int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); no_module = !dev; if (no_module && capable(CAP_NET_ADMIN)) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) request_module("%s", name); } EXPORT_SYMBOL(dev_load); /* * This function handles all "interface"-type I/O control requests. The actual * ‘doing‘ part of this is dev_ifsioc above. */ /** * dev_ioctl - network device ioctl * @net: the applicable net namespace * @cmd: command to issue * @arg: pointer to a struct ifreq in user space * * Issue ioctl functions to devices. This is normally called by the * user space syscall interfaces but can sometimes be useful for * other purposes. The return value is the return from the syscall if * positive or a negative errno code on error. */ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; char *colon; /* One special case: SIOCGIFCONF takes ifconf argument and requires shared lock, because it sleeps writing to user space. */ if (cmd == SIOCGIFCONF) { rtnl_lock(); ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; ifr.ifr_name[IFNAMSIZ-1] = 0; colon = strchr(ifr.ifr_name, ‘:‘); if (colon) *colon = 0; /* * See which interface the caller is talking about. */ switch (cmd) { /* * These ioctl calls: * - can be done by all. * - atomic and do not require locking. * - return a value */ case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: case SIOCGIFHWADDR: case SIOCGIFSLAVE: case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: dev_load(net, ifr.ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, &ifr, cmd); rcu_read_unlock(); if (!ret) { if (colon) *colon = ‘:‘; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) ret = -EFAULT; } return ret; case SIOCETHTOOL: dev_load(net, ifr.ifr_name); rtnl_lock(); ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) *colon = ‘:‘; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) ret = -EFAULT; } return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - return a value */ case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; dev_load(net, ifr.ifr_name); rtnl_lock(); ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) *colon = ‘:‘; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) ret = -EFAULT; } return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFMAP: case SIOCSIFTXQLEN: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ /* * These ioctl calls: * - require local superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: case SIOCSHWTSTAMP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: dev_load(net, ifr.ifr_name); rtnl_lock(); ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; case SIOCGIFMEM: /* Get the per device memory space. We can add this but * currently do not support it */ case SIOCSIFMEM: /* Set the per device memory buffer space. * Not applicable in our case */ case SIOCSIFLINK: return -ENOTTY; /* * Unknown or private ioctl. */ default: if (cmd == SIOCWANDEV || cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr.ifr_name); rtnl_lock(); ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) ret = -EFAULT; return ret; } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) return wext_handle_ioctl(net, &ifr, cmd, arg); return -ENOTTY; } }
其中内核有的netlink 消息如下
/**** * Routing/neighbour discovery messages. ****/ /* Types of messages */ enum { RTM_BASE = 16, #define RTM_BASE RTM_BASE RTM_NEWLINK = 16, #define RTM_NEWLINK RTM_NEWLINK RTM_DELLINK, #define RTM_DELLINK RTM_DELLINK RTM_GETLINK, #define RTM_GETLINK RTM_GETLINK RTM_SETLINK, #define RTM_SETLINK RTM_SETLINK RTM_NEWADDR = 20, #define RTM_NEWADDR RTM_NEWADDR RTM_DELADDR, #define RTM_DELADDR RTM_DELADDR RTM_GETADDR, #define RTM_GETADDR RTM_GETADDR RTM_NEWROUTE = 24, #define RTM_NEWROUTE RTM_NEWROUTE RTM_DELROUTE, #define RTM_DELROUTE RTM_DELROUTE RTM_GETROUTE, #define RTM_GETROUTE RTM_GETROUTE RTM_NEWNEIGH = 28, #define RTM_NEWNEIGH RTM_NEWNEIGH RTM_DELNEIGH, #define RTM_DELNEIGH RTM_DELNEIGH RTM_GETNEIGH, #define RTM_GETNEIGH RTM_GETNEIGH RTM_NEWRULE = 32, #define RTM_NEWRULE RTM_NEWRULE RTM_DELRULE, #define RTM_DELRULE RTM_DELRULE RTM_GETRULE, #define RTM_GETRULE RTM_GETRULE RTM_NEWQDISC = 36, #define RTM_NEWQDISC RTM_NEWQDISC RTM_DELQDISC, #define RTM_DELQDISC RTM_DELQDISC RTM_GETQDISC, #define RTM_GETQDISC RTM_GETQDISC RTM_NEWTCLASS = 40, #define RTM_NEWTCLASS RTM_NEWTCLASS RTM_DELTCLASS, #define RTM_DELTCLASS RTM_DELTCLASS RTM_GETTCLASS, #define RTM_GETTCLASS RTM_GETTCLASS RTM_NEWTFILTER = 44, #define RTM_NEWTFILTER RTM_NEWTFILTER RTM_DELTFILTER, #define RTM_DELTFILTER RTM_DELTFILTER RTM_GETTFILTER, #define RTM_GETTFILTER RTM_GETTFILTER RTM_NEWACTION = 48, #define RTM_NEWACTION RTM_NEWACTION RTM_DELACTION, #define RTM_DELACTION RTM_DELACTION RTM_GETACTION, #define RTM_GETACTION RTM_GETACTION RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX RTM_GETMULTICAST = 58, #define RTM_GETMULTICAST RTM_GETMULTICAST RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST RTM_NEWNEIGHTBL = 64, #define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL RTM_GETNEIGHTBL = 66, #define RTM_GETNEIGHTBL RTM_GETNEIGHTBL RTM_SETNEIGHTBL, #define RTM_SETNEIGHTBL RTM_SETNEIGHTBL RTM_NEWNDUSEROPT = 68, #define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT RTM_NEWADDRLABEL = 72, #define RTM_NEWADDRLABEL RTM_NEWADDRLABEL RTM_DELADDRLABEL, #define RTM_DELADDRLABEL RTM_DELADDRLABEL RTM_GETADDRLABEL, #define RTM_GETADDRLABEL RTM_GETADDRLABEL RTM_GETDCB = 78, #define RTM_GETDCB RTM_GETDCB RTM_SETDCB, #define RTM_SETDCB RTM_SETDCB RTM_NEWNETCONF = 80, #define RTM_NEWNETCONF RTM_NEWNETCONF RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF RTM_NEWMDB = 84, #define RTM_NEWMDB RTM_NEWMDB RTM_DELMDB = 85, #define RTM_DELMDB RTM_DELMDB RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB RTM_NEWNSID = 88, #define RTM_NEWNSID RTM_NEWNSID RTM_DELNSID = 89, #define RTM_DELNSID RTM_DELNSID RTM_GETNSID = 90, #define RTM_GETNSID RTM_GETNSID RTM_NEWSTATS = 92, #define RTM_NEWSTATS RTM_NEWSTATS RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; #define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) #define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) #define RTM_FAM(cmd) (((cmd) - RTM_BASE) >>
其中 RTM_NEWLINK-->.newlink = vlan_newlink,--->创建接口vlanif
其netlink详细见af_netlink
netlink_proto_init
标签:infoq getname error 技术 spin ddl device down purpose
原文地址:https://www.cnblogs.com/codestack/p/9163756.html