标签:linux stateless nat stateful nat iptables netfilter
关于Linux上如何配置NAT的资料已经不少,可谓铺天盖地!本文与此无关。本文提供一种iptables之外的方式。/* * * 用法: * 对目标地址为1.2.1.2的数据包做目标地址转换,目标转为192.168.1.8 * echo +1.2.1.2 192.168.1.8 dst >/proc/net/static_nat * 上述命令会同时添加一条反向的SNAT映射 * * 请解释: * echo +192.168.184.250 192.168.184.154 src >/proc/net/static_nat * */ #include <linux/module.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/netfilter/nf_conntrack.h> #define DIRMASK 0x11 #define BUCKETS 1024 #define NAT_OPT_DEL 0x01 #define NAT_OPT_FIND 0x04 #define NAT_OPT_ACCT_BIT 0x02 enum nat_dir { DIR_SNAT, DIR_DNAT, DIR_NUM }; /* * 记录统计信息 */ struct nat_account { u32 nat_packets; u32 nat_bytes; }; struct static_nat_entry { __be32 addr[DIR_NUM]; enum nat_dir type; struct nat_account acct[DIR_NUM]; struct hlist_node node[DIR_NUM]; }; static DEFINE_SPINLOCK(nat_lock); /* 保存SNAT映射 */ struct hlist_head *src_list; /* 保存DNAT映射 */ struct hlist_head *dst_list; /* * 用一个IP地址(对于PREROUTING是daddr,对于POSTROUTING是saddr)作为key来获取value。 */ static __be32 get_address_from_map(struct sk_buff *skb, unsigned int dir, __be32 addr_key, unsigned int opt) { __be32 ret = 0, cmp_key, ret_value; u32 hash; struct hlist_head *list; struct hlist_node *iter, *tmp; struct static_nat_entry *ent; hash = jhash_1word(addr_key, 1); hash = hash%BUCKETS; spin_lock(&nat_lock); if (dir == DIR_DNAT) { list = &dst_list[hash]; } else if (dir == DIR_SNAT) { list = &src_list[hash]; } else { spin_unlock(&nat_lock); goto out; } hlist_for_each_safe(iter, tmp, list) { ent = hlist_entry(iter, struct static_nat_entry, node[dir]); /* 注意反转 */ cmp_key = (ent->type == dir) ? ent->addr[0]:ent->addr[1]; ret_value = (ent->type == dir) ? ent->addr[1]:ent->addr[0]; if (addr_key == cmp_key) { ret = ret_value; if (opt == NAT_OPT_DEL) { if (dir == ent->type) { hlist_del(&ent->node[0]); hlist_del(&ent->node[1]); kfree(ent); } else { ret = 0; } } if (opt & NAT_OPT_ACCT_BIT) { ent->acct[dir].nat_packets ++; ent->acct[dir].nat_bytes += skb == NULL?1:skb->len; } break; } } spin_unlock(&nat_lock); out: return ret; } /* * 更新第四层的校验码信息 */ static void nat4_update_l4(struct sk_buff *skb, __be32 oldip, __be32 newip) { struct iphdr *iph = ip_hdr(skb); void *transport_hdr = (void *)iph + ip_hdrlen(skb); struct tcphdr *tcph; struct udphdr *udph; bool cond; switch (iph->protocol) { case IPPROTO_TCP: tcph = transport_hdr; inet_proto_csum_replace4(&tcph->check, skb, oldip, newip, true); break; case IPPROTO_UDP: case IPPROTO_UDPLITE: udph = transport_hdr; cond = udph->check != 0; cond |= skb->ip_summed == CHECKSUM_PARTIAL; if (cond) { inet_proto_csum_replace4(&udph->check, skb, oldip, newip, true); if (udph->check == 0) { udph->check = CSUM_MANGLED_0; } } break; } } /* * 在POSTROUTING上执行源地址转换: * 1.正向源地址转换; * 2.目标地址转换的逆向源地址转换 */ static unsigned int ipv4_nat_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret = NF_ACCEPT; __be32 to_trans = 0; struct iphdr *hdr = ip_hdr(skb); to_trans = get_address_from_map(skb, DIR_SNAT, hdr->saddr, NAT_OPT_FIND|NAT_OPT_ACCT_BIT); if (!to_trans) { goto out; } if (hdr->saddr == to_trans) { goto out; } /* 执行SNAT */ csum_replace4(&hdr->check, hdr->saddr, to_trans); nat4_update_l4(skb, hdr->saddr, to_trans); hdr->saddr = to_trans; out: return ret; } /* * 在PREROUTING上执行目标地址转换: * 1.正向目标地址转换; * 2.源地址转换的逆向目标地址转换 */ static unsigned int ipv4_nat_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret = NF_ACCEPT; __be32 to_trans = 0; struct iphdr *hdr = ip_hdr(skb); if (skb->nfct && skb->nfct != &nf_conntrack_untracked.ct_general) { goto out; } to_trans = get_address_from_map(skb, DIR_DNAT, hdr->daddr, NAT_OPT_FIND|NAT_OPT_ACCT_BIT); if (!to_trans) { goto out; } if (hdr->daddr == to_trans) { goto out; } /* 执行DNAT */ csum_replace4(&hdr->check, hdr->daddr, to_trans); nat4_update_l4(skb, hdr->daddr, to_trans); hdr->daddr = to_trans; /* * 设置一个notrack 防止其被track以及nat. * 这是绝对合适的,因为既然是static的stateless NAT * 我们就不希望它被状态左右 **/ /* * 其实,并不是主要避开基于conntrack的NAT就可以了,因为 * conntrack本身就不容你对两个方向的tuple进行随意修改 */ if (!skb->nfct) { skb->nfct = &nf_conntrack_untracked.ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); } out: return ret; } static struct nf_hook_ops ipv4_nat_ops[] __read_mostly = { { .hook = ipv4_nat_in, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK-1, }, { .hook = ipv4_nat_out, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK+1, }, }; static char *parse_addr(const char *input, __be32 *from, __be32 *to) { char *p1, *p2; size_t length = strlen(input); if (!(p1 = memchr(input, ‘ ‘, length))) { return NULL; } if (!(p2 = memchr(p1 + 1, ‘ ‘, length - (p1 + 1 - input)))) { return NULL; } if (!(in4_pton(input, p1 - input, (u8 *)from, ‘ ‘, NULL)) || !(in4_pton(p1 + 1, p2 - p1 - 1, (u8 *)to, ‘ ‘, NULL))) { return NULL; } return ++p2; } static ssize_t static_nat_config_write(struct file *file, const char *buffer, size_t count, loff_t *unused) { int ret = 0; size_t length = count; __be32 from, to; u32 normal, reverse; char *buf = NULL; char *p; struct static_nat_entry *ent; if (length) { char *pp = (char *)(buffer + (length - 1)); for (; (*pp < (char)32) || (*pp > (char)126); pp--) { if (length <= 0) { ret = -EINVAL; goto out; } length--; } } else { goto out; } buf = kzalloc((length + 1), GFP_ATOMIC); if (!buf) { ret = -ENOMEM; goto out; } memcpy(buf, buffer, length); if (!(p = parse_addr(buf + 1, &from, &to))) { ret = -EINVAL; goto out; } if (‘+‘ == *buf) { ent = (struct static_nat_entry *)kzalloc(sizeof(struct static_nat_entry), GFP_KERNEL); if (!ent) { ret = -EFAULT; goto out; } /* 计算原始项的hash桶位置 */ normal = jhash_1word(from, 1); normal = normal%BUCKETS; /* 计算反转位置的hash桶位置 */ reverse = jhash_1word(to, 1); reverse = reverse%BUCKETS; /* * 设置key/value对 * 注意,反转类型的hnode其key/value也要反转 */ ent->addr[0] = from; ent->addr[1] = to; /* 初始化链表节点 */ INIT_HLIST_NODE(&ent->node[DIR_SNAT]); INIT_HLIST_NODE(&ent->node[DIR_DNAT]); if (strstr(p, "src")) { /* 添加SNAT项,自动生成DNAT项 */ /* 首先判断是否已经存在了 */ if (get_address_from_map(NULL, DIR_SNAT, from, NAT_OPT_FIND) || get_address_from_map(NULL, DIR_SNAT, to, NAT_OPT_FIND)) { ret = -EEXIST; kfree(ent); goto out; } /* 这是这个entry的type,用来区分生成的两条配置项 */ ent->type = DIR_SNAT; /* 落实到链表 */ spin_lock(&nat_lock); hlist_add_head(&ent->node[DIR_SNAT], &src_list[normal]); hlist_add_head(&ent->node[DIR_DNAT], &dst_list[reverse]); spin_unlock(&nat_lock); } else if(strstr(p, "dst")) { /* 添加DNAT项,自动生成SNAT项 */ /* 首先判断是否已经存在了 */ if (get_address_from_map(NULL, DIR_DNAT, from, NAT_OPT_FIND) || get_address_from_map(NULL, DIR_DNAT, to, NAT_OPT_FIND)){ ret = -EEXIST; kfree(ent); goto out; } /* 这是这个entry的type,用来区分生成的两条配置项 */ ent->type = DIR_DNAT; /* 落实到链表 */ spin_lock(&nat_lock); hlist_add_head(&ent->node[DIR_DNAT], &dst_list[normal]); hlist_add_head(&ent->node[DIR_SNAT], &src_list[reverse]); spin_unlock(&nat_lock); } else { ret = -EFAULT; kfree(ent); goto out; } } else if (‘-‘ ==*buf) { u32 r1; if (strstr(p, "src")) { r1 = get_address_from_map(NULL, DIR_SNAT, from, NAT_OPT_DEL); if (r1 == 0) { ret = -ENOENT; goto out; } } else if(strstr(p, "dst")) { r1 = get_address_from_map(NULL, DIR_DNAT, from, NAT_OPT_DEL); if (r1 == 0) { ret = -ENOENT; goto out; } } else { } } else { ret = -EINVAL; goto out; } ret = count; out: kfree(buf); return ret; } static ssize_t static_nat_config_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int len = 0; static int done = 0; int i; char from[15], to[15]; char *kbuf_to_avoid_user_space_memory_page_fault = NULL; /* 每一行的最大长度 */ #define MAX_LINE_CHARS 128 if (done) { done = 0; goto out; } /* * 分配一块内核内存,为了避免直接操作用户内存而引发页面调度, * 页面调度会导致睡眠切换,而我们操作的内容处在自旋锁的保护 * 下,所以不能切换! */ /* * 问题: * 我这里仅仅分配count大小的内存,是因为这个版本不支持多次读, * 只能一次读完。也许我应该学学seq read的方法。 */ kbuf_to_avoid_user_space_memory_page_fault = kzalloc(count, GFP_KERNEL); if (!kbuf_to_avoid_user_space_memory_page_fault) { len = -ENOMEM; done = 1; goto out; } spin_lock(&nat_lock); len += sprintf(kbuf_to_avoid_user_space_memory_page_fault + len, "Source trans table:\n"); if (len + MAX_LINE_CHARS > count) { goto copy_now; } for (i = 0; i < BUCKETS; i++) { struct hlist_node *iter, *tmp; struct static_nat_entry *ent; hlist_for_each_safe(iter, tmp, &src_list[i]) { ent = hlist_entry(iter, struct static_nat_entry, node[DIR_SNAT]); sprintf(from, "%pI4", (ent->type == DIR_SNAT)? &ent->addr[0]:&ent->addr[1]); sprintf(to, "%pI4", (ent->type == DIR_SNAT)? &ent->addr[1]:&ent->addr[0]); len += sprintf(kbuf_to_avoid_user_space_memory_page_fault + len, "From:%-15s To:%-15s [%s] [Bytes:%u Packet:%u]\n", from, to, (ent->type == DIR_SNAT)?"STATIC":"AUTO", ent->acct[DIR_SNAT].nat_bytes, ent->acct[DIR_SNAT].nat_packets); if (len + MAX_LINE_CHARS > count) { goto copy_now; } } } len += sprintf(kbuf_to_avoid_user_space_memory_page_fault + len, "\nDestination trans table:\n"); if (len + MAX_LINE_CHARS > count) { goto copy_now; } for (i = 0; i < BUCKETS; i++) { struct hlist_node *iter, *tmp; struct static_nat_entry *ent; hlist_for_each_safe(iter, tmp, &dst_list[i]) { ent = hlist_entry(iter, struct static_nat_entry, node[DIR_DNAT]); sprintf(from, "%pI4", (ent->type == DIR_DNAT)? &ent->addr[0]:&ent->addr[1]); sprintf(to, "%pI4", (ent->type == DIR_DNAT)? &ent->addr[1]:&ent->addr[0]); len += sprintf(kbuf_to_avoid_user_space_memory_page_fault + len, "From:%-15s To:%-15s [%s] [Bytes:%u Packet:%u]\n", from, to, (ent->type == DIR_DNAT)?"STATIC":"AUTO", ent->acct[DIR_DNAT].nat_bytes, ent->acct[DIR_DNAT].nat_packets); if (len + MAX_LINE_CHARS > count) { goto copy_now; } } } copy_now: spin_unlock(&nat_lock); done = 1; /* 这里已经解除自旋锁 */ if (copy_to_user(buf, kbuf_to_avoid_user_space_memory_page_fault, len)) { len = EFAULT; goto out; } out: if (kbuf_to_avoid_user_space_memory_page_fault) { kfree(kbuf_to_avoid_user_space_memory_page_fault); } return len; } static const struct file_operations static_nat_file_ops = { .owner = THIS_MODULE, .read = static_nat_config_read, .write = static_nat_config_write, }; static int __init nf_static_nat_init(void) { int ret = 0; int i; src_list = kzalloc(sizeof(struct hlist_head) * BUCKETS, GFP_KERNEL); if (!src_list) { ret = -ENOMEM; goto out; } dst_list = kzalloc(sizeof(struct hlist_head) * BUCKETS, GFP_KERNEL); if (!dst_list) { ret = -ENOMEM; goto out; } ret = nf_register_hooks(ipv4_nat_ops, ARRAY_SIZE(ipv4_nat_ops)); if (ret < 0) { printk("nf_nat_ipv4: can‘t register hooks.\n"); goto out; } if (!proc_create("static_nat", 0644, init_net.proc_net, &static_nat_file_ops)) { ret = -ENOMEM; goto out; } for (i = 0; i < BUCKETS; i++) { INIT_HLIST_HEAD(&src_list[i]); INIT_HLIST_HEAD(&dst_list[i]); } return ret; out: if (src_list) { kfree(src_list); } if (dst_list) { kfree(dst_list); } return ret; } static void __exit nf_static_nat_fini(void) { int i; remove_proc_entry("static_nat", init_net.proc_net); nf_unregister_hooks(ipv4_nat_ops, ARRAY_SIZE(ipv4_nat_ops)); spin_lock(&nat_lock); for (i = 0; i < BUCKETS; i++) { struct hlist_node *iter, *tmp; struct static_nat_entry *ent; hlist_for_each_safe(iter, tmp, &src_list[i]) { ent = hlist_entry(iter, struct static_nat_entry, node[0]); hlist_del(&ent->node[DIR_SNAT]); hlist_del(&ent->node[DIR_DNAT]); kfree(ent); } } spin_unlock(&nat_lock); if (src_list) { kfree(src_list); } if (dst_list) { kfree(dst_list); } } module_init(nf_static_nat_init); module_exit(nf_static_nat_fini); MODULE_DESCRIPTION("STATIC two-way NAT"); MODULE_AUTHOR("marywangran@126.com"); MODULE_LICENSE("GPL");
Makefile:
obj-m += nf_rawnat.o all: make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules clean: rm -rf *.ko *.o .tmp_versions .*.mod.o .*.o.cmd *.mod.c .*.ko.cmd Module.symvers modules.order
在Linux上实现一个可用的stateless双向静态NAT模块
标签:linux stateless nat stateful nat iptables netfilter
原文地址:http://blog.csdn.net/dog250/article/details/42046867