标签:
在比较早的那些年,我曾经写了一个负载均衡调度算法模块,是基于应用层协议包任意偏移量开始的一段固定长度的数据计算一个值,然后将这个值hash到不同的服务器。那时觉得没啥用,就没有再继续,直到前一段时间的一段思考以及前几天的一次预研。我决定作文以记之,以后说不定能用得着。net/netfilter/ipvs/ip_vs_offh.c:
/* * IPVS: Layer7 payload Hashing scheduling module * * Authors: ZHAOYA * 基于ip_vs_sh/dh修改而来,详细注释请参见: * net/netfilter/ipvs/ip_vs_sh.c * net/netfilter/ipvs/ip_vs_dh.c */ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/ctype.h> #include <net/ip.h> #include <net/ip_vs.h> struct ip_vs_offh_bucket { struct ip_vs_dest *dest; }; struct ip_vs_offh_data { struct ip_vs_offh_bucket *tbl; u32 offset; u32 offlen; }; #define IP_VS_OFFH_TAB_BITS 8 #define IP_VS_OFFH_TAB_SIZE (1 << IP_VS_OFFH_TAB_BITS) #define IP_VS_OFFH_TAB_MASK (IP_VS_OFFH_TAB_SIZE - 1) /* * 全局变量 * offset:Layer7计算hash值的payload偏移量(相对于Layer7头) * offlen:Layer7计算hash值的payload长度 */ static u32 offset, offlen; static int skip_atoi(char **s) { int i=0; while (isdigit(**s)) i = i*10 + *((*s)++) - ‘0‘; return i; } static inline struct ip_vs_dest * ip_vs_offh_get(struct ip_vs_offh_bucket *tbl, const char *payload, u32 length) { __be32 v_fold = 0; /* 算法有待优化 */ v_fold = (payload[0]^payload[length>>2]^payload[length])*2654435761UL; return (tbl[v_fold & IP_VS_OFFH_TAB_MASK]).dest; } static int ip_vs_offh_assign(struct ip_vs_offh_bucket *tbl, struct ip_vs_service *svc) { int i; struct ip_vs_offh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; b = tbl; p = &svc->destinations; for (i=0; i<IP_VS_OFFH_TAB_SIZE; i++) { if (list_empty(p)) { b->dest = NULL; } else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); atomic_inc(&dest->refcnt); b->dest = dest; p = p->next; } b++; } return 0; } static void ip_vs_offh_flush(struct ip_vs_offh_bucket *tbl) { int i; struct ip_vs_offh_bucket *b; b = tbl; for (i=0; i<IP_VS_OFFH_TAB_SIZE; i++) { if (b->dest) { atomic_dec(&b->dest->refcnt); b->dest = NULL; } b++; } } static int ip_vs_offh_init_svc(struct ip_vs_service *svc) { struct ip_vs_offh_data *pdata; struct ip_vs_offh_bucket *tbl; pdata = kmalloc(sizeof(struct ip_vs_offh_data), GFP_ATOMIC); if (pdata == NULL) { pr_err("%s(): no memory\n", __func__); return -ENOMEM; } tbl = kmalloc(sizeof(struct ip_vs_offh_bucket)*IP_VS_OFFH_TAB_SIZE, GFP_ATOMIC); if (tbl == NULL) { kfree(pdata); pr_err("%s(): no memory\n", __func__); return -ENOMEM; } pdata->tbl = tbl; pdata->offset = 0; pdata->offlen = 0; svc->sched_data = pdata; ip_vs_offh_assign(tbl, svc); return 0; } static int ip_vs_offh_done_svc(struct ip_vs_service *svc) { struct ip_vs_offh_data *pdata = svc->sched_data; struct ip_vs_offh_bucket *tbl = pdata->tbl; ip_vs_offh_flush(tbl); kfree(tbl); kfree(pdata); return 0; } static int ip_vs_offh_update_svc(struct ip_vs_service *svc) { struct ip_vs_offh_bucket *tbl = svc->sched_data; ip_vs_offh_flush(tbl); ip_vs_offh_assign(tbl, svc); return 0; } static inline int is_overloaded(struct ip_vs_dest *dest) { return dest->flags & IP_VS_DEST_F_OVERLOAD; } static struct ip_vs_dest * ip_vs_offh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_offh_data *pdata; struct ip_vs_offh_bucket *tbl; struct iphdr *iph; void *transport_hdr; char *payload; u32 hdrlen = 0; u32 _offset = 0; u32 _offlen = 0; iph = ip_hdr(skb); hdrlen = iph->ihl*4; if (hdrlen > skb->len) { return NULL; } transport_hdr = (void *)iph + hdrlen; switch (iph->protocol) { case IPPROTO_TCP: hdrlen += (((struct tcphdr*)transport_hdr)->doff)*4; break; case IPPROTO_UDP: hdrlen += sizeof(struct udphdr); break; default: return NULL; } #if 0 { int i = 0; _offset = offset; _offlen = offlen; payload = (char *)iph + hdrlen + _offset; printk("begin:iplen:%d \n", hdrlen); for (i = 0; i < _offlen; i++) { printk("%02X ", payload[i]); } printk("\nend\n"); return NULL; } #endif pdata = (struct ip_vs_offh_datai *)svc->sched_data; tbl = pdata->tbl; _offset = offset;//pdata->offset; _offlen = offlen;//pdata->offlen; if (_offlen + _offset > skb->len - hdrlen) { IP_VS_ERR_RL("OFFH: exceed\n"); return NULL; } payload = (char *)iph + hdrlen + _offset; dest = ip_vs_offh_get(tbl, payload, _offlen); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { IP_VS_ERR_RL("OFFH: no destination available\n"); return NULL; } return dest; } static struct ip_vs_scheduler ip_vs_offh_scheduler = { .name = "offh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_offh_scheduler.n_list), .init_service = ip_vs_offh_init_svc, .done_service = ip_vs_offh_done_svc, .update_service = ip_vs_offh_update_svc, .schedule = ip_vs_offh_schedule, }; static ssize_t ipvs_sch_offset_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int ret = 0; ret = sprintf(buf, "offset:%u;offlen:%u\n", offset, offlen); return ret; } /* * 设置offset/offset length * echo offset:$value1 offlen:$value2 >/proc/net/ipvs_sch_offset */ static int ipvs_sch_offset_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int ret = count; char *p = buf, *pstart; if ((p = strstr(p, "offset:")) == NULL) { ret = -EINVAL; goto out; } p += strlen("offset:"); pstart = p; if ((p = strstr(p, " ")) == NULL) { ret = -EINVAL; goto out; } p[0] = 0; offset = skip_atoi(&pstart); if (offset == 0 && strcmp(pstart, "0")) { ret = -EINVAL; goto out; } p += strlen(";"); if ((p = strstr(p, "offlen:")) == NULL) { ret = -EINVAL; goto out; } p += strlen("offlen:"); pstart = p; offlen = skip_atoi(&pstart); if (offlen == 0 && strcmp(pstart, "0")) { ret = -EINVAL; goto out; } out: return ret; } /* * 由于不想修改用户态的配置接口,还是觉得procfs这种方式比较靠普 **/ static const struct file_operations ipvs_sch_offset_file_ops = { .owner = THIS_MODULE, .read = ipvs_sch_offset_read, .write = ipvs_sch_offset_write, }; struct net *net = &init_net; static int __init ip_vs_offh_init(void) { int ret = -1; if (!proc_create("ipvs_sch_offset", 0644, net->proc_net, &ipvs_sch_offset_file_ops)) { printk("OFFH: create proc entry failed\n"); goto out; } return register_ip_vs_scheduler(&ip_vs_offh_scheduler); out: return ret; } static void __exit ip_vs_offh_cleanup(void) { remove_proc_entry("ipvs_sch_offset", net->proc_net); unregister_ip_vs_scheduler(&ip_vs_offh_scheduler); } module_init(ip_vs_offh_init); module_exit(ip_vs_offh_cleanup); MODULE_LICENSE("GPL");
标签:
原文地址:http://blog.csdn.net/dog250/article/details/43638573