标签:load 商业 style user 距离 ... 关心 dem handle
代码的实现是简单的,背后的思绪是复杂的。// 定义pacing_tasklet: /* include/net/tcp.h */ struct pacing_tasklet { struct tasklet_struct tasklet; struct list_head head; /* queue of tcp sockets */ }; extern struct pacing_tasklet pacing_tasklet; /* net/ipv4/tcp_output.c */ // 定义per cpu的tasklet变量 DEFINE_PER_CPU(struct pacing_tasklet, pacing_tasklet); // 独立出来的handler,仅仅为了与tasklet的action分离,使其不至于太长 static void tcp_pacing_handler(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if(!sysctl_tcp_pacing || !tp->pacing.pacing) return ; if (sock_owned_by_user(sk)) { if (!test_and_set_bit(TCP_PACING_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) sock_hold(sk); goto out; } if (sk->sk_state == TCP_CLOSE) goto out; if(!sk->sk_send_head){ goto out; } tcp_push_pending_frames(sk); out: if (tcp_memory_pressure) sk_mem_reclaim(sk); } // pacing tasklet的action函数 static void tcp_pacing_func(unsigned long data) { struct pacing_tasklet *pacing = (struct pacing_tasklet *)data; LIST_HEAD(list); unsigned long flags; struct list_head *q, *n; struct tcp_sock *tp; struct sock *sk; local_irq_save(flags); list_splice_init(&pacing->head, &list); local_irq_restore(flags); list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, pacing_node); list_del(&tp->pacing_node); sk = (struct sock *)tp; bh_lock_sock(sk); tcp_pacing_handler(sk); bh_unlock_sock(sk); clear_bit(PACING_QUEUED, &tp->tsq_flags); } } // 初始化pacing tasklet(完全学着tsq的样子来做) void __init tcp_tasklet_init(void) { int i,j; struct sock *sk; local_irq_save(flags); list_splice_init(&pacing->head, &list); local_irq_restore(flags); list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, pacing_node); list_del(&tp->pacing_node); sk = (struct sock *)tp; bh_lock_sock(sk); tcp_pacing_handler(sk); bh_unlock_sock(sk); clear_bit(PACING_QUEUED, &tp->tsq_flags); } }
/* net/ipv4/tcp_timer.c */ // 重置hrtimer定时器 void tcp_pacing_reset_timer(struct sock *sk, u64 expires) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout = nsecs_to_jiffies(expires); if(!sysctl_tcp_pacing || !tp->pacing.pacing) return; hrtimer_start(&sk->timer, ns_to_ktime(expires), HRTIMER_MODE_ABS_PINNED); } // hrtimer的超时回调 static enum hrtimer_restart tcp_pacing_timer(struct hrtimer *timer) { struct sock *sk = container_of(timer, struct sock, timer); struct tcp_sock *tp = tcp_sk(sk); if (!test_and_set_bit(PACING_QUEUED, &tp->tsq_flags)) { unsigned long flags; struct pacing_tasklet *pacing; // 仅仅调度起tasklet,而不是执行action! local_irq_save(flags); pacing = this_cpu_ptr(&pacing_tasklet); list_add(&tp->pacing_node, &pacing->head); tasklet_schedule(&pacing->tasklet); local_irq_restore(flags); } return HRTIMER_NORESTART; } // 初始化 void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); hrtimer_init(&sk->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); sk->timer.function = &tcp_pacing_timer; }3.tcp_write_xmit中的判断:
/* net/ipv4/tcp_output.c */ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) { ... while ((skb = tcp_send_head(sk))) { unsigned int limit; u64 now = ktime_get_ns(); ... cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; else if(tp->pacing.pacing == 0) // 这里是个创举,既然pacing rate就是由cwnd算出来,检查了pacing rate就不必再检测cwnd了,但是在bbr算法中要慎重,因为bbr的pacing rate真不是由cwnd算出来的,恰恰相反,cwnd是由pacing算出来的! break; } // 通告窗口与网络拥塞无关,还是要检测的。 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; // 这里的逻辑与普通timer版的一样! if (sysctl_tcp_pacing && tp->pacing.pacing == 1) { u32 plen; u64 rate, len; if (now < tp->pacing.next_to_send) { tcp_pacing_reset_timer(sk, tp->pacing.next_to_send); break; } rate = sysctl_tcp_rate ? sysctl_tcp_rate:sk->sk_pacing_rate; plen = skb->len + MAX_HEADER; len = (u64)plen * NSEC_PER_SEC; if (rate) do_div(len, rate); tp->pacing.next_to_send = now + len; if (cwnd_quota == 0) cwnd_quota = 1; } if (tso_segs == 1) { ... }
/* net/ipv4/tcp_output.c */ void tcp_release_cb(struct sock *sk) { ... if (flags & (1UL << TCP_PACING_TIMER_DEFERRED)) { if(sk->sk_send_head) { tcp_push_pending_frames(sk); } __sock_put(sk); } ... }
首先看标准cubic算法的曲线:
CTMB,垃圾!都他妈的是垃圾!
其吞吐量曲线如下图所示:
然后再看我的pacing曲线:
然后再看看吞吐量的图!我虽然没有上过大学,其实我也是不屑于大学的,我的圈子里,都是硕博连读的,好久不回一次国,而我,不知本科为何?!那么看看结果吧:
-------------------------------------
最后看看我最初的愿景。彻底实现Linux TCP的Pacing发送逻辑-高精度hrtimer版
标签:load 商业 style user 距离 ... 关心 dem handle
原文地址:http://blog.csdn.net/dog250/article/details/54424751