This is an updated version of the RFC3465 ABC patch originally for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting bytes ack'd rather than packets when updating congestion control.
The orignal ABC described in the RFC applied to a Reno style algorithm. For advanced congestion control there is little change after leaving slow start. Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]> --- net-2.6.orig/include/linux/sysctl.h +++ net-2.6/include/linux/sysctl.h @@ -353,6 +353,7 @@ enum NET_TCP_BIC_BETA=108, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_TCP_CONG_CONTROL=110, + NET_TCP_ABC=111, }; enum { --- net-2.6.orig/include/linux/tcp.h +++ net-2.6/include/linux/tcp.h @@ -326,6 +326,7 @@ struct tcp_sock { __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ --- net-2.6.orig/include/net/tcp.h +++ net-2.6/include/net/tcp.h @@ -217,6 +217,7 @@ extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; +extern int sysctl_tcp_abc; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; @@ -765,6 +766,23 @@ static inline __u32 tcp_current_ssthresh */ static inline void tcp_slow_start(struct tcp_sock *tp) { + if (sysctl_tcp_abc) { + /* RFC3465: Slow Start + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (tp->bytes_acked < tp->mss_cache) + return; + + /* We MAY increase by 2 if discovered delayed ack */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } + tp->bytes_acked = 0; + if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } @@ -799,6 +817,7 @@ static inline void tcp_enter_cwr(struct struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; + tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { __tcp_enter_cwr(sk); tcp_set_ca_state(sk, TCP_CA_CWR); --- net-2.6.orig/net/ipv4/sysctl_net_ipv4.c +++ net-2.6/net/ipv4/sysctl_net_ipv4.c @@ -645,6 +645,14 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_tcp_congestion_control, .strategy = &sysctl_tcp_congestion_control, }, + { + .ctl_name = NET_TCP_ABC, + .procname = "tcp_abc", + .data = &sysctl_tcp_abc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; --- net-2.6.orig/net/ipv4/tcp.c +++ net-2.6/net/ipv4/tcp.c @@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; + tp->bytes_acked = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); --- net-2.6.orig/Documentation/networking/ip-sysctl.txt +++ net-2.6/Documentation/networking/ip-sysctl.txt @@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER TCP variables: +tcp_abc - INTEGER + Controls Appropriate Byte Count defined in RFC3465. If set to + 0 then does congestion avoid once per ack. 1 is conservative + value, and 2 is more agressive. + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 255. Default value --- net-2.6.orig/net/ipv4/tcp_input.c +++ net-2.6/net/ipv4/tcp_input.c @@ -89,6 +89,7 @@ int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf = 1; +int sysctl_tcp_abc = 1; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; + tp->bytes_acked = 0; tcp_clear_retrans(tp); /* Push undo marker, if it was plain RTO and nothing @@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u TCP_ECN_queue_cwr(tp); } + tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); } @@ -2327,6 +2330,9 @@ static int tcp_ack(struct sock *sk, stru if (before(ack, prior_snd_una)) goto old_ack; + if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) + tp->bytes_acked += ack - prior_snd_una; + if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -4390,6 +4396,7 @@ discard: EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_reordering); +EXPORT_SYMBOL(sysctl_tcp_abc); EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); --- net-2.6.orig/net/ipv4/tcp_minisocks.c +++ net-2.6/net/ipv4/tcp_minisocks.c @@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(st */ newtp->snd_cwnd = 2; newtp->snd_cwnd_cnt = 0; + newtp->bytes_acked = 0; newtp->frto_counter = 0; newtp->frto_highmark = 0; --- net-2.6.orig/net/ipv4/tcp_cong.c +++ net-2.6/net/ipv4/tcp_cong.c @@ -189,15 +189,25 @@ void tcp_reno_cong_avoid(struct sock *sk if (tp->snd_cwnd <= tp->snd_ssthresh) return tcp_slow_start(tp); - /* In dangerous area, increase slowly. - * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else - tp->snd_cwnd_cnt++; + /* In dangerous area, increase slowly. */ + if (sysctl_tcp_abc) { + /* RFC3465: Apppriate Byte Count + * increase once for each full cwnd acked + */ + if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { + tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } else { + /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt++; + } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); -- Stephen Hemminger <[EMAIL PROTECTED]> OSDL http://developer.osdl.org/~shemminger - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html