[PATCH 5/7] tcp: RFC3465 Appropriate Byte Count

Stephen Hemminger Wed, 09 Nov 2005 15:14:17 -0800

This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.


The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.


Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>

--- net-2.6.orig/include/linux/sysctl.h
+++ net-2.6/include/linux/sysctl.h
@@ -353,6 +353,7 @@ enum
        NET_TCP_BIC_BETA=108,
        NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
        NET_TCP_CONG_CONTROL=110,
+       NET_TCP_ABC=111,
 };
 
 enum {
--- net-2.6.orig/include/linux/tcp.h
+++ net-2.6/include/linux/tcp.h
@@ -326,6 +326,7 @@ struct tcp_sock {
        __u32   snd_up;         /* Urgent pointer               */
 
        __u32   total_retrans;  /* Total retransmits for entire connection */
+       __u32   bytes_acked;    /* Appropriate Byte Counting - RFC3465 */
 
        unsigned int            keepalive_time;   /* time before keep alive 
takes place */
        unsigned int            keepalive_intvl;  /* time interval between keep 
alive probes */
--- net-2.6.orig/include/net/tcp.h
+++ net-2.6/include/net/tcp.h
@@ -217,6 +217,7 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
+extern int sysctl_tcp_abc;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -765,6 +766,23 @@ static inline __u32 tcp_current_ssthresh
  */
 static inline void tcp_slow_start(struct tcp_sock *tp)
 {
+       if (sysctl_tcp_abc) {
+               /* RFC3465: Slow Start
+                * TCP sender SHOULD increase cwnd by the number of
+                * previously unacknowledged bytes ACKed by each incoming
+                * acknowledgment, provided the increase is not more than L
+                */
+               if (tp->bytes_acked < tp->mss_cache)
+                       return;
+
+               /* We MAY increase by 2 if discovered delayed ack */
+               if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+               }
+       }
+       tp->bytes_acked = 0;
+
        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
                tp->snd_cwnd++;
 }
@@ -799,6 +817,7 @@ static inline void tcp_enter_cwr(struct 
        struct tcp_sock *tp = tcp_sk(sk);
 
        tp->prior_ssthresh = 0;
+       tp->bytes_acked = 0;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                __tcp_enter_cwr(sk);
                tcp_set_ca_state(sk, TCP_CA_CWR);
--- net-2.6.orig/net/ipv4/sysctl_net_ipv4.c
+++ net-2.6/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
                .proc_handler   = &proc_tcp_congestion_control,
                .strategy       = &sysctl_tcp_congestion_control,
        },
+       {
+               .ctl_name       = NET_TCP_ABC,
+               .procname       = "tcp_abc",
+               .data           = &sysctl_tcp_abc,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
 
        { .ctl_name = 0 }
 };
--- net-2.6.orig/net/ipv4/tcp.c
+++ net-2.6/net/ipv4/tcp.c
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int 
        tp->packets_out = 0;
        tp->snd_ssthresh = 0x7fffffff;
        tp->snd_cwnd_cnt = 0;
+       tp->bytes_acked = 0;
        tcp_set_ca_state(sk, TCP_CA_Open);
        tcp_clear_retrans(tp);
        inet_csk_delack_init(sk);
--- net-2.6.orig/Documentation/networking/ip-sysctl.txt
+++ net-2.6/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
 
 TCP variables: 
 
+tcp_abc - INTEGER
+       Controls Appropriate Byte Count defined in RFC3465. If set to
+       0 then does congestion avoid once per ack. 1 is conservative
+       value, and 2 is more agressive.
+
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
        will be retransmitted. Should not be higher than 255. Default value
--- net-2.6.orig/net/ipv4/tcp_input.c
+++ net-2.6/net/ipv4/tcp_input.c
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 #define FLAG_DATA              0x01 /* Incoming frame contained data.          
*/
 #define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window 
update.       */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int
        tp->snd_cwnd_cnt   = 0;
        tp->snd_cwnd_stamp = tcp_time_stamp;
 
+       tp->bytes_acked = 0;
        tcp_clear_retrans(tp);
 
        /* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u
                        TCP_ECN_queue_cwr(tp);
                }
 
+               tp->bytes_acked = 0;
                tp->snd_cwnd_cnt = 0;
                tcp_set_ca_state(sk, TCP_CA_Recovery);
        }
@@ -2327,6 +2330,9 @@ static int tcp_ack(struct sock *sk, stru
        if (before(ack, prior_snd_una))
                goto old_ack;
 
+       if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+               tp->bytes_acked += ack - prior_snd_una;
+
        if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
                /* Window is constant, pure forward advance.
                 * No more checks are required.
@@ -4390,6 +4396,7 @@ discard:
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
--- net-2.6.orig/net/ipv4/tcp_minisocks.c
+++ net-2.6/net/ipv4/tcp_minisocks.c
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(st
                 */
                newtp->snd_cwnd = 2;
                newtp->snd_cwnd_cnt = 0;
+               newtp->bytes_acked = 0;
 
                newtp->frto_counter = 0;
                newtp->frto_highmark = 0;
--- net-2.6.orig/net/ipv4/tcp_cong.c
+++ net-2.6/net/ipv4/tcp_cong.c
@@ -189,15 +189,25 @@ void tcp_reno_cong_avoid(struct sock *sk
         if (tp->snd_cwnd <= tp->snd_ssthresh)
                return tcp_slow_start(tp);
 
-       /* In dangerous area, increase slowly.
-        * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-        */
-       if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-               if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-                       tp->snd_cwnd++;
-               tp->snd_cwnd_cnt = 0;
-       } else
-               tp->snd_cwnd_cnt++;
+       /* In dangerous area, increase slowly. */
+       if (sysctl_tcp_abc) {
+               /* RFC3465: Apppriate Byte Count
+                * increase once for each full cwnd acked
+                */
+               if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+                       tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+               }
+       } else {
+               /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+               if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+                       tp->snd_cwnd_cnt = 0;
+               } else
+                       tp->snd_cwnd_cnt++;
+       }
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 

--
Stephen Hemminger <[EMAIL PROTECTED]>
OSDL http://developer.osdl.org/~shemminger

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/7] tcp: RFC3465 Appropriate Byte Count

Reply via email to