This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.
The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>
--- net-2.6.orig/include/linux/sysctl.h
+++ net-2.6/include/linux/sysctl.h
@@ -353,6 +353,7 @@ enum
NET_TCP_BIC_BETA=108,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
NET_TCP_CONG_CONTROL=110,
+ NET_TCP_ABC=111,
};
enum {
--- net-2.6.orig/include/linux/tcp.h
+++ net-2.6/include/linux/tcp.h
@@ -326,6 +326,7 @@ struct tcp_sock {
__u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
unsigned int keepalive_time; /* time before keep alive
takes place */
unsigned int keepalive_intvl; /* time interval between keep
alive probes */
--- net-2.6.orig/include/net/tcp.h
+++ net-2.6/include/net/tcp.h
@@ -217,6 +217,7 @@ extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
+extern int sysctl_tcp_abc;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
@@ -765,6 +766,23 @@ static inline __u32 tcp_current_ssthresh
*/
static inline void tcp_slow_start(struct tcp_sock *tp)
{
+ if (sysctl_tcp_abc) {
+ /* RFC3465: Slow Start
+ * TCP sender SHOULD increase cwnd by the number of
+ * previously unacknowledged bytes ACKed by each incoming
+ * acknowledgment, provided the increase is not more than L
+ */
+ if (tp->bytes_acked < tp->mss_cache)
+ return;
+
+ /* We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ }
+ tp->bytes_acked = 0;
+
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
@@ -799,6 +817,7 @@ static inline void tcp_enter_cwr(struct
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
+ tp->bytes_acked = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
--- net-2.6.orig/net/ipv4/sysctl_net_ipv4.c
+++ net-2.6/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_tcp_congestion_control,
.strategy = &sysctl_tcp_congestion_control,
},
+ {
+ .ctl_name = NET_TCP_ABC,
+ .procname = "tcp_abc",
+ .data = &sysctl_tcp_abc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
--- net-2.6.orig/net/ipv4/tcp.c
+++ net-2.6/net/ipv4/tcp.c
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int
tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
+ tp->bytes_acked = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
--- net-2.6.orig/Documentation/networking/ip-sysctl.txt
+++ net-2.6/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables:
+tcp_abc - INTEGER
+ Controls Appropriate Byte Count defined in RFC3465. If set to
+ 0 then does congestion avoid once per ack. 1 is conservative
+ value, and 2 is more agressive.
+
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value
--- net-2.6.orig/net/ipv4/tcp_input.c
+++ net-2.6/net/ipv4/tcp_input.c
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data.
*/
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window
update. */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->bytes_acked = 0;
tcp_clear_retrans(tp);
/* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u
TCP_ECN_queue_cwr(tp);
}
+ tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
@@ -2327,6 +2330,9 @@ static int tcp_ack(struct sock *sk, stru
if (before(ack, prior_snd_una))
goto old_ack;
+ if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
@@ -4390,6 +4396,7 @@ discard:
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
--- net-2.6.orig/net/ipv4/tcp_minisocks.c
+++ net-2.6/net/ipv4/tcp_minisocks.c
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(st
*/
newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0;
+ newtp->bytes_acked = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
--- net-2.6.orig/net/ipv4/tcp_cong.c
+++ net-2.6/net/ipv4/tcp_cong.c
@@ -189,15 +189,25 @@ void tcp_reno_cong_avoid(struct sock *sk
if (tp->snd_cwnd <= tp->snd_ssthresh)
return tcp_slow_start(tp);
- /* In dangerous area, increase slowly.
- * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
- */
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- } else
- tp->snd_cwnd_cnt++;
+ /* In dangerous area, increase slowly. */
+ if (sysctl_tcp_abc) {
+ /* RFC3465: Apppriate Byte Count
+ * increase once for each full cwnd acked
+ */
+ if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ } else {
+ /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ } else
+ tp->snd_cwnd_cnt++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
--
Stephen Hemminger <[EMAIL PROTECTED]>
OSDL http://developer.osdl.org/~shemminger
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html