Patch 2: fixes to fack_counts and enhancement of SACK fast path -Tom
--- diff -ur linux-2.6.22.6-rbtree-davem-fixed/include/net/tcp.h linux-2.6.22.6-rbtree-tomq/include/net/tcp.h --- linux-2.6.22.6-rbtree-davem-fixed/include/net/tcp.h 2007-09-19 17:36:07.000000000 -0700 +++ linux-2.6.22.6-rbtree-tomq/include/net/tcp.h 2007-09-19 12:22:06.000000000 -0700 @@ -1213,6 +1213,11 @@ sk->sk_send_head = tcp_write_queue_next(sk, skb); if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; + else + /* update fack_count of send_head. Since we've sent skb already, + * its packet count must be set by now. */ + TCP_SKB_CB(sk->sk_send_head)->fack_count = + TCP_SKB_CB(skb)->fack_count + tcp_skb_pcount(skb); /* Don't override Nagle indefinately with F-RTO */ if (tp->frto_counter == 2) tp->frto_counter = 3; @@ -1310,19 +1315,22 @@ /* An insert into the middle of the write queue causes the fack * counts in subsequent packets to become invalid, fix them up. */ -static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *first) +static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *prev = first->prev; + struct sk_buff *prev = skb->prev; unsigned int fc = 0; if (prev != (struct sk_buff *) &sk->sk_write_queue) fc = TCP_SKB_CB(prev)->fack_count + tcp_skb_pcount(prev); - while (first != (struct sk_buff *)&sk->sk_write_queue) { - TCP_SKB_CB(first)->fack_count = fc; + while (skb != (struct sk_buff *)&sk->sk_write_queue) { + if (TCP_SKB_CB(skb)->fack_count == fc || !tcp_skb_pcount(skb)) + break; - fc += tcp_skb_pcount(first); - first = first->next; + TCP_SKB_CB(skb)->fack_count = fc; + + fc += tcp_skb_pcount(skb); + skb = skb->next; } } diff -ur linux-2.6.22.6-rbtree-davem-fixed/net/ipv4/tcp_input.c linux-2.6.22.6-rbtree-tomq/net/ipv4/tcp_input.c --- linux-2.6.22.6-rbtree-davem-fixed/net/ipv4/tcp_input.c 2007-09-13 18:23:16.000000000 -0700 +++ linux-2.6.22.6-rbtree-tomq/net/ipv4/tcp_input.c 2007-09-19 12:27:42.000000000 -0700 @@ -956,6 +956,7 @@ int fack_count_base; int i; int first_sack_index; + u32 prev_end_seq = 0; if (!tp->sacked_out) tp->fackets_out = 0; @@ -1000,6 +1001,7 @@ if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) flag = 0; + prev_end_seq = ntohl(tp->recv_sack_cache[i].end_seq); } else { if ((tp->recv_sack_cache[i].start_seq != start_seq) || (tp->recv_sack_cache[i].end_seq != end_seq)) @@ -1016,9 +1018,16 @@ first_sack_index = 0; if (flag) + /* all that has changed is end of first SACK block. So all we + * need to do is tag those skbs that were'nt tagged last time. */ num_sacks = 1; else { int j; + + /* more than just end of first SACK block has changed; invalidate + * prev_end_seq */ + + prev_end_seq = 0; /* order SACK blocks to allow in order walk of the retrans queue */ for (i = num_sacks-1; i > 0; i--) { @@ -1051,6 +1060,8 @@ int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); + if (prev_end_seq) start_seq = prev_end_seq; + skb = tcp_write_queue_find(sk, start_seq); if (!skb) continue; - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html