While trying to track down a PMTUD problem with IPsec, I
misread the current MTU estimation code and initially thought
it would overestimate the MTU. I then noticed that this was
wrong, but by that time had already replaced it by an exact
calculation. It fixes the common underestimation of the MTU
by two bytes with ESP and should be a bit faster, so it still
looks useful.

[XFRM]: Improve MTU estimation

Replace the probing based MTU estimation, which usually takes 2-3
iterations to find a fitting value and may underestimate the MTU,
by an exact calculation.

Signed-off-by: Patrick McHardy <[EMAIL PROTECTED]>

---
commit d5722ea7c8c7d3526788cd4fc3ab3e1237273fa8
tree 56ddec256902370864e93bb7bd095281162aea3b
parent a205729e2cd8e51257cd0ea738524c64da99b9e0
author Patrick McHardy <[EMAIL PROTECTED]> Fri, 04 Aug 2006 10:37:09 +0200
committer Patrick McHardy <[EMAIL PROTECTED]> Fri, 04 Aug 2006 10:37:09 +0200

 include/net/xfrm.h    |    3 +--
 net/ipv4/esp4.c       |   28 +++++++++++++++-------------
 net/ipv6/esp6.c       |   25 +++++++++++--------------
 net/xfrm/xfrm_state.c |   36 ++++++++----------------------------
 4 files changed, 35 insertions(+), 57 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..ea1b028 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -262,8 +262,7 @@ struct xfrm_type
        void                    (*destructor)(struct xfrm_state *);
        int                     (*input)(struct xfrm_state *, struct sk_buff 
*skb);
        int                     (*output)(struct xfrm_state *, struct sk_buff 
*pskb);
-       /* Estimate maximal size of result of transformation of a dgram */
-       u32                     (*get_max_size)(struct xfrm_state *, int size);
+       u32                     (*get_mtu)(struct xfrm_state *, int size);
 };
 
 extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..5393dc2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -251,21 +251,19 @@ out:
        return -EINVAL;
 }
 
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 {
        struct esp_data *esp = x->data;
-       u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+       u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
 
-       if (x->props.mode) {
-               mtu = ALIGN(mtu + 2, blksize);
-       } else {
-               /* The worst case. */
-               mtu = ALIGN(mtu + 2, 4) + blksize - 4;
-       }
-       if (esp->conf.padlen)
-               mtu = ALIGN(mtu, esp->conf.padlen);
+       if (esp->conf.padlen > align)
+               align = esp->conf.padlen;
 
-       return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+       mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+       mtu &= ~(align - 1);
+       mtu -= 2;
+
+       return mtu;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
@@ -307,6 +305,7 @@ static void esp_destroy(struct xfrm_stat
 static int esp_init_state(struct xfrm_state *x)
 {
        struct esp_data *esp = NULL;
+       u32 align;
 
        /* null auth and encryption can have zero length keys */
        if (x->aalg) {
@@ -385,7 +384,10 @@ static int esp_init_state(struct xfrm_st
                }
        }
        x->data = esp;
-       x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+       align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+       if (esp->conf.padlen)
+               align = ALIGN(align, esp->conf.padlen);
+       x->props.trailer_len = align - 1 + esp->auth.icv_trunc_len;
        return 0;
 
 error:
@@ -402,7 +404,7 @@ static struct xfrm_type esp_type =
        .proto          = IPPROTO_ESP,
        .init_state     = esp_init_state,
        .destructor     = esp_destroy,
-       .get_max_size   = esp4_get_max_size,
+       .get_mtu        = esp4_get_mtu,
        .input          = esp_input,
        .output         = esp_output
 };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..b8d0a05 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -222,22 +222,19 @@ out:
        return ret;
 }
 
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 {
        struct esp_data *esp = x->data;
-       u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-
-       if (x->props.mode) {
-               mtu = ALIGN(mtu + 2, blksize);
-       } else {
-               /* The worst case. */
-               u32 padsize = ((blksize - 1) & 7) + 1;
-               mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
-       }
-       if (esp->conf.padlen)
-               mtu = ALIGN(mtu, esp->conf.padlen);
+       u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+
+       if (esp->conf.padlen > align)
+               align = esp->conf.padlen;
+       
+       mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+       mtu &= ~(align - 1);
+       mtu -= 2;
 
-       return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+       return mtu;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -363,7 +360,7 @@ static struct xfrm_type esp6_type =
        .proto          = IPPROTO_ESP,
        .init_state     = esp6_init_state,
        .destructor     = esp6_destroy,
-       .get_max_size   = esp6_get_max_size,
+       .get_mtu        = esp6_get_mtu,
        .input          = esp6_input,
        .output         = esp6_output
 };
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..39d9169 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1129,37 +1129,17 @@ void xfrm_state_delete_tunnel(struct xfr
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-/*
- * This function is NOT optimal.  For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal.  However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
-       int res = mtu;
-
-       res -= x->props.header_len;
-
-       for (;;) {
-               int m = res;
-
-               if (m < 68)
-                       return 68;
-
-               spin_lock_bh(&x->lock);
-               if (x->km.state == XFRM_STATE_VALID &&
-                   x->type && x->type->get_max_size)
-                       m = x->type->get_max_size(x, m);
-               else
-                       m += x->props.header_len;
-               spin_unlock_bh(&x->lock);
-
-               if (m <= mtu)
-                       break;
-               res -= (m - mtu);
-       }
+       int res;
 
+       spin_lock_bh(&x->lock);
+       if (x->km.state == XFRM_STATE_VALID &&
+           x->type && x->type->get_mtu)
+               res = x->type->get_mtu(x, mtu);
+       else
+               res = mtu;
+       spin_unlock_bh(&x->lock);
        return res;
 }
 

Reply via email to