[SCTP]: Implement SCTP_FRAGMENT_INTERLEAVE socket option.

This option was introduced in draft-ietf-tsvwg-sctpsocket-13.  It
prevents head-of-line blocking in the case of one-to-many endpoint.
Applications enabling this option really must enable SCTP_SNDRCV event
so that they would know where the data belongs.  Based on an
earlier patch by Ivan Skytte Jørgensen.

Signed-off-by: Vlad Yasevich <[EMAIL PROTECTED]>
Signed-off-by: Sridhar Samudrala <[EMAIL PROTECTED]>

---
 include/net/sctp/structs.h |    1 +
 include/net/sctp/user.h    |    4 ++
 net/sctp/socket.c          |   75 +++++++++++++++++++++++++++++++++++++++++++-
 net/sctp/ulpqueue.c        |   30 ++++++++++++------
 4 files changed, 97 insertions(+), 13 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c9075d0..7497cec 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -306,6 +306,7 @@ struct sctp_sock {
        __u8 disable_fragments;
        __u8 pd_mode;
        __u8 v4mapped;
+       __u8 frag_interleave;
        __u32 adaptation_ind;
 
        /* Receive to here while partial delivery is in effect. */
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 4116b0d..8c1d68c 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -97,6 +97,8 @@ #define SCTP_GET_PEER_ADDR_INFO SCTP_GET
 #define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME
        SCTP_CONTEXT,   /* Receive Context */
 #define SCTP_CONTEXT SCTP_CONTEXT
+       SCTP_FRAGMENT_INTERLEAVE,
+#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
 
        /* Internal Socket Options. Some of the sctp library functions are 
         * implemented using these socket options.
@@ -530,7 +532,7 @@ struct sctp_paddrparams {
        __u32                   spp_flags;
 } __attribute__((packed, aligned(4)));
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ec70201..91bef1e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2249,7 +2249,7 @@ static int sctp_setsockopt_peer_addr_par
        return 0;
 }
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
@@ -2786,6 +2786,46 @@ static int sctp_setsockopt_context(struc
        return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave.  Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations.  Some implementations
+ * may allow you to turn this value on or off.  If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket).  When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value.  A non-zero value indicates that
+ * fragmented interleave is on.  A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default.  Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+                                              char __user *optval,
+                                              int optlen)
+{
+       int val;
+
+       if (optlen != sizeof(int))
+               return -EINVAL;
+       if (get_user(val, (int __user *)optval))
+               return -EFAULT;
+
+       sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+       return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2900,7 +2940,9 @@ SCTP_STATIC int sctp_setsockopt(struct s
        case SCTP_CONTEXT:
                retval = sctp_setsockopt_context(sk, optval, optlen);
                break;
-
+       case SCTP_FRAGMENT_INTERLEAVE:
+               retval = sctp_setsockopt_fragment_interleave(sk, optval, 
optlen);
+               break;
        default:
                retval = -ENOPROTOOPT;
                break;
@@ -3130,6 +3172,7 @@ SCTP_STATIC int sctp_init_sock(struct so
        /* Control variables for partial data delivery. */
        sp->pd_mode           = 0;
        skb_queue_head_init(&sp->pd_lobby);
+       sp->frag_interleave = 0;
 
        /* Create a per socket endpoint structure.  Even if we
         * change the data structure relationships, this may still
@@ -4530,6 +4573,30 @@ static int sctp_getsockopt_maxseg(struct
        return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+                                              char __user *optval,
+                                              int __user *optlen)
+{
+       int val;
+
+       if (len < sizeof(int))
+               return -EINVAL;
+
+       len = sizeof(int);
+
+       val = sctp_sk(sk)->frag_interleave;
+       if (put_user(len, optlen))
+               return -EFAULT;
+       if (copy_to_user(optval, &val, len))
+               return -EFAULT;
+
+       return 0;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
                                char __user *optval, int __user *optlen)
 {
@@ -4642,6 +4709,10 @@ SCTP_STATIC int sctp_getsockopt(struct s
        case SCTP_CONTEXT:
                retval = sctp_getsockopt_context(sk, len, optval, optlen);
                break;
+       case SCTP_FRAGMENT_INTERLEAVE:
+               retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+                                                            optlen);
+               break;
        default:
                retval = -ENOPROTOOPT;
                break;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index e1d1442..d1e4ebd 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -184,20 +184,30 @@ int sctp_ulpq_tail_event(struct sctp_ulp
 
        /* If we are in partial delivery mode, post to the lobby until
         * partial delivery is cleared, unless, of course _this_ is
-        * the association the cause of the partial delivery.
+        * the association that caused partial delivery.
         */
-
        if (!sctp_sk(sk)->pd_mode) {
                queue = &sk->sk_receive_queue;
-       } else if (ulpq->pd_mode) {
-               if (event->msg_flags & MSG_NOTIFICATION)
-                       queue = &sctp_sk(sk)->pd_lobby;
-               else {
-                       clear_pd = event->msg_flags & MSG_EOR;
-                       queue = &sk->sk_receive_queue;
+       } else {
+               if (ulpq->pd_mode) {
+                       if (event->msg_flags & MSG_NOTIFICATION)
+                               queue = &sctp_sk(sk)->pd_lobby;
+                       else {
+                               clear_pd = event->msg_flags & MSG_EOR;
+                               queue = &sk->sk_receive_queue;
+                       }
+               } else {
+                       /*
+                        * If fragment interleave is enabled, we
+                        * can queue this to the recieve queue instead
+                        * of the lobby.
+                        */
+                       if (sctp_sk(sk)->frag_interleave)
+                               queue = &sk->sk_receive_queue;
+                       else
+                               queue = &sctp_sk(sk)->pd_lobby;
                }
-       } else
-               queue = &sctp_sk(sk)->pd_lobby;
+       }
 

        /* If we are harvesting multiple skbs they will be


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to