From: Willem de Bruijn <will...@google.com>

Bound the number of pages that a userspace process may pin.

Account pinned pages to the locked page count (`ulimit -l`) of the
caller and fail beyond the administrator controlled threshold, similar
to infiniband.

Use an atomic variable to avoid having to take mmap_sem. Taking the
lock is expensive and requires scheduling a worker on destruction,
as taking the lock may sleep, but ubuf_info are often destroyed in
atomic context.

The current mm_struct.pinned_vm_ is a hack. A non-RFC patchset would
convert unsigned long pinned_vm_ and all its callers (infiniband) to
atomic_long_t.

Signed-off-by: Willem de Bruijn <will...@google.com>
---
 include/linux/mm_types.h |  1 +
 include/linux/skbuff.h   |  5 +++++
 net/core/skbuff.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0038ac7..dc6e12a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -402,6 +402,7 @@ struct mm_struct {
        unsigned long total_vm;         /* Total pages mapped */
        unsigned long locked_vm;        /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;        /* Refcount permanently increased */
+       atomic_t pinned_vm_;
        unsigned long shared_vm;        /* Shared pages (files) */
        unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
        unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c1ea855..95a9f75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@ struct ubuf_info {
                };
        };
        atomic_t refcnt;
+
+       struct mmpin {
+               struct mm_struct *mm;
+               int num_pg;
+       } mmp;
 };
 
 #define skb_uarg(SKB)  ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4ae60ee..3742968 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -840,6 +840,42 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct 
sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+       unsigned long max_pg, num_pg, new_pg, old_pg;
+       struct mm_struct *mm;
+
+       if (capable(CAP_IPC_LOCK) || !size)
+               return 0;
+
+       num_pg = (size >> PAGE_SHIFT) + 2;      /* worst case */
+       max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       mm = mmp->mm ? : current->mm;
+
+       do {
+               old_pg = atomic_read(&mm->pinned_vm_);
+               new_pg = old_pg + num_pg;
+               if (new_pg > max_pg)
+                       return -ENOMEM;
+       } while (atomic_cmpxchg(&mm->pinned_vm_, old_pg, new_pg) != old_pg);
+
+       if (!mmp->mm) {
+               mmp->mm = mm;
+               atomic_inc(&mm->mm_count);
+       }
+
+       mmp->num_pg += num_pg;
+       return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+       if (mmp->mm) {
+               atomic_sub(mmp->num_pg, &mmp->mm->pinned_vm_);
+               mmdrop(mmp->mm);
+       }
+}
+
 /* must only be called from process context */
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
@@ -852,6 +888,12 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, 
size_t size)
 
        BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
        uarg = (void *)skb->cb;
+       uarg->mmp.mm = NULL;
+
+       if (mm_account_pinned_pages(&uarg->mmp, size)) {
+               kfree_skb(skb);
+               return NULL;
+       }
 
        uarg->callback = sock_zerocopy_callback;
        uarg->id = ((u16)atomic_inc_return(&sk->sk_zckey)) - 1;
@@ -880,6 +922,8 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, 
size_t size,
 
                next = atomic_read(&sk->sk_zckey);
                if ((u16)(uarg->id + uarg->len) == next) {
+                       if (mm_account_pinned_pages(&uarg->mmp, size))
+                               return NULL;
                        uarg->len++;
                        atomic_set(&sk->sk_zckey, ++next);
                        return uarg;
@@ -946,6 +990,8 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
 void sock_zerocopy_put(struct ubuf_info *uarg)
 {
        if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+               mm_unaccount_pinned_pages(&uarg->mmp);
+
                /* if !len, there was only 1 call, and it was aborted */
                if (uarg->callback && uarg->len)
                        uarg->callback(uarg, true);
-- 
2.5.0.276.gf5e568e

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to