On Tue, Dec 16, 2014 at 11:43 AM, 严正 <[email protected]> wrote:
>
>> 在 2014年12月15日,21:00,Ilya Dryomov <[email protected]> 写道:
>>
>> On Thu, Nov 13, 2014 at 11:23 PM, Ilya Dryomov <[email protected]>
>> wrote:
>>> This patch wasn't posted on ceph-devel, posting..
>>
>> Hi Zheng,
>>
>> Ping - I had some comments (below), mostly trivial but a potential
>> cached_context leak is what worries me.
>>
>> Thanks,
>>
>> Ilya
>
>
> here is the updated patch, thanks.
>
> —
> From 6de279d244d56b78da70efe7968b815db0f3bb5f Mon Sep 17 00:00:00 2001
> From: "Yan, Zheng" <[email protected]>
> Date: Thu, 6 Nov 2014 15:09:41 +0800
> Subject: [PATCH] ceph: introduce global empty snap context
>
> Current snaphost code does not properly handle moving inode from one
> empty snap realm to another empty snap realm. After changing inode's
> snap realm, some dirty pages' snap context can be not equal to inode's
> i_head_snap. This can trigger BUG() in ceph_put_wrbuffer_cap_refs()
>
> The fix is introduce a global empty snap context for all empty snap
> realm. This avoids triggering the BUG() for filesystem with no snapshot.
>
> Fixes: http://tracker.ceph.com/issues/9928
> Signed-off-by: Yan, Zheng <[email protected]>
> ---
> fs/ceph/snap.c | 28 ++++++++++++++++++++++++++--
> fs/ceph/super.c | 10 ++++++++--
> fs/ceph/super.h | 2 ++
> 3 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index c1cc993..3035b8f 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
> return 0;
> }
>
> +
> +static struct ceph_snap_context *empty_snapc;
> +
> /*
> * build the snap context for a given realm.
> */
> @@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm
> *realm)
> return 0;
> }
>
> + if (num == 0 && realm->seq == empty_snapc->seq) {
> + ceph_get_snap_context(empty_snapc);
> + snapc = empty_snapc;
> + goto done;
> + }
> +
> /* alloc new snap context */
> err = -ENOMEM;
> if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
> @@ -364,7 +373,7 @@ static int build_snap_context(struct ceph_snap_realm
> *realm)
> dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n",
> realm->ino, realm, snapc, snapc->seq,
> (unsigned int) snapc->num_snaps);
> -
> +done:
> ceph_put_snap_context(realm->cached_context);
> realm->cached_context = snapc;
> return 0;
> @@ -465,6 +474,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
> cap_snap. lucky us. */
> dout("queue_cap_snap %p already pending\n", inode);
> kfree(capsnap);
> + } else if (ci->i_snap_realm->cached_context == empty_snapc) {
> + dout("queue_cap_snap %p empty snapc\n", inode);
> + kfree(capsnap);
> } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
> CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
> struct ceph_snap_context *snapc = ci->i_head_snapc;
> @@ -925,5 +937,17 @@ out:
> return;
> }
>
> +int __init ceph_snap_init(void)
> +{
> + empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
> + if (!empty_snapc)
> + return -ENOMEM;
> + empty_snapc->seq = 1;
> + empty_snapc->num_snaps = 0;
> + return 0;
> +}
>
> -
> +void ceph_snap_exit(void)
> +{
> + ceph_put_snap_context(empty_snapc);
> +}
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index f6e1237..3b5c1e3 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -1028,15 +1028,20 @@ static int __init init_ceph(void)
>
> ceph_flock_init();
> ceph_xattr_init();
> + ret = ceph_snap_init();
> + if (ret)
> + goto out_xattr;
> ret = register_filesystem(&ceph_fs_type);
> if (ret)
> - goto out_icache;
> + goto out_snap;
>
> pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
>
> return 0;
>
> -out_icache:
> +out_snap:
> + ceph_snap_exit();
> +out_xattr:
> ceph_xattr_exit();
> destroy_caches();
> out:
> @@ -1047,6 +1052,7 @@ static void __exit exit_ceph(void)
> {
> dout("exit_ceph\n");
> unregister_filesystem(&ceph_fs_type);
> + ceph_snap_exit();
> ceph_xattr_exit();
> destroy_caches();
> }
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index aca2287..9f63f18 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -699,6 +699,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info
> *ci);
> extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
> struct ceph_cap_snap *capsnap);
> extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
> +extern int __init ceph_snap_init(void);
> +extern void ceph_snap_exit(void);
>
> /*
> * a cap_snap is "pending" if it is still awaiting an in-progress
> --
> 1.9.3
>
>
>
>>
>>
>>>
>>> From d57f921fd3c7cbe768f13bc58e547d83274f47a8 Mon Sep 17 00:00:00 2001
>>> From: "Yan, Zheng" <[email protected]>
>>> Date: Thu, 6 Nov 2014 15:09:41 +0800
>>> Subject: [PATCH] ceph: introduce global empty snap context
>>>
>>> Current snaphost code does not properly handle moving inode from one
>>> empty snap realm to another empty snap realm. After changing inode's
>>> snap realm, some dirty pages' snap context can be not equal to inode's
>>> i_head_snap. This can trigger BUG() in ceph_put_wrbuffer_cap_refs()
>>>
>>> The fix is introduce a global empty snap context for all empty snap
>>> realm. This avoids triggering the BUG() for filesystem with no snapshot.
>>>
>>> Fixes: http://tracker.ceph.com/issues/9928
>>> Signed-off-by: Yan, Zheng <[email protected]>
>>> ---
>>> fs/ceph/snap.c | 26 +++++++++++++++++++++++++-
>>> fs/ceph/super.c | 6 ++++++
>>> fs/ceph/super.h | 2 ++
>>> 3 files changed, 33 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>>> index c1cc993225e3..24b454a0e64c 100644
>>> --- a/fs/ceph/snap.c
>>> +++ b/fs/ceph/snap.c
>>> @@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
>>> return 0;
>>> }
>>>
>>> +
>>> +static struct ceph_snap_context *empty_snapc;
>>> +
>>> /*
>>> * build the snap context for a given realm.
>>> */
>>> @@ -328,6 +331,12 @@ static int build_snap_context(struct
>>> ceph_snap_realm *realm)
>>> return 0;
>>> }
>>>
>>> + if (num == 0 && realm->seq == empty_snapc->seq) {
>>> + ceph_get_snap_context(empty_snapc);
>>> + realm->cached_context = empty_snapc;
>>> + return 0;
>>>
>>> Can there be a cached_context here that we would need to let go of?
>>> IOW, shouldn't this be
>>>
>>> + if (num == 0 && realm->seq == empty_snapc->seq) {
>>> + ceph_get_snap_context(empty_snapc);
>>> + goto out;
>>> + }
>>>
>>> ...
>>>
>>> realm->ino, realm, snapc, snapc->seq,
>>> (unsigned int) snapc->num_snaps);
>>>
>>> +out:
>>> ceph_put_snap_context(realm->cached_context);
>>> realm->cached_context = snapc;
>>> return 0;
>>>
>>> ?
>>>
>>> + }
>>> +
>>> /* alloc new snap context */
>>> err = -ENOMEM;
>>> if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
>>> @@ -465,6 +474,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>> cap_snap. lucky us. */
>>> dout("queue_cap_snap %p already pending\n", inode);
>>> kfree(capsnap);
>>> + } else if (ci->i_snap_realm->cached_context == empty_snapc) {
>>> + dout("queue_cap_snap %p empty snapc\n", inode);
>>> + kfree(capsnap);
>>> } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
>>> CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
>>> struct ceph_snap_context *snapc = ci->i_head_snapc;
>>> @@ -925,5 +937,17 @@ out:
>>> return;
>>> }
>>>
>>> +int ceph_snap_init(void)
>>>
>>> I see you put __init on the declaration. While that seems to work
>>> I think it's preferred to have it on the definition:
>>>
>>> int __init ceph_snap_init(void)
>>>
>>> +{
>>> + empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
>>> + if (!empty_snapc)
>>> + return -ENOMEM;
>>> + empty_snapc->seq = 1;
>>> + empty_snapc->num_snaps = 0;
>>>
>>> Setting num_snaps seems unnecessary, ceph_create_snap_context() does
>>> that for you.
>>>
>>> + return 0;
>>> +}
>>>
>>> -
>>> +void ceph_snap_exit(void)
>>> +{
>>> + ceph_put_snap_context(empty_snapc);
>>> +}
>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>> index f6e12377335c..2b5481f1b9c7 100644
>>> --- a/fs/ceph/super.c
>>> +++ b/fs/ceph/super.c
>>> @@ -1028,6 +1028,9 @@ static int __init init_ceph(void)
>>>
>>> ceph_flock_init();
>>> ceph_xattr_init();
>>> + ret = ceph_snap_init();
>>> + if (ret)
>>> + goto out_xattr;
>>> ret = register_filesystem(&ceph_fs_type);
>>> if (ret)
>>> goto out_icache;
>>> @@ -1037,6 +1040,8 @@ static int __init init_ceph(void)
>>> return 0;
>>>
>>> out_icache:
>>>
>>> You could rename the nonsensical out_icache to out_snap while you are
>>> at it.
>>>
>>> + ceph_snap_exit();
>>> +out_xattr:
>>> ceph_xattr_exit();
>>> destroy_caches();
>>> out:
>>> @@ -1047,6 +1052,7 @@ static void __exit exit_ceph(void)
>>> {
>>> dout("exit_ceph\n");
>>> unregister_filesystem(&ceph_fs_type);
>>> + ceph_snap_exit();
>>> ceph_xattr_exit();
>>> destroy_caches();
>>> }
>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>> index aca22879b41f..9f63f18bb783 100644
>>> --- a/fs/ceph/super.h
>>> +++ b/fs/ceph/super.h
>>> @@ -699,6 +699,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info
>>> *ci);
>>> extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
>>> struct ceph_cap_snap *capsnap);
>>> extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
>>> +extern int __init ceph_snap_init(void);
>>>
>>> extern int ceph_snap_init(void); will do.
>>>
>>> +extern void ceph_snap_exit(void);
>>>
>>> /*
>>> * a cap_snap is "pending" if it is still awaiting an in-progress
>>> --
>>> 2.1.1
I fixed it up in testing manually. Here is the diff:
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 28571cfa7f40..ce35fbd4ba5d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -333,8 +333,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
if (num == 0 && realm->seq == empty_snapc->seq) {
ceph_get_snap_context(empty_snapc);
- realm->cached_context = empty_snapc;
- return 0;
+ snapc = empty_snapc;
+ goto done;
}
/* alloc new snap context */
@@ -374,6 +374,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
+done:
ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
@@ -939,13 +940,12 @@ out:
return;
}
-int ceph_snap_init(void)
+int __init ceph_snap_init(void)
{
empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
if (!empty_snapc)
return -ENOMEM;
empty_snapc->seq = 1;
- empty_snapc->num_snaps = 0;
return 0;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8eef47c8909a..50f06cddc94b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1031,13 +1031,13 @@ static int __init init_ceph(void)
goto out_xattr;
ret = register_filesystem(&ceph_fs_type);
if (ret)
- goto out_icache;
+ goto out_snap;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
-out_icache:
+out_snap:
ceph_snap_exit();
out_xattr:
ceph_xattr_exit();
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 33885ad03203..e1aa32d0759d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -701,7 +701,7 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
-extern int __init ceph_snap_init(void);
+extern int ceph_snap_init(void);
extern void ceph_snap_exit(void);
/*
Please make sure I got it right. With the above
Reviewed-by: Ilya Dryomov <[email protected]>
Thanks,
Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html