[PATCH Coroutines] Add error messages for missing methods of awaitable class
Hi This patch adds lookup_awaitable_member, it outputs error messages when any of the await_ready/suspend/resume functions are missing in awaitable class. This patch also add some error check on return value of build_co_await since we may failed to build co_await_expr. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. From 3979b29dcdb1000bbc819f69c1dc3ce7616f87cd Mon Sep 17 00:00:00 2001 From: "liangbin.mj" Date: Thu, 21 Nov 2019 08:51:22 +0800 Subject: [PATCH] to #23584419 Add some error messages when can not find method of awaitable class. --- gcc/cp/coroutines.cc | 49 --- .../coroutines/coro1-missing-await-method.C | 21 .../coroutines/coro1-ret-int-yield-int.h | 9 3 files changed, 61 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/coro1-missing-await-method.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index d3aacd7ad3b..49e509f4384 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -505,6 +505,23 @@ lookup_promise_method (tree fndecl, tree member_id, location_t loc, return pm_memb; } +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb += lookup_member (await_type, member_id, +/*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); + if (aw_memb == NULL_TREE || aw_memb == error_mark_node) +{ + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; +} + return aw_memb; +} + /* Here we check the constraints that are common to all keywords (since the presence of a coroutine keyword makes the function into a coroutine). */ @@ -643,25 +660,18 @@ build_co_await (location_t loc, tree a, suspend_point_kind suspend_kind) /* Check for required awaitable members and their types. */ tree awrd_meth -= lookup_member (o_type, coro_await_ready_identifier, -/* protect */ 1, /*want_type=*/0, tf_warning_or_error); - += lookup_awaitable_member (o_type, coro_await_ready_identifier, loc); if (!awrd_meth || awrd_meth == error_mark_node) return error_mark_node; - tree awsp_meth -= lookup_member (o_type, coro_await_suspend_identifier, -/* protect */ 1, /*want_type=*/0, tf_warning_or_error); - += lookup_awaitable_member (o_type, coro_await_suspend_identifier, loc); if (!awsp_meth || awsp_meth == error_mark_node) return error_mark_node; /* The type of the co_await is the return type of the awaitable's - co_resume(), so we need to look that up. */ + await_resume(), so we need to look that up. */ tree awrs_meth -= lookup_member (o_type, coro_await_resume_identifier, -/* protect */ 1, /*want_type=*/0, tf_warning_or_error); - += lookup_awaitable_member (o_type, coro_await_resume_identifier, loc); if (!awrs_meth || awrs_meth == error_mark_node) return error_mark_node; @@ -800,9 +810,11 @@ finish_co_await_expr (location_t kw, tree expr) /* Now we want to build co_await a. */ tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); - + if (op != error_mark_node) +{ + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); +} return op; } @@ -864,10 +876,11 @@ finish_co_yield_expr (location_t kw, tree expr) promise transform_await(). */ tree op = build_co_await (kw, yield_call, CO_YIELD_SUSPEND_POINT); - - op = build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (op), expr, op); - TREE_SIDE_EFFECTS (op) = 1; - + if (op != error_mark_node) +{ + op = build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (op), expr, op); + TREE_SIDE_EFFECTS (op) = 1; +} return op; } diff --git a/gcc/testsuite/g++.dg/coroutines/coro1-missing-await-method.C b/gcc/testsuite/g++.dg/coroutines/coro1-missing-await-method.C new file mode 100644 index 000..c1869e0654c --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/coro1-missing-await-method.C @@ -0,0 +1,21 @@ +// { dg-additional-options "-fsyntax-only -w" } +#include "coro.h" + +#define MISSING_AWAIT_READY +#define MISSING_AWAIT_SUSPEND +#define MISSING_AWAIT_RESUME +#include "coro1-ret-int-yield-int.h" + +coro1 +bar0 () // { dg-error {no member named 'await_suspend' in 'coro1::suspend_a
[PATCH Coroutines] Fix issue with unused corutine function parameters
Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. --- gcc/cp/coroutines.cc | 63 +++-- .../coroutines/torture/func-params-07.C | 68 +++ 2 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/func-params-07.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 49e509f4384..74e0f6d1785 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1877,6 +1877,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, actor_frame, fld_ref, NULL_TREE); int i; tree *puse; + if (parm.body_uses == NULL) + continue; FOR_EACH_VEC_ELT (*parm.body_uses, i, puse) { *puse = fld_idx; @@ -2717,7 +2719,6 @@ struct param_frame_data tree *field_list; hash_map *param_uses; location_t loc; - bool param_seen; }; static tree @@ -2731,30 +2732,10 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) bool existed; param_info &parm = data->param_uses->get_or_insert (*stmt, &existed); gcc_checking_assert (existed); - - if (parm.field_id == NULL_TREE) -{ - tree actual_type = TREE_TYPE (*stmt); - - if (!COMPLETE_TYPE_P (actual_type)) - actual_type = complete_type_or_else (actual_type, *stmt); - - if (TREE_CODE (actual_type) == REFERENCE_TYPE) - actual_type = build_pointer_type (TREE_TYPE (actual_type)); - - parm.frame_type = actual_type; - tree pname = DECL_NAME (*stmt); - size_t namsize = sizeof ("__parm.") + IDENTIFIER_LENGTH (pname) + 1; - char *buf = (char *) alloca (namsize); - snprintf (buf, namsize, "__parm.%s", IDENTIFIER_POINTER (pname)); - parm.field_id - = coro_make_frame_entry (data->field_list, buf, actual_type, data->loc); - vec_alloc (parm.body_uses, 4); - parm.body_uses->quick_push (stmt); - data->param_seen = true; -} - else -parm.body_uses->safe_push (stmt); + gcc_checking_assert (parm.field_id != NULL_TREE); + if (parm.body_uses == NULL) +vec_alloc (parm.body_uses, 4); + parm.body_uses->safe_push (stmt); return NULL_TREE; } @@ -3060,6 +3041,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) The second two entries start out empty - and only get populated when we see uses. */ param_uses = new hash_map; + struct param_frame_data param_data + = {&field_list, param_uses, fn_start}; for (tree arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg)) @@ -3067,24 +3050,28 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) bool existed; param_info &parm = param_uses->get_or_insert (arg, &existed); gcc_checking_assert (!existed); - parm.field_id = NULL_TREE; parm.body_uses = NULL; + tree actual_type = TREE_TYPE (arg); + + if (!COMPLETE_TYPE_P (actual_type)) + actual_type = complete_type_or_else (actual_type, arg); + + if (TREE_CODE (actual_type) == REFERENCE_TYPE) + actual_type = build_pointer_type (TREE_TYPE (actual_type)); + + parm.frame_type = actual_type; + tree pname = DECL_NAME (arg); + size_t namsize = sizeof ("__parm.") + IDENTIFIER_LENGTH (pname) + 1; + char *buf = (char *) alloca (namsize); + snprintf (buf, namsize, "__parm.%s", IDENTIFIER_POINTER (pname)); + parm.field_id + = coro_make_frame_entry (param_data.field_list, buf, +actual_type, param_data.loc); } - param_frame_data param_data - = {&field_list, param_uses, fn_start, false}; /* We want to record every instance of param's use, so don't include a 'visited' hash_set. */ cp_walk_tree (&fnbody, register_param_uses, ¶m_data, NULL); - - /* If no uses for any param were seen, act as if there were no -params (it could be that they are only used to construct the -promise). */ - if (!param_data.p
Re: [PATCH Coroutines] Add error messages for missing methods of awaitable class
在 2020/1/20 下午4:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. <0001-Add-some-error-messages-when-missing.patch> this LGTM, but you will have to wait for a C++ maintainer to approve. thanks Iain Thanks Iain, +Jason and Nathan could you have a look? Regards JunMa
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch Regards JunMa Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. <0001-Fix-issue-when-copy-function-parameters-into-corouti.patch>
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/20 下午7:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch I think that, even if the DTOR is non-trivial, the copy only needs to be in the stack frame of the ramp, not in the coroutine frame. I do think this. It's just need more work on front end. This is also what clang does for -O>0 (although it makes a frame copy at O0). Will clarify this (perhaps the wording could be slightly more specfic). No, clang build frame in middle end. In FE, it just generate local variable to do the copy, and let the middle end to do the optimization which can see more opportunity. Regards JunMa thanks Iain Regards JunMa Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. <0001-Fix-issue-when-copy-function-parameters-into-corouti.patch>
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/20 下午8:21, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午7:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch I think that, even if the DTOR is non-trivial, the copy only needs to be in the stack frame of the ramp, not in the coroutine frame. I do think this. It's just need more work on front end. I think we already did the work, and know the answer (about param uses in the body), right? Yes, we may need extra work on copy parameters, I'll do this. This is also what clang does for -O>0 (although it makes a frame copy at O0). Will clarify this (perhaps the wording could be slightly more specfic). No, clang build frame in middle end. In FE, it just generate local variable to do the copy, and let the middle end to do the optimization which can see more opportunity. Yes, i understand that clang does this in the ME not in the FE but, so long as the principle is correct, it is equivalent for GCC to do this in the FE. Yes, just less situation we can handle, that's why we still need coroutine frame reduction pass. Regards JunMa thanks Iain Regards JunMa thanks Iain Regards JunMa Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. <0001-Fix-issue-when-copy-function-parameters-into-corouti.patch>
Re: [PATCH Coroutines] Add error messages for missing methods of awaitable class
在 2020/1/20 下午11:49, Nathan Sidwell 写道: On 1/20/20 12:18 AM, JunMa wrote: Hi This patch adds lookup_awaitable_member, it outputs error messages when any of the await_ready/suspend/resume functions are missing in awaitable class. This patch also add some error check on return value of build_co_await since we may failed to build co_await_expr. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. 1) you're mixing two distinct changes, the one about the error message and the one about changing error_mark_node. tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); - + if (op != error_mark_node) + { + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); + } return op; } these two fragments are ok, as a separate patch. ok, I'll split it. +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb + = lookup_member (await_type, member_id, + /*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); fwiw '/*foo=*/expr' is the canonical form -- i know Iain wasn't consistent, and it looks like I may have missed a few places in review. ok. + if (aw_memb == NULL_TREE || aw_memb == error_mark_node) + { + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; + } + return aw_memb; +} This looks wrong. lookup_member is being passed tf_warning_or_error, so it should already be emitting a diagnostic, for the cases where something is found, but is ambiguous or whatever. So, I think you only want a diagnostic here when you get NULL_TREE back. you are right, I follow with same code of lookup_promise_member, I'll update both. Regards JunMa nathan
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/21 上午12:39, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午8:21, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午7:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch I think that, even if the DTOR is non-trivial, the copy only needs to be in the stack frame of the ramp, not in the coroutine frame. I do think this. It's just need more work on front end. I think we already did the work, and know the answer (about param uses in the body), right? Yes, we may need extra work on copy parameters, I'll do this. Having discussed this with Nathan (and I’ve also mailed Gor for clarification). I think it might be a good idea to wait for those responses before revising (it could be that your original reading of the wording is correct, and the clang impl. needs to be updated). ok, thanks. Regards JunMa thanks Iain This is also what clang does for -O>0 (although it makes a frame copy at O0). Will clarify this (perhaps the wording could be slightly more specfic). No, clang build frame in middle end. In FE, it just generate local variable to do the copy, and let the middle end to do the optimization which can see more opportunity. Yes, i understand that clang does this in the ME not in the FE but, so long as the principle is correct, it is equivalent for GCC to do this in the FE. Yes, just less situation we can handle, that's why we still need coroutine frame reduction pass. Regards JunMa thanks Iain Regards JunMa thanks Iain Regards JunMa Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. <0001-Fix-issue-when-copy-function-parameters-into-corouti.patch>
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/21 上午9:34, JunMa 写道: 在 2020/1/21 上午12:39, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午8:21, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午7:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch I think that, even if the DTOR is non-trivial, the copy only needs to be in the stack frame of the ramp, not in the coroutine frame. I do think this. It's just need more work on front end. I think we already did the work, and know the answer (about param uses in the body), right? Yes, we may need extra work on copy parameters, I'll do this. Having discussed this with Nathan (and I’ve also mailed Gor for clarification). I think it might be a good idea to wait for those responses before revising (it could be that your original reading of the wording is correct, and the clang impl. needs to be updated). ok, thanks. The reason why i consider about non-trivial destructors is that if the destructors are called in ramp function, actor function may has different status on something. the testcase I attachted is such example: it changes global variable in destructor. Regards JunMa Regards JunMa thanks Iain This is also what clang does for -O>0 (although it makes a frame copy at O0). Will clarify this (perhaps the wording could be slightly more specfic). No, clang build frame in middle end. In FE, it just generate local variable to do the copy, and let the middle end to do the optimization which can see more opportunity. Yes, i understand that clang does this in the ME not in the FE but, so long as the principle is correct, it is equivalent for GCC to do this in the FE. Yes, just less situation we can handle, that's why we still need coroutine frame reduction pass. Regards JunMa thanks Iain Regards JunMa thanks Iain Regards JunMa Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (build_actor_fn): Skip rewrite when there is no param references. (register_param_uses): Only collect param references here. (morph_fn_to_coro): Create coroutine frame field for each function params. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/torture/func-params-07.C: New test. <0001-Fix-issue-when-copy-function-parameters-into-corouti.patch>
Re: [PATCH Coroutines 1/2] Add error messages for missing methods of awaitable class
在 2020/1/21 上午9:31, JunMa 写道: 在 2020/1/20 下午11:49, Nathan Sidwell 写道: On 1/20/20 12:18 AM, JunMa wrote: Hi This patch adds lookup_awaitable_member, it outputs error messages when any of the await_ready/suspend/resume functions are missing in awaitable class. This patch also add some error check on return value of build_co_await since we may failed to build co_await_expr. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. 1) you're mixing two distinct changes, the one about the error message and the one about changing error_mark_node. tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); - + if (op != error_mark_node) + { + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); + } return op; } these two fragments are ok, as a separate patch. ok, I'll split it. +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb + = lookup_member (await_type, member_id, + /*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); fwiw '/*foo=*/expr' is the canonical form -- i know Iain wasn't consistent, and it looks like I may have missed a few places in review. ok. + if (aw_memb == NULL_TREE || aw_memb == error_mark_node) + { + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; + } + return aw_memb; +} This looks wrong. lookup_member is being passed tf_warning_or_error, so it should already be emitting a diagnostic, for the cases where something is found, but is ambiguous or whatever. So, I think you only want a diagnostic here when you get NULL_TREE back. you are right, I follow with same code of lookup_promise_member, I'll update both. Regards JunMa nathan Hi nathan, attachment is the updated patch which add error messages for lookup awaitable member. Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (lookup_promise_method): Emit diagnostic when get NULL_TREE back only. (build_co_await): Use lookup_awaitable_member instead of lookup_member. gcc/testsuite 2020-01-21 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. --- gcc/cp/coroutines.cc | 36 --- .../coroutines/coro1-missing-await-method.C | 21 +++ .../coroutines/coro1-ret-int-yield-int.h | 9 + 3 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/coro1-missing-await-method.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index d3aacd7ad3b..43f03f7c49a 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -496,8 +496,8 @@ lookup_promise_method (tree fndecl, tree member_id, location_t loc, tree promise = get_coroutine_promise_type (fndecl); tree pm_memb = lookup_member (promise, member_id, -/*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); - if (musthave && (pm_memb == NULL_TREE || pm_memb == error_mark_node)) +/*protect=*/ 1, /*want_type=*/ 0, tf_warning_or_error); + if (musthave && pm_memb == NULL_TREE) { error_at (loc, "no member named %qE in %qT", member_id, promise); return error_mark_node; @@ -505,6 +505,23 @@ lookup_promise_method (tree fndecl, tree member_id, location_t loc, return pm_memb; } +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume. */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb += lookup_member (await_type, member_id, +/*protect=*/ 1, /*want_type=*/ 0, tf_warning_or_error); + if (aw_memb == NULL_TREE) +{ + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; +} + return aw_memb; +} + /* Here we check the constraints that are common to all keywords (since the presence of a coroutine keyword makes the function into a coroutine). */ @@ -643,25 +660,18 @@ build_co_await (location_t loc, tree a, suspend_point_kind suspend_kind) /* Check for required awaitable members and their types. */ tree awrd_meth -= lookup_member (o_type, coro_a
Re: [PATCH Coroutines 2/2] Add error check on return value of build_co_await
在 2020/1/21 上午9:31, JunMa 写道: 在 2020/1/20 下午11:49, Nathan Sidwell 写道: On 1/20/20 12:18 AM, JunMa wrote: Hi This patch adds lookup_awaitable_member, it outputs error messages when any of the await_ready/suspend/resume functions are missing in awaitable class. This patch also add some error check on return value of build_co_await since we may failed to build co_await_expr. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. 1) you're mixing two distinct changes, the one about the error message and the one about changing error_mark_node. tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); - + if (op != error_mark_node) + { + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); + } return op; } these two fragments are ok, as a separate patch. ok, I'll split it. +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb + = lookup_member (await_type, member_id, + /*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); fwiw '/*foo=*/expr' is the canonical form -- i know Iain wasn't consistent, and it looks like I may have missed a few places in review. ok. + if (aw_memb == NULL_TREE || aw_memb == error_mark_node) + { + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; + } + return aw_memb; +} This looks wrong. lookup_member is being passed tf_warning_or_error, so it should already be emitting a diagnostic, for the cases where something is found, but is ambiguous or whatever. So, I think you only want a diagnostic here when you get NULL_TREE back. you are right, I follow with same code of lookup_promise_member, I'll update both. Regards JunMa nathan Hi nathan, attachment is the updated patch which check error_mark_node of build_co_coawait. Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (finish_co_await_expr): Add error check on return value of build_co_await. (finish_co_yield_expr,): Ditto. --- gcc/cp/coroutines.cc | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 43f03f7c49a..bd3656562ec 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -810,8 +810,11 @@ finish_co_await_expr (location_t kw, tree expr) /* Now we want to build co_await a. */ tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); + if (op != error_mark_node) +{ + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); +} return op; } @@ -874,9 +877,11 @@ finish_co_yield_expr (location_t kw, tree expr) promise transform_await(). */ tree op = build_co_await (kw, yield_call, CO_YIELD_SUSPEND_POINT); - - op = build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (op), expr, op); - TREE_SIDE_EFFECTS (op) = 1; + if (op != error_mark_node) +{ + op = build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (op), expr, op); + TREE_SIDE_EFFECTS (op) = 1; +} return op; } -- 2.19.1.3.ge56e4f7
[PATCH Coroutines] Change context of label_decl in original function
Hi This patch does minor fix on changing context of label_decl from original function to actor function which avoid assertion in gimplify pass. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (transform_await_wrapper): Set actor funcion as new context of label_decl. (build_actor_fn): Fill new field of await_xform_data. gcc/testsuite 2020-01-21 Jun Ma * g++.dg/coroutines/co-await-04-control-flow.C: Add label. --- gcc/cp/coroutines.cc | 11 --- .../coroutines/torture/co-await-04-control-flow.C | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index bd3656562ec..2b6a154d28f 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1562,6 +1562,7 @@ static hash_map *suspend_points; struct await_xform_data { + tree actor_fn; /* Decl for context. */ tree actor_frame; tree promise_proxy; tree real_promise; @@ -1643,12 +1644,16 @@ transform_await_expr (tree await_expr, await_xform_data *xform) static tree transform_await_wrapper (tree *stmt, int *do_subtree, void *d) { + /* Set actor function as new DECL_CONTEXT of label_decl. */ + struct await_xform_data *xform = (struct await_xform_data *) d; + if (TREE_CODE (*stmt) == LABEL_DECL + && DECL_CONTEXT (*stmt) != xform->actor_fn) +DECL_CONTEXT (*stmt) = xform->actor_fn; + if (TREE_CODE (*stmt) != CO_AWAIT_EXPR && TREE_CODE (*stmt) != CO_YIELD_EXPR) return NULL_TREE; tree await_expr = *stmt; - await_xform_data *xform = (await_xform_data *) d; - *stmt = transform_await_expr (await_expr, xform); if (*stmt == error_mark_node) *do_subtree = 0; @@ -2005,7 +2010,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, decide where to put things. */ await_xform_data xform -= {actor_frame, promise_proxy, ap, self_h_proxy, ash}; += {actor, actor_frame, promise_proxy, ap, self_h_proxy, ash}; /* Get a reference to the initial suspend var in the frame. */ transform_await_expr (initial_await, &xform); diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-04-control-flow.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-04-control-flow.C index 9bc99e875d0..e8da2d2e2ad 100644 --- a/gcc/testsuite/g++.dg/coroutines/torture/co-await-04-control-flow.C +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-04-control-flow.C @@ -16,9 +16,11 @@ coro1 f () { if (gX < 12) { +L1: gX += y; gX += co_await 11; } else +L2: gX += co_await 12; co_return gX; -- 2.19.1.3.ge56e4f7
Re: [PATCH Coroutines] Fix issue with unused corutine function parameters
在 2020/1/21 下午4:07, Iain Sandoe 写道: JunMa wrote: 在 2020/1/21 上午9:34, JunMa 写道: 在 2020/1/21 上午12:39, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午8:21, Iain Sandoe 写道: JunMa wrote: 在 2020/1/20 下午7:55, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/1/20 下午6:07, Iain Sandoe 写道: Hi JunMa, JunMa wrote: Hi Accroding to N4835: When a coroutine is invoked, a copy is created for each coroutine parameter. While in current implementation, we only collect used parameters. This may lost behavior inside constructor and destructor of unused parameters. This patch change the implementation to collect all parameter. thanks for the patch. I am not convinced this is the right way to fix this, we do not want to increase the size of the coroutine frame unnecessarily. I would like to check the lifetime requirements; such copies might only need to be made local to the ramp function. Iain For type with trivial destructor, There is no need to copy parameter if it is never referenced after a reachable suspend point(return-to-caller/resume) in the coroutine. Since we are in front end, that should be inital_suspend point. so we can create field for type with non-trivial destructor first, and skip unused parameters in register_param_uses. I'll update the patch I think that, even if the DTOR is non-trivial, the copy only needs to be in the stack frame of the ramp, not in the coroutine frame. I do think this. It's just need more work on front end. I think we already did the work, and know the answer (about param uses in the body), right? Yes, we may need extra work on copy parameters, I'll do this. Having discussed this with Nathan (and I’ve also mailed Gor for clarification). I think it might be a good idea to wait for those responses before revising (it could be that your original reading of the wording is correct, and the clang impl. needs to be updated). ok, thanks. The reason why i consider about non-trivial destructors is that if the destructors are called in ramp function, actor function may has different status on something. the testcase I attachted is such example: it changes global variable in destructor. Yes we all agree on this :-) The issue is: 1. should the copy exist for the duration of the ramp only? (i.e. copied to the ramp() stack frame) 2. should the copy exist for the duration of the resume() function? (i.e. copied to the coro frame) At the present, clang appears to do 1. when optimisation is on and 2. without optimisation. Nathan pointed out that the lifetime of an object should not depend on the optimisation level. Although I prefer 2, it seems that maybe clang is hard to control this(not sure:), maybe new attribute). So - I think we need some clarification on the intent (from the designer) and maybe some revision of the standard wording to make the lifetime clear. Yeah, let's make this more clear. Thanks. Regards JunMa thanks Iain
[PATCH Coroutines] Handle type deduction of auto and decltype(auto) with indirect_ref expression
Hi When test gcc with cppcoro, I find case like: int& awaitable::await_resume() auto x1 = co_await awaitable; decltype(auto) x2 = co_await awaitable; Based on standard, typeof(x1) should be int, typeof(x2) is int&. However, we get both x1 and x2 as int, this because we donot consider indirect_ref which wrap await_resume call expression (convert_from_reference), and it is invoked into type deduction of auto and decltype(auto). This patch wrap co_await expression with indirect_ref which should be same with await_resume expression, and it sink await_resume expression to real call_expr when replace co_await expression. it fix type deduction of auto and decltype(auto) in coroutine. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (build_co_await): Wrap co_await with indirect_ref when needed. (co_await_expander): Sink to call_expr if await_resume is wrapped by indirect_ref. gcc/testsuite 2020-01-21 Jun Ma * g++.dg/coroutines/co-await-14-return-ref-to-auto.C: Add label. --- gcc/cp/coroutines.cc | 16 +-- .../torture/co-await-14-return-ref-to-auto.C | 45 +++ 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index d25c1887c1e..0a8943d8348 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -743,9 +743,16 @@ build_co_await (location_t loc, tree a, suspend_point_kind suspend_kind) TREE_VEC_ELT (awaiter_calls, 1) = awsp_call; /* await_suspend(). */ TREE_VEC_ELT (awaiter_calls, 2) = awrs_call; /* await_resume(). */ - return build5_loc (loc, CO_AWAIT_EXPR, TREE_TYPE (awrs_call), a, -e_proxy, o, awaiter_calls, -build_int_cst (integer_type_node, (int) suspend_kind)); + tree await_expr = build5_loc (loc, CO_AWAIT_EXPR, + TREE_TYPE (TREE_TYPE (awrs_func)), + a, e_proxy, o, awaiter_calls, + build_int_cst (integer_type_node, + (int) suspend_kind)); + /* Wrap co_await_expr. */ + if (TREE_CODE (awrs_call) == INDIRECT_REF) +await_expr = build1_loc (loc, INDIRECT_REF, TREE_TYPE (awrs_call), +await_expr); + return await_expr; } tree @@ -1503,6 +1510,9 @@ co_await_expander (tree *stmt, int * /*do_subtree*/, void *d) /* This will produce the value (if one is provided) from the co_await expression. */ tree resume_call = TREE_VEC_ELT (awaiter_calls, 2); /* await_resume(). */ + if (TREE_CODE (resume_call) == INDIRECT_REF) +/* Sink to await_resume call_expr. */ +resume_call = TREE_OPERAND (resume_call, 0); switch (stmt_code) { default: /* not likely to work .. but... */ diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C new file mode 100644 index 000..0a7c035ef2a --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C @@ -0,0 +1,45 @@ +// { dg-do run } + +/* The simplest valued co_await we can do. */ + +#include "../coro.h" + +// boiler-plate for tests of codegen +#include "../coro1-ret-int-yield-int.h" + + +coro1 +f () +{ + int t1 = 5; + int t2 = 5; + auto gX = co_await coro1::suspend_always_intrefprt{t1}; + if (gX != t1) + abort(); + decltype(auto) gX1 = co_await coro1::suspend_always_intrefprt{t2}; + if (&gX1 != &t2) + abort(); + co_return t1 + 10; +} + +int main () +{ + PRINT ("main: create coro1"); + struct coro1 f_coro = f (); + if (f_coro.handle.done()) +{ + PRINT ("main: we should not be 'done' [1]"); + abort (); +} + PRINT ("main: resuming [1] initial suspend"); + while (!f_coro.handle.done()) +f_coro.handle.resume(); + /* we should now have returned with the co_return (15) */ + if (!f_coro.handle.done()) +{ + PRINT ("main: we should be 'done' "); + abort (); +} + puts ("main: done"); + return 0; +} -- 2.19.1.3.ge56e4f7
Re: [PATCH Coroutines 1/2] Add error messages for missing methods of awaitable class
在 2020/1/21 下午8:06, Nathan Sidwell 写道: On 1/20/20 10:38 PM, JunMa wrote: 在 2020/1/21 上午9:31, JunMa 写道: 在 2020/1/20 下午11:49, Nathan Sidwell 写道: On 1/20/20 12:18 AM, JunMa wrote: Hi This patch adds lookup_awaitable_member, it outputs error messages when any of the await_ready/suspend/resume functions are missing in awaitable class. This patch also add some error check on return value of build_co_await since we may failed to build co_await_expr. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-01-20 Jun Ma * coroutines.cc (lookup_awaitable_member): Lookup an awaitable member. (build_co_await): Use lookup_awaitable_member instead of lookup_member. (finish_co_yield_expr, finish_co_await_expr): Add error check on return value of build_co_await. gcc/testsuite 2020-01-20 Jun Ma * g++.dg/coroutines/coro1-missing-await-method.C: New test. 1) you're mixing two distinct changes, the one about the error message and the one about changing error_mark_node. tree op = build_co_await (kw, a, CO_AWAIT_SUSPEND_POINT); - TREE_SIDE_EFFECTS (op) = 1; - SET_EXPR_LOCATION (op, kw); - + if (op != error_mark_node) + { + TREE_SIDE_EFFECTS (op) = 1; + SET_EXPR_LOCATION (op, kw); + } return op; } these two fragments are ok, as a separate patch. ok, I'll split it. +/* Lookup an Awaitable member, which should be await_ready, await_suspend + or await_resume */ + +static tree +lookup_awaitable_member (tree await_type, tree member_id, location_t loc) +{ + tree aw_memb + = lookup_member (await_type, member_id, + /*protect*/ 1, /*want_type*/ 0, tf_warning_or_error); fwiw '/*foo=*/expr' is the canonical form -- i know Iain wasn't consistent, and it looks like I may have missed a few places in review. ok. + if (aw_memb == NULL_TREE || aw_memb == error_mark_node) + { + error_at (loc, "no member named %qE in %qT", member_id, await_type); + return error_mark_node; + } + return aw_memb; +} This looks wrong. lookup_member is being passed tf_warning_or_error, so it should already be emitting a diagnostic, for the cases where something is found, but is ambiguous or whatever. So, I think you only want a diagnostic here when you get NULL_TREE back. you are right, I follow with same code of lookup_promise_member, I'll update both. Regards JunMa nathan Hi nathan, attachment is the updated patch which add error messages for lookup awaitable member. thanks this is ok. Could you remove the space in '/*protect=*/ 1' and similar? This appear to be one place where our coding style has fewer spaces than expected! ok, I'll update it. Regards JunMa nathan
Re: [PATCH Coroutines] Handle type deduction of auto and decltype(auto) with indirect_ref expression
在 2020/1/28 上午12:01, Nathan Sidwell 写道: On 1/21/20 6:21 AM, JunMa wrote: Hi When test gcc with cppcoro, I find case like: int& awaitable::await_resume() auto x1 = co_await awaitable; decltype(auto) x2 = co_await awaitable; Based on standard, typeof(x1) should be int, typeof(x2) is int&. However, we get both x1 and x2 as int, this because we donot consider indirect_ref which wrap await_resume call expression (convert_from_reference), and it is invoked into type deduction of auto and decltype(auto). This patch wrap co_await expression with indirect_ref which should be same with await_resume expression, and it sink await_resume expression to real call_expr when replace co_await expression. it fix type deduction of auto and decltype(auto) in coroutine. Bootstrap and test on X86_64, is it OK? + /* Wrap co_await_expr. */ + if (TREE_CODE (awrs_call) == INDIRECT_REF) + await_expr = build1_loc (loc, INDIRECT_REF, TREE_TYPE (awrs_call), + await_expr); I think all you want here is: await_expr = convert_from_reference (await_expr); Thanks, I'll update it. Regards JunMa Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (build_co_await): Wrap co_await with indirect_ref when needed. (co_await_expander): Sink to call_expr if await_resume is wrapped by indirect_ref. gcc/testsuite 2020-01-21 Jun Ma * g++.dg/coroutines/co-await-14-return-ref-to-auto.C: Add label.
Re: [PATCH Coroutines v1] Handle type deduction of auto and decltype(auto) with indirect_ref expression
在 2020/2/3 上午9:03, JunMa 写道: 在 2020/1/28 上午12:01, Nathan Sidwell 写道: On 1/21/20 6:21 AM, JunMa wrote: Hi When test gcc with cppcoro, I find case like: int& awaitable::await_resume() auto x1 = co_await awaitable; decltype(auto) x2 = co_await awaitable; Based on standard, typeof(x1) should be int, typeof(x2) is int&. However, we get both x1 and x2 as int, this because we donot consider indirect_ref which wrap await_resume call expression (convert_from_reference), and it is invoked into type deduction of auto and decltype(auto). This patch wrap co_await expression with indirect_ref which should be same with await_resume expression, and it sink await_resume expression to real call_expr when replace co_await expression. it fix type deduction of auto and decltype(auto) in coroutine. Bootstrap and test on X86_64, is it OK? + /* Wrap co_await_expr. */ + if (TREE_CODE (awrs_call) == INDIRECT_REF) + await_expr = build1_loc (loc, INDIRECT_REF, TREE_TYPE (awrs_call), + await_expr); I think all you want here is: await_expr = convert_from_reference (await_expr); Thanks, I'll update it. Regards JunMa Hi nathan, Here is the update. Regards JunMa gcc/cp 2020-02-03 Jun Ma * coroutines.cc (build_co_await): Call convert_from_reference to wrap co_await_expr with indirect_ref which avoid reference/non-reference type confusion. (co_await_expander): Sink to call_expr if await_resume is wrapped by indirect_ref. gcc/testsuite 2020-02-03 Jun Ma * g++.dg/coroutines/co-await-14-return-ref-to-auto.C: New test. Regards JunMa gcc/cp 2020-01-21 Jun Ma * coroutines.cc (build_co_await): Wrap co_await with indirect_ref when needed. (co_await_expander): Sink to call_expr if await_resume is wrapped by indirect_ref. gcc/testsuite 2020-01-21 Jun Ma * g++.dg/coroutines/co-await-14-return-ref-to-auto.C: Add label. --- gcc/cp/coroutines.cc | 12 +++-- .../torture/co-await-14-return-ref-to-auto.C | 45 +++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 62d92d9fc98..e7a150d257f 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -749,9 +749,12 @@ build_co_await (location_t loc, tree a, suspend_point_kind suspend_kind) TREE_VEC_ELT (awaiter_calls, 1) = awsp_call; /* await_suspend(). */ TREE_VEC_ELT (awaiter_calls, 2) = awrs_call; /* await_resume(). */ - return build5_loc (loc, CO_AWAIT_EXPR, TREE_TYPE (awrs_call), a, -e_proxy, o, awaiter_calls, -build_int_cst (integer_type_node, (int) suspend_kind)); + tree await_expr = build5_loc (loc, CO_AWAIT_EXPR, + TREE_TYPE (TREE_TYPE (awrs_func)), + a, e_proxy, o, awaiter_calls, + build_int_cst (integer_type_node, + (int) suspend_kind)); + return convert_from_reference (await_expr); } tree @@ -1512,6 +1515,9 @@ co_await_expander (tree *stmt, int * /*do_subtree*/, void *d) /* This will produce the value (if one is provided) from the co_await expression. */ tree resume_call = TREE_VEC_ELT (awaiter_calls, 2); /* await_resume(). */ + if (TREE_CODE (resume_call) == INDIRECT_REF) +/* Sink to await_resume call_expr. */ +resume_call = TREE_OPERAND (resume_call, 0); switch (stmt_code) { default: /* not likely to work .. but... */ diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C new file mode 100644 index 000..0a7c035ef2a --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C @@ -0,0 +1,45 @@ +// { dg-do run } + +/* The simplest valued co_await we can do. */ + +#include "../coro.h" + +// boiler-plate for tests of codegen +#include "../coro1-ret-int-yield-int.h" + + +coro1 +f () +{ + int t1 = 5; + int t2 = 5; + auto gX = co_await coro1::suspend_always_intrefprt{t1}; + if (gX != t1) + abort(); + decltype(auto) gX1 = co_await coro1::suspend_always_intrefprt{t2}; + if (&gX1 != &t2) + abort(); + co_return t1 + 10; +} + +int main () +{ + PRINT ("main: create coro1"); + struct coro1 f_coro = f (); + if (f_coro.handle.done()) +{ + PRINT ("main: we should not be 'done' [1]"); + abort (); +} + PRINT ("main: resuming [1] initial suspend"); + while (!f_coro.handle.done()) +f_coro.handle.resume(); + /* we should now have returned with the co_return (15) */ + if (!f_coro.handle.done()) +{ + PRINT ("mai
Re: [PATCH Coroutines v2] Handle type deduction of auto and decltype(auto) with indirect_ref expression
在 2020/2/3 下午8:53, Nathan Sidwell 写道: On 2/2/20 9:28 PM, JunMa wrote: 在 2020/2/3 上午9:03, JunMa 写道: I think all you want here is: await_expr = convert_from_reference (await_expr); Thanks, I'll update it. Regards JunMa Hi nathan, Here is the update. /* This will produce the value (if one is provided) from the co_await expression. */ tree resume_call = TREE_VEC_ELT (awaiter_calls, 2); /* await_resume(). */ + if (TREE_CODE (resume_call) == INDIRECT_REF) + /* Sink to await_resume call_expr. */ + resume_call = TREE_OPERAND (resume_call, 0); sorry, I should have realized you still needed this bit. This too is stripping a reference access, right? I.e. TYPE_REF_P (TREE_TYPE (resume_call)) is true. You should that as the condition too. you mean REFERENCE_REF_P (resume_call) ? here is the update. Regards JunMa the other part of the patch is fine nathan --- gcc/cp/coroutines.cc | 12 +++-- .../torture/co-await-14-return-ref-to-auto.C | 45 +++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 8a0ce384425..0c2ec32d7db 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -800,9 +800,12 @@ build_co_await (location_t loc, tree a, suspend_point_kind suspend_kind) TREE_VEC_ELT (awaiter_calls, 1) = awsp_call; /* await_suspend(). */ TREE_VEC_ELT (awaiter_calls, 2) = awrs_call; /* await_resume(). */ - return build5_loc (loc, CO_AWAIT_EXPR, TREE_TYPE (awrs_call), a, -e_proxy, o, awaiter_calls, -build_int_cst (integer_type_node, (int) suspend_kind)); + tree await_expr = build5_loc (loc, CO_AWAIT_EXPR, + TREE_TYPE (TREE_TYPE (awrs_func)), + a, e_proxy, o, awaiter_calls, + build_int_cst (integer_type_node, + (int) suspend_kind)); + return convert_from_reference (await_expr); } tree @@ -1563,6 +1566,9 @@ co_await_expander (tree *stmt, int * /*do_subtree*/, void *d) /* This will produce the value (if one is provided) from the co_await expression. */ tree resume_call = TREE_VEC_ELT (awaiter_calls, 2); /* await_resume(). */ + if (REFERENCE_REF_P (resume_call)) +/* Sink to await_resume call_expr. */ +resume_call = TREE_OPERAND (resume_call, 0); switch (stmt_code) { default: /* not likely to work .. but... */ diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C new file mode 100644 index 000..0a7c035ef2a --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-return-ref-to-auto.C @@ -0,0 +1,45 @@ +// { dg-do run } + +/* The simplest valued co_await we can do. */ + +#include "../coro.h" + +// boiler-plate for tests of codegen +#include "../coro1-ret-int-yield-int.h" + + +coro1 +f () +{ + int t1 = 5; + int t2 = 5; + auto gX = co_await coro1::suspend_always_intrefprt{t1}; + if (gX != t1) + abort(); + decltype(auto) gX1 = co_await coro1::suspend_always_intrefprt{t2}; + if (&gX1 != &t2) + abort(); + co_return t1 + 10; +} + +int main () +{ + PRINT ("main: create coro1"); + struct coro1 f_coro = f (); + if (f_coro.handle.done()) +{ + PRINT ("main: we should not be 'done' [1]"); + abort (); +} + PRINT ("main: resuming [1] initial suspend"); + while (!f_coro.handle.done()) +f_coro.handle.resume(); + /* we should now have returned with the co_return (15) */ + if (!f_coro.handle.done()) +{ + PRINT ("main: we should be 'done' "); + abort (); +} + puts ("main: done"); + return 0; +} -- 2.19.1.3.ge56e4f7
[PATCH coroutines] Change lowering behavior of alias variable from copy to substitute
Hi When testing coroutines with lambda function, I find we copy each captured variable to frame. However, according to gimplify pass, for each declaration that is an alias for another expression(DECL_VALUE_EXPR), we can substitute them directly. Since lambda captured variables is one of this kind. It is better to replace them rather than copy them, This can reduce frame size (all of the captured variables are field of closure class) and avoid extra copy behavior as well. This patch remove all of the code related to copy captured variable. Instead, we first rewrite DECL_VALUE_EXPR with frame field, then we check every variable whether it has DECL_VALUE_EXPR, and then substitute it, this patch does not create frame field for such variables. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-04 Jun Ma * coroutines.cc (morph_fn_to_coro): Remove code related to copy captured variable. (register_local_var_uses): Ditto. (register_param_uses): Collect use of parameters inside DECL_VALUE_EXPR. (transform_local_var_uses): Substitute the alias variable with DECL_VALUE_EXPR if it has one. gcc/testsuite 2020-02-04 Jun Ma * g++.dg/coroutines/lambda-07-multi-capture.C: New test. --- gcc/cp/coroutines.cc | 117 +- .../torture/lambda-07-multi-capture.C | 55 2 files changed, 85 insertions(+), 87 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-07-multi-capture.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 0c2ec32d7db..1bc2ed7f89c 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1790,6 +1790,11 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) cp_walk_tree (&DECL_SIZE (lvar), transform_local_var_uses, d, NULL); cp_walk_tree (&DECL_SIZE_UNIT (lvar), transform_local_var_uses, d, NULL); + if (DECL_HAS_VALUE_EXPR_P (lvar)) + { + tree t = DECL_VALUE_EXPR (lvar); + cp_walk_tree (&t, transform_local_var_uses, d, NULL); + } /* TODO: implement selective generation of fields when vars are known not-used. */ @@ -1815,9 +1820,9 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) gcc_checking_assert (existed); if (local_var.field_id == NULL_TREE) - pvar = &DECL_CHAIN (*pvar); /* Wasn't used. */ - - *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ + pvar = &DECL_CHAIN (*pvar); /* Wasn't used or was an alias. */ + else + *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ } *do_subtree = 0; /* We've done the body already. */ @@ -1847,8 +1852,16 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) if (local_var_i == NULL) return NULL_TREE; - /* This is our revised 'local' i.e. a frame slot. */ - tree revised = local_var_i->field_idx; + /* This is our revised 'local' i.e. a frame slot or an alias. */ + tree revised = NULL_TREE; + if (local_var_i->field_id == NULL_TREE) +{ + gcc_checking_assert (DECL_HAS_VALUE_EXPR_P (var_decl)); + /* If the decl is an alias for another expression, substitute it. */ + revised = DECL_VALUE_EXPR (var_decl); +} + else +revised = local_var_i->field_idx; gcc_checking_assert (DECL_CONTEXT (var_decl) == lvd->context); if (decl_expr_p && DECL_INITIAL (var_decl)) @@ -2796,6 +2809,12 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) { param_frame_data *data = (param_frame_data *) d; + if (VAR_P (*stmt) && DECL_HAS_VALUE_EXPR_P (*stmt)) +{ + tree x = DECL_VALUE_EXPR (*stmt); + cp_walk_tree (&x, register_param_uses, d, NULL); +} + if (TREE_CODE (*stmt) != PARM_DECL) return NULL_TREE; @@ -2840,7 +2859,6 @@ struct local_vars_frame_data { tree *field_list; hash_map *local_var_uses; - vec *captures; unsigned int nest_depth, bind_indx; location_t loc; bool saw_capture; @@ -2869,26 +2887,27 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) local_var_info &local_var = lvd->local_var_uses->get_or_insert (lvar, &existed); gcc_checking_assert (!existed); + /* For var as an alias, just leave it. */ + if (DECL_HAS_VALUE_EXPR_P (lvar)) + continue; tree lvtype = TREE_TYPE (lvar); tree lvname = DECL_NAME (lvar); - bool captured = is_normal_capture_proxy (lvar); /* Make names depth+index unique, so that we can support nested scopes with identically named locals. */ char *buf; size_t namsize = sizeof ("__lv...") + 18; - co
[PATCH coroutines] Build co_await/yield_expr with unknown_type in processing_template_decl phase
Hi This patch builds co_await/yield_expr with unknown_type when we can not know the promise type in processing_template_decl phase. it avoid to confuse compiler when handing type deduction and conversion. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-05 Jun Ma * coroutines.cc (finish_co_await_expr): Build co_await_expr with unknown_type_node. (finish_co_yield_expr): Ditto. *pt.c (type_dependent_expression_p): Set co_await/yield_expr with unknown type as dependent. gcc/testsuite 2020-02-05 Jun Ma * g++.dg/coroutines/torture/co-await-14-template-traits.C: New test. --- gcc/cp/coroutines.cc | 8 +++ gcc/cp/pt.c | 5 .../torture/co-await-14-template-traits.C | 24 +++ 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 7525d7c035a..e380ee24c5f 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -834,9 +834,8 @@ finish_co_await_expr (location_t kw, tree expr) /* If we don't know the promise type, we can't proceed. */ tree functype = TREE_TYPE (current_function_decl); if (dependent_type_p (functype) || type_dependent_expression_p (expr)) - return build5_loc (kw, CO_AWAIT_EXPR, TREE_TYPE (expr), expr, NULL_TREE, - NULL_TREE, NULL_TREE, integer_zero_node); -} + return build5_loc (kw, CO_AWAIT_EXPR, unknown_type_node, expr, + NULL_TREE, NULL_TREE, NULL_TREE, integer_zero_node); /* We must be able to look up the "await_transform" method in the scope of the promise type, and obtain its return type. */ @@ -912,9 +911,8 @@ finish_co_yield_expr (location_t kw, tree expr) tree functype = TREE_TYPE (current_function_decl); /* If we don't know the promise type, we can't proceed. */ if (dependent_type_p (functype) || type_dependent_expression_p (expr)) - return build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (expr), expr, + return build2_loc (kw, CO_YIELD_EXPR, unknown_type_node, expr, NULL_TREE); -} if (!coro_promise_type_found_p (current_function_decl, kw)) /* We must be able to look up the "yield_value" method in the scope of diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 40ff3c3a089..cfc3393991e 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -26743,6 +26743,11 @@ type_dependent_expression_p (tree expression) if (TREE_CODE (expression) == SCOPE_REF) return false; + /* CO_AWAIT/YIELD_EXPR with unknown type is always dependent. */ + if (TREE_CODE (expression) == CO_AWAIT_EXPR + || TREE_CODE (expression) == CO_YIELD_EXPR) + return true; + if (BASELINK_P (expression)) { if (BASELINK_OPTYPE (expression) diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C new file mode 100644 index 000..4e670b1c308 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C @@ -0,0 +1,24 @@ +// { dg-do compile } +// Test we create co_await_expr with dependent type rather than type of awaitable class + +#include "../coro.h" +#include "../coro1-ret-int-yield-int.h" +#include + +struct TestAwaiter { +int recent_test; +TestAwaiter(int test) : recent_test{test} {} +bool await_ready() { return true; } +void await_suspend(coro::coroutine_handle<>) {} +int await_resume() { return recent_test;} +void return_value(int x) { recent_test = x;} +}; + +template +coro1 test_temparg (std::chrono::duration dur) +{ + auto sum = co_await TestAwaiter(1); + if (!sum) +dur.count(); + co_return 0; +} -- 2.19.1.3.ge56e4f7
Re: [PATCH coroutines v1] Build co_await/yield_expr with unknown_type in processing_template_decl phase
在 2020/2/5 下午2:14, JunMa 写道: Hi This patch builds co_await/yield_expr with unknown_type when we can not know the promise type in processing_template_decl phase. it avoid to confuse compiler when handing type deduction and conversion. Bootstrap and test on X86_64, is it OK? Regards JunMa Hi sorry for that '}' was removed, here is the update patch:) Regards JunMa gcc/cp 2020-02-05 Jun Ma * coroutines.cc (finish_co_await_expr): Build co_await_expr with unknown_type_node. (finish_co_yield_expr): Ditto. *pt.c (type_dependent_expression_p): Set co_await/yield_expr with unknown type as dependent. gcc/testsuite 2020-02-05 Jun Ma * g++.dg/coroutines/torture/co-await-14-template-traits.C: New test. --- gcc/cp/coroutines.cc | 6 ++--- gcc/cp/pt.c | 5 .../torture/co-await-14-template-traits.C | 24 +++ 3 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 7525d7c035a..04e56b9a636 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -834,8 +834,8 @@ finish_co_await_expr (location_t kw, tree expr) /* If we don't know the promise type, we can't proceed. */ tree functype = TREE_TYPE (current_function_decl); if (dependent_type_p (functype) || type_dependent_expression_p (expr)) - return build5_loc (kw, CO_AWAIT_EXPR, TREE_TYPE (expr), expr, NULL_TREE, - NULL_TREE, NULL_TREE, integer_zero_node); + return build5_loc (kw, CO_AWAIT_EXPR, unknown_type_node, expr, + NULL_TREE, NULL_TREE, NULL_TREE, integer_zero_node); } /* We must be able to look up the "await_transform" method in the scope of @@ -912,7 +912,7 @@ finish_co_yield_expr (location_t kw, tree expr) tree functype = TREE_TYPE (current_function_decl); /* If we don't know the promise type, we can't proceed. */ if (dependent_type_p (functype) || type_dependent_expression_p (expr)) - return build2_loc (kw, CO_YIELD_EXPR, TREE_TYPE (expr), expr, + return build2_loc (kw, CO_YIELD_EXPR, unknown_type_node, expr, NULL_TREE); } diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 40ff3c3a089..cfc3393991e 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -26743,6 +26743,11 @@ type_dependent_expression_p (tree expression) if (TREE_CODE (expression) == SCOPE_REF) return false; + /* CO_AWAIT/YIELD_EXPR with unknown type is always dependent. */ + if (TREE_CODE (expression) == CO_AWAIT_EXPR + || TREE_CODE (expression) == CO_YIELD_EXPR) + return true; + if (BASELINK_P (expression)) { if (BASELINK_OPTYPE (expression) diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C new file mode 100644 index 000..4e670b1c308 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-14-template-traits.C @@ -0,0 +1,24 @@ +// { dg-do compile } +// Test we create co_await_expr with dependent type rather than type of awaitable class + +#include "../coro.h" +#include "../coro1-ret-int-yield-int.h" +#include + +struct TestAwaiter { +int recent_test; +TestAwaiter(int test) : recent_test{test} {} +bool await_ready() { return true; } +void await_suspend(coro::coroutine_handle<>) {} +int await_resume() { return recent_test;} +void return_value(int x) { recent_test = x;} +}; + +template +coro1 test_temparg (std::chrono::duration dur) +{ + auto sum = co_await TestAwaiter(1); + if (!sum) +dur.count(); + co_return 0; +} -- 2.19.1.3.ge56e4f7
Re: [PATCH coroutines] Change lowering behavior of alias variable from copy to substitute
在 2020/2/4 下午8:17, JunMa 写道: Hi When testing coroutines with lambda function, I find we copy each captured variable to frame. However, according to gimplify pass, for each declaration that is an alias for another expression(DECL_VALUE_EXPR), we can substitute them directly. Since lambda captured variables is one of this kind. It is better to replace them rather than copy them, This can reduce frame size (all of the captured variables are field of closure class) and avoid extra copy behavior as well. This patch remove all of the code related to copy captured variable. Instead, we first rewrite DECL_VALUE_EXPR with frame field, then we check every variable whether it has DECL_VALUE_EXPR, and then substitute it, this patch does not create frame field for such variables. Bootstrap and test on X86_64, is it OK? Regards JunMa Hi minor update: only handle var_decl when iterate BIND_EXPR_VARS in register_local_var_uses. Regards JunMa gcc/cp 2020-02-04 Jun Ma * coroutines.cc (morph_fn_to_coro): Remove code related to copy captured variable. (register_local_var_uses): Ditto. (register_param_uses): Collect use of parameters inside DECL_VALUE_EXPR. (transform_local_var_uses): Substitute the alias variable with DECL_VALUE_EXPR if it has one. gcc/testsuite 2020-02-04 Jun Ma * g++.dg/coroutines/lambda-07-multi-capture.C: New test. --- gcc/cp/coroutines.cc | 117 +- .../torture/lambda-07-multi-capture.C | 55 2 files changed, 85 insertions(+), 87 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-07-multi-capture.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 0c2ec32d7db..1bc2ed7f89c 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1790,6 +1790,11 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) cp_walk_tree (&DECL_SIZE (lvar), transform_local_var_uses, d, NULL); cp_walk_tree (&DECL_SIZE_UNIT (lvar), transform_local_var_uses, d, NULL); + if (VAR_P (lvar) && DECL_HAS_VALUE_EXPR_P (lvar)) + { + tree t = DECL_VALUE_EXPR (lvar); + cp_walk_tree (&t, transform_local_var_uses, d, NULL); + } /* TODO: implement selective generation of fields when vars are known not-used. */ @@ -1815,9 +1820,9 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) gcc_checking_assert (existed); if (local_var.field_id == NULL_TREE) - pvar = &DECL_CHAIN (*pvar); /* Wasn't used. */ - - *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ + pvar = &DECL_CHAIN (*pvar); /* Wasn't used or was an alias. */ + else + *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ } *do_subtree = 0; /* We've done the body already. */ @@ -1847,8 +1852,16 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) if (local_var_i == NULL) return NULL_TREE; - /* This is our revised 'local' i.e. a frame slot. */ - tree revised = local_var_i->field_idx; + /* This is our revised 'local' i.e. a frame slot or an alias. */ + tree revised = NULL_TREE; + if (local_var_i->field_id == NULL_TREE) +{ + gcc_checking_assert (DECL_HAS_VALUE_EXPR_P (var_decl)); + /* If the decl is an alias for another expression, substitute it. */ + revised = DECL_VALUE_EXPR (var_decl); +} + else +revised = local_var_i->field_idx; gcc_checking_assert (DECL_CONTEXT (var_decl) == lvd->context); if (decl_expr_p && DECL_INITIAL (var_decl)) @@ -2796,6 +2809,12 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) { param_frame_data *data = (param_frame_data *) d; + if (VAR_P (*stmt) && DECL_HAS_VALUE_EXPR_P (*stmt)) +{ + tree x = DECL_VALUE_EXPR (*stmt); + cp_walk_tree (&x, register_param_uses, d, NULL); +} + if (TREE_CODE (*stmt) != PARM_DECL) return NULL_TREE; @@ -2840,7 +2859,6 @@ struct local_vars_frame_data { tree *field_list; hash_map *local_var_uses; - vec *captures; unsigned int nest_depth, bind_indx; location_t loc; bool saw_capture; @@ -2869,26 +2887,27 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) local_var_info &local_var = lvd->local_var_uses->get_or_insert (lvar, &existed); gcc_checking_assert (!existed); + /* For non-VAR_DECL or var as an alias, just leave it. */ + if (!VAR_P (lvar) || DECL_HAS_VALUE_EXPR_P (lvar)) + continue; tree lvtype = TREE_TYPE (lvar); tree lvname = DECL_NAME (lvar); - bool captured = is_normal_capture_proxy (lvar); /* M
Re: [PATCH coroutines] Change lowering behavior of alias variable from copy to substitute
在 2020/2/6 下午5:12, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/2/4 下午8:17, JunMa 写道: Hi When testing coroutines with lambda function, I find we copy each captured variable to frame. However, according to gimplify pass, for each declaration that is an alias for another expression(DECL_VALUE_EXPR), we can substitute them directly. Since lambda captured variables is one of this kind. It is better to replace them rather than copy them, This can reduce frame size (all of the captured variables are field of closure class) and avoid extra copy behavior as well. This patch remove all of the code related to copy captured variable. Instead, we first rewrite DECL_VALUE_EXPR with frame field, then we check every variable whether it has DECL_VALUE_EXPR, and then substitute it, this patch does not create frame field for such variables. Bootstrap and test on X86_64, is it OK? minor update: only handle var_decl when iterate BIND_EXPR_VARS in register_local_var_uses. Do you have any other local patches applied along with this? Testing locally (on Darwin), I see regressions with optimisation O2/O3/Os e.g: class-05-lambda-capture-copy-local.C -O2 (internal compiler error) class-06-lambda-capture-ref.C -O2 (internal compiler error) lambda-05-capture-copy-local.C -O2 (internal compiler error) lambda-06-multi-capture.C -O2 (internal compiler error) lambda-07-multi-capture.C -O2 (internal compiler error) lambda-08-co-ret-parm-ref.C -O3 -g (internal compiler error) I have applied this to master, and on top of the patches posted by you and Bin, but the results are the same. +Bin This is known issue which has been fixed by Bin, he will send the patch. Regards JunMa thanks Iain gcc/cp 2020-02-04 Jun Ma * coroutines.cc (morph_fn_to_coro): Remove code related to copy captured variable. (register_local_var_uses): Ditto. (register_param_uses): Collect use of parameters inside DECL_VALUE_EXPR. (transform_local_var_uses): Substitute the alias variable with DECL_VALUE_EXPR if it has one. gcc/testsuite 2020-02-04 Jun Ma * g++.dg/coroutines/lambda-07-multi-capture.C: New test. <0001-fix-alias-variable.patch>
Re: [PATCH coroutines] Change lowering behavior of alias variable from copy to substitute
在 2020/2/6 下午7:09, JunMa 写道: 在 2020/2/6 下午5:12, Iain Sandoe 写道: Hi JunMa, JunMa wrote: 在 2020/2/4 下午8:17, JunMa 写道: Hi When testing coroutines with lambda function, I find we copy each captured variable to frame. However, according to gimplify pass, for each declaration that is an alias for another expression(DECL_VALUE_EXPR), we can substitute them directly. Since lambda captured variables is one of this kind. It is better to replace them rather than copy them, This can reduce frame size (all of the captured variables are field of closure class) and avoid extra copy behavior as well. This patch remove all of the code related to copy captured variable. Instead, we first rewrite DECL_VALUE_EXPR with frame field, then we check every variable whether it has DECL_VALUE_EXPR, and then substitute it, this patch does not create frame field for such variables. Bootstrap and test on X86_64, is it OK? minor update: only handle var_decl when iterate BIND_EXPR_VARS in register_local_var_uses. Do you have any other local patches applied along with this? Testing locally (on Darwin), I see regressions with optimisation O2/O3/Os e.g: class-05-lambda-capture-copy-local.C -O2 (internal compiler error) class-06-lambda-capture-ref.C -O2 (internal compiler error) lambda-05-capture-copy-local.C -O2 (internal compiler error) lambda-06-multi-capture.C -O2 (internal compiler error) lambda-07-multi-capture.C -O2 (internal compiler error) lambda-08-co-ret-parm-ref.C -O3 -g (internal compiler error) I have applied this to master, and on top of the patches posted by you and Bin, but the results are the same. +Bin This is known issue which has been fixed by Bin, he will send the patch. Hi Iain, After dig into these regression, I find the patch needs some fix: rather than replace these alias in front end, we can just leave them to gimplify pass. This change fixes the regressions and simplifies the implementation as well. Also, no more patches are needed. Here is the updated patch. Regards JunMa Regards JunMa thanks Iain gcc/cp 2020-02-04 Jun Ma * coroutines.cc (morph_fn_to_coro): Remove code related to copy captured variable. (register_local_var_uses): Ditto. (register_param_uses): Collect use of parameters inside DECL_VALUE_EXPR. (transform_local_var_uses): Substitute the alias variable with DECL_VALUE_EXPR if it has one. gcc/testsuite 2020-02-04 Jun Ma * g++.dg/coroutines/lambda-07-multi-capture.C: New test. <0001-fix-alias-variable.patch> --- gcc/cp/coroutines.cc | 112 -- .../torture/lambda-07-multi-capture.C | 55 + 2 files changed, 79 insertions(+), 88 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-07-multi-capture.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 0879d57b060..decec4550af 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1793,6 +1793,11 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) cp_walk_tree (&DECL_SIZE (lvar), transform_local_var_uses, d, NULL); cp_walk_tree (&DECL_SIZE_UNIT (lvar), transform_local_var_uses, d, NULL); + if (VAR_P (lvar) && DECL_HAS_VALUE_EXPR_P (lvar)) + { + tree t = DECL_VALUE_EXPR (lvar); + cp_walk_tree (&t, transform_local_var_uses, d, NULL); + } /* TODO: implement selective generation of fields when vars are known not-used. */ @@ -1818,9 +1823,9 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) gcc_checking_assert (existed); if (local_var.field_id == NULL_TREE) - pvar = &DECL_CHAIN (*pvar); /* Wasn't used. */ - - *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ + pvar = &DECL_CHAIN (*pvar); /* Wasn't used or was an alias. */ + else + *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ } *do_subtree = 0; /* We've done the body already. */ @@ -1844,10 +1849,11 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) return NULL_TREE; /* VAR_DECLs that are not recorded can belong to the proxies we've placed - for the promise and coroutine handle(s), to global vars or to compiler - temporaries. Skip past these, we will handle them later. */ + for the promise and coroutine handle(s), to global vars, to compiler + temporaries or to vars as alias. Skip past these, we will handle + them later. */ local_var_info *local_var_i = lvd->local_var_uses->get (var_decl); - if (local_var_i == NULL) + if (local_var_i == NULL || local_var_i->field_id == NULL_TREE) return NULL_TREE; /* This is our revised 'local' i.e. a fr
Re: [PATCH coroutines v1] Build co_await/yield_expr with unknown_type in processing_template_decl phase
Kindly ping. Regards JunMa 在 2020/2/5 下午5:17, JunMa 写道: 在 2020/2/5 下午2:14, JunMa 写道: Hi This patch builds co_await/yield_expr with unknown_type when we can not know the promise type in processing_template_decl phase. it avoid to confuse compiler when handing type deduction and conversion. Bootstrap and test on X86_64, is it OK? Regards JunMa Hi sorry for that '}' was removed, here is the update patch:) Regards JunMa gcc/cp 2020-02-05 Jun Ma * coroutines.cc (finish_co_await_expr): Build co_await_expr with unknown_type_node. (finish_co_yield_expr): Ditto. *pt.c (type_dependent_expression_p): Set co_await/yield_expr with unknown type as dependent. gcc/testsuite 2020-02-05 Jun Ma * g++.dg/coroutines/torture/co-await-14-template-traits.C: New test.
[PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt
Hi In maybe_promote_captured_temps, the cleanup_point_stmt has been stripped when handle temporaries captured by reference. However, maybe there are non-reference temporaries in current stmt which cause ice in gimpilify pass. This patch fix this. The testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Do not strip cleanup_point_stmt. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-09-capture-object.C: New test. --- gcc/cp/coroutines.cc | 8 +- .../torture/lambda-09-capture-object.C| 132 ++ 2 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-09-capture-object.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 9fcbb647201..3095b46ecb1 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -2681,11 +2681,9 @@ maybe_promote_captured_temps (tree *stmt, void *d) location_t sloc = EXPR_LOCATION (*stmt); tree aw_bind = build3_loc (sloc, BIND_EXPR, void_type_node, NULL, NULL, NULL); - tree aw_statement_current; - if (TREE_CODE (*stmt) == CLEANUP_POINT_EXPR) - aw_statement_current = TREE_OPERAND (*stmt, 0); - else - aw_statement_current = *stmt; + /* Do not skip cleanup_point since maybe there are other temporaries + need cleanup. */ + tree aw_statement_current = *stmt; /* Collected the scope vars we need move the temps to regular. */ tree aw_bind_body = push_stmt_list (); tree varlist = NULL_TREE; diff --git a/gcc/testsuite/g++.dg/coroutines/torture/lambda-09-capture-object.C b/gcc/testsuite/g++.dg/coroutines/torture/lambda-09-capture-object.C new file mode 100644 index 000..1297b72cc8f --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/lambda-09-capture-object.C @@ -0,0 +1,132 @@ +// { dg-do compile } +// { dg-additional-options "-Wno-return-type" } + +#include "../coro.h" +#include +#include + +template > struct a; +template +struct a().await_resume())>> +: std::conjunction<> {}; +template +auto c(b value, int) -> decltype(value.operator co_await()); +template ::value, int> = 0> b c(b, int); +template auto d(b value) -> decltype(c(value, 3)) {} +template struct f { + using e = decltype(d(std::declval())); + using h = decltype(std::declval().await_resume()); +}; +template class s; +template class s> { +public: + s(std::tuple); + auto operator co_await() { +struct aa { + std::tuple await_resume() {}; + s i; +}; +return aa{*this}; + } +}; +template class k { +public: + using l = std::coroutine_handle; + j m(); +}; +template class ab { +public: + using promise_type = k; + using l = typename promise_type::l; + auto m() { return n.promise().m(); } + auto ac() { return m(); } + l n; +}; +template ::h, + std::enable_if_t, int> = 0> +ab p(o); +template struct u { using ad = b; }; +template using t = typename u::ad; +template , int> = 0> +auto af(ae... ag) { + return s>>::h>...>>( + std::make_tuple(p(ag)...)); +} +template class ah { + using e = typename f::e; + +public: + ah(q ai, o aj) : r(ai), ak(d(aj)) {} + auto await_ready() { return 0; } + template auto await_suspend(std::coroutine_handle) {} + template , int> = 0> + auto await_resume() { +return invoke(r, ak.await_resume()); + } + q r; + e ak; +}; +template class x { +public: + template , int> = 0> + x(y ai, al aj) : r(ai), i(aj) {} + auto operator co_await() { return ah(r, i); } + q r; + o i; +}; +template auto am(q ai, o aj) { + return x>, + std::remove_cv_t>>(ai, aj); +} +template , int> = 0> +auto an(ae... ag) { + return am( + [](auto ao) { +auto ap = +apply([](auto... aq) { return std::make_tuple(aq.ac()...); }, ao); +return ap; + }, + af(ag...)); +} +class ar; +class z { +public: + ar as(); +}; +class at { +public: + ~at(); +}; +class ar { +public: + at await_resume(); +}; +class au; +class av { + struct aw { +bool await_ready(); +template void await_suspend(std::coroutine_handle); +void await_resume(); + }; + +public: + auto initial_suspend() { return std::suspend_always{}; } + auto final_suspend() { return aw{}; } +}; +class ax : public av { +public: + au get_return_object(); + void unhandled_exception(); +}; +class au { +public: + using promise_type = ax; +}; +void d() { + []() -> au { +z ay; +co_await an(ay.as()); + }; +} -- 2.19.1.3.ge56e4f7
[PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps
Hi As title. in maybe_promote_captured_temps, we promote captured temporaries and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains co_await_expr and maybe other function calls, the side effects flag should be set. This patch fix one mismatch in cppcoro, the testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Set side effects flag for BIND_EXPR. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-10-co-await-lambda.C: New test.
Re: [PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps
在 2020/2/11 上午10:50, JunMa 写道: Hi As title. in maybe_promote_captured_temps, we promote captured temporaries and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains co_await_expr and maybe other function calls, the side effects flag should be set. This patch fix one mismatch in cppcoro, the testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Set side effects flag for BIND_EXPR. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-10-co-await-lambda.C: New test. Hi Sorry for forgetting the patch. Here it is. Regards JunMa --- gcc/cp/coroutines.cc | 2 + .../torture/lambda-10-co-await-lambda.C | 47 +++ 2 files changed, 49 insertions(+) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-10-co-await-lambda.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index decec4550af..004e0a1a659 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -2735,6 +2735,8 @@ maybe_promote_captured_temps (tree *stmt, void *d) } BIND_EXPR_BLOCK (aw_bind) = b_block; + /* Set side effects flag since the BIND_EXPR contains co_await expr. */ + TREE_SIDE_EFFECTS (aw_bind) = 1; *stmt = aw_bind; } return res; diff --git a/gcc/testsuite/g++.dg/coroutines/torture/lambda-10-co-await-lambda.C b/gcc/testsuite/g++.dg/coroutines/torture/lambda-10-co-await-lambda.C new file mode 100644 index 000..43d4ff6b77e --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/lambda-10-co-await-lambda.C @@ -0,0 +1,47 @@ +// { dg-do run } + +#include "../coro.h" +template struct a; +struct b { + struct c { +int await_ready() {return 0;} +template void await_suspend(d) {} +void await_resume() noexcept {} + }; + auto initial_suspend() { return std::suspend_never{}; } + auto final_suspend() { return c{}; } +}; +template struct e { + int f() {return 1;} +}; +template <> struct e : b { + a get_return_object() noexcept; + void unhandled_exception(); +}; +template struct a { + using promise_type = e; + struct h { +std::coroutine_handle j; +int await_ready() {return 0;} +bool await_suspend(std::coroutine_handle<>) { return false;} + }; + auto operator co_await() { +struct awaitable : h { + auto await_resume() { return this->j.promise().f(); } +}; +return awaitable{}; + } +}; +a<> e::get_return_object() noexcept { return a<>{};} + +int main() { + auto k = []() -> a { return a{};}; + int l = 0 ; + auto m = [&]() -> a<> { +for (int i; i < 100; ++i) + l += co_await k(); + }; + m(); + if (l != 100) +abort(); +} -- 2.19.1.3.ge56e4f7
[PATCH coroutines] Handle component_ref in captures_temporary
Hi In captures_temporary, the current implementation fails to handle component_ref. This causes ice with case co_await A while operator co_await is defined in base class of A. Also it is necessary to capture the object of base class as if it is temporary object. This patch strips component_ref to its base object and check it as usual. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-12 Jun Ma * coroutines.cc (captures_temporary): Strip component_ref to its base object. gcc/testsuite 2020-02-12 Jun Ma * g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C: New test. --- gcc/cp/coroutines.cc | 15 ++- .../torture/co-await-15-capture-comp-ref.C| 99 +++ 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 1a77f5dbfce..a6adb946df1 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -2474,11 +2474,24 @@ captures_temporary (tree *stmt, int *do_subtree, void *d) continue; parm = TREE_OPERAND (parm, 0); + + /* In case of component_ref, we need to capture the object of base +class as if it is temporary object. There are two possibilities: +(*base).field and base->field. */ + while (TREE_CODE (parm) == COMPONENT_REF) + { + parm = TREE_OPERAND (parm, 0); + if (TREE_CODE (parm) == INDIRECT_REF) + parm = TREE_OPERAND (parm, 0); + while (TREE_CODE (parm) == NOP_EXPR) + parm = TREE_OPERAND (parm, 0); + } + if (TREE_CODE (parm) == VAR_DECL && !DECL_ARTIFICIAL (parm)) /* This isn't a temporary... */ continue; - if (TREE_CODE (parm) == PARM_DECL) + if (TREE_CODE (parm) == PARM_DECL || TREE_CODE (parm) == NON_LVALUE_EXPR) /* .. nor is this... */ continue; diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C new file mode 100644 index 000..93a43fbd298 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C @@ -0,0 +1,99 @@ +// { dg-do run } + +#include "../coro.h" + +class resumable { +public: + struct promise_type; + using coro_handle = std::coroutine_handle; + resumable(coro_handle handle) : handle_(handle) { } + resumable(resumable&) = delete; + resumable(resumable&&) = delete; + ~resumable() { handle_.destroy(); } + coro_handle handle_; +}; + +struct resumable::promise_type { + using coro_handle = std::coroutine_handle; + int used; + auto get_return_object() { +return coro_handle::from_promise(*this); + } + auto initial_suspend() { return std::suspend_never(); } + auto final_suspend() { return std::suspend_always(); } + void return_value(int x) {used = x;} + void unhandled_exception() {} + + struct TestAwaiter { +int recent_test; +TestAwaiter(int test) : recent_test{test} {} +bool await_ready() { return false; } +void await_suspend(std::coroutine_handle) {} +int await_resume() { + return recent_test; +} +auto operator co_await() { + return *this; +} + }; + + struct TestAwaiterCH :TestAwaiter { +TestAwaiterCH(int test) : TestAwaiter(test) {}; + }; + + struct TestAwaiterCHCH :TestAwaiterCH { +TestAwaiterCHCH(int test) : TestAwaiterCH(test) {}; + +resumable foo(){ +int x = co_await *this; +co_return x; +} + }; +}; + +struct TestP { + resumable::promise_type::TestAwaiterCHCH tp = resumable::promise_type::TestAwaiterCHCH(6); +}; + +resumable foo1(int t){ + int x = co_await resumable::promise_type::TestAwaiterCH(t); + co_return x; +} + +resumable foo2(){ + struct TestP TP; + int x = co_await TP.tp; + co_return x; +} + +resumable foo3(){ + int x = co_await TestP{}.tp; + co_return x; +} + +int main(){ + auto t = resumable::promise_type::TestAwaiterCHCH(4); + resumable res = t.foo(); + while (!res.handle_.done()) +res.handle_.resume(); + if (res.handle_.promise().used != 4) +abort(); + + resumable res1 = foo1(5); + while (!res1.handle_.done()) +res1.handle_.resume(); + if (res1.handle_.promise().used != 5) +abort(); + + resumable res2 = foo2(); + while (!res2.handle_.done()) +res2.handle_.resume(); + if (res2.handle_.promise().used != 6) +abort(); + + resumable res3 = foo2(); + while (!res3.handle_.done()) +res3.handle_.resume(); + if (res3.handle_.promise().used != 6) +abort(); +} -- 2.19.1.3.ge56e4f7
Re: [C++ coroutines 6/6] Testsuite.
在 2019/11/20 下午9:11, JunMa 写道: 在 2019/11/20 下午7:22, Iain Sandoe 写道: Hello JunMa, JunMa wrote: 在 2019/11/17 下午6:28, Iain Sandoe 写道: I find that the patches donot support 'for co_await'. And it is quiet simple to implement range based 'for co_await' based on your patches, since it's just need few more works on range for source to source transform. Any reason for that? If I understand your question correctly, for example TS n4775, there was: [stmt.stmt] …. for co_await ( for-range-declaration : for-range-initializer ) statement yes? This was removed by a later committee resolution, and was *not* merged to the C++20 Working Draft (I am currently working to n4835). So, the reason it is not implemented in GCC at present, is that it is not clear exactly what form it might take if introduced in some future proposal for enhanced coroutines. hope that answers the question, thanks Iain Hi Iain, Thanks, that make sense. Regards JunMa Hi Iain In current implementation of the allocating storage for coroutine, it does not follow the rules in n4835 which look up in the scope of the promise type first. Is there any reason? If not, would youimplement this following the rules of n4835? Thanks. Regards JunMa
Re: [PATCH] coroutines: Amend parameter handling to match n4849.
在 2020/2/26 下午9:43, Iain Sandoe 写道: This is the second in the series to bring the GCC implementation into line with the current standard. @JunMa I believe that this should solve the problems you were looking at in “[PATCH Coroutines] Fix issue with unused corutine function parameters” and supercedes that patch (since we needed to alter the ordering as well). If any parameter issues remain, please file a PR (or a patch, of course). OK for trunk? thanks Iain = In n4849 and preceding versions, [class.copy.elision] (1.3) appears to confer additional permissions on coroutines to elide parameter copies. After considerable discussion on this topic by email and during the February 2020 WG21 meeting, it has been determined that there are no additional permissions applicable to coroutine parameter copy elision. The content of that clause in the standard is expected to be amended eventually to clarify this. Other than this, the handling of parameter lifetimes is expected to be as per n4849: * A copy is made before the promise is constructed * If the promise CTOR uses the parms, then it should use the copy where appropriate. * The param copy lifetimes end after the promise is destroyed (during the coroutine frame destruction). * Otherwise, C++20 copy elision rules apply. (as an aside) In practice, we expect that copy elision can only occur when the coroutine body is fully inlined, possibly in conjunction with heap allocation elision. The patch: * Reorders the copying process to precede the promise CTOR and ensures the correct use. * Copies all params into the frame regardless of whether the coro body uses them (this is a bit unfortunate, and we should figure out an amendment for C++23). Hi iain, This makes sense to me. Regards JunMa gcc/cp/ChangeLog: 2020-02-26 Iain Sandoe * coroutines.cc (struct param_info): Keep track of params that are references, and cache the original type and whether the DTOR is trivial. (build_actor_fn): Handle param copies always, and adjust the handling for references. (register_param_uses): Only handle uses here. (classtype_has_non_deleted_copy_ctor): New. (morph_fn_to_coro): Adjust param copy handling to match n4849 by reordering ahead of the promise CTOR and always making a frame copy, even if the param is unused in the coroutine body. gcc/testsuite/ChangeLog: 2020-02-26 Iain Sandoe * g++.dg/coroutines/coro1-refs-and-ctors.h: New. * g++.dg/coroutines/torture/func-params-07.C: New test. * g++.dg/coroutines/torture/func-params-08.C: New test. --- gcc/cp/coroutines.cc | 313 +++--- .../g++.dg/coroutines/coro1-refs-and-ctors.h | 144 .../coroutines/torture/func-params-07.C | 81 + .../coroutines/torture/func-params-08.C | 112 +++ 4 files changed, 524 insertions(+), 126 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/func-params-07.C create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/func-params-08.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 524d4872804..e0e7e66fe5e 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1742,6 +1742,11 @@ struct param_info tree field_id; vec *body_uses; tree frame_type; + tree orig_type; + bool by_ref; + bool rv_ref; + bool pt_ref; + bool trivial_dtor; }; struct local_var_info @@ -1941,26 +1946,37 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, /* Re-write param references in the body, no code should be generated here. */ - if (DECL_ARGUMENTS (orig) && param_uses != NULL) + if (DECL_ARGUMENTS (orig)) { tree arg; for (arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg)) { bool existed; param_info &parm = param_uses->get_or_insert (arg, &existed); - if (parm.field_id == NULL_TREE) - continue; /* Wasn't used. */ + if (!parm.body_uses) + continue; /* Wasn't used in the orignal function body. */ + tree fld_ref = lookup_member (coro_frame_type, parm.field_id, /*protect=*/1, /*want_type=*/0, tf_warning_or_error); - tree fld_idx = build3_loc (loc, COMPONENT_REF, TREE_TYPE (arg), + tree fld_idx = build3_loc (loc, COMPONENT_REF, parm.frame_type, actor_frame, fld_ref, NULL_TREE); + + /* We keep these in the frame as a regular pointer, so convert that + back to the type expected. */ + if (parm.pt_ref) + fld_idx = build1_loc (loc, CONVERT_EXPR, TREE_TYPE (arg), fld_idx); + + /*
Re: [PING PATCH coroutines v1] Build co_await/yield_expr with unknown_type in processing_template_decl phase
在 2020/2/10 下午7:42, JunMa 写道: Ping~ Regards JunMa Kindly ping. Regards JunMa 在 2020/2/5 下午5:17, JunMa 写道: 在 2020/2/5 下午2:14, JunMa 写道: Hi This patch builds co_await/yield_expr with unknown_type when we can not know the promise type in processing_template_decl phase. it avoid to confuse compiler when handing type deduction and conversion. Bootstrap and test on X86_64, is it OK? Regards JunMa Hi sorry for that '}' was removed, here is the update patch:) Regards JunMa gcc/cp 2020-02-05 Jun Ma * coroutines.cc (finish_co_await_expr): Build co_await_expr with unknown_type_node. (finish_co_yield_expr): Ditto. *pt.c (type_dependent_expression_p): Set co_await/yield_expr with unknown type as dependent. gcc/testsuite 2020-02-05 Jun Ma * g++.dg/coroutines/torture/co-await-14-template-traits.C: New test.
Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps
在 2020/2/11 上午10:50, JunMa 写道: Hi kindly ping~ Regards JunMa Hi As title. in maybe_promote_captured_temps, we promote captured temporaries and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains co_await_expr and maybe other function calls, the side effects flag should be set. This patch fix one mismatch in cppcoro, the testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Set side effects flag for BIND_EXPR. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-10-co-await-lambda.C: New test.
Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt
在 2020/2/11 上午10:14, JunMa 写道: Kindly ping Regards JunMa Hi In maybe_promote_captured_temps, the cleanup_point_stmt has been stripped when handle temporaries captured by reference. However, maybe there are non-reference temporaries in current stmt which cause ice in gimpilify pass. This patch fix this. The testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Do not strip cleanup_point_stmt. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-09-capture-object.C: New test.
Re: [PING PATCH coroutines] Handle component_ref in captures_temporary
在 2020/2/12 下午3:23, JunMa 写道: Kindly ping Regards JunMa Hi In captures_temporary, the current implementation fails to handle component_ref. This causes ice with case co_await A while operator co_await is defined in base class of A. Also it is necessary to capture the object of base class as if it is temporary object. This patch strips component_ref to its base object and check it as usual. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-12 Jun Ma * coroutines.cc (captures_temporary): Strip component_ref to its base object. gcc/testsuite 2020-02-12 Jun Ma * g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C: New test.
Re: [PATCH] coroutines: Update lambda capture handling to n4849.
在 2020/3/2 下午5:43, Iain Sandoe 写道: Hi, In the absence of specific comment on the handling of closures I'd implemented something more than was intended (extending the lifetime of lambda capture-by-copy vars to the duration of the coro). After discussion at WG21 in February and by email, the correct handling is to treat the closure "this" pointer the same way as for a regular one, and thus it is the user's responsibility to ensure that the lambda capture object has suitable lifetime for the coroutine. It is noted that users frequently get this wrong, so it would be a good thing to revisit for C++23. This patch removes the additional copying behaviour for lambda capture-by- copy vars. @JunMa, this supercedes your fix to the aliases, which should no longer be necessary, but i’ve added your testcases to this patch. Hi Iain Most part of your patch are same idea as my patch, so this LGTM with some comments. Regards JunMa gcc/cp/ChangeLog: 2020-03-02 Iain Sandoe * coroutines.cc (struct local_var_info): Adjust to remove the reference to the captured var, and just to note that this is a lambda capture proxy. (transform_local_var_uses): Handle lambda captures specially. (struct param_frame_data): Add a visited set. (register_param_uses): Also check for param uses in lambda capture proxies. (struct local_vars_frame_data): Remove captures list. (register_local_var_uses): Handle lambda capture proxies by noting and bypassing them. (morph_fn_to_coro): Update to remove lifetime extension of lambda capture-by-copy vars. gcc/testsuite/ChangeLog: 2020-03-02 Iain Sandoe Jun Ma * g++.dg/coroutines/torture/class-05-lambda-capture-copy-local.C: Update to have multiple uses for the lambda parm. * g++.dg/coroutines/torture/lambda-09-init-captures.C: New test. * g++.dg/coroutines/torture/lambda-10-mutable.C: New test. --- gcc/cp/coroutines.cc | 174 +++--- .../class-05-lambda-capture-copy-local.C | 4 +- .../torture/lambda-09-init-captures.C | 55 ++ .../coroutines/torture/lambda-10-mutable.C| 48 + 4 files changed, 171 insertions(+), 110 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-09-init-captures.C create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/lambda-10-mutable.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 3e06f079787..303e6e83d54 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1783,7 +1783,7 @@ struct local_var_info tree field_id; tree field_idx; tree frame_type; - tree captured; + bool is_lambda_capture; location_t def_loc; }; @@ -1828,6 +1828,14 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) cp_walk_tree (&DECL_SIZE_UNIT (lvar), transform_local_var_uses, d, NULL); + /* For capture proxies, this could include the decl value expr. */ + if (local_var.is_lambda_capture) + { + tree ve = DECL_VALUE_EXPR (lvar); + cp_walk_tree (&ve, transform_local_var_uses, d, NULL); + continue; /* No frame entry for this. */ + } + /* TODO: implement selective generation of fields when vars are known not-used. */ if (local_var.field_id == NULL_TREE) @@ -1842,8 +1850,9 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) local_var.field_idx = fld_idx; } cp_walk_tree (&BIND_EXPR_BODY (*stmt), transform_local_var_uses, d, NULL); + /* Now we have processed and removed references to the original vars, -we can drop those from the bind. */ +we can drop those from the bind - leaving capture proxies alone. */ for (tree *pvar = &BIND_EXPR_VARS (*stmt); *pvar != NULL;) { bool existed; @@ -1851,10 +1860,24 @@ transform_local_var_uses (tree *stmt, int *do_subtree, void *d) = lvd->local_var_uses->get_or_insert (*pvar, &existed); gcc_checking_assert (existed); + /* Leave lambda closure captures alone, we replace the *this +pointer with the frame version and let the normal process +deal with the rest. */ + if (local_var.is_lambda_capture) + { + pvar = &DECL_CHAIN (*pvar); + continue; + } + + /* It's not used, but we can let the optimizer deal with that. */ if (local_var.field_id == NULL_TREE) - pvar = &DECL_CHAIN (*pvar); /* Wasn't used. */ + { + pvar = &DECL_CHAIN (*pvar); + continue; + } Merge ifs maybe better. - *pvar = DECL_CHAIN (*pvar); /* discard this one, we replaced it. */ + /* Disca
Re: [PATCH coroutines] Handle component_ref in captures_temporary
在 2020/3/2 下午10:49, Nathan Sidwell 写道: On 2/12/20 2:23 AM, JunMa wrote: Hi In captures_temporary, the current implementation fails to handle component_ref. This causes ice with case co_await A while operator co_await is defined in base class of A. Also it is necessary to capture the object of base class as if it is temporary object. This patch strips component_ref to its base object and check it as usual. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-12 Jun Ma * coroutines.cc (captures_temporary): Strip component_ref to its base object. gcc/testsuite 2020-02-12 Jun Ma * g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C: New test. + + /* In case of component_ref, we need to capture the object of base + class as if it is temporary object. There are two possibilities: + (*base).field and base->field. */ + while (TREE_CODE (parm) == COMPONENT_REF) + { + parm = TREE_OPERAND (parm, 0); + if (TREE_CODE (parm) == INDIRECT_REF) + parm = TREE_OPERAND (parm, 0); + while (TREE_CODE (parm) == NOP_EXPR) + parm = TREE_OPERAND (parm, 0); Use STRIP_NOPS. + } + if (TREE_CODE (parm) == VAR_DECL && !DECL_ARTIFICIAL (parm)) /* This isn't a temporary... */ continue; - if (TREE_CODE (parm) == PARM_DECL) + if (TREE_CODE (parm) == PARM_DECL || TREE_CODE (parm) == NON_LVALUE_EXPR) /* .. nor is this... */ continue; Either a separate if, or merging both ifs (my preference) would be better. nathan Hi nathan Here is the updated patch Regards JunMa --- gcc/cp/coroutines.cc | 20 +++- .../torture/co-await-15-capture-comp-ref.C| 99 +++ 2 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 966ec0583aa..2a54bcefc1e 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -2613,12 +2613,22 @@ captures_temporary (tree *stmt, int *do_subtree, void *d) continue; parm = TREE_OPERAND (parm, 0); - if (TREE_CODE (parm) == VAR_DECL && !DECL_ARTIFICIAL (parm)) - /* This isn't a temporary... */ - continue; - if (TREE_CODE (parm) == PARM_DECL) - /* .. nor is this... */ + /* In case of component_ref, we need to capture the object of base +class as if it is temporary object. There are two possibilities: +(*base).field and base->field. */ + while (TREE_CODE (parm) == COMPONENT_REF) + { + parm = TREE_OPERAND (parm, 0); + if (TREE_CODE (parm) == INDIRECT_REF) + parm = TREE_OPERAND (parm, 0); + parm = STRIP_NOPS (parm); + } + + /* This isn't a temporary or argument. */ + if ((TREE_CODE (parm) == VAR_DECL && !DECL_ARTIFICIAL (parm)) + || TREE_CODE (parm) == PARM_DECL + || TREE_CODE (parm) == NON_LVALUE_EXPR) continue; if (TREE_CODE (parm) == TARGET_EXPR) diff --git a/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C b/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C new file mode 100644 index 000..93a43fbd298 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/co-await-15-capture-comp-ref.C @@ -0,0 +1,99 @@ +// { dg-do run } + +#include "../coro.h" + +class resumable { +public: + struct promise_type; + using coro_handle = std::coroutine_handle; + resumable(coro_handle handle) : handle_(handle) { } + resumable(resumable&) = delete; + resumable(resumable&&) = delete; + ~resumable() { handle_.destroy(); } + coro_handle handle_; +}; + +struct resumable::promise_type { + using coro_handle = std::coroutine_handle; + int used; + auto get_return_object() { +return coro_handle::from_promise(*this); + } + auto initial_suspend() { return std::suspend_never(); } + auto final_suspend() { return std::suspend_always(); } + void return_value(int x) {used = x;} + void unhandled_exception() {} + + struct TestAwaiter { +int recent_test; +TestAwaiter(int test) : recent_test{test} {} +bool await_ready() { return false; } +void await_suspend(std::coroutine_handle) {} +int await_resume() { + return recent_test; +} +auto operator co_await() { + return *this; +} + }; + + struct TestAwaiterCH :TestAwaiter { +TestAwaiterCH(int test) : TestAwaiter(test) {}; + }; + + struct TestAwaiterCHCH :TestAwaiterCH { +TestAwaiterCHCH(int test) : TestAwaiterCH(test) {}; + +resumable foo(){ +int x = co_await *this; +co_return x; +} + }; +}; + +struct TestP { + resumable::promise_type::TestAwaiterCHCH tp = resumable::promise_type::TestAwaiterCHCH(6); +}; + +resumable foo1(int t){ + int x = co_await resumable::promise_type::TestAw
Re: [PATCH coroutines] Handle component_ref in captures_temporary
在 2020/3/3 下午8:15, Nathan Sidwell 写道: On 3/3/20 12:42 AM, JunMa wrote: 在 2020/3/2 下午10:49, Nathan Sidwell 写道: On 2/12/20 2:23 AM, JunMa wrote: Hi nathan Here is the updated patch This is ok, with a correction in a comment: + /* This isn't a temporary or argument. */ /* This isn't a temporary. */ is sufficient. Otherwise it reads as 'this is neither a temporary nor an argument' which isn't the case. Thanks, will check in later. Regards JunMa nathan
Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps
Ping Regards JunMa 在 2020/2/27 上午10:17, JunMa 写道: 在 2020/2/11 上午10:50, JunMa 写道: Hi kindly ping~ Regards JunMa Hi As title. in maybe_promote_captured_temps, we promote captured temporaries and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains co_await_expr and maybe other function calls, the side effects flag should be set. This patch fix one mismatch in cppcoro, the testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Set side effects flag for BIND_EXPR. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-10-co-await-lambda.C: New test.
Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt
Ping Regards JunMa 在 2020/2/27 上午10:18, JunMa 写道: 在 2020/2/11 上午10:14, JunMa 写道: Kindly ping Regards JunMa Hi In maybe_promote_captured_temps, the cleanup_point_stmt has been stripped when handle temporaries captured by reference. However, maybe there are non-reference temporaries in current stmt which cause ice in gimpilify pass. This patch fix this. The testcase comes from cppcoro and is reduced by creduce. Bootstrap and test on X86_64, is it OK? Regards JunMa gcc/cp 2020-02-11 Jun Ma * coroutines.cc (maybe_promote_captured_temps): Do not strip cleanup_point_stmt. gcc/testsuite 2020-02-11 Jun Ma * g++.dg/coroutines/torture/lambda-09-capture-object.C: New test.
Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps
在 2020/3/5 下午9:51, Iain Sandoe 写道: Hello JunMa, JunMa wrote: Ping Thanks for your patch(es) and I am sorry this has taken some time to review. (right now, we’re trying to ensure that we have the latest standard represented in GCC10, so updating to n4849). 在 2020/2/27 上午10:17, JunMa 写道: 在 2020/2/11 上午10:50, JunMa 写道: Hi kindly ping~ Regards JunMa Hi As title. in maybe_promote_captured_temps, we promote captured temporaries and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains co_await_expr and maybe other function calls, the side effects flag should be set. This patch fix one mismatch in cppcoro, the testcase comes from cppcoro and is reduced by creduce. With the following test conditions; r10-7040-ga2ec7c4aafbcd517 + the two approved patches by Bin Cheng applied. 1/ the test case in this patch (lambda-10-co-await-lambda.C) fails both with and without the patch. 2/ the patch regresses one of my local testcases. The test case fails because of ICE which is fixed by the [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt This patch fix the runtime mismatch. The extra regression is co-await-syntax-11.C which comes from Bin's patch and also is fixed by that patch. Regards JunMa So, it appears that the testcase might show a bug - but the fix is not the right one for current trunk? Please could you re-check ? thanks Iain
Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt
在 2020/3/5 下午10:18, Iain Sandoe 写道: Hello JunMa, JunMa wrote: Ping Once again, sorry for taking time to review this. 在 2020/2/27 上午10:18, JunMa 写道: 在 2020/2/11 上午10:14, JunMa 写道: Kindly ping Regards JunMa Hi In maybe_promote_captured_temps, the cleanup_point_stmt has been stripped when handle temporaries captured by reference. However, maybe there are non-reference temporaries in current stmt which cause ice in gimpilify pass. This patch fix this. The testcase comes from cppcoro and is reduced by creduce. With current trunk + Bin’s two approved patches. I see no change in the testcase (lambda-09-capture-object.C) before / after the patch (it fails for me at -O0 only - in both cases). please could you check? As I said at previous mail, this patch fix the ICE in gimpilify pass. I test with current trunk + Bin's two patches, the testcase passes with the patch and fails without the patch. It also fix co-await-syntax-11.C which caused by same ICE. could you do double check? Regards JunMa thanks Iain
Re: [C++ coroutines 6/6] Testsuite.
在 2019/11/20 下午7:22, Iain Sandoe 写道: Hello JunMa, JunMa wrote: 在 2019/11/17 下午6:28, Iain Sandoe 写道: I find that the patches donot support 'for co_await'. And it is quiet simple to implement range based 'for co_await' based on your patches, since it's just need few more works on range for source to source transform. Any reason for that? If I understand your question correctly, for example TS n4775, there was: [stmt.stmt] …. for co_await ( for-range-declaration : for-range-initializer ) statement yes? This was removed by a later committee resolution, and was *not* merged to the C++20 Working Draft (I am currently working to n4835). So, the reason it is not implemented in GCC at present, is that it is not clear exactly what form it might take if introduced in some future proposal for enhanced coroutines. hope that answers the question, thanks Iain Hi Iain, Thanks, that make sense. Regards JunMa
Re: [PATCH][PR89341]Fix ICE on function definition with weakref/alias attributes attached
在 2019/6/19 上午4:38, Jeff Law 写道: On 3/26/19 5:40 AM, JunMa wrote: Hi According to gnu document of function attributes, neither weakref nor alias could be attached to a function defined in current translation unit. Although GCC checks the attributes under some circumstances, it still fails on some cases and even causes ICE. This patch checks whether alias/weakref attribute attaches on a function declaration which has body, and removes the attribute later. This also avoid the ICE. Bootstrapped/regtested on x86_64-linux, ok for GCC10? Regards JunMa gcc/ChangeLog 2019-03-26 Jun Ma PR89341 * cgraphunit.c (handle_alias_pairs): Check whether alias attribute attaches on a function declaration which has body. gcc/testsuite/ChangeLog 2019-03-26 Jun Ma PR89341 * gcc.dg/attr-alias-6.c: New test. * gcc.dg/attr-weakref-5.c: Likewise. Based on my reading of the BZ, this should result in a hard error, rather than an "attribute ignored" warning. Sorry for the late reply. Yes, It should error out with message like "'foo' defined both normally and as 'alias' attribute". However, this patch just tries to fix ICE, and keeps remain unchanged. I do have a patch to fix the message and the testcases, I'll send it later. Regards JunMa Jeff
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/6/18 上午6:46, Jeff Law 写道: On 5/9/19 2:01 AM, JunMa wrote: 在 2019/5/9 上午10:22, JunMa 写道: 在 2019/5/9 上午3:02, Bernd Edlinger 写道: On 5/8/19 4:31 PM, Richard Biener wrote: On Tue, May 7, 2019 at 4:34 AM JunMa wrote: 在 2019/5/6 下午7:58, JunMa 写道: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Although it is safe to use total length, I found that function inline_expand_builtin_string_cmp() which used c_getstr() may emit redundant rtls for trailing null chars when total length is returned. Also it is trivial to handle constant string with trailing null chars. So this updated patch follow richard's suggestion: using string_constant directly. Bootstrapped/regtested on x86_64-linux, ok for trunk? Doesn't this fold to NULL for the case where seaching for '0' and it doesn't occur in the string constant but only the zero-padding? So I'm not sure if JunMa addressed this question specifically. What happens is we'll get back a NULL terminated string from c_getstr, but the returned length will include the NUL terminator. Then we call memchr on the result with a length that would include that NUL terminator. So I'm pretty sure the current patch will DTRT in that case. It'd be better to have a stronger test which verified not only that the call was folded away, but what the resultant value was and whether or not it was the right value. That would include testing for NUL in the string as well as testing for NUL in the tail padding. I'll ack the change to gimple-fold, but please improve the testcase a bit and commit the change to gimple-fold and the improved testcase together. Thanks, and sorry for the delays. Thanks, I'll update the testcase and check in after pass the regtest. Regards Jun jeff
[PATCH GCC10] ipa-inline.c: Trivial fix on function not declared inline check in want_inline_small_function_p
Hi Since MAX_INLINE_INSNS_AUTO should be below or equal to MAX_INLINE_INSNS_SINGLE (see params.def), there is no need to do second inlining limit check on growth when function not declared inline, this patch removes it. Bootstrapped and tested on x86_64-unknown-linux-gnu, is it ok for trunk? Regards Jun 2019-03-01 Jun Ma *ipa-inline.c(want_inline_small_function_p): Remove redundant growth check when function not declared inline 0001-remove-redundant-check-on-growth.patch Description: Binary data
回复:[PATCH GCC10] ipa-inline.c: Trivial fix on function not declared inline check in want_inline_small_function_p
-- 发件人:Segher Boessenkool 发送时间:2019年3月1日(星期五) 22:18 收件人:JunMa 抄 送:gcc-patches 主 题:Re: [PATCH GCC10] ipa-inline.c: Trivial fix on function not declared inline check in want_inline_small_function_p Hi! On Fri, Mar 01, 2019 at 04:39:38PM +0800, JunMa wrote: >Since MAX_INLINE_INSNS_AUTO should be below or equal to >MAX_INLINE_INSNS_SINGLE (see params.def), there is no need >to do second inlining limit check on growth when function not >declared inline, this patch removes it. >Bootstrapped and tested on x86_64-unknown-linux-gnu, is it ok for trunk? Your mail subject says this is for GCC 10, but you are asking for GCC 9 now; which is it? Sorry. Since we are in GCC9 stage4 now, also it's not for regression fix.So, it's for GCC 10. > 2019-03-01 Jun Ma > > *ipa-inline.c(want_inline_small_function_p): Remove > redundant growth check when function not declared > inline Some spaces were lost in the first line. Trailing space. Sentences should end with a full stop (or similar). Don't send patches (or pretty much anything else) as application/octet-stream attachments. Segher Sorry again for this. Here is the full change. JunMa 2019-03-01 Jun Ma * ipa-inline.c(want_inline_small_function_p): Remove redundant growth check when function not declared inline. diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 360c3de..ff9bc9e 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -837,15 +837,11 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) ? MAX (MAX_INLINE_INSNS_AUTO, MAX_INLINE_INSNS_SINGLE) : MAX_INLINE_INSNS_AUTO) - && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup)) + && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup) + && growth_likely_positive (callee, growth)) { - /* growth_likely_positive is expensive, always test it last. */ - if (growth >= MAX_INLINE_INSNS_SINGLE - || growth_likely_positive (callee, growth)) - { - e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; - want_inline = false; - } + e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; + want_inline = false; } /* If call is cold, do not inline when function body would grow. */ else if (!e->maybe_hot_p ()
Re: [PATCH GCC10] ipa-inline.c: Trivial fix on function not declared inline check in want_inline_small_function_p
Hi Please ignore the previous mail. 在 2019/3/1 下午10:17, Segher Boessenkool 写道: Hi! On Fri, Mar 01, 2019 at 04:39:38PM +0800, JunMa wrote: Since MAX_INLINE_INSNS_AUTO should be below or equal to MAX_INLINE_INSNS_SINGLE (see params.def), there is no need to do second inlining limit check on growth when function not declared inline, this patch removes it. Bootstrapped and tested on x86_64-unknown-linux-gnu, is it ok for trunk? Your mail subject says this is for GCC 10, but you are asking for GCC 9 now; which is it? Since we are in GCC9 stage4 now, also it's not for regression fix. So, it's for GCC 10. 2019-03-01 Jun Ma *ipa-inline.c(want_inline_small_function_p): Remove redundant growth check when function not declared inline Some spaces were lost in the first line. Trailing space. Sentences should end with a full stop (or similar). Don't send patches (or pretty much anything else) as application/octet-stream attachments. Segher Sorry again for this. Here is the full change. JunMa 2019-03-01 Jun Ma * ipa-inline.c(want_inline_small_function_p): Remove redundant growth check when function not declared inline. diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 360c3de..ff9bc9e 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -837,15 +837,11 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) ? MAX (MAX_INLINE_INSNS_AUTO, MAX_INLINE_INSNS_SINGLE) : MAX_INLINE_INSNS_AUTO) - && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup)) + && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup) + && growth_likely_positive (callee, growth)) { - /* growth_likely_positive is expensive, always test it last. */ - if (growth >= MAX_INLINE_INSNS_SINGLE - || growth_likely_positive (callee, growth)) - { - e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; - want_inline = false; - } + e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; + want_inline = false; } /* If call is cold, do not inline when function body would grow. */ else if (!e->maybe_hot_p ()
[PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. --- gcc/fold-const-call.c | 14 +++--- gcc/fold-const.c| 14 +++--- gcc/fold-const.h| 3 ++- gcc/gimple-fold.c | 5 +++-- gcc/testsuite/gcc.dg/builtin-memchr-4.c | 30 ++ 5 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/builtin-memchr-4.c diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index 702c8b4..ea81f6a 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -1720,7 +1720,7 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) { const char *p0, *p1; char c; - unsigned HOST_WIDE_INT s0, s1; + unsigned HOST_WIDE_INT s0, s1, s3, s4; size_t s2 = 0; switch (fn) { @@ -1756,10 +1756,10 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) && !TREE_SIDE_EFFECTS (arg0) && !TREE_SIDE_EFFECTS (arg1)) return build_int_cst (type, 0); - if ((p0 = c_getstr (arg0, &s0)) - && (p1 = c_getstr (arg1, &s1)) - && s2 <= s0 - && s2 <= s1) + if ((p0 = c_getstr (arg0, &s0, &s3)) + && (p1 = c_getstr (arg1, &s1, &s4)) + && s2 <= s0 + s3 + && s2 <= s1 + s4) return build_cmp_result (type, memcmp (p0, p1, s2)); return NULL_TREE; @@ -1770,8 +1770,8 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) && !TREE_SIDE_EFFECTS (arg0) && !TREE_SIDE_EFFECTS (arg1)) return build_int_cst (type, 0); - if ((p0 = c_getstr (arg0, &s0)) - && s2 <= s0 + if ((p0 = c_getstr (arg0, &s0, &s3)) + && s2 <= s0 + s3 && target_char_cst_p (arg1, &c)) { const char *r = (const char *) memchr (p0, c, s2); diff --git a/gcc/fold-const.c b/gcc/fold-const.c index ec28b43..413f0f0 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -14607,10 +14607,13 @@ fold_build_pointer_plus_hwi_loc (location_t loc, tree ptr, HOST_WIDE_INT off) characters within it if SRC is a reference to a string plus some constant offset). If STRLEN is non-null, store the number of bytes in the string constant including the terminating NUL char. *STRLEN is - typically strlen(P) + 1 in the absence of embedded NUL characters. */ + typically strlen(P) + 1 in the absence of embedded NUL characters. + If TRAILINGNULSLEN is non-null, store the number of trailing NUL chars + after terminating NUL char of pointer P. */ const char * -c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */) +c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */, + unsigned HOST_WIDE_INT *trailingnulslen /* =NULL */) { tree offset_node; tree mem_size; @@ -14639,16 +14642,13 @@ c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */) literal is stored in. */ unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src); unsigned HOST_WIDE_INT string_size = tree_to_uhwi (mem_size); - - /* Ideally this would turn into a gcc_checking_assert over time. */ - if (string_length > string_size) -string_length = string_size; - const char *string = TREE_STRING_POINTER (src); /* Ideally this would turn into a gcc_checking_assert over time. */ if (string_length > string_size) string_length = string_size; + if (trailingnulslen) +*trailingnulslen = string_size - string_length; if (string_length == 0 || offset >= string_size) diff --git a/gcc/fold-const.h b/gcc/fold-const.h index 049fee9..5073138 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -187,7 +187,8 @@ extern bool expr_not_equal_to (tree t, const wide_int &); extern tree const_unop (enum tree_code, tree, tree); extern tree const_
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/3/21 下午1:06, Bin.Cheng 写道: On Thu, Mar 21, 2019 at 12:57 PM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? I suppose that it's for GCC10? Thanks, bin Since it's a P3 normal bug, so the patch is for GCC10. Sorry for the misleading, and thanks for pointing it out. Regards Jun Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test.
[PATCH][PR89341]Fix ICE on function definition with weakref/alias attributes attached
Hi According to gnu document of function attributes, neither weakref nor alias could be attached to a function defined in current translation unit. Although GCC checks the attributes under some circumstances, it still fails on some cases and even causes ICE. This patch checks whether alias/weakref attribute attaches on a function declaration which has body, and removes the attribute later. This also avoid the ICE. Bootstrapped/regtested on x86_64-linux, ok for GCC10? Regards JunMa gcc/ChangeLog 2019-03-26 Jun Ma PR89341 * cgraphunit.c (handle_alias_pairs): Check whether alias attribute attaches on a function declaration which has body. gcc/testsuite/ChangeLog 2019-03-26 Jun Ma PR89341 * gcc.dg/attr-alias-6.c: New test. * gcc.dg/attr-weakref-5.c: Likewise. --- gcc/cgraphunit.c | 11 --- gcc/testsuite/gcc.dg/attr-alias-6.c | 4 gcc/testsuite/gcc.dg/attr-weakref-5.c | 4 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/attr-alias-6.c create mode 100644 gcc/testsuite/gcc.dg/attr-weakref-5.c diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 8bfbd0b..733e184 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1405,14 +1405,20 @@ handle_alias_pairs (void) for (i = 0; alias_pairs && alias_pairs->iterate (i, &p);) { symtab_node *target_node = symtab_node::get_for_asmname (p->target); - + symtab_node *node = symtab_node::get (p->decl); + if (node && TREE_CODE (p->decl) == FUNCTION_DECL + && DECL_SAVED_TREE (p->decl)) + { + node->alias = false; + alias_pairs->unordered_remove (i); + continue; + } /* Weakrefs with target not defined in current unit are easy to handle: they behave just as external variables except we need to note the alias flag to later output the weakref pseudo op into asm file. */ if (!target_node && lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)) != NULL) { - symtab_node *node = symtab_node::get (p->decl); if (node) { node->alias_target = p->target; @@ -1426,7 +1432,6 @@ handle_alias_pairs (void) else if (!target_node) { error ("%q+D aliased to undefined symbol %qE", p->decl, p->target); - symtab_node *node = symtab_node::get (p->decl); if (node) node->alias = false; alias_pairs->unordered_remove (i); diff --git a/gcc/testsuite/gcc.dg/attr-alias-6.c b/gcc/testsuite/gcc.dg/attr-alias-6.c new file mode 100644 index 000..a3ec311 --- /dev/null +++ b/gcc/testsuite/gcc.dg/attr-alias-6.c @@ -0,0 +1,4 @@ +/* { dg-do compile } */ +/* { dg-require-alias "" } */ +static void __attribute__((alias("bar"))) foo(void) { } /* { dg-warning "attribute ignored because function is defined" } */ +void bar(void); diff --git a/gcc/testsuite/gcc.dg/attr-weakref-5.c b/gcc/testsuite/gcc.dg/attr-weakref-5.c new file mode 100644 index 000..bb21126 --- /dev/null +++ b/gcc/testsuite/gcc.dg/attr-weakref-5.c @@ -0,0 +1,4 @@ +/* { dg-do compile } */ +/* { dg-require-weak "" } */ +static void __attribute__((weakref("bar"))) foo(void) { } /* { dg-warning "attribute ignored because function is defined" } */ +void foo(void); -- 1.8.3.1
Re: [PATCH GCC10] ipa-inline.c: Trivial fix on function not declared inline check in want_inline_small_function_p
在 2019/4/30 上午5:57, Jeff Law 写道: On 3/1/19 1:39 AM, JunMa wrote: Hi Since MAX_INLINE_INSNS_AUTO should be below or equal to MAX_INLINE_INSNS_SINGLE (see params.def), there is no need to do second inlining limit check on growth when function not declared inline, this patch removes it. Bootstrapped and tested on x86_64-unknown-linux-gnu, is it ok for trunk? Regards Jun 2019-03-01 Jun Ma *ipa-inline.c(want_inline_small_function_p): Remove redundant growth check when function not declared inline So I don't think anything in the option processing ensures MAX_INLINE_INSNS_AUTO is <= MAX_INLINE_INSNS_SINGLE. Furthermore, it doesn't look like the restriction is documented in any user documentation. Thus I'm not particularly comfortable removing this additional test. Jeff The description at gcc onlinedocs says MAX_INLINE_INSNS_AUTO is more restrictive compare to limit applies to functions declared inline which is MAX_INLINE_INSNS_SINGLE. Thus, I think it is the user's responsibility to keep this restriction. Regards JunMa
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/3/21 下午12:51, JunMa 写道: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. Ping. Regards JunMa
Re: [PATCH][PR89341]Fix ICE on function definition with weakref/alias attributes attached
在 2019/3/26 下午7:40, JunMa 写道: Hi According to gnu document of function attributes, neither weakref nor alias could be attached to a function defined in current translation unit. Although GCC checks the attributes under some circumstances, it still fails on some cases and even causes ICE. This patch checks whether alias/weakref attribute attaches on a function declaration which has body, and removes the attribute later. This also avoid the ICE. Bootstrapped/regtested on x86_64-linux, ok for GCC10? Regards JunMa gcc/ChangeLog 2019-03-26 Jun Ma PR89341 * cgraphunit.c (handle_alias_pairs): Check whether alias attribute attaches on a function declaration which has body. gcc/testsuite/ChangeLog 2019-03-26 Jun Ma PR89341 * gcc.dg/attr-alias-6.c: New test. * gcc.dg/attr-weakref-5.c: Likewise. Ping. Regards JunMa
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test.
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/6 下午7:58, JunMa 写道: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Although it is safe to use total length, I found that function inline_expand_builtin_string_cmp() which used c_getstr() may emit redundant rtls for trailing null chars when total length is returned. Also it is trivial to handle constant string with trailing null chars. So this updated patch follow richard's suggestion: using string_constant directly. Bootstrapped/regtested on x86_64-linux, ok for trunk? Regards JunMa gcc/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. gcc/testsuite/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. --- gcc/gimple-fold.c | 9 - gcc/testsuite/gcc.dg/builtin-memchr-4.c | 30 ++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/builtin-memchr-4.c diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 1b10bae..7ee5aea 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -2557,7 +2557,14 @@ gimple_fold_builtin_memchr (gimple_stmt_iterator *gsi) const char *r = (const char *)memchr (p1, c, MIN (length, string_length)); if (r == NULL) { - if (length <= string_length) + tree mem_size, offset_node; + string_constant (arg1, &offset_node, &mem_size, NULL); + unsigned HOST_WIDE_INT offset = (offset_node == NULL_TREE) + ? 0 : tree_to_uhwi (offset_node); + /* MEM_SIZE is the size of the array the string literal +is stored in. */ + unsigned HOST_WIDE_INT string_size = tree_to_uhwi (mem_size) - offset; + if (length <= string_size) { replace_call_with_value (gsi, build_int_cst (ptr_type_node, 0)); return true; diff --git a/gcc/testsuite/gcc.dg/builtin-memchr-4.c b/gcc/testsuite/gcc.dg/builtin-memchr-4.c new file mode 100644 index 000..3ef424c --- /dev/null +++ b/gcc/testsuite/gcc.dg/builtin-memchr-4.c @@ -0,0 +1,30 @@ +/* PR tree-optimization/89772 + Verify that memchr calls with a pointer to a constant character + are folded as expected. + { dg-do compile } + { dg-options "-O1 -Wall -fdump-tree-lower" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +extern void* memchr (const void*, int, size_t); +extern int printf (const char*, ...); +extern void abort (void); + +#define A(expr)\ + ((expr) \ + ? (void)0 \ + : (printf ("assertion failed on line %i: %s\n", \ + __LINE__, #expr), \ + abort ())) + +const char a[8] = {'a',0,'b'}; + +void test_memchr_cst_char (void) +{ + A (!memchr (a, 'c', 2)); + A (!memchr (a, 'c', 5)); + A (!memchr (a, 'c', sizeof a)); +} + +/* { dg-final { scan-tree-dump-not "abort" "gimple" } } */ -- 1.8.3.1
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/7 上午2:05, Martin Sebor 写道: On 5/6/19 5:58 AM, JunMa wrote: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Using c_getstr is simpler than string_constant so it seems fine to me but I agree that returning a size of the array rather than the number of trailing nuls would make the API more intuitive. I would also suggest to use a name for the variable/parameter that makes that clear. E.g., string_size or array_size. (Since trailing nuls don't contribute to the length of a string referring to length in the name is misleading.) I found that the return length of c_getstr() is used in function inline_expand_builtin_string_cmp(). It may emit redundant rtls for trailing null chars when total length is returned. So Although it is safe to return total length, It seems that using string_constant is better. Thanks JunMa Martin Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test.
[PATCH][PR90106] Builtin call transformation changes in cdce pass
Hi As PR90106 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90106), when gcc meets builtin function call like: y = sqrt (x); The cdce pass tries to transform the call into an internal function call and conditionally executes call with a simple range check on the arguments which can detect most cases and the errno does not need to be set. It looks like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); However, If the call is in tail position, for example: y = sqrt (x); return y; will become: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); return y; This transformation breaks tailcall pattern, and prevents later tailcall optimizations. So This patch transform builtin call with return value into if-then-else part, which looks like: y = sqrt (x); ==> if (__builtin_isless (x, 0)) y = sqrt (x); else y = IFN_SQRT (x); BTW, y = sqrt (x) can also transform like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) y = sqrt (x); We don‘t choose this pattern because it emits worse assemble code(more move instruction and use more registers) in x86_64. Bootstrapped/regtested on x86_64-linux, ok for trunk? Regards JunMa gcc/ChangeLog 2019-05-07 Jun Ma PR tree-optimization/90106 * tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds): Add new parameter as new internal function call, also move it to new basic block. (use_internal_fn): Pass internal function call to shrink_wrap_one_built_in_call_with_conds. gcc/testsuite/ChangeLog 2019-05-07 Jun Ma PR tree-optimization/90106 * gcc.dg/cdce1.c: Check tailcall code generation after cdce pass. * gcc.dg/cdce2.c: Likewise. --- gcc/testsuite/gcc.dg/cdce1.c | 3 +- gcc/testsuite/gcc.dg/cdce2.c | 3 +- gcc/tree-call-cdce.c | 90 +--- 3 files changed, 71 insertions(+), 25 deletions(-) diff --git a/gcc/testsuite/gcc.dg/cdce1.c b/gcc/testsuite/gcc.dg/cdce1.c index b23ad63..424d80f 100644 --- a/gcc/testsuite/gcc.dg/cdce1.c +++ b/gcc/testsuite/gcc.dg/cdce1.c @@ -1,7 +1,8 @@ /* { dg-do run } */ /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ /* { dg-require-effective-target int32plus } */ -/* { dg-final { scan-tree-dump "cdce1.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "cdce1.c:17: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-assembler "jmp pow" } } */ /* { dg-require-effective-target large_double } */ #include diff --git a/gcc/testsuite/gcc.dg/cdce2.c b/gcc/testsuite/gcc.dg/cdce2.c index 30e7cb1..2af2893 100644 --- a/gcc/testsuite/gcc.dg/cdce2.c +++ b/gcc/testsuite/gcc.dg/cdce2.c @@ -1,7 +1,8 @@ /* { dg-do run } */ /* { dg-skip-if "doubles are floats" { "avr-*-*" } } */ /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ -/* { dg-final { scan-tree-dump "cdce2.c:15: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-assembler "jmp log" } } */ #include #include diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c index 2e482b3..9e3372f 100644 --- a/gcc/tree-call-cdce.c +++ b/gcc/tree-call-cdce.c @@ -93,10 +93,10 @@ along with GCC; see the file COPYING3. If not see y = sqrt (x); ==> - y = IFN_SQRT (x); if (__builtin_isless (x, 0)) - sqrt (x); - + y = sqrt (x); + else + y = IFN_SQRT (x); In the vast majority of cases we should then never need to call sqrt. Note that library functions are not supposed to clear errno to zero without @@ -793,14 +793,16 @@ gen_shrink_wrap_conditions (gcall *bi_call, vec conds, } /* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS - conditions in CONDS is false. */ + conditions in CONDS is false. Also move BI_NEWCALL to a new basic block + when it is non-null, it is called while all of the CONDS are true. */ static void shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec conds, - unsigned int nconds) + unsigned int nconds, + gcall *bi_newcall = NULL) { gimple_stmt_iterator bi_call_bsi; - basic_block bi_call_bb, join_tgt_bb, guard_bb; + basic_block bi_call_bb, bi_newcall_bb, join_tgt_bb, guard_bb; edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru; edge bi_call_in_edge0, guard_bb_in_edge; unsigned tn_cond_stmts; @@ -809,27 +811,26 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call,
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/8 下午10:31, Richard Biener 写道: On Tue, May 7, 2019 at 4:34 AM JunMa wrote: 在 2019/5/6 下午7:58, JunMa 写道: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Although it is safe to use total length, I found that function inline_expand_builtin_string_cmp() which used c_getstr() may emit redundant rtls for trailing null chars when total length is returned. Also it is trivial to handle constant string with trailing null chars. So this updated patch follow richard's suggestion: using string_constant directly. Bootstrapped/regtested on x86_64-linux, ok for trunk? Doesn't this fold to NULL for the case where seaching for '0' and it doesn't occur in the string constant but only the zero-padding? For C case like: const char str[100] = “hello world” const char ch = '\0'; ret = (char*)memchr(str, ch, strlen(str)); ret2 = (char*)memchr(str, ch, sizeof str); then ret is null, ret2 is not So you'd need to conditionalize on c being not zero (or handle that case specially by actually finding the first zero in the padding)? Yes, the string length return from c_getstr() including the termination NULL most of the time(when string_length <= string_size). If ch is NULL and the string length >= 3rd argument of memchr, then we donot need to handle this. Thanks JunMa I think there was work to have all string constants zero terminated but I'm not sure if we can rely on that here. Bernd? Richard. Regards JunMa gcc/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. gcc/testsuite/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test.
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/9 上午3:02, Bernd Edlinger 写道: On 5/8/19 4:31 PM, Richard Biener wrote: On Tue, May 7, 2019 at 4:34 AM JunMa wrote: 在 2019/5/6 下午7:58, JunMa 写道: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Although it is safe to use total length, I found that function inline_expand_builtin_string_cmp() which used c_getstr() may emit redundant rtls for trailing null chars when total length is returned. Also it is trivial to handle constant string with trailing null chars. So this updated patch follow richard's suggestion: using string_constant directly. Bootstrapped/regtested on x86_64-linux, ok for trunk? Doesn't this fold to NULL for the case where seaching for '0' and it doesn't occur in the string constant but only the zero-padding? So you'd need to conditionalize on c being not zero (or handle that case specially by actually finding the first zero in the padding)? I think there was work to have all string constants zero terminated but I'm not sure if we can rely on that here. Bernd? It turned out that there is a number of languages that don't have zero-terminated strings by default, which would have complicated the patch just too much for too little benefit. In the end, it was more important to guarantee that mem_size >= string_length holds. The string_length returned form c_getstr() is equal to mem_size when string_length > string_size, so I'll add assert in the patch. In C it is just a convention that string constants have usually a zero-termination, but even with C there are ways how strings constants can be not zero-terminated. There can in theory be optimizations that introduce not zero-terminated strings, like tree-ssa-forwprop.c, where a not zero-terminated string constant is folded in simplify_builtin_call. In such a case, c_getstr might in theory return a string without zero-termination, but I think it will be rather difficult to find a C test case for that. Well, if I had a test case for that I would probably fix it in c_getstr to consider the implicit padding as equivalent to an explicit zero-termination. c_getstr() makes sure the returned string has zero-termination when 2nd argument is NULL, but not when string_length is returned. I think it is the caller's responsibility to take care of both of the returned string and length. Thanks JunMa Bernd. Richard. Regards JunMa gcc/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. gcc/testsuite/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test.
Re: [PATCH][PR90106] Builtin call transformation changes in cdce pass
在 2019/5/9 上午9:20, Bin.Cheng 写道: On Thu, May 9, 2019 at 5:31 AM Jeff Law wrote: On 5/8/19 6:28 AM, Richard Biener wrote: On Wed, May 8, 2019 at 12:09 PM JunMa wrote: Hi As PR90106 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90106), when gcc meets builtin function call like: y = sqrt (x); The cdce pass tries to transform the call into an internal function call and conditionally executes call with a simple range check on the arguments which can detect most cases and the errno does not need to be set. It looks like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); However, If the call is in tail position, for example: y = sqrt (x); return y; will become: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); return y; This transformation breaks tailcall pattern, and prevents later tailcall optimizations. So This patch transform builtin call with return value into if-then-else part, which looks like: y = sqrt (x); ==> if (__builtin_isless (x, 0)) y = sqrt (x); else y = IFN_SQRT (x); BTW, y = sqrt (x) can also transform like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) y = sqrt (x); We don‘t choose this pattern because it emits worse assemble code(more move instruction and use more registers) in x86_64. Bootstrapped/regtested on x86_64-linux, ok for trunk? OK. JunMa -- do you have a copyright assignment on file and write access to the repository? If not we should take care of that before proceeding further. Hi Jeff, Thanks very much for helping. Yes, he is under Alibaba's copyright assignment. What else should we do other than noticing in this mailing list message? BTW, I think JunMa needs to apply write-access though. Yes, I do not have write access, FYI. Thanks JunMa Thanks, bin Jeff
Re: [PATCH][Tree-optimization/PR89772]fold memchr builtins for character not in constant nul-padded string
在 2019/5/9 上午10:22, JunMa 写道: 在 2019/5/9 上午3:02, Bernd Edlinger 写道: On 5/8/19 4:31 PM, Richard Biener wrote: On Tue, May 7, 2019 at 4:34 AM JunMa wrote: 在 2019/5/6 下午7:58, JunMa 写道: 在 2019/5/6 下午6:02, Richard Biener 写道: On Thu, Mar 21, 2019 at 5:57 AM JunMa wrote: Hi For now, gcc can not fold code like: const char a[5] = "123" __builtin_memchr (a, '7', sizeof a) It tries to avoid folding out of string length although length of a is 5. This is a bit conservative, it's safe to folding memchr/bcmp/memcmp builtins when constant string stores in array with some trailing nuls. This patch folds these cases by exposing additional length of trailing nuls in c_getstr(). Bootstrapped/regtested on x86_64-linux, ok for trunk? It's probably better if gimple_fold_builtin_memchr uses string_constant directly instead? We can use string_constant in gimple_fold_builtin_memchr, however it is a bit complex to use it in memchr/memcmp constant folding. You are changing memcmp constant folding but only have a testcase for memchr. I'll add later. If we decide to amend c_getstr I would rather make it return the total length instead of the number of trailing zeros. I think return the total length is safe in other place as well. I used the argument in patch since I do not want any impact on other part at all. Although it is safe to use total length, I found that function inline_expand_builtin_string_cmp() which used c_getstr() may emit redundant rtls for trailing null chars when total length is returned. Also it is trivial to handle constant string with trailing null chars. So this updated patch follow richard's suggestion: using string_constant directly. Bootstrapped/regtested on x86_64-linux, ok for trunk? Doesn't this fold to NULL for the case where seaching for '0' and it doesn't occur in the string constant but only the zero-padding? So you'd need to conditionalize on c being not zero (or handle that case specially by actually finding the first zero in the padding)? I think there was work to have all string constants zero terminated but I'm not sure if we can rely on that here. Bernd? It turned out that there is a number of languages that don't have zero-terminated strings by default, which would have complicated the patch just too much for too little benefit. In the end, it was more important to guarantee that mem_size >= string_length holds. The string_length returned form c_getstr() is equal to mem_size when string_length > string_size, so I'll add assert in the patch. In C it is just a convention that string constants have usually a zero-termination, but even with C there are ways how strings constants can be not zero-terminated. There can in theory be optimizations that introduce not zero-terminated strings, like tree-ssa-forwprop.c, where a not zero-terminated string constant is folded in simplify_builtin_call. In such a case, c_getstr might in theory return a string without zero-termination, but I think it will be rather difficult to find a C test case for that. Well, if I had a test case for that I would probably fix it in c_getstr to consider the implicit padding as equivalent to an explicit zero-termination. c_getstr() makes sure the returned string has zero-termination when 2nd argument is NULL, but not when string_length is returned. I think it is the caller's responsibility to take care of both of the returned string and length. Update the patch with assert checking on condition string_length <= string_size. FYI. Also Bootstrapped/regtested on x86_64-linux. Regards JunMa Thanks JunMa Bernd. Richard. Regards JunMa gcc/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. gcc/testsuite/ChangeLog 2019-05-07 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. Thanks JunMa Richard. Regards JunMa gcc/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * fold-const.c (c_getstr): Add new parameter to get length of additional trailing nuls after constant string. * gimple-fold.c (gimple_fold_builtin_memchr): consider trailing nuls in out-of-bound accesses checking. * fold-const-call.c (fold_const_call): Likewise. gcc/testsuite/ChangeLog 2019-03-21 Jun Ma PR Tree-optimization/89772 * gcc.dg/builtin-memchr-4.c: New test. --- gcc/gimple-fold.c | 10 +- gcc/testsuite/gcc.dg/builtin-memchr-4.c | 30 ++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/builtin-memchr-4.c diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 1b10bae..c4fd5b1 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -2557,7 +2
Re: [PATCH][PR90106] Builtin call transformation changes in cdce pass
在 2019/5/10 上午4:00, Jeff Law 写道: On 5/8/19 8:25 PM, JunMa wrote: 在 2019/5/9 上午9:20, Bin.Cheng 写道: On Thu, May 9, 2019 at 5:31 AM Jeff Law wrote: On 5/8/19 6:28 AM, Richard Biener wrote: On Wed, May 8, 2019 at 12:09 PM JunMa wrote: Hi As PR90106 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90106), when gcc meets builtin function call like: y = sqrt (x); The cdce pass tries to transform the call into an internal function call and conditionally executes call with a simple range check on the arguments which can detect most cases and the errno does not need to be set. It looks like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); However, If the call is in tail position, for example: y = sqrt (x); return y; will become: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) sqrt (x); return y; This transformation breaks tailcall pattern, and prevents later tailcall optimizations. So This patch transform builtin call with return value into if-then-else part, which looks like: y = sqrt (x); ==> if (__builtin_isless (x, 0)) y = sqrt (x); else y = IFN_SQRT (x); BTW, y = sqrt (x) can also transform like: y = IFN_SQRT (x); if (__builtin_isless (x, 0)) y = sqrt (x); We don‘t choose this pattern because it emits worse assemble code(more move instruction and use more registers) in x86_64. Bootstrapped/regtested on x86_64-linux, ok for trunk? OK. JunMa -- do you have a copyright assignment on file and write access to the repository? If not we should take care of that before proceeding further. Hi Jeff, Thanks very much for helping. Yes, he is under Alibaba's copyright assignment. What else should we do other than noticing in this mailing list message? BTW, I think JunMa needs to apply write-access though. Yes, I do not have write access, FYI. OK. So in my response to Bin I gave you a link to the form to fill out to get write access. List me as your sponsor. Once that's all taken care of and working, go ahead and commit this change to the trunk. Thanks! JunMa Jeff
Re: [PATCH][PR89341]Fix ICE on function definition with weakref/alias attributes attached
在 2019/3/26 下午7:40, JunMa 写道: Hi According to gnu document of function attributes, neither weakref nor alias could be attached to a function defined in current translation unit. Although GCC checks the attributes under some circumstances, it still fails on some cases and even causes ICE. This patch checks whether alias/weakref attribute attaches on a function declaration which has body, and removes the attribute later. This also avoid the ICE. Bootstrapped/regtested on x86_64-linux, ok for GCC10? Regards JunMa gcc/ChangeLog 2019-03-26 Jun Ma PR89341 * cgraphunit.c (handle_alias_pairs): Check whether alias attribute attaches on a function declaration which has body. gcc/testsuite/ChangeLog 2019-03-26 Jun Ma PR89341 * gcc.dg/attr-alias-6.c: New test. * gcc.dg/attr-weakref-5.c: Likewise. Hi Jakub Since you are owner of this part, so I add you to cc list. Ping? Regards JunMa
Re: [PATCH][PR89341]Fix ICE on function definition with weakref/alias attributes attached
在 2019/5/13 下午5:02, Jakub Jelinek 写道: On Mon, May 13, 2019 at 04:53:52PM +0800, JunMa wrote: According to gnu document of function attributes, neither weakref nor alias could be attached to a function defined in current translation unit. Although GCC checks the attributes under some circumstances, it still fails on some cases and even causes ICE. This patch checks whether alias/weakref attribute attaches on a function declaration which has body, and removes the attribute later. This also avoid the ICE. Bootstrapped/regtested on x86_64-linux, ok for GCC10? 2019-03-26 Jun Ma PR89341 * cgraphunit.c (handle_alias_pairs): Check whether alias attribute attaches on a function declaration which has body. gcc/testsuite/ChangeLog 2019-03-26 Jun Ma PR89341 * gcc.dg/attr-alias-6.c: New test. * gcc.dg/attr-weakref-5.c: Likewise. Hi Jakub Since you are owner of this part, so I add you to cc list. Ping? I'm not the maintainer of the callgraph, Honza is: callgraph Jan Hubicka Hi Jakub Thanks for your reminder. Honza, Ping? Regards JunMa Jakub
[PATCH] Add myself to MAINTAINERS
Already committed, as per https://www.gnu.org/software/gcc/svnwrite.html. Regards Jun 2019-05-16 Jun Ma * MAINTAINERS (Write After Approval): Add myself. diff --git a/MAINTAINERS b/MAINTAINERS index fb7fd34..7c5942a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -514,6 +514,7 @@ Brooks Moses Dirk Mueller Phil Muldoon Steven Munroe +Jun Ma Szabolcs Nagy Quentin Neill Adam Nemet -- 1.8.3.1
Re: [PATCH][PR90106] Builtin call transformation changes in cdce pass
在 2019/5/17 上午6:04, Jakub Jelinek 写道: On Thu, May 16, 2019 at 11:39:38PM +0200, Jakub Jelinek wrote: One possibility is to add -fdump-tree-optimized and scan for /* { dg-final { scan-tree-dump "pow \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ resp. /* { dg-final { scan-tree-dump "log \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ Here it is in patch form. That said, I'm not convinced your patch does what you wanted, because comparing a month old trunk with today's trunk generates the same assembly except for .ident, generates as many [tail call] lines in *.optimized dump as before, emits the same number of jmp\tpow and jmp\tlog instructions as before (one in a separate routine). Thanks for point out the mistake and fix it. For these two tests, cdce pass doesn't transform the builtin math functions in foo with or without the patch because they cannot use internal functions. I'll add another testcase to verify the patch. Regards Jun But at least the tests aren't UNSUPPORTED anymore. 2019-05-16 Jakub Jelinek PR tree-optimization/90106 * gcc.dg/cdce1.c: Don't scan-assembler, instead -fdump-tree-optimized and scan-tree-dump for tail call. * gcc.dg/cdce2.c: Likewise. --- gcc/testsuite/gcc.dg/cdce1.c.jj 2019-05-16 11:28:22.750177582 +0200 +++ gcc/testsuite/gcc.dg/cdce1.c2019-05-16 23:50:23.618450891 +0200 @@ -1,9 +1,9 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ +/* { dg-do run } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ /* { dg-require-effective-target int32plus } */ -/* { dg-final { scan-tree-dump "cdce1.c:17: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ -/* { dg-final { scan-assembler "jmp pow" } } */ /* { dg-require-effective-target large_double } */ +/* { dg-final { scan-tree-dump "cdce1.c:17: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "pow \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ #include #include --- gcc/testsuite/gcc.dg/cdce2.c.jj 2019-05-16 11:28:22.781177075 +0200 +++ gcc/testsuite/gcc.dg/cdce2.c2019-05-16 23:50:58.505880845 +0200 @@ -1,8 +1,8 @@ -/* { dg-do run } */ +/* { dg-do run } */ /* { dg-skip-if "doubles are floats" { "avr-*-*" } } */ -/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ -/* { dg-final { scan-tree-dump "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ -/* { dg-final { scan-assembler "jmp log" } } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ +/* { dg-final { scan-tree-dump "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "log \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ #include #include Jakub
Re: [PATCH][PR90106] Builtin call transformation changes in cdce pass
在 2019/5/17 上午11:09, JunMa 写道: 在 2019/5/17 上午6:04, Jakub Jelinek 写道: On Thu, May 16, 2019 at 11:39:38PM +0200, Jakub Jelinek wrote: One possibility is to add -fdump-tree-optimized and scan for /* { dg-final { scan-tree-dump "pow \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ resp. /* { dg-final { scan-tree-dump "log \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ Here it is in patch form. That said, I'm not convinced your patch does what you wanted, because comparing a month old trunk with today's trunk generates the same assembly except for .ident, generates as many [tail call] lines in *.optimized dump as before, emits the same number of jmp\tpow and jmp\tlog instructions as before (one in a separate routine). Thanks for point out the mistake and fix it. For these two tests, cdce pass doesn't transform the builtin math functions in foo with or without the patch because they cannot use internal functions. I'll add another testcase to verify the patch. Here is the new testcase. The sqrtf function call keeps as tailcall with the patch but not without the patch. For more details, you can read https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90106. Bootstrapped/regtested on x86_64-linux, ok for trunk? Regards Jun gcc/testsuite/ChangeLog 2019-05-17 Jun Ma PR tree-optimization/90106 * gcc.dg/cdce3.c: New test. Regards Jun But at least the tests aren't UNSUPPORTED anymore. 2019-05-16 Jakub Jelinek PR tree-optimization/90106 * gcc.dg/cdce1.c: Don't scan-assembler, instead -fdump-tree-optimized and scan-tree-dump for tail call. * gcc.dg/cdce2.c: Likewise. --- gcc/testsuite/gcc.dg/cdce1.c.jj 2019-05-16 11:28:22.750177582 +0200 +++ gcc/testsuite/gcc.dg/cdce1.c 2019-05-16 23:50:23.618450891 +0200 @@ -1,9 +1,9 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ +/* { dg-do run } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ /* { dg-require-effective-target int32plus } */ -/* { dg-final { scan-tree-dump "cdce1.c:17: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ -/* { dg-final { scan-assembler "jmp pow" } } */ /* { dg-require-effective-target large_double } */ +/* { dg-final { scan-tree-dump "cdce1.c:17: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "pow \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ #include #include --- gcc/testsuite/gcc.dg/cdce2.c.jj 2019-05-16 11:28:22.781177075 +0200 +++ gcc/testsuite/gcc.dg/cdce2.c 2019-05-16 23:50:58.505880845 +0200 @@ -1,8 +1,8 @@ -/* { dg-do run } */ +/* { dg-do run } */ /* { dg-skip-if "doubles are floats" { "avr-*-*" } } */ -/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -lm" } */ -/* { dg-final { scan-tree-dump "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ -/* { dg-final { scan-assembler "jmp log" } } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ +/* { dg-final { scan-tree-dump "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "log \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ #include #include Jakub --- gcc/testsuite/gcc.dg/cdce3.c | 12 1 file changed, 12 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/cdce3.c diff --git a/gcc/testsuite/gcc.dg/cdce3.c b/gcc/testsuite/gcc.dg/cdce3.c new file mode 100644 index 000..0062c4f --- /dev/null +++ b/gcc/testsuite/gcc.dg/cdce3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ +/* { dg-final { scan-tree-dump "cdce3.c:10: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "sqrtf \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ + +#include + +float foo ( float x ) +{ + return sqrtf( x ); +} + -- 1.8.3.1
Re: [PATCH][PR90106] Builtin call transformation changes in cdce pass
在 2019/5/17 下午2:34, Jakub Jelinek 写道: On Fri, May 17, 2019 at 02:24:22PM +0800, JunMa wrote: 2019-05-17 Jun Ma Two spaces before < rather than one. PR tree-optimization/90106 * gcc.dg/cdce3.c: New test. --- /dev/null +++ b/gcc/testsuite/gcc.dg/cdce3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ Just use one space instead of two. +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized -lm" } */ For compile time test, no need to add " -lm" (well, no need to add it even for link/run tests). +/* { dg-final { scan-tree-dump "cdce3.c:10: .* function call is shrink-wrapped into error conditions\." "cdce" } } */ Please use \[^\n\r]* instead of .*, you don't want newlines matched in there. +/* { dg-final { scan-tree-dump "sqrtf \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ + +#include Wouldn't it be better to just declare it yourself: float sqrtf (float); ? You really don't know what the target math.h includes. + +float foo ( float x ) +{ + return sqrtf( x ); +} + -- 1.8.3.1 Thanks for review so carefully. Update the patch based on your suggestion. Regards Jun gcc/testsuite/ChangeLog 2019-05-17 Jun Ma PR tree-optimization/90106 * gcc.dg/cdce3.c: New test. Jakub --- gcc/testsuite/gcc.dg/cdce3.c | 11 +++ 1 file changed, 11 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/cdce3.c diff --git a/gcc/testsuite/gcc.dg/cdce3.c b/gcc/testsuite/gcc.dg/cdce3.c new file mode 100644 index 000..8a74379 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cdce3.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump "cdce3.c:9: \[^\n\r]* function call is shrink-wrapped into error conditions\." "cdce" } } */ +/* { dg-final { scan-tree-dump "sqrtf \\(\[^\n\r]*\\); \\\[tail call\\\]" "optimized" } } */ + +float sqrtf (float); +float foo (float x) +{ + return sqrtf (x); +} + -- 1.8.3.1