So, I have a rough cut of a new feature to add pragma GCC unroll support to gcc. It is safe wrt the test suite and code-gen, but there are some corners that I need help or suggestions on how to round off nicely.
Things to think about, can we put this in even if we have TODO support for C++ template support? I think that is only 2-5 lines away from completion, but one needs to play peak-a-boo with some data (the unroll count) and I wanted to let pt fingers decide where to hide the data. See RANGE_FOR_IVDEP for how I want to do it. Can I increase the size of all annotations to 3 from 2? It seemed safer, easier than trying to subdivide it. I didn’t engineer ivdeps and unroll together. Does it sound reasonable to allow both to be used at the same time on the same loop? If so, I can add the two other cases, presently I just handle one of them then the loop. Fortran support is left to the fortran people, if they want to do it. I wired it up tantalizingly close for them to complete. See ICK in the code. I was unsure how to resolve that code. Does unroll 8 mean that the loop is repeated 8 times? Can I turn on peeling in try_peel_loop by simply wanted to do it for 1 loop? I support using -1 for a directive that says, don’t peel, don’t unroll. As a UI issue, I think this is wrong. I want to to be either 0 or 1, those two seem better. But, not sure which is the right one of the two. Which number says, don’t unroll, I’m smarter than you think. If we have a loop that we know can only be unroll 7 times, and the user says unroll 8, should we unroll it 7 times? Presently I do. The other option, is to ignore the directive when we know it is non-sensicle. Yes, I’m aware that this isn’t the right phase for this, but such are business cycles. It would not go in until we reenter stage 1. I see no value in trying to squeeze it in past stage 1.
Index: ada/gcc-interface/trans.c =================================================================== --- ada/gcc-interface/trans.c (revision 219031) +++ ada/gcc-interface/trans.c (working copy) @@ -7850,17 +7850,20 @@ gnat_gimplify_stmt (tree *stmt_p) { /* Deal with the optimization hints. */ if (LOOP_STMT_IVDEP (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); if (LOOP_STMT_NO_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_no_vector_kind)); + annot_expr_no_vector_kind), + integer_zero_node); if (LOOP_STMT_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_vector_kind)); + annot_expr_vector_kind), + integer_zero_node); gnu_cond = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE, Index: c/c-parser.c =================================================================== --- c/c-parser.c (revision 219031) +++ c/c-parser.c (working copy) @@ -1206,9 +1206,9 @@ static void c_parser_statement (c_parser static void c_parser_statement_after_labels (c_parser *); static void c_parser_if_statement (c_parser *); static void c_parser_switch_statement (c_parser *); -static void c_parser_while_statement (c_parser *, bool); -static void c_parser_do_statement (c_parser *, bool); -static void c_parser_for_statement (c_parser *, bool); +static void c_parser_while_statement (c_parser *, bool, int); +static void c_parser_do_statement (c_parser *, bool, int); +static void c_parser_for_statement (c_parser *, bool, int); static tree c_parser_asm_statement (c_parser *); static tree c_parser_asm_operands (c_parser *); static tree c_parser_asm_goto_operands (c_parser *); @@ -4925,13 +4925,13 @@ c_parser_statement_after_labels (c_parse c_parser_switch_statement (parser); break; case RID_WHILE: - c_parser_while_statement (parser, false); + c_parser_while_statement (parser, false, 0); break; case RID_DO: - c_parser_do_statement (parser, false); + c_parser_do_statement (parser, false, 0); break; case RID_FOR: - c_parser_for_statement (parser, false); + c_parser_for_statement (parser, false, 0); break; case RID_CILK_FOR: if (!flag_cilkplus) @@ -5293,7 +5293,7 @@ c_parser_switch_statement (c_parser *par */ static void -c_parser_while_statement (c_parser *parser, bool ivdep) +c_parser_while_statement (c_parser *parser, bool ivdep, int unroll) { tree block, cond, body, save_break, save_cont; location_t loc; @@ -5307,9 +5307,15 @@ c_parser_while_statement (c_parser *pars "%<_Cilk_spawn%> statement cannot be used as a condition for while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); save_break = c_break_label; c_break_label = NULL_TREE; save_cont = c_cont_label; @@ -5328,7 +5334,7 @@ c_parser_while_statement (c_parser *pars */ static void -c_parser_do_statement (c_parser *parser, bool ivdep) +c_parser_do_statement (c_parser *parser, bool ivdep, int unroll) { tree block, cond, body, save_break, save_cont, new_break, new_cont; location_t loc; @@ -5356,9 +5362,16 @@ c_parser_do_statement (c_parser *parser, "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_unroll_kind), + build_int_cst (integer_type_node, + unroll)); if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>")) c_parser_skip_to_end_of_block_or_statement (parser); c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false); @@ -5422,7 +5435,7 @@ c_parser_do_statement (c_parser *parser, */ static void -c_parser_for_statement (c_parser *parser, bool ivdep) +c_parser_for_statement (c_parser *parser, bool ivdep, int unroll) { tree block, cond, incr, save_break, save_cont, body; /* The following are only used when parsing an ObjC foreach statement. */ @@ -5540,6 +5553,12 @@ c_parser_for_statement (c_parser *parser "%<GCC ivdep%> pragma"); cond = error_mark_node; } + else if (unroll) + { + c_parser_error (parser, "missing loop condition in loop with " + "%<GCC unroll%> pragma"); + cond = error_mark_node; + } else { c_parser_consume_token (parser); @@ -5557,9 +5576,15 @@ c_parser_for_statement (c_parser *parser "expected %<;%>"); } if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); } /* Parse the increment expression (the third expression in a for-statement). In the case of a foreach-statement, this is @@ -9658,12 +9683,42 @@ c_parser_pragma (c_parser *parser, enum return false; } if (c_parser_next_token_is_keyword (parser, RID_FOR)) - c_parser_for_statement (parser, true); + c_parser_for_statement (parser, true, 0); else if (c_parser_next_token_is_keyword (parser, RID_WHILE)) - c_parser_while_statement (parser, true); + c_parser_while_statement (parser, true, 0); else - c_parser_do_statement (parser, true); + c_parser_do_statement (parser, true, 0); return false; + case PRAGMA_UNROLL: + { + c_parser_consume_pragma (parser); + tree expr = c_parser_expr_no_commas (parser, NULL).value; + mark_exp_read (expr); + expr = c_fully_fold (expr, false, NULL); + if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) + || TREE_CODE (expr) != INTEGER_CST) + { + c_parser_error (parser, "%<#pragma GCC unroll%> requires a number"); + expr = integer_zero_node; + } + int unroll = tree_to_shwi (expr); + + c_parser_skip_to_pragma_eol (parser); + if (!c_parser_next_token_is_keyword (parser, RID_FOR) + && !c_parser_next_token_is_keyword (parser, RID_WHILE) + && !c_parser_next_token_is_keyword (parser, RID_DO)) + { + c_parser_error (parser, "for, while or do statement expected"); + return false; + } + if (c_parser_next_token_is_keyword (parser, RID_FOR)) + c_parser_for_statement (parser, false, unroll); + else if (c_parser_next_token_is_keyword (parser, RID_WHILE)) + c_parser_while_statement (parser, false, unroll); + else + c_parser_do_statement (parser, false, unroll); + return false; + } case PRAGMA_GCC_PCH_PREPROCESS: c_parser_error (parser, "%<#pragma GCC pch_preprocess%> must be first"); Index: c-family/c-pragma.c =================================================================== --- c-family/c-pragma.c (revision 219031) +++ c-family/c-pragma.c (working copy) @@ -1415,6 +1415,10 @@ init_pragma (void) cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false, false); + if (!flag_preprocess_only) + cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false, + false); + if (flag_cilkplus && !flag_preprocess_only) cpp_register_deferred_pragma (parse_in, "cilk", "grainsize", PRAGMA_CILK_GRAINSIZE, true, false); Index: c-family/c-pragma.h =================================================================== --- c-family/c-pragma.h (revision 219031) +++ c-family/c-pragma.h (working copy) @@ -60,6 +60,7 @@ typedef enum pragma_kind { PRAGMA_GCC_PCH_PREPROCESS, PRAGMA_IVDEP, + PRAGMA_UNROLL, PRAGMA_FIRST_EXTERNAL } pragma_kind; Index: cfgloop.h =================================================================== --- cfgloop.h (revision 219031) +++ cfgloop.h (working copy) @@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo of the loop can be safely evaluated concurrently. */ int safelen; + + /* The number of times to unroll the loop. 0, means no information + given, just do what we always do. */ + int unroll; + /* True if this loop should never be vectorized. */ bool dont_vectorize; Index: cp/cp-array-notation.c =================================================================== --- cp/cp-array-notation.c (revision 219031) +++ cp/cp-array-notation.c (working copy) @@ -71,7 +71,7 @@ create_an_loop (tree init, tree cond, tr finish_expr_stmt (init); for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE); finish_for_init_stmt (for_stmt); - finish_for_cond (cond, for_stmt, false); + finish_for_cond (cond, for_stmt, false, 0); finish_for_expr (incr, for_stmt); finish_expr_stmt (body); finish_for_stmt (for_stmt); Index: cp/cp-tree.h =================================================================== --- cp/cp-tree.h (revision 219031) +++ cp/cp-tree.h (working copy) @@ -5645,7 +5645,7 @@ extern tree implicitly_declare_fn extern bool maybe_clone_body (tree); /* In parser.c */ -extern tree cp_convert_range_for (tree, tree, tree, bool); +extern tree cp_convert_range_for (tree, tree, tree, bool, int); extern bool parsing_nsdmi (void); extern void inject_this_parameter (tree, cp_cv_quals); @@ -5881,16 +5881,16 @@ extern void begin_else_clause (tree); extern void finish_else_clause (tree); extern void finish_if_stmt (tree); extern tree begin_while_stmt (void); -extern void finish_while_stmt_cond (tree, tree, bool); +extern void finish_while_stmt_cond (tree, tree, bool, int); extern void finish_while_stmt (tree); extern tree begin_do_stmt (void); extern void finish_do_body (tree); -extern void finish_do_stmt (tree, tree, bool); +extern void finish_do_stmt (tree, tree, bool, int); extern tree finish_return_stmt (tree); extern tree begin_for_scope (tree *); extern tree begin_for_stmt (tree, tree); extern void finish_for_init_stmt (tree); -extern void finish_for_cond (tree, tree, bool); +extern void finish_for_cond (tree, tree, bool, int); extern void finish_for_expr (tree, tree); extern void finish_for_stmt (tree); extern tree begin_range_for_stmt (tree, tree); Index: cp/init.c =================================================================== --- cp/init.c (revision 219031) +++ cp/init.c (working copy) @@ -3689,7 +3689,7 @@ build_vec_init (tree base, tree maxindex finish_for_init_stmt (for_stmt); finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator, build_int_cst (TREE_TYPE (iterator), -1)), - for_stmt, false); + for_stmt, false, 0); elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0, complain); if (elt_init == error_mark_node) Index: cp/parser.c =================================================================== --- cp/parser.c (revision 219031) +++ cp/parser.c (working copy) @@ -2038,15 +2038,15 @@ static tree cp_parser_selection_statemen static tree cp_parser_condition (cp_parser *); static tree cp_parser_iteration_statement - (cp_parser *, bool); + (cp_parser *, bool, int); static bool cp_parser_for_init_statement (cp_parser *, tree *decl); static tree cp_parser_for - (cp_parser *, bool); + (cp_parser *, bool, int); static tree cp_parser_c_for - (cp_parser *, tree, tree, bool); + (cp_parser *, tree, tree, bool, int); static tree cp_parser_range_for - (cp_parser *, tree, tree, tree, bool); + (cp_parser *, tree, tree, tree, bool, int); static void do_range_for_auto_deduction (tree, tree); static tree cp_parser_perform_range_for_lookup @@ -9652,7 +9652,7 @@ cp_parser_statement (cp_parser* parser, case RID_WHILE: case RID_DO: case RID_FOR: - statement = cp_parser_iteration_statement (parser, false); + statement = cp_parser_iteration_statement (parser, false, 0); break; case RID_CILK_FOR: @@ -10344,7 +10344,7 @@ cp_parser_condition (cp_parser* parser) not included. */ static tree -cp_parser_for (cp_parser *parser, bool ivdep) +cp_parser_for (cp_parser *parser, bool ivdep, int unroll) { tree init, scope, decl; bool is_range_for; @@ -10356,13 +10356,13 @@ cp_parser_for (cp_parser *parser, bool i is_range_for = cp_parser_for_init_statement (parser, &decl); if (is_range_for) - return cp_parser_range_for (parser, scope, init, decl, ivdep); + return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll); else - return cp_parser_c_for (parser, scope, init, ivdep); + return cp_parser_c_for (parser, scope, init, ivdep, unroll); } static tree -cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep) +cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep, int unroll) { /* Normal for loop */ tree condition = NULL_TREE; @@ -10383,7 +10383,13 @@ cp_parser_c_for (cp_parser *parser, tree "%<GCC ivdep%> pragma"); condition = error_mark_node; } - finish_for_cond (condition, stmt, ivdep); + else if (unroll) + { + cp_parser_error (parser, "missing loop condition in loop with " + "%<GCC unroll%> pragma"); + condition = error_mark_node; + } + finish_for_cond (condition, stmt, ivdep, unroll); /* Look for the `;'. */ cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON); @@ -10407,7 +10413,7 @@ cp_parser_c_for (cp_parser *parser, tree static tree cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl, - bool ivdep) + bool ivdep, int unroll) { tree stmt, range_expr; @@ -10428,6 +10434,8 @@ cp_parser_range_for (cp_parser *parser, stmt = begin_range_for_stmt (scope, init); if (ivdep) RANGE_FOR_IVDEP (stmt) = 1; + if (unroll) + /* TODO */(void)0; finish_range_for_decl (stmt, range_decl, range_expr); if (!type_dependent_expression_p (range_expr) /* do_auto_deduction doesn't mess with template init-lists. */ @@ -10437,7 +10445,7 @@ cp_parser_range_for (cp_parser *parser, else { stmt = begin_for_stmt (scope, init); - stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep); + stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll); } return stmt; } @@ -10529,7 +10537,7 @@ do_range_for_auto_deduction (tree decl, tree cp_convert_range_for (tree statement, tree range_decl, tree range_expr, - bool ivdep) + bool ivdep, int unroll) { tree begin, end; tree iter_type, begin_expr, end_expr; @@ -10586,7 +10594,7 @@ cp_convert_range_for (tree statement, tr begin, ERROR_MARK, end, ERROR_MARK, NULL, tf_warning_or_error); - finish_for_cond (condition, statement, ivdep); + finish_for_cond (condition, statement, ivdep, unroll); /* The new increment expression. */ expression = finish_unary_op_expr (input_location, @@ -10747,7 +10755,7 @@ cp_parser_range_for_member_function (tre Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT. */ static tree -cp_parser_iteration_statement (cp_parser* parser, bool ivdep) +cp_parser_iteration_statement (cp_parser* parser, bool ivdep, int unroll) { cp_token *token; enum rid keyword; @@ -10777,7 +10785,7 @@ cp_parser_iteration_statement (cp_parser cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN); /* Parse the condition. */ condition = cp_parser_condition (parser); - finish_while_stmt_cond (condition, statement, ivdep); + finish_while_stmt_cond (condition, statement, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); /* Parse the dependent statement. */ @@ -10807,7 +10815,7 @@ cp_parser_iteration_statement (cp_parser /* Parse the expression. */ expression = cp_parser_expression (parser); /* We're done with the do-statement. */ - finish_do_stmt (expression, statement, ivdep); + finish_do_stmt (expression, statement, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); /* Look for the `;'. */ @@ -10820,7 +10828,7 @@ cp_parser_iteration_statement (cp_parser /* Look for the `('. */ cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN); - statement = cp_parser_for (parser, ivdep); + statement = cp_parser_for (parser, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); @@ -32135,7 +32143,32 @@ cp_parser_pragma (cp_parser *parser, enu cp_parser_error (parser, "for, while or do statement expected"); return false; } - cp_parser_iteration_statement (parser, true); + cp_parser_iteration_statement (parser, true, 0); + return true; + } + + case PRAGMA_UNROLL: + { + tree expr = cp_parser_constant_expression (parser); + expr = maybe_constant_value (expr); + cp_parser_skip_to_pragma_eol (parser, pragma_tok); + if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) + || TREE_CODE (expr) != INTEGER_CST) + { + cp_parser_error (parser, "%<#pragma GCC unroll%> requires a number"); + expr = integer_zero_node; + } + int unroll = tree_to_shwi (expr); + cp_token *tok; + tok = cp_lexer_peek_token (the_parser->lexer); + if (tok->type != CPP_KEYWORD + || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE + && tok->keyword != RID_DO)) + { + cp_parser_error (parser, "for, while or do statement expected"); + return false; + } + cp_parser_iteration_statement (parser, false, unroll); return true; } Index: cp/pt.c =================================================================== --- cp/pt.c (revision 219031) +++ cp/pt.c (working copy) @@ -13876,7 +13876,7 @@ tsubst_expr (tree t, tree args, tsubst_f RECUR (FOR_INIT_STMT (t)); finish_for_init_stmt (stmt); tmp = RECUR (FOR_COND (t)); - finish_for_cond (tmp, stmt, false); + finish_for_cond (tmp, stmt, false, 0); tmp = RECUR (FOR_EXPR (t)); finish_for_expr (tmp, stmt); RECUR (FOR_BODY (t)); @@ -13891,7 +13891,7 @@ tsubst_expr (tree t, tree args, tsubst_f decl = tsubst (decl, args, complain, in_decl); maybe_push_decl (decl); expr = RECUR (RANGE_FOR_EXPR (t)); - stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t)); + stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0); RECUR (RANGE_FOR_BODY (t)); finish_for_stmt (stmt); } @@ -13900,7 +13900,7 @@ tsubst_expr (tree t, tree args, tsubst_f case WHILE_STMT: stmt = begin_while_stmt (); tmp = RECUR (WHILE_COND (t)); - finish_while_stmt_cond (tmp, stmt, false); + finish_while_stmt_cond (tmp, stmt, false, 0); RECUR (WHILE_BODY (t)); finish_while_stmt (stmt); break; @@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f RECUR (DO_BODY (t)); finish_do_body (stmt); tmp = RECUR (DO_COND (t)); - finish_do_stmt (tmp, stmt, false); + finish_do_stmt (tmp, stmt, false, 0); break; case IF_STMT: @@ -14348,8 +14348,8 @@ tsubst_expr (tree t, tree args, tsubst_f case ANNOTATE_EXPR: tmp = RECUR (TREE_OPERAND (t, 0)); - RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, - TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1)))); + RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, + TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1)), RECUR (TREE_OPERAND (t, 2)))); default: gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t))); Index: cp/semantics.c =================================================================== --- cp/semantics.c (revision 219031) +++ cp/semantics.c (working copy) @@ -796,7 +796,7 @@ begin_while_stmt (void) WHILE_STMT. */ void -finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep) +finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep, int unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used as a condition for while statement", @@ -806,11 +806,19 @@ finish_while_stmt_cond (tree cond, tree finish_cond (&WHILE_COND (while_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR, + WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (WHILE_COND (while_stmt)), WHILE_COND (while_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR, + TREE_TYPE (WHILE_COND (while_stmt)), + WHILE_COND (while_stmt), + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt)); } @@ -855,7 +863,7 @@ finish_do_body (tree do_stmt) COND is as indicated. */ void -finish_do_stmt (tree cond, tree do_stmt, bool ivdep) +finish_do_stmt (tree cond, tree do_stmt, bool ivdep, int unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used as a condition for a do-while statement", @@ -864,8 +872,13 @@ finish_do_stmt (tree cond, tree do_stmt, cond = maybe_convert_cond (cond); end_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, - build_int_cst (integer_type_node, annot_expr_ivdep_kind)); + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); DO_COND (do_stmt) = cond; } @@ -968,7 +981,7 @@ finish_for_init_stmt (tree for_stmt) FOR_STMT. */ void -finish_for_cond (tree cond, tree for_stmt, bool ivdep) +finish_for_cond (tree cond, tree for_stmt, bool ivdep, int unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used in a condition for a for-loop", @@ -978,11 +991,20 @@ finish_for_cond (tree cond, tree for_stm finish_cond (&FOR_COND (for_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR, + FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (FOR_COND (for_stmt)), FOR_COND (for_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); + if (unroll && cond != error_mark_node) + FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR, + TREE_TYPE (FOR_COND (for_stmt)), + FOR_COND (for_stmt), + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, + unroll)); simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt)); } Index: fortran/trans-stmt.c =================================================================== --- fortran/trans-stmt.c (revision 219031) +++ fortran/trans-stmt.c (working copy) @@ -2790,9 +2790,10 @@ gfc_trans_forall_loop (forall_info *fora cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node, count, build_int_cst (TREE_TYPE (count), 0)); if (forall_tmp->do_concurrent) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); tmp = build1_v (GOTO_EXPR, exit_label); tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, Index: gimplify.c =================================================================== --- gimplify.c (revision 219031) +++ gimplify.c (working copy) @@ -2888,6 +2888,7 @@ gimple_boolify (tree expr) case annot_expr_ivdep_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: + case annot_expr_unroll_kind: TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0)); if (TREE_CODE (type) != BOOLEAN_TYPE) TREE_TYPE (expr) = boolean_type_node; @@ -7784,6 +7785,7 @@ gimplify_expr (tree *expr_p, gimple_seq { tree cond = TREE_OPERAND (*expr_p, 0); tree kind = TREE_OPERAND (*expr_p, 1); + tree data = TREE_OPERAND (*expr_p, 2); tree type = TREE_TYPE (cond); if (!INTEGRAL_TYPE_P (type)) { @@ -7794,7 +7796,7 @@ gimplify_expr (tree *expr_p, gimple_seq tree tmp = create_tmp_var (type); gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p)); gcall *call - = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind); + = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data); gimple_call_set_lhs (call, tmp); gimplify_seq_add_stmt (pre_p, call); *expr_p = tmp; Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 219031) +++ loop-unroll.c (working copy) @@ -405,6 +405,19 @@ decide_unroll_constant_iterations (struc return; } + if (loop->unroll > 0) + { + loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; + loop->lpt_decision.times = loop->unroll - 1; + if (loop->lpt_decision.times > desc->niter - 2) /* ICK: see ICK below. */ + { + /* They won't do this for us. */ + loop->lpt_decision.decision = LPT_NONE; + loop->lpt_decision.times = desc->niter - 2; + } + return; + } + /* Check whether the loop rolls enough to consider. Consult also loop bounds and profile; in the case the loop has more than one exit it may well loop less than determined maximal number @@ -426,7 +439,7 @@ decide_unroll_constant_iterations (struc best_copies = 2 * nunroll + 10; i = 2 * nunroll + 2; - if (i - 1 >= desc->niter) + if (i > desc->niter - 2) /* ICK, compare the first assert in unroll_loop_constant_iterations */ i = desc->niter - 2; for (; i >= nunroll - 1; i--) @@ -678,6 +691,9 @@ decide_unroll_runtime_iterations (struct if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll > 0) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { @@ -707,9 +723,11 @@ decide_unroll_runtime_iterations (struct } /* Check whether the loop rolls. */ - if ((get_estimated_loop_iterations (loop, &iterations) - || get_max_loop_iterations (loop, &iterations)) - && wi::ltu_p (iterations, 2 * nunroll)) + if (loop->unroll) + ; + else if ((get_estimated_loop_iterations (loop, &iterations) + || get_max_loop_iterations (loop, &iterations)) + && wi::ltu_p (iterations, 2 * nunroll)) { if (dump_file) fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); @@ -1125,6 +1143,9 @@ decide_unroll_stupid (struct loop *loop, if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll > 0) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 219031) +++ lto-streamer-in.c (working copy) @@ -734,6 +734,7 @@ input_cfg (struct lto_input_block *ib, s /* Read OMP SIMD related info. */ loop->safelen = streamer_read_hwi (ib); + loop->unroll = streamer_read_hwi (ib); loop->dont_vectorize = streamer_read_hwi (ib); loop->force_vectorize = streamer_read_hwi (ib); loop->simduid = stream_read_tree (ib, data_in); Index: lto-streamer-out.c =================================================================== --- lto-streamer-out.c (revision 219031) +++ lto-streamer-out.c (working copy) @@ -1863,6 +1863,7 @@ output_cfg (struct output_block *ob, str /* Write OMP SIMD related info. */ streamer_write_hwi (ob, loop->safelen); + streamer_write_hwi (ob, loop->unroll); streamer_write_hwi (ob, loop->dont_vectorize); streamer_write_hwi (ob, loop->force_vectorize); stream_write_tree (ob, loop->simduid, true); Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 219031) +++ tree-cfg.c (working copy) @@ -299,6 +299,9 @@ replace_loop_annotate_in_block (basic_bl loop->force_vectorize = true; cfun->has_force_vectorize_loops = true; break; + case annot_expr_unroll_kind: + loop->unroll = tree_to_shwi (gimple_call_arg (stmt, 2)); + break; default: gcc_unreachable (); } @@ -348,6 +351,7 @@ replace_loop_annotate (void) case annot_expr_ivdep_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: + case annot_expr_unroll_kind: break; default: gcc_unreachable (); Index: tree-core.h =================================================================== --- tree-core.h (revision 219031) +++ tree-core.h (working copy) @@ -685,6 +685,7 @@ enum annot_expr_kind { annot_expr_ivdep_kind, annot_expr_no_vector_kind, annot_expr_vector_kind, + annot_expr_unroll_kind, annot_expr_kind_last }; Index: tree-pretty-print.c =================================================================== --- tree-pretty-print.c (revision 219031) +++ tree-pretty-print.c (working copy) @@ -2165,6 +2165,10 @@ dump_generic_node (pretty_printer *pp, t case annot_expr_vector_kind: pp_string (pp, ", vector"); break; + case annot_expr_unroll_kind: + pp_printf (buffer, ", unroll %ld", + (long)TREE_INT_CST_LOW (TREE_OPERAND (node, 2))); + break; default: gcc_unreachable (); } Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 219031) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -341,7 +341,10 @@ tree_estimate_loop_size (struct loop *lo if (likely_eliminated || likely_eliminated_last) size->last_iteration_eliminated_by_peeling += num; } - if ((size->overall * 3 / 2 - size->eliminated_by_peeling + /* A loop that we want to unroll is never too large. */ + if (loop->unroll > 0) + ; + else if ((size->overall * 3 / 2 - size->eliminated_by_peeling - size->last_iteration_eliminated_by_peeling) > upper_bound) { free (body); @@ -725,7 +728,8 @@ try_unroll_loop_completely (struct loop if (!n_unroll_found) return false; - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) + if (loop->unroll <= 0 && + n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " @@ -747,11 +751,19 @@ try_unroll_loop_completely (struct loop if (ul == UL_SINGLE_ITER) return false; + if (loop->unroll == -1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: already peeled for the user.\n", + loop->num); + return false; + } + large = tree_estimate_loop_size (loop, exit, edge_to_cancel, &size, PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); ninsns = size.overall; - if (large) + if (loop->unroll == 0 && large) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", @@ -767,13 +779,27 @@ try_unroll_loop_completely (struct loop (int) unr_insns); } - /* If the code is going to shrink, we don't need to be extra cautious - on guessing if the unrolling is going to be profitable. */ - if (unr_insns - /* If there is IV variable that will become constant, we save - one instruction in the loop prologue we do not account - otherwise. */ - <= ninsns + (size.constant_iv != false)) + /* It the user asked us to do it, we don't need to be cautious. + Even if the user said unroll more than what the code says, + trust the code. */ + if (loop->unroll > 0 && n_unroll <= (unsigned)loop->unroll - 1) + n_unroll = MIN ((unsigned)loop->unroll - 1, n_unroll); + else if (loop->unroll) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "user didn't want the entire thing unrolled.\n", + loop->num); + return false; + } + /* If the code is going to shrink, we don't need to be extra + cautious on guessing if the unrolling is going to be + profitable. */ + else if (unr_insns + /* If there is IV variable that will become constant, + we save one instruction in the loop prologue we do + not account otherwise. */ + <= ninsns + (size.constant_iv != false)) ; /* We unroll only inner loops, because we do not consider it profitable otheriwse. We still can cancel loopback edge of not rolling loop; @@ -965,18 +991,26 @@ try_peel_loop (struct loop *loop, if (TREE_CODE (niter) != INTEGER_CST) exit = NULL; - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) + if ((!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) + && loop->unroll > 0) return false; /* Peel only innermost loops. */ - if (loop->inner) + if (loop->unroll == 0 && loop->inner) { if (dump_file) fprintf (dump_file, "Not peeling: outer loop\n"); return false; } - if (!optimize_loop_for_speed_p (loop)) + if (loop->unroll < 0) + { + if (dump_file) + fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); + return false; + } + + if (loop->unroll == 0 && !optimize_loop_for_speed_p (loop)) { if (dump_file) fprintf (dump_file, "Not peeling: cold loop\n"); @@ -985,6 +1019,10 @@ try_peel_loop (struct loop *loop, /* Check if there is an estimate on the number of iterations. */ npeel = estimated_loop_iterations_int (loop); + + if (loop->unroll > 0 && npeel > 0 && npeel > loop->unroll) + npeel = loop->unroll - 1; + if (npeel < 0) { if (dump_file) @@ -992,7 +1030,7 @@ try_peel_loop (struct loop *loop, "estimated\n"); return false; } - if (maxiter >= 0 && maxiter <= npeel) + if (loop->unroll == 0 && maxiter >= 0 && maxiter <= npeel) { if (dump_file) fprintf (dump_file, "Not peeling: upper bound is known so can " @@ -1003,7 +1041,8 @@ try_peel_loop (struct loop *loop, /* We want to peel estimated number of iterations + 1 (so we never enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES and be sure to avoid overflows. */ - if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) + if (loop->unroll == 0 + && npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) { if (dump_file) fprintf (dump_file, "Not peeling: rolls too much " @@ -1015,7 +1054,9 @@ try_peel_loop (struct loop *loop, /* Check peeled loops size. */ tree_estimate_loop_size (loop, exit, NULL, &size, PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); - if ((peeled_size = estimated_peeled_sequence_size (&size, npeel)) + if (loop->unroll > 0) + ; + else if ((peeled_size = estimated_peeled_sequence_size (&size, npeel)) > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) { if (dump_file) @@ -1051,6 +1092,8 @@ try_peel_loop (struct loop *loop, fprintf (dump_file, "Peeled loop %d, %i times.\n", loop->num, npeel); } + /* If we peeled things, don't try and expand it further later. */ + loop->unroll = -1; if (loop->any_upper_bound) loop->nb_iterations_upper_bound -= npeel; loop->nb_iterations_estimate = 0; @@ -1296,7 +1339,9 @@ tree_unroll_loops_completely_1 (bool may if (!loop_father) return false; - if (may_increase_size && optimize_loop_nest_for_speed_p (loop) + if (loop->unroll > 0) + ul = UL_ALL; + else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) Index: tree.def =================================================================== --- tree.def (revision 219031) +++ tree.def (working copy) @@ -1315,8 +1315,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target /* ANNOTATE_EXPR. Operand 0 is the expression to be annotated. - Operand 1 is the annotation kind. */ -DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2) + Operand 1 is the annotation kind. + Operand 2 is optional data. */ +DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3) /* Cilk spawn statement Operand 0 is the CALL_EXPR. */