This adds support for a new optimization hint pertaining to loops, namely
Ivdep, which, like its counterparts in C/C++/Fortran, tells the compiler
that there are no loop-carried dependencies in a given loop, thus making
it possible for the compiler to generate better vectorized code.
The typical example is:
type Sarray is array (1 .. 4) of Float;
procedure Add (X, Y : not null access Sarray; R : not null access Sarray)
is
begin
for I in Sarray'Range loop
pragma Loop_Optimize (Ivdep);
R(I) := X(I) + Y(I);
end loop;
end;
where the hint eliminates the non-vectorized version of the loop.
Tested on x86_64-suse-linux, applied on the mainline.
2014-04-14 Eric Botcazou <ebotca...@adacore.com>
* snames.ads-tmpl (Name_Ivdep): New pragma-related name.
* sem_prag.adb (Analyze_Pragma) <Pragma_Loop_Optimize>: Add support
for Ivdep hint.
* gnat_rm.texi (Implementation Defined Pragmas): Document new Ivdep
hint for Loop_Optimize.
* gnat_ugn.texi (Vectorization of loops): Mention new Ivdep hint.
* gcc-interface/trans.c (Pragma_to_gnu) <Pragma_Loop_Optimize>: Deal
with new Ivdep hint.
* gcc-interface/ada-tree.h (LOOP_STMT_IVDEP): New macro.
* gcc-interface/trans.c (Pragma_to_gnu) <Pragma_Loop_Optimize>: Deal
with new Ivdep hint.
(gnat_gimplify_stmt) <LOOP_STMT>: Propagate the loop hints.
2014-04-14 Eric Botcazou <ebotca...@adacore.com>
* gnat.dg/vect11.ad[sb]: New test.
--
Eric Botcazou
Index: ada/gnat_rm.texi
===================================================================
--- ada/gnat_rm.texi (revision 209334)
+++ ada/gnat_rm.texi (working copy)
@@ -4417,7 +4417,7 @@ Syntax:
@smallexample @c ada
pragma Loop_Optimize (OPTIMIZATION_HINT @{, OPTIMIZATION_HINT@});
-OPTIMIZATION_HINT ::= No_Unroll | Unroll | No_Vector | Vector
+OPTIMIZATION_HINT ::= Ivdep | No_Unroll | Unroll | No_Vector | Vector
@end smallexample
@noindent
@@ -4426,8 +4426,13 @@ programmer to specify optimization hints
are not mutually exclusive and can be freely mixed, but not all combinations
will yield a sensible outcome.
-There are four supported optimization hints for a loop:
+There are five supported optimization hints for a loop:
+
@itemize @bullet
+@item Ivdep
+
+The programmer asserts that there are no loop-carried dependencies which would prevent consecutive iterations of the loop from being executed simultaneously.
+
@item No_Unroll
The loop must not be unrolled. This is a strong hint: the compiler will not
Index: ada/sem_prag.adb
===================================================================
--- ada/sem_prag.adb (revision 209334)
+++ ada/sem_prag.adb (working copy)
@@ -16560,7 +16560,8 @@ package body Sem_Prag is
-- pragma Loop_Optimize ( OPTIMIZATION_HINT {, OPTIMIZATION_HINT } );
- -- OPTIMIZATION_HINT ::= No_Unroll | Unroll | No_Vector | Vector
+ -- OPTIMIZATION_HINT ::=
+ -- Ivdep | No_Unroll | Unroll | No_Vector | Vector
when Pragma_Loop_Optimize => Loop_Optimize : declare
Hint : Node_Id;
@@ -16572,7 +16573,7 @@ package body Sem_Prag is
Hint := First (Pragma_Argument_Associations (N));
while Present (Hint) loop
- Check_Arg_Is_One_Of (Hint,
+ Check_Arg_Is_One_Of (Hint, Name_Ivdep,
Name_No_Unroll, Name_Unroll, Name_No_Vector, Name_Vector);
Next (Hint);
end loop;
Index: ada/gnat_ugn.texi
===================================================================
--- ada/gnat_ugn.texi (revision 209334)
+++ ada/gnat_ugn.texi (working copy)
@@ -10780,6 +10780,38 @@ preferably to other optimizations by mea
placed immediately within the loop will convey the appropriate hint to the
compiler for this loop.
+It is also possible to help the compiler generate better vectorized code
+for a given loop by asserting that there are no loop-carried dependencies
+in the loop. Consider for example the procedure:
+
+@smallexample @c ada
+ type Arr is array (1 .. 4) of Long_Float;
+
+ procedure Add (X, Y : not null access Arr; R : not null access Arr) is
+ begin
+ for I in Arr'Range loop
+ R(I) := X(I) + Y(I);
+ end loop;
+ end;
+@end smallexample
+
+@noindent
+By default, the compiler cannot unconditionally vectorize the loop because
+assigning to a component of the array designated by R in one iteration could
+change the value read from the components of the arrays designated by X or Y
+in a later iteration. As a result, the compiler will generate two versions
+of the loop in the object code, one vectorized and the other not vectorized,
+as well as a test to select the appropriate version at run time. This can
+be overcome by another hint:
+
+@smallexample @c ada
+ pragma Loop_Optimize (Ivdep);
+@end smallexample
+
+@noindent
+placed immediately within the loop will tell the compiler that it can safely
+omit the non-vectorized version of the loop as well as the run-time test.
+
@node Other Optimization Switches
@subsection Other Optimization Switches
@cindex Optimization Switches
Index: ada/gcc-interface/ada-tree.h
===================================================================
--- ada/gcc-interface/ada-tree.h (revision 209334)
+++ ada/gcc-interface/ada-tree.h (working copy)
@@ -6,7 +6,7 @@
* *
* C Header File *
* *
- * Copyright (C) 1992-2013, Free Software Foundation, Inc. *
+ * Copyright (C) 1992-2014, Free Software Foundation, Inc. *
* *
* GNAT is free software; you can redistribute it and/or modify it under *
* terms of the GNU General Public License as published by the Free Soft- *
@@ -504,10 +504,11 @@ do { \
#define LOOP_STMT_TOP_UPDATE_P(NODE) TREE_LANG_FLAG_1 (LOOP_STMT_CHECK (NODE))
/* Optimization hints on loops. */
-#define LOOP_STMT_NO_UNROLL(NODE) TREE_LANG_FLAG_2 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_UNROLL(NODE) TREE_LANG_FLAG_3 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_NO_VECTOR(NODE) TREE_LANG_FLAG_4 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_VECTOR(NODE) TREE_LANG_FLAG_5 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_IVDEP(NODE) TREE_LANG_FLAG_2 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_NO_UNROLL(NODE) TREE_LANG_FLAG_3 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_UNROLL(NODE) TREE_LANG_FLAG_4 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_NO_VECTOR(NODE) TREE_LANG_FLAG_5 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_VECTOR(NODE) TREE_LANG_FLAG_6 (LOOP_STMT_CHECK (NODE))
#define EXIT_STMT_COND(NODE) TREE_OPERAND_CHECK_CODE (NODE, EXIT_STMT, 0)
#define EXIT_STMT_LABEL(NODE) TREE_OPERAND_CHECK_CODE (NODE, EXIT_STMT, 1)
Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c (revision 209371)
+++ ada/gcc-interface/trans.c (working copy)
@@ -1268,10 +1268,14 @@ Pragma_to_gnu (Node_Id gnat_node)
Present (gnat_temp);
gnat_temp = Next (gnat_temp))
{
- tree gnu_loop_stmt = gnu_loop_stack ->last ()->stmt;
+ tree gnu_loop_stmt = gnu_loop_stack->last ()->stmt;
switch (Chars (Expression (gnat_temp)))
{
+ case Name_Ivdep:
+ LOOP_STMT_IVDEP (gnu_loop_stmt) = 1;
+ break;
+
case Name_No_Unroll:
LOOP_STMT_NO_UNROLL (gnu_loop_stmt) = 1;
break;
@@ -7747,13 +7751,20 @@ gnat_gimplify_stmt (tree *stmt_p)
tree gnu_cond = LOOP_STMT_COND (stmt);
tree gnu_update = LOOP_STMT_UPDATE (stmt);
tree gnu_end_label = LOOP_STMT_LABEL (stmt);
- tree t;
/* Build the condition expression from the test, if any. */
if (gnu_cond)
- gnu_cond
- = build3 (COND_EXPR, void_type_node, gnu_cond, alloc_stmt_list (),
- build1 (GOTO_EXPR, void_type_node, gnu_end_label));
+ {
+ /* Deal with the optimization hints. */
+ if (LOOP_STMT_IVDEP (stmt))
+ gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ build_int_cst (integer_type_node,
+ annot_expr_ivdep_kind));
+
+ gnu_cond
+ = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
+ build1 (GOTO_EXPR, void_type_node, gnu_end_label));
+ }
/* Set to emit the statements of the loop. */
*stmt_p = NULL_TREE;
@@ -7782,7 +7793,7 @@ gnat_gimplify_stmt (tree *stmt_p)
if (gnu_update && !LOOP_STMT_TOP_UPDATE_P (stmt))
append_to_statement_list (gnu_update, stmt_p);
- t = build1 (GOTO_EXPR, void_type_node, gnu_start_label);
+ tree t = build1 (GOTO_EXPR, void_type_node, gnu_start_label);
SET_EXPR_LOCATION (t, DECL_SOURCE_LOCATION (gnu_end_label));
append_to_statement_list (t, stmt_p);
Index: ada/snames.ads-tmpl
===================================================================
--- ada/snames.ads-tmpl (revision 209334)
+++ ada/snames.ads-tmpl (working copy)
@@ -730,6 +730,7 @@ package Snames is
Name_Increases : constant Name_Id := N + $;
Name_Info : constant Name_Id := N + $;
Name_Internal : constant Name_Id := N + $;
+ Name_Ivdep : constant Name_Id := N + $;
Name_Link_Name : constant Name_Id := N + $;
Name_Lowercase : constant Name_Id := N + $;
Name_Max_Entry_Queue_Depth : constant Name_Id := N + $;
-- { dg-do compile { target i?86-*-* x86_64-*-* } }
-- { dg-options "-O3 -msse2 -fdump-tree-optimized" }
package body Vect11 is
function "+" (X, Y : Sarray) return Sarray is
R : Sarray;
begin
for I in Sarray'Range loop
R(I) := X(I) + Y(I);
end loop;
return R;
end;
procedure Add (X, Y : Sarray; R : out Sarray) is
begin
for I in Sarray'Range loop
R(I) := X(I) + Y(I);
end loop;
end;
procedure Add (X, Y : not null access Sarray; R : not null access Sarray) is
begin
for I in Sarray'Range loop
pragma Loop_Optimize (Ivdep);
R(I) := X(I) + Y(I);
end loop;
end;
end Vect11;
-- { dg-final { scan-tree-dump-not "goto" "optimized" } }
-- { dg-final { cleanup-tree-dump "optimized" } }
package Vect11 is
-- Constrained array types are vectorizable
type Sarray is array (1 .. 4) of Float;
for Sarray'Alignment use 16;
function "+" (X, Y : Sarray) return Sarray;
procedure Add (X, Y : Sarray; R : out Sarray);
procedure Add (X, Y : not null access Sarray; R : not null access Sarray);
end Vect11;