This adds support for a new optimization hint pertaining to loops, namely
Ivdep, which, like its counterparts in C/C++/Fortran, tells the compiler
that there are no loop-carried dependencies in a given loop, thus making
it possible for the compiler to generate better vectorized code.

The typical example is:

   type Sarray is array (1 .. 4) of Float;

   procedure Add (X, Y : not null access Sarray; R : not null access Sarray) 
is
   begin
      for I in Sarray'Range loop
         pragma Loop_Optimize (Ivdep);
         R(I) := X(I) + Y(I);
      end loop;
   end;

where the hint eliminates the non-vectorized version of the loop.

Tested on x86_64-suse-linux, applied on the mainline.


2014-04-14  Eric Botcazou  <ebotca...@adacore.com>

        * snames.ads-tmpl (Name_Ivdep): New pragma-related name.
        * sem_prag.adb (Analyze_Pragma) <Pragma_Loop_Optimize>: Add support
        for Ivdep hint.
        * gnat_rm.texi (Implementation Defined Pragmas): Document new Ivdep
        hint for Loop_Optimize.
        * gnat_ugn.texi (Vectorization of loops): Mention new Ivdep hint.
        * gcc-interface/trans.c (Pragma_to_gnu) <Pragma_Loop_Optimize>: Deal
        with new Ivdep hint.
        * gcc-interface/ada-tree.h (LOOP_STMT_IVDEP): New macro.
        * gcc-interface/trans.c (Pragma_to_gnu) <Pragma_Loop_Optimize>: Deal
        with new Ivdep hint.
        (gnat_gimplify_stmt) <LOOP_STMT>: Propagate the loop hints.


2014-04-14  Eric Botcazou  <ebotca...@adacore.com>

        * gnat.dg/vect11.ad[sb]: New test.


-- 
Eric Botcazou
Index: ada/gnat_rm.texi
===================================================================
--- ada/gnat_rm.texi	(revision 209334)
+++ ada/gnat_rm.texi	(working copy)
@@ -4417,7 +4417,7 @@ Syntax:
 @smallexample @c ada
 pragma Loop_Optimize (OPTIMIZATION_HINT @{, OPTIMIZATION_HINT@});
 
-OPTIMIZATION_HINT ::= No_Unroll | Unroll | No_Vector | Vector
+OPTIMIZATION_HINT ::= Ivdep | No_Unroll | Unroll | No_Vector | Vector
 @end smallexample
 
 @noindent
@@ -4426,8 +4426,13 @@ programmer to specify optimization hints
 are not mutually exclusive and can be freely mixed, but not all combinations
 will yield a sensible outcome.
 
-There are four supported optimization hints for a loop:
+There are five supported optimization hints for a loop:
+
 @itemize @bullet
+@item Ivdep
+
+The programmer asserts that there are no loop-carried dependencies which would prevent consecutive iterations of the loop from being executed simultaneously.
+
 @item No_Unroll
 
 The loop must not be unrolled.  This is a strong hint: the compiler will not
Index: ada/sem_prag.adb
===================================================================
--- ada/sem_prag.adb	(revision 209334)
+++ ada/sem_prag.adb	(working copy)
@@ -16560,7 +16560,8 @@ package body Sem_Prag is
 
          --  pragma Loop_Optimize ( OPTIMIZATION_HINT {, OPTIMIZATION_HINT } );
 
-         --  OPTIMIZATION_HINT ::= No_Unroll | Unroll | No_Vector | Vector
+         --  OPTIMIZATION_HINT ::=
+         --    Ivdep | No_Unroll | Unroll | No_Vector | Vector
 
          when Pragma_Loop_Optimize => Loop_Optimize : declare
             Hint : Node_Id;
@@ -16572,7 +16573,7 @@ package body Sem_Prag is
 
             Hint := First (Pragma_Argument_Associations (N));
             while Present (Hint) loop
-               Check_Arg_Is_One_Of (Hint,
+               Check_Arg_Is_One_Of (Hint, Name_Ivdep,
                  Name_No_Unroll, Name_Unroll, Name_No_Vector, Name_Vector);
                Next (Hint);
             end loop;
Index: ada/gnat_ugn.texi
===================================================================
--- ada/gnat_ugn.texi	(revision 209334)
+++ ada/gnat_ugn.texi	(working copy)
@@ -10780,6 +10780,38 @@ preferably to other optimizations by mea
 placed immediately within the loop will convey the appropriate hint to the
 compiler for this loop.
 
+It is also possible to help the compiler generate better vectorized code
+for a given loop by asserting that there are no loop-carried dependencies
+in the loop.  Consider for example the procedure:
+
+@smallexample @c ada
+  type Arr is array (1 .. 4) of Long_Float;
+
+  procedure Add (X, Y : not null access Arr; R : not null access Arr) is
+  begin
+    for I in Arr'Range loop
+      R(I) := X(I) + Y(I);
+    end loop;
+  end;
+@end smallexample
+
+@noindent
+By default, the compiler cannot unconditionally vectorize the loop because
+assigning to a component of the array designated by R in one iteration could
+change the value read from the components of the arrays designated by X or Y
+in a later iteration.  As a result, the compiler will generate two versions
+of the loop in the object code, one vectorized and the other not vectorized,
+as well as a test to select the appropriate version at run time.  This can
+be overcome by another hint:
+
+@smallexample @c ada
+  pragma Loop_Optimize (Ivdep);
+@end smallexample
+
+@noindent
+placed immediately within the loop will tell the compiler that it can safely
+omit the non-vectorized version of the loop as well as the run-time test.
+
 @node Other Optimization Switches
 @subsection Other Optimization Switches
 @cindex Optimization Switches
Index: ada/gcc-interface/ada-tree.h
===================================================================
--- ada/gcc-interface/ada-tree.h	(revision 209334)
+++ ada/gcc-interface/ada-tree.h	(working copy)
@@ -6,7 +6,7 @@
  *                                                                          *
  *                              C Header File                               *
  *                                                                          *
- *          Copyright (C) 1992-2013, Free Software Foundation, Inc.         *
+ *          Copyright (C) 1992-2014, Free Software Foundation, Inc.         *
  *                                                                          *
  * GNAT is free software;  you can  redistribute it  and/or modify it under *
  * terms of the  GNU General Public License as published  by the Free Soft- *
@@ -504,10 +504,11 @@ do {						   \
 #define LOOP_STMT_TOP_UPDATE_P(NODE)  TREE_LANG_FLAG_1 (LOOP_STMT_CHECK (NODE))
 
 /* Optimization hints on loops.  */
-#define LOOP_STMT_NO_UNROLL(NODE) TREE_LANG_FLAG_2 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_UNROLL(NODE)    TREE_LANG_FLAG_3 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_NO_VECTOR(NODE) TREE_LANG_FLAG_4 (LOOP_STMT_CHECK (NODE))
-#define LOOP_STMT_VECTOR(NODE)    TREE_LANG_FLAG_5 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_IVDEP(NODE)     TREE_LANG_FLAG_2 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_NO_UNROLL(NODE) TREE_LANG_FLAG_3 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_UNROLL(NODE)    TREE_LANG_FLAG_4 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_NO_VECTOR(NODE) TREE_LANG_FLAG_5 (LOOP_STMT_CHECK (NODE))
+#define LOOP_STMT_VECTOR(NODE)    TREE_LANG_FLAG_6 (LOOP_STMT_CHECK (NODE))
 
 #define EXIT_STMT_COND(NODE)     TREE_OPERAND_CHECK_CODE (NODE, EXIT_STMT, 0)
 #define EXIT_STMT_LABEL(NODE)    TREE_OPERAND_CHECK_CODE (NODE, EXIT_STMT, 1)
Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 209371)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -1268,10 +1268,14 @@ Pragma_to_gnu (Node_Id gnat_node)
 	   Present (gnat_temp);
 	   gnat_temp = Next (gnat_temp))
 	{
-	  tree gnu_loop_stmt = gnu_loop_stack ->last ()->stmt;
+	  tree gnu_loop_stmt = gnu_loop_stack->last ()->stmt;
 
 	  switch (Chars (Expression (gnat_temp)))
 	    {
+	    case Name_Ivdep:
+	      LOOP_STMT_IVDEP (gnu_loop_stmt) = 1;
+	      break;
+
 	    case Name_No_Unroll:
 	      LOOP_STMT_NO_UNROLL (gnu_loop_stmt) = 1;
 	      break;
@@ -7747,13 +7751,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	tree gnu_cond = LOOP_STMT_COND (stmt);
 	tree gnu_update = LOOP_STMT_UPDATE (stmt);
 	tree gnu_end_label = LOOP_STMT_LABEL (stmt);
-	tree t;
 
 	/* Build the condition expression from the test, if any.  */
 	if (gnu_cond)
-	  gnu_cond
-	    = build3 (COND_EXPR, void_type_node, gnu_cond, alloc_stmt_list (),
-		      build1 (GOTO_EXPR, void_type_node, gnu_end_label));
+	  {
+	    /* Deal with the optimization hints.  */
+	    if (LOOP_STMT_IVDEP (stmt))
+	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+				 build_int_cst (integer_type_node,
+						annot_expr_ivdep_kind));
+
+	    gnu_cond
+	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
+			build1 (GOTO_EXPR, void_type_node, gnu_end_label));
+	  }
 
 	/* Set to emit the statements of the loop.  */
 	*stmt_p = NULL_TREE;
@@ -7782,7 +7793,7 @@ gnat_gimplify_stmt (tree *stmt_p)
         if (gnu_update && !LOOP_STMT_TOP_UPDATE_P (stmt))
 	  append_to_statement_list (gnu_update, stmt_p);
 
-	t = build1 (GOTO_EXPR, void_type_node, gnu_start_label);
+	tree t = build1 (GOTO_EXPR, void_type_node, gnu_start_label);
 	SET_EXPR_LOCATION (t, DECL_SOURCE_LOCATION (gnu_end_label));
 	append_to_statement_list (t, stmt_p);
 
Index: ada/snames.ads-tmpl
===================================================================
--- ada/snames.ads-tmpl	(revision 209334)
+++ ada/snames.ads-tmpl	(working copy)
@@ -730,6 +730,7 @@ package Snames is
    Name_Increases                      : constant Name_Id := N + $;
    Name_Info                           : constant Name_Id := N + $;
    Name_Internal                       : constant Name_Id := N + $;
+   Name_Ivdep                          : constant Name_Id := N + $;
    Name_Link_Name                      : constant Name_Id := N + $;
    Name_Lowercase                      : constant Name_Id := N + $;
    Name_Max_Entry_Queue_Depth          : constant Name_Id := N + $;
-- { dg-do compile { target i?86-*-* x86_64-*-* } }
-- { dg-options "-O3 -msse2 -fdump-tree-optimized" }

package body Vect11 is

   function "+" (X, Y : Sarray) return Sarray is
      R : Sarray;
   begin
      for I in Sarray'Range loop
         R(I) := X(I) + Y(I);
      end loop;
      return R;
   end;

   procedure Add (X, Y : Sarray; R : out Sarray) is
   begin
      for I in Sarray'Range loop
         R(I) := X(I) + Y(I);
      end loop;
   end;

   procedure Add (X, Y : not null access Sarray; R : not null access Sarray) is
   begin
      for I in Sarray'Range loop
         pragma Loop_Optimize (Ivdep);
         R(I) := X(I) + Y(I);
      end loop;
   end;

end Vect11;

-- { dg-final { scan-tree-dump-not "goto" "optimized" } }
-- { dg-final { cleanup-tree-dump "optimized" } }
package Vect11 is

   -- Constrained array types are vectorizable
   type Sarray is array (1 .. 4) of Float;
   for Sarray'Alignment use 16;

   function "+" (X, Y : Sarray) return Sarray;
   procedure Add (X, Y : Sarray; R : out Sarray);
   procedure Add (X, Y : not null access Sarray; R : not null access Sarray);

end Vect11;

Reply via email to