fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Prathamesh Kulkarni
This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
Bootstrapped, regtested on x86_64-unknown-linux-gnu
Ok for trunk ?

[gcc]
* gcc/builtins.c (fold_builtin_strchr):  returns tree for s1 + strlen (s1)
if TREE_SIDE_EFFECTS (s1) is false and integer_zerop (s2) is true.

[gcc/testsuite]
* gcc.dg/strlenopt-5.c: modified dg-final
scan-tree-dump-times for strchr and strlen
* gcc.dg/strlenopt-7.c: Likewise
* gcc.dg/strlenopt-9.c: Likewise
* gcc.dg/strlenopt-20.c: Likewise
* gcc.dg/strlenopt-21.c: Likewise
* gcc.dg/strlenopt-22.c: Likewise
* gcc.dg/strlenopt-22g.c: Likewise
* gcc.dg/strlenopt-26.c: Likewise
* gcc.c-torture/execute/builtins/strchr.c: added test case

Thanks and Regards,
Prathamesh
Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 207700)
+++ gcc/builtins.c	(working copy)
@@ -11587,6 +11587,17 @@ fold_builtin_strchr (location_t loc, tre
 	  tem = fold_build_pointer_plus_hwi_loc (loc, s1, r - p1);
 	  return fold_convert_loc (loc, type, tem);
 	}
+  else if (integer_zerop (s2) && !TREE_SIDE_EFFECTS (s1))  // simplify strchr (s1, '\0') to s1 + strlen (s1)
+  {
+tree fn = builtin_decl_implicit (BUILT_IN_STRLEN);
+
+if (!fn)
+  return NULL_TREE;
+
+tree call_expr = build_call_expr_loc (loc, fn, 1, s1);
+tree ptr_plus_expr = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (s1), s1, call_expr);
+return fold_convert_loc (loc, type, ptr_plus_expr);
+  }
   return NULL_TREE;
 }
 }
Index: gcc/testsuite/gcc.dg/strlenopt-20.c
===
--- gcc/testsuite/gcc.dg/strlenopt-20.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-20.c	(working copy)
@@ -48,7 +48,7 @@ __attribute__((noinline, noclone)) char
 fn3 (char *p)
 {
   char *c;
-  /* The strcpy call can be optimized into memcpy, strchr needs to stay,
+  /* The strcpy call can be optimized into memcpy, strchr (p, '\0') is converted to p + strlen (p), 
  strcat is optimized into memcpy.  */
   strcpy (p, "abc");
   p[3] = 'd';
@@ -86,10 +86,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
-/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */
 /* { dg-final { cleanup-tree-dump "strlen" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-21.c
===
--- gcc/testsuite/gcc.dg/strlenopt-21.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-21.c	(working copy)
@@ -10,6 +10,7 @@ foo (char *x, int n)
 {
   int i;
   char a[64];
+  /* strlen (x, '\0') is converted to x + strlen (x) */
   char *p = strchr (x, '\0');
   struct S s;
   /* strcpy here is optimized into memcpy, length computed as p - x + 1.  */
@@ -57,10 +58,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "memcpy \\(" 3 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
-/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */
 /* { dg-final { cleanup-tree-dump "strlen" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-22.c
===
--- gcc/testsuite/gcc.dg/strlenopt-22.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-22.c	(working copy)
@@ -9,7 +9,7 @@ bar (char *p, char *q)
   size_t l1, l2, l3;
   char *r = strchr (p, '\0');
   strcpy (r, "abcde");
-  char *s = strchr (r, '\0');
+  char *s = strchr (r, '\0');  /* strchr (r, '\0') is converted to r + strlen (r) */
   strcpy (s, q);
   l1 = strlen (p);
   l2 = strlen (r);
@@ -31,10 +31,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 3 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 4 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "memcpy \\(" 1 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcpy \\(" 1 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
-/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */
-/* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } *
+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
+/* { dg-final { sca

Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Jakub Jelinek
On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
> This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
> Bootstrapped, regtested on x86_64-unknown-linux-gnu
> Ok for trunk ?

Why do you think it is a good idea?  It is often very much the opposite.

Jakub


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Prathamesh Kulkarni
On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek  wrote:
> On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
>> This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
>> Bootstrapped, regtested on x86_64-unknown-linux-gnu
>> Ok for trunk ?
>
> Why do you think it is a good idea?  It is often very much the opposite.
I maybe completely wrong, but since  strchr(p, 0), matches each
character of p with c until '\0' is found, I thought it would be
faster to call strlen, since strlen would just skip over characters upto '\0'.
Also, I saw this committed in llvm trunk recently, and thought it
might be a good idea:
http://llvm-reviews.chandlerc.com/rL200736

>
> Jakub


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Richard Biener
On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
 wrote:
> On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek  wrote:
>> On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
>>> This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
>>> Bootstrapped, regtested on x86_64-unknown-linux-gnu
>>> Ok for trunk ?
>>
>> Why do you think it is a good idea?  It is often very much the opposite.
> I maybe completely wrong, but since  strchr(p, 0), matches each
> character of p with c until '\0' is found, I thought it would be
> faster to call strlen, since strlen would just skip over characters upto '\0'.
> Also, I saw this committed in llvm trunk recently, and thought it
> might be a good idea:
> http://llvm-reviews.chandlerc.com/rL200736

If it ends up being a good idea then please add this transform to
tree-ssa-strlen.c,
not to GENERIC builtins folding.

Richard.

>>
>> Jakub


RE: [PATCH] Fix Cilk+ ICEs in the alias oracle

2014-02-15 Thread Richard Biener
On Fri, 14 Feb 2014, Iyer, Balaji V wrote:

> 
> 
> > -Original Message-
> > From: Jeff Law [mailto:l...@redhat.com]
> > Sent: Friday, February 14, 2014 12:34 PM
> > To: Richard Biener; gcc-patches@gcc.gnu.org
> > Cc: Iyer, Balaji V
> > Subject: Re: [PATCH] Fix Cilk+ ICEs in the alias oracle
> > 
> > On 02/13/14 05:47, Richard Biener wrote:
> > > On Thu, 13 Feb 2014, Richard Biener wrote:
> > >
> > >>
> > >> Cilk+ builds INDIRECT_REFs when expanding builtins (oops) and thus
> > >> those can leak into MEM_EXRs which will lead to ICEs later.
> > >> The following patch properly builds a MEM_REF instead.  Grepping for
> > >> INDIRECT_REF I found another suspicious use (just removed, it cannot
> > >> have triggered and it looks bogus) and the use of a langhook instead
> > >> of proper GIMPLE interfaces (function also used during expansion).
> > >>
> > >> Bootstrap / testing in progress together with some other stuff.
> > >>
> > >> Ok?
> > >
> > > Btw, this exposes that Cilk+ is LTO-ignorant - it doesn't properly
> > > register its global trees (bah, more global trees...).  So the
> > > types_compatible_p call ICEs.  Trying to process them in
> > > lto/lto.c:read_cgraph_and_symbols doesn't seem to work though.
> > >
> > > So I'm opting to remove the assert and leave fixing LTO for somebody
> > > who cares about Cilk+.
> > >
> > > Simpifies the patch as follows, bootstrapped & tested on
> > > x86_64-unknown-linux-gnu.
> > >
> > > Richard.
> > >
> > > 2014-02-13  Richard Biener  
> > >
> > >   * cilk-common.c (cilk_arrow): Build a MEM_REF, not an
> > INDIRECT_REF.
> > >   (get_frame_arg): Drop the assert with langhook
> > types_compatible_p.
> > >   Do not strip INDIRECT_REFs.
> > FWIW, I see a recurring issue here.  Specifically I'm regularly seeing
> > cases where submissions are not playing well with LTO.   Speaking
> > strictly for myself, I'm not LTO-aware enough to spot them in patches as 
> > they
> > fly by.
> 
> I thought I had handled LTO correctly. I apologize if I made a mistake. 
> I assure you that it was not deliberate. I even had my tests use -flto 
> flags to make sure it is going through it correctly...

By using the langhook types_compatible_p you by-passed the failure
on LTO (because that langhook is not implemented there).

As it's only builtins expansion the mismatches don't really matter.

Richard.


Re: [Patch, fortran] PR 59599 ICE on intrinsic ichar

2014-02-15 Thread Richard Biener
On Fri, 14 Feb 2014, Mikael Morin wrote:

> Hello,
> 
> this bug is not a regression, but the patch shouldn't wreck the compiler
> too much on the other hand.
> The problem is a wrong number of arguments while generating code for the
> ichar intrinsic.  The correct number is 2 without the kind argument and
> 3 with it.
> The attached patch uses the gfc_intrinsic_argument_list_length function
> like it's done for other intrinsics.
> 
> Regression tested on x86_64-unknown-linux-gnu. OK for trunk/4.8/4.7?

Generally wrong-code non-regression fixes for Fortran are fine
if Fortran maintainers think so.

Richard.


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Ondřej Bílka
On Sat, Feb 15, 2014 at 10:50:02AM +0100, Richard Biener wrote:
> On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
>  wrote:
> > On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek  wrote:
> >> On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
> >>> This patch folds strchr (e, 0) to e + strlen (e), if e has no 
> >>> side-effects.
> >>> Bootstrapped, regtested on x86_64-unknown-linux-gnu
> >>> Ok for trunk ?
> >>
> >> Why do you think it is a good idea?  It is often very much the opposite.
> > I maybe completely wrong, but since  strchr(p, 0), matches each
> > character of p with c until '\0' is found, I thought it would be
> > faster to call strlen, since strlen would just skip over characters upto 
> > '\0'.
> > Also, I saw this committed in llvm trunk recently, and thought it
> > might be a good idea:
> > http://llvm-reviews.chandlerc.com/rL200736
> 
> If it ends up being a good idea then please add this transform to
> tree-ssa-strlen.c,
> not to GENERIC builtins folding.
> 
No, if that transform is good idea it should be added to headers. Also
this will not match anything as with O1 and higher it already is
transformed to rawmemchr(x, 0) as in program below.

#include 
char *
foo (char *x)
{
   return strchr (x, 0);
}



Re: [Patch, fortran] PR 59599 ICE on intrinsic ichar

2014-02-15 Thread Mikael Morin
Le 15/02/2014 11:04, Richard Biener a écrit :
> On Fri, 14 Feb 2014, Mikael Morin wrote:
> 
>> Hello,
>>
>> this bug is not a regression, but the patch shouldn't wreck the compiler
>> too much on the other hand.
>> The problem is a wrong number of arguments while generating code for the
>> ichar intrinsic.  The correct number is 2 without the kind argument and
>> 3 with it.
>> The attached patch uses the gfc_intrinsic_argument_list_length function
>> like it's done for other intrinsics.
>>
>> Regression tested on x86_64-unknown-linux-gnu. OK for trunk/4.8/4.7?
> 
> Generally wrong-code non-regression fixes for Fortran are fine
> if Fortran maintainers think so.
> 
Technically it's an ICE, not a wrong-code; but I bet it would be a
wrong-code if the scalarizer didn't notice a problem.
I'll proceed with the committal. Thanks for the review(s).

Mikael


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Jakub Jelinek
On Sat, Feb 15, 2014 at 12:15:22PM +0100, Ondřej Bílka wrote:
> On Sat, Feb 15, 2014 at 10:50:02AM +0100, Richard Biener wrote:
> > On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
> >  wrote:
> > > On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek  wrote:
> > >> On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
> > >>> This patch folds strchr (e, 0) to e + strlen (e), if e has no 
> > >>> side-effects.
> > >>> Bootstrapped, regtested on x86_64-unknown-linux-gnu
> > >>> Ok for trunk ?
> > >>
> > >> Why do you think it is a good idea?  It is often very much the opposite.
> > > I maybe completely wrong, but since  strchr(p, 0), matches each
> > > character of p with c until '\0' is found, I thought it would be
> > > faster to call strlen, since strlen would just skip over characters upto 
> > > '\0'.
> > > Also, I saw this committed in llvm trunk recently, and thought it
> > > might be a good idea:
> > > http://llvm-reviews.chandlerc.com/rL200736
> > 
> > If it ends up being a good idea then please add this transform to
> > tree-ssa-strlen.c,
> > not to GENERIC builtins folding.
> > 
> No, if that transform is good idea it should be added to headers. Also
> this will not match anything as with O1 and higher it already is
> transformed to rawmemchr(x, 0) as in program below.
> 
> #include 
> char *
> foo (char *x)
> {
>return strchr (x, 0);
> }

Of course rawmemchr(x, 0) is much better implementation of strchr(x, 0) than
x + strlen(x).  The reason why gcc doesn't transform it that way is because
rawmemchr is just a GNU extension, not part of C or POSIX.

Jakub


Re: [PATCH RFC] MIPS add support for MIPS SIMD ARCHITECTURE V1.07

2014-02-15 Thread Richard Sandiford
Graham Stott  writes:
> +(define_constraint "YC"
> +  "@internal
> +   A constant vector with each element is a unsigned bitimm-bit integer with 
> only one bit set"

Maybe:

  A replicated vector constant in which the replicated value has a single
  bit set

Likewise YZ and clear bits.

> +(define_constraint "Y5"
> +  "@internal
> +   A constant vector with each element is a signed 6-bit integer"
> +  (and (match_code "const_vector")
> +   (match_test "mips_const_vector_any_int_p (op, mode, -32, 31)")))

Maybe use Usv6.

  A replicated vector constant in which the replicated value is a signed
  6-bit number.

> +(define_constraint "Y6"
> +  "@internal
> +   A constant vector with each element a unsigned 6-bit integer"
> +  (and (match_code "const_vector")
> +   (match_test "mips_const_vector_any_int_p (op, mode, 0, 31)")))

Similarly here for Uuv6.  Upper bound should be 63 for a 6-bit integer.
Would be good to have a test for that.

> +(define_constraint "Y8"
> +  "@internal
> +   A constant vector with each element a unsigned 0-bit integer"
> +  (and (match_code "const_vector")
> +   (match_test "mips_const_vector_any_int_p (op, mode, 0, 255)")))

Similarly here for Uuv8.

> @@ -127,3 +351,4 @@
>  DEF_MIPS_FTYPE (1, (VOID, USI))
>  DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
>  DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
> +

No newline here.

> +(define_c_enum "unspec" [
> +UNSPEC_MSA_ADDVI
> +UNSPEC_MSA_ANDI_B
> +UNSPEC_MSA_ASUB_S
> +  UNSPEC_MSA_ASUB_U
> +  UNSPEC_MSA_AVE_S
> +  UNSPEC_MSA_AVE_U

Formatting (second is right).

> +(define_mode_iterator MODE128_2 [V2DF V4SF V2DI V4SI V8HI V16QI])
> +(define_mode_iterator IMODE128 [V2DI V4SI V8HI V16QI])

These two aren't used and I can't see where MODE128_2 would come in useful.
Let's drop these for now.

> +(define_mode_attr VHALFMODE 
> +  [(V8HI "V16QI")
> +   (V4SI "V8HI")
> +   (V2SI "V4SI")
> +   (V2DI "V4SI")
> +   (V2DF "V4SF")])
> +
> +;; This attribute gives the integer mode for selection mask in vec_perm.
> +;; vcond also uses MSA_I for operand 0, 1, and 2.
> +(define_mode_attr MSA_I
> +  [(V2DF "V2DI")
> +   (V4SF "V4SI")
> +   (V2DI "V2DI")
> +   (V4SI "V4SI")
> +   (V8HI "V8HI")
> +   (V16QI "V16QI")])
> +
> +;; The attribute give the integer vector mode with same size
> +(define_mode_attr MODE_I
> +  [(V2DF "V2DI")
> +   (V4SF "V4SI")
> +   (V2DI "V2DI")
> +   (V4SI "V4SI")
> +   (V8HI "V8HI")
> +   (V16QI "V16QI")])

Let's call this "VIMODE" for consistency with both "IMODE" in mips.md
and the HALFMODE/VHALFMODE pair.  VIMODE can be used in place of MSA_I;
no need for both.

> +;; This attribute qives suffix gives the mode of the result for "copy_s_b, 
> copy_u_b" etc.
> +(define_mode_attr RES
> +  [(V2DF "DF")
> +   (V4SF "SF")
> +   (V2DI "DI")
> +   (V4SI "SI")
> +   (V8HI "SI")
> +   (V16QI "SI")])

Why we do need to promote sub-SI values to SI for this?  I'd prefer
that we use the "correct" mode (i.e. UNITMODE) instead.

> +;; This is used in msa_cast* to output mov.s or mov.d.
> +(define_mode_attr msafmt2
> +  [(V2DF "d")
> +   (V4SF "s")])

Not really an MSA format.  Maybe "unitfmt"?

> +;; This attribute qives define_insn suffix for MSA instructions 
> +;; with need distinction between integer and floating point.
> +(define_mode_attr msafmt3
> +  [(V2DF "d_f")
> +   (V4SF "w_f")
> +   (V2DI "d")
> +   (V4SI "w")
> +   (V8HI "h")
> +   (V16QI "b")])

msafmt_f might be more mnemonic than msafmt3.

> +;; The maximum index inside a vector.
> +(define_mode_attr max_elem_index
> +  [(V2DF "1")
> +   (V4SF "3")
> +   (V2DI "1")
> +   (V4SI "3")
> +   (V8HI "7")
> +   (V16QI "15")])

In the asserts where this is used it could just be
"GET_MODE_NUNITS (mode)"

> +;; This is used to form an immediate operand constraint 
> +;; using "const__operand".
> +(define_mode_attr imm
> +  [(V2DF "0_or_1")
> +   (V4SF "0_to_3")
> +   (V2DI "0_or_1")
> +   (V4SI "0_to_3")
> +   (V8HI "uimm3")
> +   (V16QI "uimm4")])

Maybe indeximm rather than imm, for consistency with bitimm?

> +;; This attribute is used to form the MODE for reg_or_0_operand
> +;; constraint.
> +(define_mode_attr REGOR0
> +  [(V2DF "DF")
> +   (V4SF "SF")
> +   (V2DI "DI")
> +   (V4SI "SI")
> +   (V8HI "SI")
> +   (V16QI "SI")])

Same as RES, and same comment.

> +(define_expand "vec_extract"
> +  [(match_operand: 0 "register_operand")
> +   (match_operand:IMSA 1 "register_operand")
> +   (match_operand 2 "const_int_operand")]
> +  "ISA_HAS_MSA"
> +{
> +  gcc_assert (UINTVAL (operands[2]) <= );
> +  enum machine_mode mode0 = GET_MODE (operands[0]);
> +  if (mode0 == QImode || mode0 == HImode)
> +emit_move_insn (operands[0],
> + gen_lowpart (mode0, gen_reg_rtx (SImode)));
> +  else
> +emit_insn (gen_msa_copy_s_ (operands[0], operands[1], 
> operands[2]));
> +  DONE;
> +})

The QImode/HImode case isn't right -- the source of the move is an
uninitialised register.  Please make sure there's a testcase for this.

You should be able to use mode instead of mode0.

[PATCH] Fixing SEH exceptions for languages != C++

2014-02-15 Thread Jonathan Schleifer
Hi!

The following patch fixes a bug in SEH exception handling that made it
crash with ObjC (and most likely other languages as well). The problem
is that the SEH exception handler always passes the unwind exception as
4th parameter to RtlUnwindEx, which RtlUnwindEx then later passes to
the landing pad as argument. This works for C++, as libstdc++ sets data
register 0 to the unwind exception anyway, but it crashes for ObjC as
the landing pad expects the thrown object to be in data register 0. The
solution is of course to fix the SEH wrapper to get the value that was
set for data register 0 using _Unwind_SetGR and pass that to
RtlUnwindEx, so that later on the correct value is passed to the
landing pad.

The patch was tested for C++ and ObjC, the latter with both, the GNU
libobjc runtime and my own. (With -O0, it still crashed and complained
about invalid frames, but that is another issue.)

I don't think this patch needs transfer of copyright, as it is small
enoguh, so would it be possible to please include that in GCC 4.8.3?
This would finally make ObjC usable on Windows again - and most likely
other languages using exceptions as well.

Thanks!

PS: Please CC me as I'm not on the list!

-- 
Jonathan
--- libgcc/unwind-seh.c.orig	2014-02-15 17:01:59.012396423 +0100
+++ libgcc/unwind-seh.c	2014-02-15 17:03:54.064755427 +0100
@@ -313,8 +313,9 @@
 	  ms_exc->ExceptionInformation[3] = gcc_context.reg[1];
 
 	  /* Begin phase 2.  Perform the unwinding.  */
-	  RtlUnwindEx (this_frame, gcc_context.ra, ms_exc, gcc_exc,
-		   ms_orig_context, ms_disp->HistoryTable);
+	  RtlUnwindEx (this_frame, gcc_context.ra, ms_exc,
+		   (PVOID)gcc_context.reg[0], ms_orig_context,
+		   ms_disp->HistoryTable);
 	}
 
   /* In _Unwind_RaiseException we return _URC_FATAL_PHASE1_ERROR.  */


Re: [PATCH][ARM] add HFmode to arm_preferred_simd_mode

2014-02-15 Thread Kugan


On 14/02/14 14:34, Kugan wrote:
> 
> 
> On 14/02/14 11:24, Andrew Pinski wrote:
>> On Thu, Feb 13, 2014 at 4:15 PM, Kugan
>>  wrote:
>>> Hi,
>>>
>>> Is there any reason why HFmode is not there in arm_preferred_simd_mode?
>>> NEON does support this.
>>
>> Most likely because there is no support for Half-float in the vectorizer.
>>
> 
> I can see that get_vectype_for_scalar_type_and_size failing while
> building vector type (with build_vector_type) for Half-float. I guess we
> should add support there first.

Just for records, I was wrong here. ARM NEON that supports half float
does not support vector arithmetic operations in half float; it supports
vector conversions to float and operate on 32bit. Therefore HFmode is
not really a preferred mode.

And also, gcc vectorizer can support half floats. It is not valid in
this case and hence ARM backend does not provide required patterns and
hooks.

Thanks,
Kugan