[committed] Use new expand interface for MIPS

2011-03-27 Thread Richard Sandiford
Fresh from breaking everyone's build with the target-independent changes,
this patch makes MIPS use the new expand interfaces.  It's not much of an
improvement, but it is at least more "-" than "+".

Tested on mipsisa64-elf and applied.

Richard


gcc/
* config/mips/mips.c (mips_prepare_builtin_arg): Replace icode and
opno arguments with an expand_operand.  Use create_input_operand.
(mips_prepare_builtin_target): Delete.
(mips_expand_builtin_insn, mips_expand_builtin_compare_1): New
functions.
(mips_expand_builtin_direct): Use create_output_operand and
mips_expand_builtin_insn.  Update call to mips_prepare_builtin_arg.
(mips_expand_builtin_movtf): Likewise.  Use mips_expand_fp_comparison.
(mips_expand_builtin_compare): Use mips_expand_fp_comparison.

Index: gcc/config/mips/mips.c
===
--- gcc/config/mips/mips.c  2011-03-27 10:06:51.0 +0100
+++ gcc/config/mips/mips.c  2011-03-27 10:07:15.0 +0100
@@ -13184,56 +13184,64 @@ mips_builtin_decl (unsigned int code, bo
   return mips_builtin_decls[code];
 }
 
-/* Take argument ARGNO from EXP's argument list and convert it into a
-   form suitable for input operand OPNO of instruction ICODE.  Return the
-   value.  */
+/* Take argument ARGNO from EXP's argument list and convert it into
+   an expand operand.  Store the operand in *OP.  */
 
-static rtx
-mips_prepare_builtin_arg (enum insn_code icode,
- unsigned int opno, tree exp, unsigned int argno)
+static void
+mips_prepare_builtin_arg (struct expand_operand *op, tree exp,
+ unsigned int argno)
 {
   tree arg;
   rtx value;
-  enum machine_mode mode;
 
   arg = CALL_EXPR_ARG (exp, argno);
   value = expand_normal (arg);
-  mode = insn_data[icode].operand[opno].mode;
-  if (!insn_data[icode].operand[opno].predicate (value, mode))
-{
-  /* We need to get the mode from ARG for two reasons:
+  create_input_operand (op, value, TYPE_MODE (TREE_TYPE (arg)));
+}
 
-  - to cope with address operands, where MODE is the mode of the
-memory, rather than of VALUE itself.
+/* Expand instruction ICODE as part of a built-in function sequence.
+   Use the first NOPS elements of OPS as the instruction's operands.
+   HAS_TARGET_P is true if operand 0 is a target; it is false if the
+   instruction has no target.
 
-  - to cope with special predicates like pmode_register_operand,
-where MODE is VOIDmode.  */
-  value = copy_to_mode_reg (TYPE_MODE (TREE_TYPE (arg)), value);
-
-  /* Check the predicate again.  */
-  if (!insn_data[icode].operand[opno].predicate (value, mode))
-   {
- error ("invalid argument to built-in function");
- return const0_rtx;
-   }
-}
+   Return the target rtx if HAS_TARGET_P, otherwise return const0_rtx.  */
 
-  return value;
+static rtx
+mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+ struct expand_operand *ops, bool has_target_p)
+{
+  if (!maybe_expand_insn (icode, nops, ops))
+{
+  error ("invalid argument to built-in function");
+  return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
+}
+  return has_target_p ? ops[0].value : const0_rtx;
 }
 
-/* Return an rtx suitable for output operand OP of instruction ICODE.
-   If TARGET is non-null, try to use it where possible.  */
+/* Expand a floating-point comparison for built-in function call EXP.
+   The first NARGS arguments are the values to be compared.  ICODE is
+   the .md pattern that does the comparison and COND is the condition
+   that is being tested.  Return an rtx for the result.  */
 
 static rtx
-mips_prepare_builtin_target (enum insn_code icode, unsigned int op, rtx target)
+mips_expand_builtin_compare_1 (enum insn_code icode,
+  enum mips_fp_condition cond,
+  tree exp, int nargs)
 {
-  enum machine_mode mode;
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  int opno, argno;
 
-  mode = insn_data[icode].operand[op].mode;
-  if (target == 0 || !insn_data[icode].operand[op].predicate (target, mode))
-target = gen_reg_rtx (mode);
+  /* The instruction should have a target operand, an operand for each
+ argument, and an operand for COND.  */
+  gcc_assert (nargs + 2 == insn_data[(int) icode].n_operands);
 
-  return target;
+  opno = 0;
+  create_output_operand (&ops[opno++], NULL_RTX,
+insn_data[(int) icode].operand[0].mode);
+  for (argno = 0; argno < nargs; argno++)
+mips_prepare_builtin_arg (&ops[opno++], exp, argno);
+  create_integer_operand (&ops[opno++], (int) cond);
+  return mips_expand_builtin_insn (icode, opno, ops, true);
 }
 
 /* Expand a MIPS_BUILTIN_DIRECT or MIPS_BUILTIN_DIRECT_NO_TARGET function;
@@ -13245,44 +13253,23 @@ mips_prepare_builtin_target (enum insn_c
 mips_expan

[committed] PR target/38598: extendisi2 for MIPS

2011-03-27 Thread Richard Sandiford
This patch fixes PR38598, which is about a missed optimisation.
To quote from the PR:

  mipsisa64-elf-gcc -S -O2 gcc.target/mips/madd-7.c -DNOMIPS16=

  does not produce the expected MADD.  On long64 ABIs like EABI64, the
  loop starts out with an extra extendsidi2 instruction, and although
  this instruction gets removed after reload by a "split to nothing", it
  is still around at register allocation time.

  extendsidi2 does not allow LO operands, so its presence discourages
  the register allocator from using LO for the accumulator.  The fix is
  to add a LO alternative to extendsidi2.

I'd filed this two years ago with the idea of applying it once 4.5 opened,
but I forgot.  Thanks to Andrew for the prod.

Tested on mipsisa64-elf.  Applied.

Richard


gcc/
PR target/38598
* config/mips/mips.md (extendsidi2): Add an "l" alternative.
Update commentary.

gcc/testsuite/
PR target/38598
* gcc.target/mips/madd-7.c: Remove -mlong32.
* gcc.target/mips/msub-7.c: Likewise.

Index: gcc/config/mips/mips.md
===
--- gcc/config/mips/mips.md 2011-03-26 18:30:10.0 +
+++ gcc/config/mips/mips.md 2011-03-27 10:26:41.0 +0100
@@ -2963,20 +2963,26 @@ (define_insn "*zero_extendhi_truncqi"
 ;; Extension insns.
 ;; Those for integer source operand are ordered widest source type first.
 
-;; When TARGET_64BIT, all SImode integer registers should already be in
-;; sign-extended form (see TRULY_NOOP_TRUNCATION and truncdisi2).  We can
-;; therefore get rid of register->register instructions if we constrain
-;; the source to be in the same register as the destination.
+;; When TARGET_64BIT, all SImode integer and accumulator registers
+;; should already be in sign-extended form (see TRULY_NOOP_TRUNCATION
+;; and truncdisi2).  We can therefore get rid of register->register
+;; instructions if we constrain the source to be in the same register as
+;; the destination.
 ;;
-;; The register alternative has type "arith" so that the pre-reload
-;; scheduler will treat it as a move.  This reflects what happens if
-;; the register alternative needs a reload.
+;; Only the pre-reload scheduler sees the type of the register alternatives;
+;; we split them into nothing before the post-reload scheduler runs.
+;; These alternatives therefore have type "move" in order to reflect
+;; what happens if the two pre-reload operands cannot be tied, and are
+;; instead allocated two separate GPRs.  We don't distinguish between
+;; the GPR and LO cases because we don't usually know during pre-reload
+;; scheduling whether an operand will be LO or not.
 (define_insn_and_split "extendsidi2"
-  [(set (match_operand:DI 0 "register_operand" "=d,d")
-(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,m")))]
+  [(set (match_operand:DI 0 "register_operand" "=d,l,d")
+(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,0,m")))]
   "TARGET_64BIT"
   "@
#
+   #
lw\t%0,%1"
   "&& reload_completed && register_operand (operands[1], VOIDmode)"
   [(const_int 0)]
@@ -2984,7 +2990,7 @@ (define_insn_and_split "extendsidi2"
   emit_note (NOTE_INSN_DELETED);
   DONE;
 }
-  [(set_attr "move_type" "move,load")
+  [(set_attr "move_type" "move,move,load")
(set_attr "mode" "DI")])
 
 (define_expand "extend2"
Index: gcc/testsuite/gcc.target/mips/madd-7.c
===
--- gcc/testsuite/gcc.target/mips/madd-7.c  2011-03-26 18:30:10.0 
+
+++ gcc/testsuite/gcc.target/mips/madd-7.c  2011-03-27 10:11:25.0 
+0100
@@ -1,5 +1,4 @@
-/* -mlong32 added because of PR target/38598.  */
-/* { dg-options "-O2 -march=5kc -mlong32" } */
+/* { dg-options "-O2 -march=5kc" } */
 /* { dg-final { scan-assembler-not "\tmul\t" } } */
 /* { dg-final { scan-assembler "\tmadd\t" } } */
 
Index: gcc/testsuite/gcc.target/mips/msub-7.c
===
--- gcc/testsuite/gcc.target/mips/msub-7.c  2011-03-26 18:30:10.0 
+
+++ gcc/testsuite/gcc.target/mips/msub-7.c  2011-03-27 10:11:25.0 
+0100
@@ -1,5 +1,4 @@
-/* -mlong32 added because of PR target/38598.  */
-/* { dg-options "-O2 -march=5kc -mlong32" } */
+/* { dg-options "-O2 -march=5kc" } */
 /* { dg-final { scan-assembler-not "\tmul\t" } } */
 /* { dg-final { scan-assembler "\tmsub\t" } } */
 


Re: *ping* Re: [Patch, Fortran, 4.7] PR 18918 - Add initial support for a coarray communication library

2011-03-27 Thread Tobias Burnus

On 26.03.2011 16:56, Jerry DeLisle wrote:

On 19.03.2011 17:23, Tobias Burnus wrote:

Build and regtested on x86-64-linux.
(a) Is the patch OK for the 4.7 trunk?

OK


Committed as Rev. 171568.


(b) Are the libgfortrancaf.h, libgfortrancaf_mpi.c and
libgfortrancaf_single.c OK for inclusion at libgfortran/caf?


I like the idea of segregating this into a subdirectory. This will 
help keep things isolated.

So OK by me.


Committed as Rev. 171570. I followed Steve's suggestion to rename the 
files to something shorter. I now use: libcaf.h, mpi.c, single.c.


The usage of the coarray library is now described at 
http://gcc.gnu.org/wiki/CoarrayLib


 * * *

The next step is to fill the gaps to make the coarray support really 
useful. As written before: The coarray support is very limited and as of 
now communication for coarrays themselves are supported, which limits 
the current support to programs which do not need any cross-image 
communication ;-)


Tobias


Re: [patch, Fortran] Some more trim optimizatins

2011-03-27 Thread Tobias Burnus

Thomas Koenig wrote:
the following patch extends the trim optimization to variables like 
trim(a%x).


Regression-tested. OK for trunk?


OK.

Tobias


PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread H.J. Lu
Hi,

Here is a patch to split AVX 32byte unalignd load/store:

http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html

It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
OK for trunk?

Thanks.


-- 
H.J.


[patch] Fix SSA corruption with SLP

2011-03-27 Thread Eric Botcazou
Hi,

the attached testcase exhibits a corruption of the SSA form:

t.c: In function 'foo':
t.c:9:10: error: definition in block 2 follows the use
for SSA_NAME: vect_p.7_12 in statement:
# VUSE <.MEM_6(D)>
vect_var_.8_13 = MEM[(struct R *)vect_p.7_12];
t.c:9:10: internal compiler error: verify_ssa failed
Please submit a full bug report,
with preprocessed source if appropriate.
See  for instructions.

introduced by SLP on x86-64 at -O2 -ftree-vectorize -fno-vect-cost-model.

This is a wrong statement rewriting in vectorizable_load.  Before SLP we have:

  D.2687_1 = arg.d2;
  D.2688_2 = arg.d1;
  D.2689_3 = 0.0 - D.2688_2;
  D.2690_4 = D.2687_1 * D.2689_3;
  D.2692.d1 = D.2690_4;

vect_check_interleaving computes that field d1 is accessed before field d2 
because the structure is defined as

struct R {
  double d1;
  double d2;
};

but it's the opposite in the code.  So, in vectorizable_load, first_stmt is the 
load of d1 and new statements are wrongly inserted _after_ the load of d2.
Note that, on release branches (4.5 and 4.6 at least), you get wrong code.

Proposed fix attached.  It adds a GSI parameter to vect_create_data_ref_ptr.
Tested on {i586,x86_64}-suse-linux, OK for the mainline?  And the branches?


2011-03-27  Eric Botcazou  

* tree-vectorizer.h (vect_create_data_ref_ptr): Adjust prototype.
* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add GSI parameter.
Insert new statements at it in lieu of STMT.
(vect_setup_realignment): Adjust call to vect_create_data_ref_ptr.
* tree-vect-stmts.c (vectorizable_store): Likewise.
(vectorizable_load): Likewise.


2011-03-27  Eric Botcazou  

* gcc.dg/slp-1.c: New test.


-- 
Eric Botcazou
Index: tree-vectorizer.h
===
--- tree-vectorizer.h	(revision 171572)
+++ tree-vectorizer.h	(working copy)
@@ -824,7 +824,8 @@ extern bool vect_analyze_data_ref_access
 extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
 extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
 extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *,
-  gimple *, bool, bool *);
+  gimple_stmt_iterator *, gimple *,
+  bool, bool *);
 extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
 extern tree vect_create_destination_var (tree, tree);
 extern bool vect_strided_store_supported (tree);
Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c	(revision 171572)
+++ tree-vect-data-refs.c	(working copy)
@@ -2922,9 +2922,10 @@ vect_create_addr_base_for_vector_ref (gi
2. AT_LOOP: the loop where the vector memref is to be created.
3. OFFSET (optional): an offset to be added to the initial address accessed
 by the data-ref in STMT.
-   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
+   4. BSI: location where the new stmts are to be placed if there is no loop
+   5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
 pointing to the initial address.
-   5. TYPE: if not NULL indicates the required type of the data-ref.
+   6. TYPE: if not NULL indicates the required type of the data-ref.
 
Output:
1. Declare a new ptr to vector_type, and have it point to the base of the
@@ -2952,9 +2953,9 @@ vect_create_addr_base_for_vector_ref (gi
4. Return the pointer.  */
 
 tree
-vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
-			  tree offset, tree *initial_address, gimple *ptr_incr,
-			  bool only_init, bool *inv_p)
+vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, tree offset,
+			  tree *initial_address, gimple_stmt_iterator *gsi,
+			  gimple *ptr_incr, bool only_init, bool *inv_p)
 {
   tree base_name;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -2980,7 +2981,6 @@ vect_create_data_ref_ptr (gimple stmt, s
   gimple incr;
   tree step;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
   tree base;
 
   if (loop_vinfo)
@@ -3125,7 +3125,7 @@ vect_create_data_ref_ptr (gimple stmt, s
   gcc_assert (!new_bb);
 }
   else
-gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
+gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
 }
 
   *initial_address = new_temp;
@@ -3147,7 +3147,7 @@ vect_create_data_ref_ptr (gimple stmt, s
 	  gcc_assert (!new_bb);
 	}
   else
-	gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
+	gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
 }
   else
 vect_ptr_init = new_temp;
@@ -3672,7 +3672,7 @@ vect_setup_realignment (gimple stmt, gim
   gcc_assert (!compute_in_loop);
   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   ptr = vec

Re: [testsuite, build] Convert boehm-gc testsuite to DejaGnu (PR boehm-gc/11412)

2011-03-27 Thread H.J. Lu
On Fri, Mar 25, 2011 at 10:13 AM, Rainer Orth
 wrote:
> I meant to include the final patch.  Here it is.
>
>        Rainer
>
>
> 2011-03-25  Rainer Orth  
>
>        PR boehm-gc/11412
>        * configure.ac (THREADCFLAGS): New variable.
>        Use it instead of INCLUDES, AM_CPPFLAGS.
>        <*-*-kfreebsd*-gnu> (THREADDLLIBS): Rename to THREADLIBS.
>        Remove DG/UX support.
>        (AC_CONFIG_FILES): Add testsuite/Makefile.
>        * Makefile.am (AUTOMAKE_OPTIONS): Use foreign instead of cygnus.
>        (SUBDIRS): Add testsuite.
>        (libgcjgc_la_LIBADD): Remove $(UNWINDLIBS).
>        (AM_CXXFLAGS): Add $(THREADCFLAGS).
>        (AM_CFLAGS): Likewise.
>        Remove TESTS related variables.
>        * Makefile.in: Regenerate.
>        * configure: Regenerate.
>        * testsuite/Makefile.am: New file.
>        * testsuite/Makefile.in: New file.
>        * testsuite/lib/boehm-gc.exp: New file.
>        * testsuite/config/default.exp: New file.
>        * testsuite/boehm-gc.c/c.exp: New file.
>        * testsuite/boehm-gc.lib/lib.exp: New file.
>        * tests/test.c: Move ...
>        * testsuite/boehm-gc.c/gctest.c: ... here.
>        * tests/leak_test.c, tests/middle.c, tests/thread_leak_test.c,
>        tests/trace_test.c: Move ...
>        * testsuite/boehm-gc.c: ... here.
>        * testsuite/boehm-gc.c/trace_test.c: Skip everywhere.
>        * tests/staticrootslib.c, tests/staticrootstest.c: Move ...
>        * testsuite/boehm-gc.lib: ... here.
>        * testsuite/boehm-gc.lib/staticrootstest.c: Use dg-add-shlib
>        staticrootslib.c.
>        * tests/test_cpp.cc: Move ...
>        * testsuite/boehm-gc.c++: ... here.
>

This may have caused;

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48299

-- 
H.J.


Re: PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread Andi Kleen
"H.J. Lu"  writes:

> Hi,
>
> Here is a patch to split AVX 32byte unalignd load/store:
>
> http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html
>
> It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
> OK for trunk?

This should be also made default for sandy bridge
(e.g. with -mtune=native and -mtune=corei7-2ndgen/avx)

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only


Re: PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread H.J. Lu
On Sun, Mar 27, 2011 at 9:33 AM, Andi Kleen  wrote:
> "H.J. Lu"  writes:
>
>> Hi,
>>
>> Here is a patch to split AVX 32byte unalignd load/store:
>>
>> http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html
>>
>> It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
>> OK for trunk?
>
> This should be also made default for sandy bridge
> (e.g. with -mtune=native and -mtune=corei7-2ndgen/avx)
>

It is turned on for TARGET_AVX, including SNB.

-- 
H.J.


Go patch committed: Better error about unsafe.Pointer indirect

2011-03-27 Thread Ian Lance Taylor
In Go you can not indirect through an unsafe.Pointer type.  You have to
convert it to some real pointer type first.  This patch to the Go
frontend gives a better error for trying to do this, rather than trying
to handle the void type in other places.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r b426ff8fb580 go/expressions.cc
--- a/go/expressions.cc	Sat Mar 26 12:04:00 2011 -0700
+++ b/go/expressions.cc	Sat Mar 26 22:33:57 2011 -0700
@@ -3669,6 +3669,14 @@
 	}
 }
 
+  // Catching an invalid indirection of unsafe.Pointer here avoid
+  // having to deal with TYPE_VOID in other places.
+  if (op == OPERATOR_MULT && expr->type()->is_unsafe_pointer_type())
+{
+  error_at(this->location(), "invalid indirect of %");
+  return Expression::make_error(this->location());
+}
+
   if (op == OPERATOR_PLUS || op == OPERATOR_MINUS
   || op == OPERATOR_NOT || op == OPERATOR_XOR)
 {


Re: [patch, Fortran] Some more trim optimizatins

2011-03-27 Thread Thomas Koenig

Hi Tobias,


Thomas Koenig wrote:

the following patch extends the trim optimization to variables like
trim(a%x).

Regression-tested. OK for trunk?


OK.


Sendingfortran/ChangeLog
Sendingfortran/frontend-passes.c
Sendingtestsuite/ChangeLog
Adding testsuite/gfortran.dg/trim_optimize_5.f90
Adding testsuite/gfortran.dg/trim_optimize_6.f90
Transmitting file data .
Committed revision 171575.

Thanks for the review!

Thomas


Go patch committed: Error if no-value return with no result names

2011-03-27 Thread Ian Lance Taylor
This patch makes the gccgo frontend give an error if a return statement
with no values is used in a function with results for which the result
parameters have no names.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 015f1f9a7958 go/statements.cc
--- a/go/statements.cc	Sat Mar 26 22:34:39 2011 -0700
+++ b/go/statements.cc	Sun Mar 27 10:48:22 2011 -0700
@@ -2581,10 +2581,20 @@
 void
 Return_statement::do_check_types(Gogo*)
 {
+  const Typed_identifier_list* results = this->results_;
   if (this->vals_ == NULL)
-return;
-
-  const Typed_identifier_list* results = this->results_;
+{
+  if (results != NULL
+	  && !results->empty()
+	  && results->front().name().empty())
+	{
+	  // The result parameters are not named, which means that we
+	  // need to supply values for them.
+	  this->report_error(_("not enough arguments to return"));
+	}
+  return;
+}
+
   if (results == NULL)
 {
   this->report_error(_("return with value in function "
@@ -2621,7 +2631,7 @@
 }
 
   if (pt != results->end())
-this->report_error(_("not enough values in return statement"));
+this->report_error(_("not enough arguments to return"));
 }
 
 // Build a RETURN_EXPR tree.


Re: PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread Uros Bizjak
On Sun, Mar 27, 2011 at 3:44 PM, H.J. Lu  wrote:

> Here is a patch to split AVX 32byte unalignd load/store:
>
> http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html
>
> It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
> OK for trunk?

> 2011-02-11  H.J. Lu  
>
>   * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
>   and -mavx256-split-unaligned-store.
>   (ix86_option_override_internal): Split 32-byte AVX unaligned
>   load/store by default.
>   (ix86_avx256_split_vector_move_misalign): New.
>   (ix86_expand_vector_move_misalign): Use it.
>
>   * config/i386/i386.opt: Add -mavx256-split-unaligned-load and
>   -mavx256-split-unaligned-store.
>
>   * config/i386/sse.md (*avx_mov_internal): Verify unaligned
>   256bit load/store.  Generate unaligned store on misaligned memory
>   operand.
>   (*avx_movu): Verify unaligned
>   256bit load/store.
>   (*avx_movdqu): Likewise.
>
>   * doc/invoke.texi: Document -mavx256-split-unaligned-load and
>   -mavx256-split-unaligned-store.
>
> gcc/testsuite/
>
> 2011-02-11  H.J. Lu  
>
>   * gcc.target/i386/avx256-unaligned-load-1.c: New.
>   * gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
>   * gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
>



> @@ -203,19 +203,37 @@
>return standard_sse_constant_opcode (insn, operands[1]);
>  case 1:
>  case 2:
> +  if (GET_MODE_ALIGNMENT (mode) == 256
> +   && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
> +&& MEM_P (operands[0])
> +&& MEM_ALIGN (operands[0]) < 256)
> +   || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
> +   && MEM_P (operands[1])
> +   && MEM_ALIGN (operands[1]) < 256)))
> + gcc_unreachable ();

Please use "misaligned_operand (operands[...], mode)" instead of
MEM_P && MEM_ALIGN combo in a couple of places.

OK with that change.

Thanks,
Uros.


libgo patch committed: Remove closedchan function

2011-03-27 Thread Ian Lance Taylor
With the removal of the predeclared function 'closed' from Go, and the
update to the current Go library, the 'closedchan' function is no longer
needed.  This patch removes it from libgo.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r b5434877887e libgo/runtime/go-reflect-chan.c
--- a/libgo/runtime/go-reflect-chan.c	Sun Mar 27 10:51:19 2011 -0700
+++ b/libgo/runtime/go-reflect-chan.c	Sun Mar 27 11:02:17 2011 -0700
@@ -120,17 +120,6 @@
 }
 }
 
-extern _Bool chanclosed (unsigned char *)
-  asm ("libgo_reflect.reflect.chanclosed");
-
-_Bool
-chanclosed (unsigned char *ch)
-{
-  struct __go_channel *channel = (struct __go_channel *) ch;
-
-  return __go_builtin_closed (channel);
-}
-
 extern void chanclose (unsigned char *)
   asm ("libgo_reflect.reflect.chanclose");
 


Re: PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread H.J. Lu
On Sun, Mar 27, 2011 at 10:53 AM, Uros Bizjak  wrote:
> On Sun, Mar 27, 2011 at 3:44 PM, H.J. Lu  wrote:
>
>> Here is a patch to split AVX 32byte unalignd load/store:
>>
>> http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html
>>
>> It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
>> OK for trunk?
>
>> 2011-02-11  H.J. Lu  
>>
>>       * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
>>       and -mavx256-split-unaligned-store.
>>       (ix86_option_override_internal): Split 32-byte AVX unaligned
>>       load/store by default.
>>       (ix86_avx256_split_vector_move_misalign): New.
>>       (ix86_expand_vector_move_misalign): Use it.
>>
>>       * config/i386/i386.opt: Add -mavx256-split-unaligned-load and
>>       -mavx256-split-unaligned-store.
>>
>>       * config/i386/sse.md (*avx_mov_internal): Verify unaligned
>>       256bit load/store.  Generate unaligned store on misaligned memory
>>       operand.
>>       (*avx_movu): Verify unaligned
>>       256bit load/store.
>>       (*avx_movdqu): Likewise.
>>
>>       * doc/invoke.texi: Document -mavx256-split-unaligned-load and
>>       -mavx256-split-unaligned-store.
>>
>> gcc/testsuite/
>>
>> 2011-02-11  H.J. Lu  
>>
>>       * gcc.target/i386/avx256-unaligned-load-1.c: New.
>>       * gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
>>       * gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
>>
>
>
>
>> @@ -203,19 +203,37 @@
>>        return standard_sse_constant_opcode (insn, operands[1]);
>>      case 1:
>>      case 2:
>> +      if (GET_MODE_ALIGNMENT (mode) == 256
>> +       && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
>> +            && MEM_P (operands[0])
>> +            && MEM_ALIGN (operands[0]) < 256)
>> +           || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
>> +               && MEM_P (operands[1])
>> +               && MEM_ALIGN (operands[1]) < 256)))
>> +     gcc_unreachable ();
>
> Please use "misaligned_operand (operands[...], mode)" instead of
> MEM_P && MEM_ALIGN combo in a couple of places.
>
> OK with that change.
>

This is the patch I checked in.

Thanks.


-- 
H.J.
gcc/

2011-03-27  H.J. Lu  

* config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
and -mavx256-split-unaligned-store.
(ix86_option_override_internal): Split 32-byte AVX unaligned
load/store by default.
(ix86_avx256_split_vector_move_misalign): New.
(ix86_expand_vector_move_misalign): Use it.

* config/i386/i386.opt: Add -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.

* config/i386/sse.md (*avx_mov_internal): Verify unaligned
256bit load/store.  Generate unaligned store on misaligned memory
operand.
(*avx_movu): Verify unaligned
256bit load/store.
(*avx_movdqu): Likewise.

* doc/invoke.texi: Document -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.

gcc/testsuite/

2011-03-27  H.J. Lu  

* gcc.target/i386/avx256-unaligned-load-1.c: New.
* gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-7.c: Likewise.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4e8ca69..a4ca762 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, 
const char *tune,
 { "-mvect8-ret-in-mem",MASK_VECT8_RETURNS },
 { "-m8bit-idiv",   MASK_USE_8BIT_IDIV },
 { "-mvzeroupper",  MASK_VZEROU

Re[2]: [MIPS] Hookize FUNCTION_VALUE, LIBCALL_VALUE and FUNCTION_VALUE_REGNO_P

2011-03-27 Thread Anatoly Sokolov
Hi, Richard.

Richard Sandiford writes:
>> +mips_function_value_regno_p (const unsigned int regno)

> Let's drop the "const".

> OK with those changes, thanks.

  The 'regno' argument in TARGET_FUNCTION_VALUE_REGNO_P target hook have 'const 
unsigned int' type, so I left it unchanged.


  Bootstrapped and regression tested on mips64el-unknown-linux-gnu.

  Committed as:

* config/mips/mips.h (LIBCALL_VALUE, FUNCTION_VALUE,
FUNCTION_VALUE_REGNO_P): Remove macros.
* config/mips/mips-protos.h (mips_function_value): Remove.
* config/mips/mips.c (mips_function_value): Rename to...
(mips_function_value_1): ... this. Make static.  Handle receiving
the function type in 'fn_decl_or_type' argument.
(mips_function_value, mips_libcall_value,
mips_function_value_regno_p): New function.
(TARGET_FUNCTION_VALUE, TARGET_LIBCALL_VALUE,
TARGET_FUNCTION_VALUE_REGNO_P): Define.

Index: gcc/config/mips/mips-protos.h
===
--- gcc/config/mips/mips-protos.h   (revision 171367)
+++ gcc/config/mips/mips-protos.h   (working copy)
@@ -277,7 +277,6 @@
 extern void mips_expand_before_return (void);
 extern void mips_expand_epilogue (bool);
 extern bool mips_can_use_return_insn (void);
-extern rtx mips_function_value (const_tree, const_tree, enum machine_mode);
 
 extern bool mips_cannot_change_mode_class (enum machine_mode,
   enum machine_mode, enum reg_class);
Index: gcc/config/mips/mips.c
===
--- gcc/config/mips/mips.c  (revision 171367)
+++ gcc/config/mips/mips.c  (working copy)
@@ -5247,18 +5247,25 @@
 
 }
 
-/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
-   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
-   VALTYPE is null and MODE is the mode of the return value.  */
+/* Implement TARGET_FUNCTION_VALUE and TARGET_LIBCALL_VALUE.
+   For normal calls, VALTYPE is the return type and MODE is VOIDmode.
+   For libcalls, VALTYPE is null and MODE is the mode of the return value.  */
 
-rtx
-mips_function_value (const_tree valtype, const_tree func, enum machine_mode 
mode)
+static rtx
+mips_function_value_1 (const_tree valtype, const_tree fn_decl_or_type,
+  enum machine_mode mode)
 {
   if (valtype)
 {
   tree fields[2];
   int unsigned_p;
+  const_tree func;
 
+  if (fn_decl_or_type && DECL_P (fn_decl_or_type))
+   func = fn_decl_or_type;
+  else
+   func = NULL;
+
   mode = TYPE_MODE (valtype);
   unsigned_p = TYPE_UNSIGNED (valtype);
 
@@ -5324,6 +5331,41 @@
   return gen_rtx_REG (mode, GP_RETURN);
 }
 
+/* Implement TARGET_FUNCTION_VALUE. */
+
+static rtx
+mips_function_value (const_tree valtype, const_tree fn_decl_or_type,
+bool outgoing ATTRIBUTE_UNUSED)
+{
+  return mips_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
+}
+
+/* Implement TARGET_LIBCALL_VALUE. */
+
+static rtx
+mips_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return mips_function_value_1 (NULL_TREE, NULL_TREE, mode);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the MIPS, R2 R3 and F0 F2 are the only register thus used.
+   Currently, R2 and F0 are only implemented here (C has no complex type)  */
+
+static bool
+mips_function_value_regno_p (const unsigned int regno)
+{
+  if (regno == GP_RETURN
+  || regno == FP_RETURN
+  || (LONG_DOUBLE_TYPE_SIZE == 128
+ && FP_RETURN != GP_RETURN
+ && regno == FP_RETURN + 2))
+return true;
+
+  return false;
+}
+
 /* Implement TARGET_RETURN_IN_MEMORY.  Under the o32 and o64 ABIs,
all BLKmode objects are returned in memory.  Under the n32, n64
and embedded ABIs, small structures are returned in a register.
@@ -16521,6 +16563,12 @@
 #undef TARGET_PROMOTE_PROTOTYPES
 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mips_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mips_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P mips_function_value_regno_p
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY mips_return_in_memory
 #undef TARGET_RETURN_IN_MSB
Index: gcc/config/mips/mips.h
===
--- gcc/config/mips/mips.h  (revision 171367)
+++ gcc/config/mips/mips.h  (working copy)
@@ -2150,20 +2150,6 @@
 #define FP_ARG_FIRST (FP_REG_FIRST + 12)
 #define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
 
-#define LIBCALL_VALUE(MODE) \
-  mips_function_value (NULL_TREE, NULL_TREE, MODE)
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  mips_function_value (VALTYPE, FUNC, VOIDmode)
-
-/* 1 if N is a possible register number for a function value.
-   On the MIPS

libgo patch committed: Runtime profiling support (not working yet)

2011-03-27 Thread Ian Lance Taylor
This patch to libgo adds the runtime profiling infrastructure from the
master Go library.  This does not yet work for gccgo, because I haven't
written the code to get a stack traceback.  However, that should come in
time, and adding the infrastructure provides some symbols which code in
the Go library expects to exist.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r d71c93d01702 libgo/Makefile.am
--- a/libgo/Makefile.am	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/Makefile.am	Sun Mar 27 12:07:37 2011 -0700
@@ -423,6 +423,7 @@
 	runtime/go-unsafe-newarray.c \
 	runtime/go-unsafe-pointer.c \
 	runtime/go-unwind.c \
+	runtime/cpuprof.c \
 	runtime/mcache.c \
 	runtime/mcentral.c \
 	$(runtime_mem_file) \
diff -r d71c93d01702 libgo/runtime/cpuprof.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +
+++ b/libgo/runtime/cpuprof.c	Sun Mar 27 12:07:37 2011 -0700
@@ -0,0 +1,432 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// CPU profiling.
+// Based on algorithms and data structures used in
+// http://code.google.com/p/google-perftools/.
+//
+// The main difference between this code and the google-perftools
+// code is that this code is written to allow copying the profile data
+// to an arbitrary io.Writer, while the google-perftools code always
+// writes to an operating system file.
+//
+// The signal handler for the profiling clock tick adds a new stack trace
+// to a hash table tracking counts for recent traces.  Most clock ticks
+// hit in the cache.  In the event of a cache miss, an entry must be 
+// evicted from the hash table, copied to a log that will eventually be
+// written as profile data.  The google-perftools code flushed the
+// log itself during the signal handler.  This code cannot do that, because
+// the io.Writer might block or need system calls or locks that are not
+// safe to use from within the signal handler.  Instead, we split the log
+// into two halves and let the signal handler fill one half while a goroutine
+// is writing out the other half.  When the signal handler fills its half, it
+// offers to swap with the goroutine.  If the writer is not done with its half,
+// we lose the stack trace for this clock tick (and record that loss).
+// The goroutine interacts with the signal handler by calling getprofile() to
+// get the next log piece to write, implicitly handing back the last log
+// piece it obtained.
+//
+// The state of this dance between the signal handler and the goroutine
+// is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
+// is not using either log half and is waiting (or will soon be waiting) for
+// a new piece by calling notesleep(&p->wait).  If the signal handler
+// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
+// to wake the goroutine.  The value indicates the number of entries in the
+// log half being handed off.  The goroutine leaves the non-zero value in
+// place until it has finished processing the log half and then flips the number
+// back to zero.  Setting the high bit in handoff means that the profiling is over, 
+// and the goroutine is now in charge of flushing the data left in the hash table
+// to the log and returning that data.  
+//
+// The handoff field is manipulated using atomic operations.
+// For the most part, the manipulation of handoff is orderly: if handoff == 0
+// then the signal handler owns it and can change it to non-zero.  
+// If handoff != 0 then the goroutine owns it and can change it to zero.
+// If that were the end of the story then we would not need to manipulate
+// handoff using atomic operations.  The operations are needed, however,
+// in order to let the log closer set the high bit to indicate "EOF" safely
+// in the situation when normally the goroutine "owns" handoff.
+
+#include "runtime.h"
+#include "malloc.h"
+
+#include "array.h"
+typedef struct __go_open_array Slice;
+#define array __values
+#define len __count
+#define cap __capacity
+
+enum
+{
+	HashSize = 1<<10,
+	LogSize = 1<<17,
+	Assoc = 4,
+	MaxStack = 64,
+};
+
+typedef struct Profile Profile;
+typedef struct Bucket Bucket;
+typedef struct Entry Entry;
+
+struct Entry {
+	uintptr count;
+	uintptr depth;
+	uintptr stack[MaxStack];
+};
+
+struct Bucket {
+	Entry entry[Assoc];
+};
+
+struct Profile {
+	bool on;		// profiling is on
+	Note wait;		// goroutine waits here
+	uintptr count;		// tick count
+	uintptr evicts;		// eviction count
+	uintptr lost;		// lost ticks that need to be logged
+	uintptr totallost;	// total lost ticks
+
+	// Active recent stack traces.
+	Bucket hash[HashSize];
+
+	// Log of traces evicted from hash.
+	// Signal handler has filled log[toggle][:nlog].
+	// Goroutine is writing log[1-toggle][:handoff].
+	uintptr log[2][LogSize/2];
+	uintptr nlog;
+	int32 toggle;
+	uint32 handoff;
+	
+	// Writer state.
+	// Write

[h8300] Hookize FUNCTION_VALUE, LIBCALL_VALUE and FUNCTION_VALUE_REGNO_P

2011-03-27 Thread Anatoly Sokolov
Hello.

  This patch removes obsolete FUNCTION_VALUE, LIBCALL_VALUE and
FUNCTION_VALUE_REGNO_P macros from H8300 back end in the GCC and introduces
equivalent TARGET_FUNCTION_VALUE, TARGET_LIBCALL_VALUE and
TARGET_FUNCTION_VALUE_REGNO_P target hooks.

  Regression tested on h8300-unknown-elf with no new failure.

  OK to install? 

* config/h8300/h8300.h (FUNCTION_VALUE_REGNO_P, FUNCTION_VALUE,
LIBCALL_VALUE): Remove macros.
* config/h8300/h8300.c (TARGET_FUNCTION_VALUE, TARGET_LIBCALL_VALUE,
TARGET_FUNCTION_VALUE_REGNO_P): Define.
(h8300_function_value, h8300_libcall_value,
h8300_function_value_regno_p): New functions

Index: gcc/config/h8300/h8300.c
===
--- gcc/config/h8300/h8300.c(revision 171345)
+++ gcc/config/h8300/h8300.c(working copy)
@@ -5860,6 +5860,38 @@
   set_optab_libfunc (umod_optab, HImode, "__umodhi3");
 }
 
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   On the H8 the return value is in R0/R1.  */
+
+static rtx
+h8300_function_value (const_tree ret_type,
+ const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+ bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (ret_type), R0_REG);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.
+
+   On the H8 the return value is in R0/R1.  */
+
+static rtx
+h8300_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, R0_REG);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the H8, R0 is the only register thus used.  */
+
+static bool
+h8300_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == R0_REG);
+}
+
 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
 
 static bool
@@ -5946,6 +5978,15 @@
 #undef TARGET_INIT_LIBFUNCS
 #define TARGET_INIT_LIBFUNCS h8300_init_libfuncs
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE h8300_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE h8300_libcall_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P h8300_function_value_regno_p
+
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY h8300_return_in_memory
 
Index: gcc/config/h8300/h8300.h
===
--- gcc/config/h8300/h8300.h(revision 171345)
+++ gcc/config/h8300/h8300.h(working copy)
@@ -527,29 +527,6 @@
 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)   \
   ((OFFSET) = h8300_initial_elimination_offset ((FROM), (TO)))
 
-/* Define how to find the value returned by a function.
-   VALTYPE is the data type of the value (as a tree).
-   If the precise function being called is known, FUNC is its FUNCTION_DECL;
-   otherwise, FUNC is 0.
-
-   On the H8 the return value is in R0/R1.  */
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  gen_rtx_REG (TYPE_MODE (VALTYPE), R0_REG)
-
-/* Define how to find the value returned by a library function
-   assuming the value has mode MODE.  */
-
-/* On the H8 the return value is in R0/R1.  */
-
-#define LIBCALL_VALUE(MODE) \
-  gen_rtx_REG (MODE, R0_REG)
-
-/* 1 if N is a possible register number for a function value.
-   On the H8, R0 is the only register thus used.  */
-
-#define FUNCTION_VALUE_REGNO_P(N) ((N) == R0_REG)
-
 /* Define this if PCC uses the nonreentrant convention for returning
structure and union values.  */
 

Anatoly.



[H8300] Remove ASM_OUTPUT_BSS

2011-03-27 Thread Anatoly Sokolov
Hi.

  In config/h8300/h8300.h file both ASM_OUTPUT_BSS and ASM_OUTPUT_ALIGNED_BSS 
macros is defined, but the ASM_OUTPUT_BSS macro is not used when 
ASM_OUTPUT_ALIGNED_BSS is defined. This patch remove ASM_OUTPUT_BSS macro 
from H8300 target. 

  Regression tested on h8300-unknown-elf with no new failure.

  OK to install? 

* config/h8300/h8300.h (ASM_OUTPUT_BSS): Remove macro.

Index: gcc/config/h8300/h8300.h
===
--- gcc/config/h8300/h8300.h(revision 171427)
+++ gcc/config/h8300/h8300.h(working copy)
@@ -1015,13 +987,6 @@
   assemble_name ((FILE), (NAME)),  \
   fprintf ((FILE), ",%lu\n", (unsigned long)(SIZE)))
 
-/* This says how to output the assembler to define a global
-   uninitialized but not common symbol.
-   Try to use asm_output_bss to implement this macro.  */
-
-#define ASM_OUTPUT_BSS(FILE, DECL, NAME, SIZE, ROUNDED)\
-  asm_output_bss ((FILE), (DECL), (NAME), (SIZE), (ROUNDED))
-
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
   asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
 


Anatoly.



[committed] Fix PA ior expanders

2011-03-27 Thread John David Anglin
The following patch fixes a thinko in the iordi3 expander -- the
expander didn't allow register operands.  I also changed the iorsi3
expander to use the new predicate.

Tested on hppa64-hp-hpux11.11 and hppa-unknown-linux-gnu with no
regressions.  Committed to trunk.

Dave
-- 
J. David Anglin  dave.ang...@nrc-cnrc.gc.ca
National Research Council of Canada  (613) 990-0752 (FAX: 952-6602)

2011-03-27  John David Anglin  

PR target/48288
* config/pa/predicates.md (reg_or_ior_operand): New predicate.
* config/pa/pa.md (iordi3): Use new predicate in expander.
(iorsi3): Likewise.

Index: config/pa/predicates.md
===
--- config/pa/predicates.md (revision 171556)
+++ config/pa/predicates.md (working copy)
@@ -409,6 +409,15 @@
  || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op;
 })
 
+;; True iff OP can be used to compute (reg | OP).
+
+(define_predicate "reg_or_ior_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return (register_operand (op, mode)
+ || (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op;
+})
+
 ;; True iff depi can be used to compute (reg | OP).
 
 (define_predicate "ior_operand"
Index: config/pa/pa.md
===
--- config/pa/pa.md (revision 171556)
+++ config/pa/pa.md (working copy)
@@ -5686,7 +5686,7 @@
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "register_operand" "")
(ior:DI (match_operand:DI 1 "register_operand" "")
-   (match_operand:DI 2 "ior_operand" "")))]
+   (match_operand:DI 2 "reg_or_ior_operand" "")))]
   ""
   "
 {
@@ -5726,14 +5726,9 @@
 (define_expand "iorsi3"
   [(set (match_operand:SI 0 "register_operand" "")
(ior:SI (match_operand:SI 1 "register_operand" "")
-   (match_operand:SI 2 "arith32_operand" "")))]
+   (match_operand:SI 2 "reg_or_ior_operand" "")))]
   ""
-  "
-{
-  if (! (ior_operand (operands[2], SImode)
- || register_operand (operands[2], SImode)))
-operands[2] = force_reg (SImode, operands[2]);
-}")
+  "")
 
 (define_insn ""
   [(set (match_operand:SI 0 "register_operand" "=r,r")


Re: C++ PATCH for c++/48289 (-pedantic breaks std::move)

2011-03-27 Thread Eric Botcazou
> Tested x86_64-pc-linux-gnu, applied to trunk and 4.5, will apply to 4.6
> after 4.6.0.

The test doesn't pass on the 4.5 branch:
  FAIL: g++.dg/cpp0x/move1.C (test for excess errors)

-- 
Eric Botcazou


Re: ira-improv patch has been committed

2011-03-27 Thread H.J. Lu
On Sun, Mar 27, 2011 at 4:25 PM, Vladimir Makarov  wrote:
>  I submitted the following patch.  The patch contains original
> patches from ira-improv branch, changes addressing all Keneth Zadeck's
> comments in http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00457.html,
> and a minor change fixing few regression on gcc testsuite for x86/x86-64 and
> ppc.
>
>  The patch was successfully tested and bootstrapped on x86/x86-64,
> ppc64, and ia64.

It breaks bootstrap on Linux/x86:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48307


H.J.

> 2011-03-27  Vladimir Makarov 
>
>    * regmove.c (regmove_optimize): Move ira_set_pseudo_classes call
>    after regstat_init_n_sets_and_refs.
>
>    * ira.c: Add more comments at the top.
>    (setup_stack_reg_pressure_class, setup_pressure_classes):
>    Add comments how we compute the register pressure classes.
>    (setup_allocno_and_important_classes): Add more comments.
>    (setup_class_translate_array, reorder_important_classes)
>    (setup_reg_class_relations): Add comments.
>
>    * ira-emit.c: Add 2011 to the Copyright line.  Add comments at the
>    start of the file.
>
>    * ira-color.c: Add 2011 to the Copyright line.
>    (assign_hard_reg):  Add more comments.
>    (improve_allocation): Ditto.
>
>    * ira-costs.c: Add 2011 to the Copyright line.
>    (setup_cost_classes, setup_regno_cost_classes_by_aclass): Add more
>    comments.
>    (setup_regno_cost_classes_by_mode): Ditto.
>
>    Initial patches from ira-improv branch:
>
>    2010-08-13  Vladimir Makarov 
>
>    * ira-build.c: (ira_create_object): Remove initialization of
>    OBJECT_PROFITABLE_HARD_REGS.  Initialize OBJECT_ADD_DATA.
>    (ira_create_allocno): Remove initialization of
>    ALLOCNO_MEM_OPTIMIZED_DEST, ALLOCNO_MEM_OPTIMIZED_DEST_P,
>    ALLOCNO_SOMEWHERE_RENAMED_P, ALLOCNO_CHILD_RENAMED_P,
>    ALLOCNO_IN_GRAPH_P, ALLOCNO_MAY_BE_SPILLED_P, ALLOCNO_COLORABLE_P,
>    ALLOCNO_NEXT_BUCKET_ALLOCNO, ALLOCNO_PREV_BUCKET_ALLOCNO,
>    ALLOCNO_FIRST_COALESCED_ALLOCNO, ALLOCNO_NEXT_COALESCED_ALLOCNO.
>    Initialize ALLOCNO_ADD_DATA.
>    (copy_info_to_removed_store_destinations): Use ALLOCNO_EMIT_DATA
>    and allocno_emit_reg instead of ALLOCNO_MEM_OPTIMIZED_DEST_P and
>    ALLOCNO_REG.
>    (ira_flattening): Ditto.  Use ALLOCNO_EMIT_DATA instead of
>    ALLOCNO_MEM_OPTIMIZED_DEST and ALLOCNO_SOMEWHERE_RENAMED_P.
>
>    * ira.c (ira_reallocate): Remove.
>    (setup_pressure_classes): Call
>    ira_init_register_move_cost_if_necessary.  Use
>    ira_register_move_cost instead of ira_get_register_move_cost.
>    (setup_allocno_assignment_flags): Use ALLOCNO_EMIT_DATA.
>    (ira): Call ira_initiate_emit_data and ira_finish_emit_data.
>
>    * ira-color.c: Use ALLOCNO_COLOR_DATA instead of
>    ALLOCNO_IN_GRAPH_P, ALLOCNO_MAY_BE_SPILLED_P, ALLOCNO_COLORABLE_P,
>    ALLOCNO_AVAILABLE_REGS_NUM, ALLOCNO_NEXT_BUCKET_ALLOCNO,
>    ALLOCNO_PREV_BUCKET_ALLOCNO. ALLOCNO_TEMP. Use OBJECT_COLOR_DATA
>    instead of OBJECT_PROFITABLE_HARD_REGS, OBJECT_HARD_REGS_NODE,
>    OBJECT_HARD_REGS_SUBNODES_START, OBJECT_HARD_REGS_SUBNODES_NUM.
>    Fix formatting.
>    (object_hard_regs_t, object_hard_regs_node_t): Move from
>    ira-int.h.
>    (struct object_hard_regs, struct object_hard_regs_node): Ditto.
>    (struct allocno_color_data): New.
>    (allocno_color_data_t): New typedef.
>    (allocno_color_data): New definition.
>    (ALLOCNO_COLOR_DATA): New macro.
>    (struct object_color_data): New.
>    (object_color_data_t): New typedef.
>    (object_color_data): New definition.
>    (OBJECT_COLOR_DATA): New macro.
>    (update_copy_costs, calculate_allocno_spill_cost): Call
>    ira_init_register_move_cost_if_necessary.  Use
>    ira_register_move_cost instead of ira_get_register_move_cost.
>    (move_spill_restore, update_curr_costs): Ditto.
>    (allocno_spill_priority): Make it inline.
>    (color_pass): Allocate and free allocno_color_dat and
>    object_color_data.
>    (struct coalesce_data, coalesce_data_t): New.
>    (allocno_coalesce_data): New definition.
>    (ALLOCNO_COALESCE_DATA): New macro.
>    (merge_allocnos, coalesced_allocno_conflict_p): Use
>    ALLOCNO_COALESCED_DATA instead of ALLOCNO_FIRST_COALESCED_ALLOCNO,
>    ALLOCNO_NEXT_COALESCED_ALLOCNO, ALLOCNO_TEMP.
>    (coalesce_allocnos): Ditto.
>    (setup_coalesced_allocno_costs_and_nums): Ditto.
>    (collect_spilled_coalesced_allocnos): Ditto.
>    (slot_coalesced_allocno_live_ranges_intersect_p): Ditto.
>    (setup_slot_coalesced_allocno_live_ranges): Ditto.
>    (coalesce_spill_slots): Ditto.
>    (ira_sort_regnos_for_alter_reg): Ditto.  Allocate, initialize and
>    free allocno_coalesce_data.
>
>    * ira-conflicts.c: Fix formatting.
>    (process_regs_for_copy): Call
>    ira_init_register_move_cost_if_necessary.  Use
>    ira_register_move_cost instead of ira_get_register_move_cost.
>    (build_object_conflicts): Optimize.
>
>    * ira-costs.c (record_reg_classes): Optimize.  Call
>    ira_init_register_move_cost_if_necessary.  Use

Re: PATCH: Split AVX 32byte unalignd load/store

2011-03-27 Thread H.J. Lu
On Sun, Mar 27, 2011 at 11:57 AM, H.J. Lu  wrote:
> On Sun, Mar 27, 2011 at 10:53 AM, Uros Bizjak  wrote:
>> On Sun, Mar 27, 2011 at 3:44 PM, H.J. Lu  wrote:
>>
>>> Here is a patch to split AVX 32byte unalignd load/store:
>>>
>>> http://gcc.gnu.org/ml/gcc-patches/2011-02/msg00743.html
>>>
>>> It speeds up some SPEC CPU 2006 benchmarks by up to 6%.
>>> OK for trunk?
>>
>>> 2011-02-11  H.J. Lu  
>>>
>>>       * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
>>>       and -mavx256-split-unaligned-store.
>>>       (ix86_option_override_internal): Split 32-byte AVX unaligned
>>>       load/store by default.
>>>       (ix86_avx256_split_vector_move_misalign): New.
>>>       (ix86_expand_vector_move_misalign): Use it.
>>>
>>>       * config/i386/i386.opt: Add -mavx256-split-unaligned-load and
>>>       -mavx256-split-unaligned-store.
>>>
>>>       * config/i386/sse.md (*avx_mov_internal): Verify unaligned
>>>       256bit load/store.  Generate unaligned store on misaligned memory
>>>       operand.
>>>       (*avx_movu): Verify unaligned
>>>       256bit load/store.
>>>       (*avx_movdqu): Likewise.
>>>
>>>       * doc/invoke.texi: Document -mavx256-split-unaligned-load and
>>>       -mavx256-split-unaligned-store.
>>>
>>> gcc/testsuite/
>>>
>>> 2011-02-11  H.J. Lu  
>>>
>>>       * gcc.target/i386/avx256-unaligned-load-1.c: New.
>>>       * gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
>>>       * gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
>>>
>>
>>
>>
>>> @@ -203,19 +203,37 @@
>>>        return standard_sse_constant_opcode (insn, operands[1]);
>>>      case 1:
>>>      case 2:
>>> +      if (GET_MODE_ALIGNMENT (mode) == 256
>>> +       && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
>>> +            && MEM_P (operands[0])
>>> +            && MEM_ALIGN (operands[0]) < 256)
>>> +           || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
>>> +               && MEM_P (operands[1])
>>> +               && MEM_ALIGN (operands[1]) < 256)))
>>> +     gcc_unreachable ();
>>
>> Please use "misaligned_operand (operands[...], mode)" instead of
>> MEM_P && MEM_ALIGN combo in a couple of places.
>>
>> OK with that change.
>>
>
> This is the patch I checked in.
>

I checked in this patch to revert unaligned 256bit load/store since
they may be generated by intriniscs:

http://gcc.gnu.org/ml/gcc-regression/2011-03/msg00477.html


-- 
H.J.
---
Index: ChangeLog
===
--- ChangeLog   (revision 171589)
+++ ChangeLog   (working copy)
@@ -1,3 +1,10 @@
+2011-03-27  H.J. Lu  
+
+   * config/i386/sse.md (*avx_mov_internal): Don't assert
+   unaligned 256bit load/store.
+   (*avx_movu): Likewise.
+   (*avx_movdqu): Likewise.
+
 2011-03-27  Vladimir Makarov  

PR bootstrap/48307
Index: config/i386/sse.md
===
--- config/i386/sse.md  (revision 171589)
+++ config/i386/sse.md  (working copy)
@@ -203,12 +203,6 @@
   return standard_sse_constant_opcode (insn, operands[1]);
 case 1:
 case 2:
-  if (GET_MODE_ALIGNMENT (mode) == 256
- && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
-  && misaligned_operand (operands[0], mode))
- || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
- && misaligned_operand (operands[1], mode
-   gcc_unreachable ();
   switch (get_attr_mode (insn))
 {
case MODE_V8SF:
@@ -416,15 +410,7 @@
  UNSPEC_MOVU))]
   "AVX_VEC_FLOAT_MODE_P (mode)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  if (GET_MODE_ALIGNMENT (mode) == 256
-  && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
-  && misaligned_operand (operands[0], mode))
- || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
- && misaligned_operand (operands[1], mode
-gcc_unreachable ();
-  return "vmovu\t{%1, %0|%0, %1}";
-}
+  "vmovu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
@@ -483,15 +469,7 @@
  [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
  UNSPEC_MOVU))]
   "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  if (GET_MODE_ALIGNMENT (mode) == 256
-  && ((TARGET_AVX256_SPLIT_U

fix for 48208 and 48260 on darwin

2011-03-27 Thread Christian Schüler

Hallo list,
please review the following patch (and besides, bear with me as this is the 
first patch proposal from me). For gcc 4.5 and earlier it was possible to 
configure xcode via a modified xcplugin to use the newer gcc directly (yes, 
the -arch flag was ignored during link time, but since I had -m32/-m64 
anyways, this didn't matter). With the new argument checking of version 4.6 
during the linking stage this is no longer possible. The proposed patch makes 
gcc on darwin recognize the -arch flag to the extent necessary to reinstate 
the old functionality. Besides, it fixes the already existing support for the
-F flag.

Thanks,
Christian


2011-03-28  Christian Schüler   gmx.de>

PR 48208
* config/c.opt: Added 'Driver' to -F option
PR 48260
* config/darwin-driver.c: Add '-arch' to handler function
* config/darwin.opt: Added '-arch' option


diff -Naurw gcc-4.6.0-RC-20110321/gcc/c-family/c.opt \
gcc-4.6.0-RC-20110321-patched/gcc/c-family/c.opt
--- gcc-4.6.0-RC-20110321/gcc/c-family/c.opt2011-02-17 22:34:10 +0100
+++ gcc-4.6.0-RC-20110321-patched/gcc/c-family/c.opt  2011-03-25 19:04:08 
+0100
@@ -201,7 +201,7 @@
 C ObjC C++ ObjC++ Undocumented Var(flag_preprocess_only)
 
 F
-C ObjC C++ ObjC++ Joined Separate MissingArgError(missing path after %qs)
+Driver C ObjC C++ ObjC++ Joined Separate MissingArgError(missing path after \
%qs)
 -FAdd  to the end of the main framework include path
 
 H
diff -Naurw gcc-4.6.0-RC-20110321/gcc/config/darwin-driver.c \
gcc-4.6.0-RC-20110321-patched/gcc/config/darwin-driver.c
--- gcc-4.6.0-RC-20110321/gcc/config/darwin-driver.c2010-11-11 00:23:15 
+0100
+++ gcc-4.6.0-RC-20110321-patched/gcc/config/darwin-driver.c2011-03-26 
07:01:02.0 +0100
@@ -161,6 +161,13 @@
continue;
   switch ((*decoded_options)[i].opt_index)
{
+#if DARWIN_X86
+   case OPT_arch:
+ if (!strcmp ((*decoded_options)[i].arg, "i386"))
+   generate_option (OPT_m32, NULL, 1, CL_DRIVER, 
&(*decoded_options)[i]);
+ else if (!strcmp ((*decoded_options)[i].arg, "x86_64"))
+   generate_option (OPT_m64, NULL, 1, CL_DRIVER, 
&(*decoded_options)[i]);
+ break;
+#endif
+   
case OPT_filelist:
case OPT_framework:
  ++*decoded_options_count;
diff -Naurw gcc-4.6.0-RC-20110321/gcc/config/darwin.opt \
gcc-4.6.0-RC-20110321-patched/gcc/config/darwin.opt
--- gcc-4.6.0-RC-20110321/gcc/config/darwin.opt 2011-02-07 21:11:45 +0100
+++ gcc-4.6.0-RC-20110321-patched/gcc/config/darwin.opt 2011-03-25 19:04:08
+0100
@@ -31,6 +31,9 @@
 allowable_client
 Driver Separate Alias(Zallowable_client)
 
+arch
+Driver RejectNegative Separate 
+
 arch_errors_fatal
 Driver Alias(Zarch_errors_fatal)