The following patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67609
The patch was bootstrapped and tested on x86/x86-64, arm, aarch64. It was also tested on ppc64 (unfortunately bootstrap with LRA is broken now).
Committed as rev. 229087.
Index: ChangeLog =================================================================== --- ChangeLog (revision 228844) +++ ChangeLog (working copy) @@ -1,3 +1,9 @@ +2015-10-20 Vladimir Makarov <vmaka...@redhat.com> + + PR rtl-optimization/67609 + * lra-splill.c (lra_final_code_change): Don't remove all + sub-registers. + 2015-10-15 Marek Polacek <pola...@redhat.com> * tree-ssa-reassoc.c (attempt_builtin_copysign): Call Index: testsuite/ChangeLog =================================================================== --- testsuite/ChangeLog (revision 228844) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,8 @@ +2015-10-20 Vladimir Makarov <vmaka...@redhat.com> + + PR rtl-optimization/67609 + * gcc.target/i386/pr67609.c: New. + 2015-10-15 Marek Polacek <pola...@redhat.com> * gcc.dg/tree-ssa/reassoc-42.c: New test. Index: lra-spills.c =================================================================== --- lra-spills.c (revision 228844) +++ lra-spills.c (working copy) @@ -727,14 +727,44 @@ lra_final_code_change (void) lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); struct lra_static_insn_data *static_id = id->insn_static_data; bool insn_change_p = false; - - for (i = id->insn_static_data->n_operands - 1; i >= 0; i--) - if ((DEBUG_INSN_P (insn) || ! static_id->operand[i].is_operator) - && alter_subregs (id->operand_loc[i], ! DEBUG_INSN_P (insn))) - { - lra_update_dup (id, i); - insn_change_p = true; - } + + for (i = id->insn_static_data->n_operands - 1; i >= 0; i--) + { + if (! DEBUG_INSN_P (insn) && static_id->operand[i].is_operator) + continue; + + rtx op = *id->operand_loc[i]; + + if (static_id->operand[i].type == OP_OUT + && GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)) + && ! LRA_SUBREG_P (op)) + { + hard_regno = REGNO (SUBREG_REG (op)); + /* We can not always remove sub-registers of + hard-registers as we may lose information that + only a part of registers is changed and + subsequent optimizations may do wrong + transformations (e.g. dead code eliminations). + We can not also keep all sub-registers as the + subsequent optimizations can not handle all such + cases. Here is a compromise which works. */ + if ((GET_MODE_SIZE (GET_MODE (op)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) + && (hard_regno_nregs[hard_regno][GET_MODE (SUBREG_REG (op))] + == hard_regno_nregs[hard_regno][GET_MODE (op)]) +#ifdef STACK_REGS + && (hard_regno < FIRST_STACK_REG + || hard_regno > LAST_STACK_REG) +#endif + ) + continue; + } + if (alter_subregs (id->operand_loc[i], ! DEBUG_INSN_P (insn))) + { + lra_update_dup (id, i); + insn_change_p = true; + } + } if (insn_change_p) lra_update_operator_dups (id); } Index: testsuite/gcc.target/i386/pr67609.c =================================================================== --- testsuite/gcc.target/i386/pr67609.c (revision 0) +++ testsuite/gcc.target/i386/pr67609.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-final { scan-assembler "movdqa" } } */ + +#include <emmintrin.h> +__m128d reg; +void set_lower(double b) +{ + double v[2]; + _mm_store_pd(v, reg); + v[0] = b; + reg = _mm_load_pd(v); +}