On Tue, Apr 19, 2016 at 8:27 AM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Tue, Apr 19, 2016 at 5:18 PM, H.J. Lu <hjl.to...@gmail.com> wrote: >> On Tue, Apr 19, 2016 at 8:08 AM, Uros Bizjak <ubiz...@gmail.com> wrote: >>> On Tue, Apr 19, 2016 at 4:49 PM, H.J. Lu <hongjiu...@intel.com> wrote: >>>> >>>> From INSTRUCTION EXCEPTION SPECIFICATION section in Intel software >>>> developer manual volume 2, only legacy SSE instructions with memory >>>> operand not 16-byte aligned get General Protection fault. There is >>>> no need to check 1, 2, 4, 8 byte alignments. Since x86 backend has >>>> accurate constraints and predicates for 16-byte alignment, we can >>>> remove ix86_legitimate_combined_insn. >>>> >>>> Tested on x86-64. OK for trunk? >>> >>> No. This function also handles cases where invalid hard register gets >>> propagated into the insn during the combine pass, leading to spill >>> failure later. >>> >> >> ix86_legitimate_combined_insn was added to work around the >> reload issue: > > Sorry, I'm not convinced. Please see [1]. > > You should remove only this part, together with now unused ssememalign > attribute. > > - /* For pre-AVX disallow unaligned loads/stores where the > - instructions don't support it. */ > - if (!TARGET_AVX > - && VECTOR_MODE_P (mode) > - && misaligned_operand (op, mode)) > - { > - unsigned int min_align = get_attr_ssememalign (insn); > - if (min_align == 0 > - || MEM_ALIGN (op) < min_align) > - return false; > - } > >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46829 >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46843 > >> LRA doesn't have those limitation. Removing >> ix86_legitimate_combined_insn causes no regressions. > > [1] https://gcc.gnu.org/ml/gcc-patches/2012-08/msg01195.html > > Uros.
Here is the updated patch. OK for trunk if there is no regression on x86-64? -- H.J.
From d558242ecf4f9acd9ae4b0e1cda28fed6a75d99d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sat, 2 Jan 2016 14:57:09 -0800 Subject: [PATCH] Remove ssememalign From INSTRUCTION EXCEPTION SPECIFICATION section in Intel software developer manual volume 2, only legacy SSE instructions with memory operand not 16-byte aligned get General Protection fault. There is no need to check 1, 2, 4, 8 byte alignments. Since x86 backend has accurate constraints and predicates for 16-byte alignment, we can remove alignment check in ix86_legitimate_combined_insn. * config/i386/i386.c (ix86_legitimate_combined_insn): Remove alignment check. * config/i386/i386.md (ssememalign): Removed. * config/i386/sse.md: Remove ssememalign attribute from patterns. --- gcc/config/i386/i386.c | 12 ------------ gcc/config/i386/i386.md | 7 ------- gcc/config/i386/sse.md | 30 ------------------------------ 3 files changed, 49 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d6c9200..6379313 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -7317,18 +7317,6 @@ ix86_legitimate_combined_insn (rtx_insn *insn) bool win; int j; - /* For pre-AVX disallow unaligned loads/stores where the - instructions don't support it. */ - if (!TARGET_AVX - && VECTOR_MODE_P (mode) - && misaligned_operand (op, mode)) - { - unsigned int min_align = get_attr_ssememalign (insn); - if (min_align == 0 - || MEM_ALIGN (op) < min_align) - return false; - } - /* A unary operator may be accepted by the predicate, but it is irrelevant for matching constraints. */ if (UNARY_P (op)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6596a1d..38eb98c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -460,13 +460,6 @@ (const_string "unknown")] (const_string "integer"))) -;; The minimum required alignment of vector mode memory operands of the SSE -;; (non-VEX/EVEX) instruction in bits, if it is different from -;; GET_MODE_ALIGNMENT of the operand, otherwise 0. If an instruction has -;; multiple alternatives, this should be conservative maximum of those minimum -;; required alignments. -(define_attr "ssememalign" "" (const_int 0)) - ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ed0a1a6..78c28c5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1181,7 +1181,6 @@ "%vlddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") - (set_attr "ssememalign" "8") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -1446,7 +1445,6 @@ vrcpss\t{%1, %2, %0|%0, %2, %k1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") - (set_attr "ssememalign" "32") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "orig,vex") @@ -1588,7 +1586,6 @@ vrsqrtss\t{%1, %2, %0|%0, %2, %k1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") - (set_attr "ssememalign" "32") (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) @@ -4690,7 +4687,6 @@ "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_vex") - (set_attr "ssememalign" "64") (set_attr "mode" "V2DF")]) (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>" @@ -5751,7 +5747,6 @@ %vmovhps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -5797,7 +5792,6 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -6255,7 +6249,6 @@ %vmovhlps\t{%1, %d0|%d0, %1} %vmovlps\t{%H1, %d0|%d0, %H1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2SF,V4SF,V2SF")]) @@ -6295,7 +6288,6 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) @@ -6310,7 +6302,6 @@ %vmovaps\t{%1, %0|%0, %1} %vmovlps\t{%1, %d0|%d0, %q1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2SF,V4SF,V2SF")]) @@ -6350,7 +6341,6 @@ %vmovlps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") - (set_attr "ssememalign" "64") (set_attr "length_immediate" "1,1,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -7492,7 +7482,6 @@ %vmovhpd\t{%1, %0|%q0, %1}" [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) @@ -7652,7 +7641,6 @@ %vmovlpd\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) @@ -8322,7 +8310,6 @@ movhlps\t{%1, %0|%0, %1} movlps\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "mode" "V2SF,V4SF,V2SF")]) ;; Avoid combining registers from different units in a single alternative, @@ -8410,7 +8397,6 @@ #" [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") - (set_attr "ssememalign" "64") (set_attr "prefix_data16" "1,*,*,*,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,*,*,*") (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) @@ -8479,7 +8465,6 @@ (const_string "imov") ] (const_string "ssemov"))) - (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") @@ -8524,7 +8509,6 @@ (const_string "1") (const_string "*"))) (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") - (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) @@ -14567,7 +14551,6 @@ "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -14608,7 +14591,6 @@ "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "32") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -14644,7 +14626,6 @@ "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -14687,7 +14668,6 @@ "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "16") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -14725,7 +14705,6 @@ "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "32") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -14760,7 +14739,6 @@ "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssemov") - (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -15048,7 +15026,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -15076,7 +15053,6 @@ (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "btver2_decode" "vector") (set_attr "memory" "none,load") @@ -15104,7 +15080,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "btver2_decode" "vector") @@ -15131,7 +15106,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "btver2_decode" "vector,vector,vector,vector") @@ -15185,7 +15159,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -15208,7 +15181,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") @@ -15233,7 +15205,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") @@ -15258,7 +15229,6 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "prefix" "maybe_vex") -- 2.5.5