Hello Uroš, On 23 Aug 09:44, Uros Bizjak wrote: > On Fri, Aug 22, 2014 at 1:51 PM, Kirill Yukhin <kirill.yuk...@gmail.com> > wrote: > > > This patch extends unaligned loads and stores patterns. > At this stage, I'd still prefer simple constraints (the solution, > proposed above), even for the price of additional patterns. Looking at > the patterns, it is quite hard to calculate final condition for the > particular mode/target combo, even without enable attribute and > conditional operand constraints/predicates. With the solution above, > the complexity is conveniently pushed to mask define_subst attribute. In the bottom patch which splits unaligned ld/st patterns. Bootstrapped and avx512-regtested on simulator.
gcc/ * config/i386/sse.md (define_mode_iterator VI48_AVX512VL): New. (define_mode_iterator VI_UNALIGNED_LOADSTORE): Delete. (define_mode_iterator VI_ULOADSTORE_AVX512BW): New. (define_mode_iterator VI_ULOADSTORE_AVX512F): Ditto. (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI1): Change mode iterator. (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI_ULOADSTORE_AVX512BW): New. (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI_ULOADSTORE_AVX512F): Ditto. (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI1): Change mode iterator. (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI_ULOADSTORE_AVX512BW): New. (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" with VI_ULOADSTORE_AVX512F): Ditto. (define_insn "<sse2_avx_avx512f>_storedqu<mode> with VI1): Change mode iterator. (define_insn "<sse2_avx_avx512f>_storedqu<mode> with VI_ULOADSTORE_AVX512BW): New. (define_insn "<sse2_avx_avx512f>_storedqu<mode> with VI_ULOADSTORE_AVX512F): Ditto. (define_insn "avx512f_storedqu<mode>_mask"): Delete. (define_insn "<avx512>_storedqu<mode>_mask" with VI48_AVX512VL): New. (define_insn "<avx512>_storedqu<mode>_mask" with VI12_AVX512VL): Ditto. Is it ok for trunk? -- Thanks, K diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0624582..0245ec4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -235,6 +235,10 @@ (define_mode_iterator VF_512 [V16SF V8DF]) +(define_mode_iterator VI48_AVX512VL + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VF2_AVX512VL [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) @@ -259,9 +263,13 @@ (define_mode_iterator VI1 [(V32QI "TARGET_AVX") V16QI]) -(define_mode_iterator VI_UNALIGNED_LOADSTORE - [(V32QI "TARGET_AVX") V16QI - (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) +(define_mode_iterator VI_ULOADSTORE_AVX512BW + [V64QI + V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")]) + +(define_mode_iterator VI_ULOADSTORE_AVX512F + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) ;; All DImode vector integer modes (define_mode_iterator VI8 @@ -1172,18 +1180,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads + just fine if misaligned_operand is true, and without the UNSPEC it can + be combined with arithmetic instructions. If misaligned_operand is + false, still emit UNSPEC_LOADU insn to honor user's request for + misaligned load. */ (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" - [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand") - (unspec:VI_UNALIGNED_LOADSTORE - [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")] + [(set (match_operand:VI1 0 "register_operand") + (unspec:VI1 + [(match_operand:VI1 1 "nonimmediate_operand")] UNSPEC_LOADU))] - "TARGET_SSE2 && <mask_mode512bit_condition>" + "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" { - /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads - just fine if misaligned_operand is true, and without the UNSPEC it can - be combined with arithmetic instructions. If misaligned_operand is - false, still emit UNSPEC_LOADU insn to honor user's request for - misaligned load. */ if (TARGET_AVX && misaligned_operand (operands[1], <MODE>mode)) { @@ -1197,25 +1205,61 @@ } }) +(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" + [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "register_operand") + (unspec:VI_ULOADSTORE_AVX512BW + [(match_operand:VI_ULOADSTORE_AVX512BW 1 "nonimmediate_operand")] + UNSPEC_LOADU))] + "TARGET_AVX512BW" +{ + if (misaligned_operand (operands[1], <MODE>mode)) + { + rtx src = operands[1]; + if (<mask_applied>) + src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], + operands[2 * <mask_applied>], + operands[3 * <mask_applied>]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], src)); + DONE; + } +}) + +(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" + [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "register_operand") + (unspec:VI_ULOADSTORE_AVX512F + [(match_operand:VI_ULOADSTORE_AVX512F 1 "nonimmediate_operand")] + UNSPEC_LOADU))] + "TARGET_AVX512F" +{ + if (misaligned_operand (operands[1], <MODE>mode)) + { + rtx src = operands[1]; + if (<mask_applied>) + src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], + operands[2 * <mask_applied>], + operands[3 * <mask_applied>]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], src)); + DONE; + } +}) + (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" - [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v") - (unspec:VI_UNALIGNED_LOADSTORE - [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VI1 0 "register_operand" "=v") + (unspec:VI1 + [(match_operand:VI1 1 "nonimmediate_operand" "vm")] UNSPEC_LOADU))] - "TARGET_SSE2 && <mask_mode512bit_condition>" + "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" { switch (get_attr_mode (insn)) { case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; - case MODE_XI: - if (<MODE>mode == V8DImode) - return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; - else - return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; default: - return "%vmovdqu\t{%1, %0|%0, %1}"; + if (!(TARGET_AVX512VL && TARGET_AVX512BW)) + return "%vmovdqu\t{%1, %0|%0, %1}"; + else + return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; } } [(set_attr "type" "ssemov") @@ -1238,10 +1282,34 @@ ] (const_string "<sseinsnmode>")))]) +(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" + [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "register_operand" "=v") + (unspec:VI_ULOADSTORE_AVX512BW + [(match_operand:VI_ULOADSTORE_AVX512BW 1 "nonimmediate_operand" "vm")] + UNSPEC_LOADU))] + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "ssememalign" "8") + (set_attr "prefix" "maybe_evex")]) + +(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" + [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "register_operand" "=v") + (unspec:VI_ULOADSTORE_AVX512F + [(match_operand:VI_ULOADSTORE_AVX512F 1 "nonimmediate_operand" "vm")] + UNSPEC_LOADU))] + "TARGET_AVX512F" + "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "ssememalign" "8") + (set_attr "prefix" "maybe_evex")]) + (define_insn "<sse2_avx_avx512f>_storedqu<mode>" - [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m") - (unspec:VI_UNALIGNED_LOADSTORE - [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")] + [(set (match_operand:VI1 0 "memory_operand" "=m") + (unspec:VI1 + [(match_operand:VI1 1 "register_operand" "v")] UNSPEC_STOREU))] "TARGET_SSE2" { @@ -1251,13 +1319,16 @@ case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; - case MODE_XI: - if (<MODE>mode == V8DImode) - return "vmovdqu64\t{%1, %0|%0, %1}"; - else - return "vmovdqu32\t{%1, %0|%0, %1}"; default: - return "%vmovdqu\t{%1, %0|%0, %1}"; + switch (<MODE>mode) + { + case V32QImode: + case V16QImode: + if (!(TARGET_AVX512VL && TARGET_AVX512BW)) + return "%vmovdqu\t{%1, %0|%0, %1}"; + default: + return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"; + } } } [(set_attr "type" "ssemov") @@ -1281,21 +1352,56 @@ ] (const_string "<sseinsnmode>")))]) -(define_insn "avx512f_storedqu<mode>_mask" - [(set (match_operand:VI48_512 0 "memory_operand" "=m") - (vec_merge:VI48_512 - (unspec:VI48_512 - [(match_operand:VI48_512 1 "register_operand" "v")] +(define_insn "<sse2_avx_avx512f>_storedqu<mode>" + [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "memory_operand" "=m") + (unspec:VI_ULOADSTORE_AVX512BW + [(match_operand:VI_ULOADSTORE_AVX512BW 1 "register_operand" "v")] + UNSPEC_STOREU))] + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "ssememalign" "8") + (set_attr "prefix" "maybe_evex")]) + +(define_insn "<sse2_avx_avx512f>_storedqu<mode>" + [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "memory_operand" "=m") + (unspec:VI_ULOADSTORE_AVX512F + [(match_operand:VI_ULOADSTORE_AVX512F 1 "register_operand" "v")] + UNSPEC_STOREU))] + "TARGET_AVX512F" + "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "ssememalign" "8") + (set_attr "prefix" "maybe_vex")]) + +(define_insn "<avx512>_storedqu<mode>_mask" + [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m") + (vec_merge:VI48_AVX512VL + (unspec:VI48_AVX512VL + [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] UNSPEC_STOREU) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] "TARGET_AVX512F" -{ - if (<MODE>mode == V8DImode) - return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}"; - else - return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}"; -} + "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "memory" "store") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "<avx512>_storedqu<mode>_mask" + [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") + (vec_merge:VI12_AVX512VL + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] + UNSPEC_STOREU) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "memory" "store")