Hello! Attached patch implements scalar unsigned int->float conversions with AVX512F.
2018-05-22 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*floatuns<SWI48:mode><MODEF:mode>2_avx512): New insn pattern. (floatunssi<mode>2): Also enable for AVX512F and TARGET_SSE_MATH. Rewrite expander pattern. Emit gen_floatunssi<mode>2_i387_with_xmm for non-SSE modes. (floatunsdisf2): Rewrite expander pattern. Hanlde TARGET_AVX512F. (floatunsdidf2): Ditto. testsuite/ChangeLog: 2018-05-22 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/cvt-3.c: New test. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}., but not tested on AVX512 target. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 260441) +++ config/i386/i386.md (working copy) @@ -5615,16 +5615,26 @@ DONE; }) +(define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512" + [(set (match_operand:MODEF 0 "register_operand" "=v") + (unsigned_float:MODEF + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODEF:MODE>")]) + ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two ;; SImode values to stack. Also note that fild loads from memory only. -(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm" +(define_insn_and_split "floatunssi<mode>2_i387_with_xmm" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:DI 3 "=x")) - (clobber (match_operand:DI 2 "memory_operand" "=m"))] + (clobber (match_operand:DI 2 "memory_operand" "=m")) + (clobber (match_scratch:DI 3 "=x"))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" @@ -5639,43 +5649,59 @@ (set_attr "mode" "<MODE>")]) (define_expand "floatunssi<mode>2" - [(parallel - [(set (match_operand:X87MODEF 0 "register_operand") - (unsigned_float:X87MODEF - (match_operand:SI 1 "nonimmediate_operand"))) - (clobber (match_scratch:DI 3)) - (clobber (match_dup 2))])] - "!TARGET_64BIT - && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand")))] + "(!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) + || ((!TARGET_64BIT || TARGET_AVX512F) + && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { - if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) { + emit_insn (gen_floatunssi<mode>2_i387_with_xmm + (operands[0], operands[1], + assign_386_stack_local (DImode, SLOT_TEMP))); + DONE; + } + if (!TARGET_AVX512F) + { ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]); DONE; } - else - operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); }) (define_expand "floatunsdisf2" - [(use (match_operand:SF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] + [(set (match_operand:SF 0 "register_operand") + (unsigned_float:SF + (match_operand:DI 1 "nonimmediate_operand")))] "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH" - "x86_emit_floatuns (operands); DONE;") +{ + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } +}) (define_expand "floatunsdidf2" - [(use (match_operand:DF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] - "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + [(set (match_operand:DF 0 "register_operand") + (unsigned_float:DF + (match_operand:DI 1 "nonimmediate_operand")))] + "(TARGET_KEEPS_VECTOR_ALIGNED_STACK || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { - if (TARGET_64BIT) - x86_emit_floatuns (operands); - else - ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); - DONE; + if (!TARGET_64BIT) + { + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; + } + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } }) ;; Load effective address instructions Index: testsuite/gcc.target/i386/cvt-3.c =================================================================== --- testsuite/gcc.target/i386/cvt-3.c (nonexistent) +++ testsuite/gcc.target/i386/cvt-3.c (working copy) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */ + +float ui2f (unsigned int x) { return x; } +double ui2d (unsigned int x) { return x; } + +#ifdef __x86_64__ +float ul2f (unsigned long x) { return x; } +double ul2d (unsigned long x) { return x; } +#endif + +/* { dg-final { scan-assembler-times "vcvtusi2ss" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2sd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2ss" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2sd" 2 { target { ! ia32 } } } } */