https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118460

Christophe Lyon <clyon at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |clyon at gcc dot gnu.org

--- Comment #5 from Christophe Lyon <clyon at gcc dot gnu.org> ---
Adding a thumb2_movhfcc_vfp pattern similar to thumb2_movsfcc_vfp does trick:

============================================
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 379f5f7b3dc..23a1e38bb92 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -816,6 +816,29 @@ (define_insn "*thumb2_movsfcc_vfp"
     (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
 )

+(define_insn "*thumb2_movhfcc_vfp"
+  [(set (match_operand:HF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
+       (if_then_else:HF
+         (match_operator   3 "arm_comparison_operator"
+           [(match_operand 4 "cc_register" "") (const_int 0)])
+         (match_operand:HF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+         (match_operand:HF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
+  "TARGET_VFP_FP16INST && !arm_restrict_it"
+  "@
+   it\\t%D3\;vmov%D3.f16\\t%0, %2
+   it\\t%d3\;vmov%d3.f16\\t%0, %1
+   ite\\t%D3\;vmov%D3.f16\\t%0, %2\;vmov%d3.f16\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "6,6,10,6,6,10,6,6,10")
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
+)
+
 (define_insn "*movdfcc_vfp"
   [(set (match_operand:DF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
        (if_then_else:DF
================================================

With that patch, I generate:
bar:
    vcvtb.f32.f16   s14, s0 @ 7     [c=4 l=4]  extendhfsf2
    vcvtb.f32.f16   s15, s1 @ 8     [c=4 l=4]  extendhfsf2
    vcmpe.f32       s14, s15        @ 20    [c=4 l=4]  *cmpsf_trap_vfp/0
    vmrs    APSR_nzcv, FPSCR        @ 21    [c=4 l=4]  *movcc_vfp
    it      pl      @ 15    [c=4 l=6]  *thumb2_movhfcc_vfp/0
    vmovpl.f16      s0, s1
    bx lr

for the reduced test case, which is similar to what we generate for float.

Reply via email to