Hello, The attached patch fixes PR 54680. Tested on rev 192200 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and no new failures. OK? Cheers, Oleg gcc/ChangeLog: PR target/54680 * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_int2sf): Fix swapped comments. * config/sh/predicates.md (fpul_operand): Add comment. (fpul_fsca_operand, fsca_scale_factor): New predicates. * config/sh/sh.md (fsca): Move below sincossf3 expander. Convert to insn_and_split. Use fpul_fsca_operand and fsca_scale_factor predicates. Simplify fpul operand in splitter. testsuite/ChangeLog: PR target/54680 * gcc.target/sh/pr54680.c: New.
Index: gcc/testsuite/gcc.target/sh/pr54680.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54680.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54680.c (revision 0) @@ -0,0 +1,66 @@ +/* Verify that the fsca input value is not converted to float and then back + to int. Notice that we can't count just "lds" insns because mode switches + use "lds.l". */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O2 -mfsca -funsafe-math-optimizations" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m3*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-times "fsca" 7 } } */ +/* { dg-final { scan-assembler-times "shad" 1 } } */ +/* { dg-final { scan-assembler-times "lds\t" 6 } } */ +/* { dg-final { scan-assembler-times "fmul" 2 } } */ +/* { dg-final { scan-assembler-times "ftrc" 1 } } */ + +#include <math.h> + +static const float pi = 3.14159265359f; + +float +test00 (int x) +{ + /* 1x shad, 1x lds, 1x fsca */ + return sinf ( (x >> 8) * (2*pi) / (1 << 16)); +} + +float +test01 (int x) +{ + /* 1x lds, 1x fsca */ + return sinf (x * (2*pi) / 65536); +} + +float +test02 (int x) +{ + /* 1x lds, 1x fsca */ + return sinf (x * (2*pi / 65536)); +} + +float +test03 (int x) +{ + /* 1x lds, 1x fsca */ + float scale = 2*pi / 65536; + return sinf (x * scale); +} + +float +test04 (int x) +{ + /* 1x lds, 1x fsca */ + return cosf (x / 65536.0f * 2*pi); +} + +float +test05 (int x) +{ + /* 1x lds, 1x fsca, 1x fmul */ + float scale = 2*pi / 65536; + return sinf (x * scale) * cosf (x * scale); +} + +float +test_06 (float x) +{ + /* 1x fmul, 1x ftrc, 1x fsca */ + return sinf (x); +} Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 192200) +++ gcc/config/sh/sh.c (working copy) @@ -12628,11 +12628,9 @@ gcc_unreachable (); } -/* This function returns a constant rtx that represents pi / 2**15 in - SFmode. it's used to scale SFmode angles, in radians, to a - fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi - maps to 0x10000). */ - +/* This function returns a constant rtx that represents 2**15 / pi in + SFmode. It's used to scale a fixed-point signed 16.16-bit fraction + of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */ static GTY(()) rtx sh_fsca_sf2int_rtx; rtx @@ -12649,11 +12647,10 @@ return sh_fsca_sf2int_rtx; } -/* This function returns a constant rtx that represents 2**15 / pi in - SFmode. it's used to scale a fixed-point signed 16.16-bit fraction - of a full circle back to a SFmode value, i.e., 0x10000 maps to - 2*pi). */ - +/* This function returns a constant rtx that represents pi / 2**15 in + SFmode. It's used to scale SFmode angles, in radians, to a + fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi + maps to 0x10000. */ static GTY(()) rtx sh_fsca_int2sf_rtx; rtx Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 192200) +++ gcc/config/sh/sh.md (working copy) @@ -12055,22 +12055,6 @@ [(set_attr "type" "fsrra") (set_attr "fp_mode" "single")]) -(define_insn "fsca" - [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") - (vec_concat:V2SF - (unspec:SF [(mult:SF - (float:SF (match_operand:SI 1 "fpul_operand" "y")) - (match_operand:SF 2 "immediate_operand" "i")) - ] UNSPEC_FSINA) - (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) - ] UNSPEC_FCOSA))) - (use (match_operand:PSI 3 "fpscr_operand" "c"))] - "TARGET_FPU_ANY && TARGET_FSCA - && operands[2] == sh_fsca_int2sf ()" - "fsca fpul,%d0" - [(set_attr "type" "fsca") - (set_attr "fp_mode" "single")]) - ;; When the sincos pattern is defined, the builtin functions sin and cos ;; will be expanded to the sincos pattern and one of the output values will ;; remain unused. @@ -12097,6 +12081,38 @@ DONE; }) +(define_insn_and_split "fsca" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (unspec:SF [(mult:SF + (float:SF (match_operand:SI 1 "fpul_fsca_operand" "y")) + (match_operand:SF 2 "fsca_scale_factor" "i")) + ] UNSPEC_FSINA) + (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) + ] UNSPEC_FCOSA))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_FPU_ANY && TARGET_FSCA" + "fsca fpul,%d0" + "&& !fpul_operand (operands[1], SImode)" + [(const_int 0)] +{ + /* If operands[1] is something like (fix:SF (float:SF (reg:SI))) reduce it + to a simple reg, otherwise reload will have trouble reloading the + pseudo into fpul. */ + rtx x = XEXP (operands[1], 0); + while (x != NULL_RTX && !fpul_operand (x, SImode)) + { + gcc_assert (GET_CODE (x) == FIX || GET_CODE (x) == FLOAT); + x = XEXP (x, 0); + } + + gcc_assert (x != NULL_RTX && fpul_operand (x, SImode)); + emit_insn (gen_fsca (operands[0], x, operands[2], operands[3])); + DONE; +} + [(set_attr "type" "fsca") + (set_attr "fp_mode" "single")]) + (define_expand "abssf2" [(set (match_operand:SF 0 "fp_arith_reg_operand" "") (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] Index: gcc/config/sh/predicates.md =================================================================== --- gcc/config/sh/predicates.md (revision 192200) +++ gcc/config/sh/predicates.md (working copy) @@ -345,8 +345,10 @@ && GET_MODE (op) == PSImode); }) -;; TODO: Add a comment here. - +;; Returns true if OP is an operand that is either the fpul hard reg or +;; a pseudo. This prevents combine from propagating function arguments +;; in hard regs into insns that need the operand in fpul. If it's a pseudo +;; reload can fix it up. (define_predicate "fpul_operand" (match_code "reg") { @@ -358,6 +360,29 @@ && GET_MODE (op) == mode); }) +;; Returns true if OP is a valid fpul input operand for the fsca insn. +;; The value in fpul is a fixed-point value and its scaling is described +;; in the fsca insn by a mult:SF. To allow pre-scaled fixed-point inputs +;; in fpul we have to permit things like +;; (reg:SI) +;; (fix:SF (float:SF (reg:SI))) +(define_predicate "fpul_fsca_operand" + (match_code "fix,reg") +{ + if (fpul_operand (op, SImode)) + return true; + if (GET_CODE (op) == FIX && GET_MODE (op) == SImode + && GET_CODE (XEXP (op, 0)) == FLOAT && GET_MODE (XEXP (op, 0)) == SFmode) + return fpul_fsca_operand (XEXP (XEXP (op, 0), 0), + GET_MODE (XEXP (XEXP (op, 0), 0))); + return false; +}) + +;; Returns true if OP is a valid constant scale factor for the fsca insn. +(define_predicate "fsca_scale_factor" + (and (match_code "const_double") + (match_test "op == sh_fsca_int2sf ()"))) + ;; TODO: Add a comment here. (define_predicate "general_extend_operand"