Hi!
The following patch adds patterns and splitters for {,u}divmodsi4 followed
by zero-extension, similarly to other 32-bit operand instructions divl and
idivl zero extends both results to 64-bit, so there is no need to extend it
again. The REE pass ignores instructions that have more than one SET, but
at least the combiner doesn't. The patch adds both patterns/splitters that
zero extend the quotient and patterns/splttiers that zero extend the modulo
(the combiner wants in that case the modulo to be the first operation).
I have a patch which I'll attach to the PR, which also has patterns for
both results zero extended, but as neither combiner nor anything else is
able to match them right now, I'm not including it here.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2017-09-29 Jakub Jelinek <[email protected]>
PR target/82361
* config/i386/i386.md
(TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split.
(divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1,
*divmodsi4_zext_2): New define_insn_and_split.
(*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn.
(TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split.
(udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1,
*udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2):
New define_insn_and_split.
(*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn.
* config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or
operands[1] having DImode when mode is SImode.
* gcc.target/i386/pr82361-1.c: New test.
* gcc.target/i386/pr82361-2.c: New test.
--- gcc/config/i386/i386.md.jj 2017-09-29 09:19:42.000000000 +0200
+++ gcc/config/i386/i386.md 2017-09-29 19:19:34.795293575 +0200
@@ -7635,6 +7635,36 @@ (define_split
[(const_int 0)]
"ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (div:SI (match_operand:SI 2 "register_operand")
+ (match_operand:SI 3 "nonimmediate_operand"))))
+ (set (match_operand:SI 1 "register_operand")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (SImode, operands, true); DONE;")
+
+(define_split
+ [(set (match_operand:DI 1 "register_operand")
+ (zero_extend:DI
+ (mod:SI (match_operand:SI 2 "register_operand")
+ (match_operand:SI 3 "nonimmediate_operand"))))
+ (set (match_operand:SI 0 "register_operand")
+ (div:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (SImode, operands, true); DONE;")
+
(define_insn_and_split "divmod<mode>4_1"
[(set (match_operand:SWI48 0 "register_operand" "=a")
(div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
@@ -7670,6 +7700,79 @@ (define_insn_and_split "divmod<mode>4_1"
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "divmodsi4_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (div:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 1)
+ (ashiftrt:SI (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 1)
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+
+ if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[1], operands[2]);
+ operands[4] = operands[1];
+ }
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "divmodsi4_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=&d")
+ (zero_extend:DI
+ (mod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_dup 2) (match_dup 3)))
+ (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 6)
+ (ashiftrt:SI (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 1)
+ (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 0)
+ (div:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 6))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+ operands[6] = gen_lowpart (SImode, operands[1]);
+
+ if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[6], operands[2]);
+ operands[4] = operands[6];
+ }
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*divmod<mode>4"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7705,6 +7808,77 @@ (define_insn_and_split "*divmod<mode>4"
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*divmodsi4_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (div:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 1)
+ (ashiftrt:SI (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 1)
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+
+ if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[1], operands[2]);
+ operands[4] = operands[1];
+ }
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*divmodsi4_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=&d")
+ (zero_extend:DI
+ (mod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 6)
+ (ashiftrt:SI (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 1)
+ (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 0)
+ (div:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 6))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+ operands[6] = gen_lowpart (SImode, operands[1]);
+
+ if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[6], operands[2]);
+ operands[4] = operands[6];
+ }
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
(define_insn "*divmod<mode>4_noext"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7718,6 +7892,34 @@ (define_insn "*divmod<mode>4_noext"
[(set_attr "type" "idiv")
(set_attr "mode" "<MODE>")])
+(define_insn "*divmodsi4_noext_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (div:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (use (match_operand:SI 4 "register_operand" "1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "idiv{l}\t%3"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_insn "*divmodsi4_noext_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=d")
+ (zero_extend:DI
+ (mod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_dup 2) (match_dup 3)))
+ (use (match_operand:SI 4 "register_operand" "1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "idiv{l}\t%3"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
(define_expand "divmodqi4"
[(parallel [(set (match_operand:QI 0 "register_operand")
(div:QI
@@ -7808,6 +8010,38 @@ (define_split
[(const_int 0)]
"ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (udiv:SI (match_operand:SI 2 "register_operand")
+ (match_operand:SI 3 "nonimmediate_operand"))))
+ (set (match_operand:SI 1 "register_operand")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (SImode, operands, false); DONE;")
+
+(define_split
+ [(set (match_operand:DI 1 "register_operand")
+ (zero_extend:DI
+ (umod:SI (match_operand:SI 2 "register_operand")
+ (match_operand:SI 3 "nonimmediate_operand"))))
+ (set (match_operand:SI 0 "register_operand")
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (SImode, operands, false); DONE;")
+
(define_insn_and_split "udivmod<mode>4_1"
[(set (match_operand:SWI48 0 "register_operand" "=a")
(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
@@ -7830,6 +8064,52 @@ (define_insn_and_split "udivmod<mode>4_1
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "udivmodsi4_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (udiv:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(set (match_dup 1) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 1)
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "udivmodsi4_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=&d")
+ (zero_extend:DI
+ (umod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(set (match_dup 4) (const_int 0))
+ (parallel [(set (match_dup 1)
+ (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 0)
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 4))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[4] = gen_lowpart (SImode, operands[1]);"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*udivmod<mode>4"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7851,6 +8131,50 @@ (define_insn_and_split "*udivmod<mode>4"
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*udivmodsi4_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (udiv:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(set (match_dup 1) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 1)
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*udivmodsi4_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=&d")
+ (zero_extend:DI
+ (umod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(set (match_dup 4) (const_int 0))
+ (parallel [(set (match_dup 1)
+ (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+ (set (match_dup 0)
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (use (match_dup 4))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[4] = gen_lowpart (SImode, operands[1]);"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
;; Optimize division or modulo by constant power of 2, if the constant
;; materializes only after expansion.
(define_insn_and_split "*udivmod<mode>4_pow2"
@@ -7877,6 +8201,60 @@ (define_insn_and_split "*udivmod<mode>4_
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*udivmodsi4_pow2_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (udiv:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "const_int_operand" "n"))))
+ (set (match_operand:SI 1 "register_operand" "=r")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
+ && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+ "#"
+ "&& 1"
+ [(set (match_dup 1) (match_dup 2))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ int v = exact_log2 (UINTVAL (operands[3]));
+ operands[4] = GEN_INT (v);
+ operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*udivmodsi4_pow2_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=r")
+ (zero_extend:DI
+ (umod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "const_int_operand" "n"))))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
+ && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+ "#"
+ "&& 1"
+ [(set (match_dup 1) (match_dup 2))
+ (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 1)
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ int v = exact_log2 (UINTVAL (operands[3]));
+ operands[4] = GEN_INT (v);
+ operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")])
+
(define_insn "*udivmod<mode>4_noext"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7890,6 +8268,34 @@ (define_insn "*udivmod<mode>4_noext"
[(set_attr "type" "idiv")
(set_attr "mode" "<MODE>")])
+(define_insn "*udivmodsi4_noext_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (zero_extend:DI
+ (udiv:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 1 "register_operand" "=d")
+ (umod:SI (match_dup 2) (match_dup 3)))
+ (use (match_operand:SI 4 "register_operand" "1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "div{l}\t%3"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_insn "*udivmodsi4_noext_zext_2"
+ [(set (match_operand:DI 1 "register_operand" "=d")
+ (zero_extend:DI
+ (umod:SI (match_operand:SI 2 "register_operand" "0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+ (set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_dup 2) (match_dup 3)))
+ (use (match_operand:SI 4 "register_operand" "1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "div{l}\t%3"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
(define_expand "udivmodqi4"
[(parallel [(set (match_operand:QI 0 "register_operand")
(udiv:QI
--- gcc/config/i386/i386.c.jj 2017-09-29 19:15:27.822267844 +0200
+++ gcc/config/i386/i386.c 2017-09-29 19:29:00.276483787 +0200
@@ -21927,9 +21927,22 @@ ix86_split_idivmod (machine_mode mode, r
switch (mode)
{
case E_SImode:
- gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+ if (GET_MODE (operands[0]) == SImode)
+ {
+ if (GET_MODE (operands[1]) == SImode)
+ gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+ else
+ gen_divmod4_1
+ = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
+ gen_zero_extend = gen_zero_extendqisi2;
+ }
+ else
+ {
+ gen_divmod4_1
+ = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
+ gen_zero_extend = gen_zero_extendqidi2;
+ }
gen_test_ccno_1 = gen_testsi_ccno_1;
- gen_zero_extend = gen_zero_extendqisi2;
break;
case E_DImode:
gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
@@ -21988,16 +22001,24 @@ ix86_split_idivmod (machine_mode mode, r
div = gen_rtx_UDIV (mode, operands[2], operands[3]);
mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
}
+ if (mode == SImode)
+ {
+ if (GET_MODE (operands[0]) != SImode)
+ div = gen_rtx_ZERO_EXTEND (DImode, div);
+ if (GET_MODE (operands[1]) != SImode)
+ mod = gen_rtx_ZERO_EXTEND (DImode, mod);
+ }
/* Extract remainder from AH. */
- tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+ tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
+ tmp0, GEN_INT (8), GEN_INT (8));
if (REG_P (operands[1]))
insn = emit_move_insn (operands[1], tmp1);
else
{
/* Need a new scratch register since the old one has result
of 8bit divide. */
- scratch = gen_reg_rtx (mode);
+ scratch = gen_reg_rtx (GET_MODE (operands[1]));
emit_move_insn (scratch, tmp1);
insn = emit_move_insn (operands[1], scratch);
}
--- gcc/testsuite/gcc.target/i386/pr82361-1.c.jj 2017-09-29
19:21:12.744113987 +0200
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2017-09-29 19:25:27.465046411
+0200
@@ -0,0 +1,53 @@
+/* PR target/82361 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mtune=generic -masm=att -mno-8bit-idiv" } */
+/* We should be able to optimize all %eax to %rax zero extensions, because
+ div and idiv instructions with 32-bit operands zero-extend both results.
*/
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
+ one. */
+/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */
+
+void
+f1 (unsigned int a, unsigned int b)
+{
+ unsigned long long c = a / b;
+ unsigned long long d = a % b;
+ asm volatile ("" : : "r" (c), "r" (d));
+}
+
+void
+f2 (int a, int b)
+{
+ unsigned long long c = (unsigned int) (a / b);
+ unsigned long long d = (unsigned int) (a % b);
+ asm volatile ("" : : "r" (c), "r" (d));
+}
+
+void
+f3 (unsigned int a, unsigned int b)
+{
+ unsigned long long c = a / b;
+ asm volatile ("" : : "r" (c));
+}
+
+void
+f4 (int a, int b)
+{
+ unsigned long long c = (unsigned int) (a / b);
+ asm volatile ("" : : "r" (c));
+}
+
+void
+f5 (unsigned int a, unsigned int b)
+{
+ unsigned long long d = a % b;
+ asm volatile ("" : : "r" (d));
+}
+
+void
+f6 (int a, int b)
+{
+ unsigned long long d = (unsigned int) (a % b);
+ asm volatile ("" : : "r" (d));
+}
--- gcc/testsuite/gcc.target/i386/pr82361-2.c.jj 2017-09-29
19:25:40.344891300 +0200
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2017-09-29 19:31:56.725359101
+0200
@@ -0,0 +1,10 @@
+/* PR target/82361 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mtune=generic -masm=att -m8bit-idiv" } */
+/* We should be able to optimize all %eax to %rax zero extensions, because
+ div and idiv instructions with 32-bit operands zero-extend both results.
*/
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+/* Ditto %edx to %rdx zero extensions. */
+/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+
+#include "pr82361-1.c"
Jakub