[llvm-branch-commits] [llvm] Backport #85277 (PR #85422)
https://github.com/benshi001 updated https://github.com/llvm/llvm-project/pull/85422 >From bb49a9f39c5643ed7503c78b56bfc28388e21709 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Fri, 15 Mar 2024 12:07:54 +0100 Subject: [PATCH] [AVR] Remove earlyclobber from LDDRdPtrQ (#85277) LDDRdPtrQ was marked as `earlyclobber`, which doesn't play well with GreedyRA (which can generate this instruction through `loadRegFromStackSlot()`). This seems to be the same case as: https://github.com/llvm/llvm-project/blob/a99b912c9b74f6ef91786b4dfbc25160c27d3b41/llvm/lib/Target/AVR/AVRInstrInfo.td#L1421 Closes https://github.com/llvm/llvm-project/issues/81911. --- llvm/lib/Target/AVR/AVRInstrInfo.td | 2 +- llvm/test/CodeGen/AVR/bug-81911.ll | 163 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AVR/bug-81911.ll diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index efaaec32ee6bb1..0a77c7c1d418a1 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1398,7 +1398,7 @@ let mayLoad = 1, hasSideEffects = 0, // Load indirect with displacement operations. let canFoldAsLoad = 1, isReMaterializable = 1 in { - let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ + def LDDRdPtrQ : FSTDLDD<0, (outs GPR8 : $reg), diff --git a/llvm/test/CodeGen/AVR/bug-81911.ll b/llvm/test/CodeGen/AVR/bug-81911.ll new file mode 100644 index 00..2a22666a1ff927 --- /dev/null +++ b/llvm/test/CodeGen/AVR/bug-81911.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=avr -mcpu=atmega328 -O1 -verify-machineinstrs | FileCheck %s + +define internal i8 @main() { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %bb0 +; CHECK-NEXT:push r2 +; CHECK-NEXT:push r3 +; CHECK-NEXT:push r4 +; CHECK-NEXT:push r5 +; CHECK-NEXT:push r6 +; CHECK-NEXT:push r7 +; CHECK-NEXT:push r8 +; CHECK-NEXT:push r9 +; CHECK-NEXT:push r10 +; CHECK-NEXT:push r11 +; CHECK-NEXT:push r12 +; CHECK-NEXT:push r13 +; CHECK-NEXT:push r14 +; CHECK-NEXT:push r15 +; CHECK-NEXT:push r16 +; CHECK-NEXT:push r17 +; CHECK-NEXT:push r28 +; CHECK-NEXT:push r29 +; CHECK-NEXT:in r28, 61 +; CHECK-NEXT:in r29, 62 +; CHECK-NEXT:sbiw r28, 13 +; CHECK-NEXT:in r0, 63 +; CHECK-NEXT:cli +; CHECK-NEXT:out 62, r29 +; CHECK-NEXT:out 63, r0 +; CHECK-NEXT:out 61, r28 +; CHECK-NEXT:ldi r16, 0 +; CHECK-NEXT:ldi r17, 0 +; CHECK-NEXT:ldi r18, -1 +; CHECK-NEXT:;APP +; CHECK-NEXT:ldi r24, 123 +; CHECK-NEXT:;NO_APP +; CHECK-NEXT:std Y+1, r24 ; 1-byte Folded Spill +; CHECK-NEXT:movw r24, r28 +; CHECK-NEXT:adiw r24, 6 +; CHECK-NEXT:std Y+3, r25 ; 2-byte Folded Spill +; CHECK-NEXT:std Y+2, r24 ; 2-byte Folded Spill +; CHECK-NEXT:movw r8, r16 +; CHECK-NEXT:movw r6, r16 +; CHECK-NEXT:movw r4, r16 +; CHECK-NEXT:movw r2, r16 +; CHECK-NEXT:rjmp .LBB0_2 +; CHECK-NEXT: .LBB0_1: ; %bb1 +; CHECK-NEXT:; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT:andi r30, 1 +; CHECK-NEXT:ldd r31, Y+4 ; 1-byte Folded Reload +; CHECK-NEXT:dec r31 +; CHECK-NEXT:cpi r30, 0 +; CHECK-NEXT:movw r8, r18 +; CHECK-NEXT:movw r6, r20 +; CHECK-NEXT:movw r4, r22 +; CHECK-NEXT:movw r2, r24 +; CHECK-NEXT:mov r18, r31 +; CHECK-NEXT:brne .LBB0_2 +; CHECK-NEXT:rjmp .LBB0_4 +; CHECK-NEXT: .LBB0_2: ; %bb1 +; CHECK-NEXT:; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:std Y+4, r18 ; 1-byte Folded Spill +; CHECK-NEXT:movw r18, r8 +; CHECK-NEXT:movw r20, r6 +; CHECK-NEXT:movw r22, r4 +; CHECK-NEXT:movw r24, r2 +; CHECK-NEXT:ldi r26, 10 +; CHECK-NEXT:ldi r27, 0 +; CHECK-NEXT:movw r10, r26 +; CHECK-NEXT:movw r12, r16 +; CHECK-NEXT:movw r14, r16 +; CHECK-NEXT:call __udivdi3 +; CHECK-NEXT:std Y+13, r25 +; CHECK-NEXT:std Y+12, r24 +; CHECK-NEXT:std Y+11, r23 +; CHECK-NEXT:std Y+10, r22 +; CHECK-NEXT:std Y+9, r21 +; CHECK-NEXT:std Y+8, r20 +; CHECK-NEXT:std Y+7, r19 +; CHECK-NEXT:std Y+6, r18 +; CHECK-NEXT:ldd r30, Y+2 ; 2-byte Folded Reload +; CHECK-NEXT:ldd r31, Y+3 ; 2-byte Folded Reload +; CHECK-NEXT:;APP +; CHECK-NEXT:;NO_APP +; CHECK-NEXT:ldi r30, 1 +; CHECK-NEXT:cp r8, r1 +; CHECK-NEXT:cpc r9, r1 +; CHECK-NEXT:cpc r6, r16 +; CHECK-NEXT:cpc r7, r17 +; CHECK-NEXT:cpc r4, r16 +; CHECK-NEXT:cpc r5, r17 +; CHECK-NEXT:cpc r2, r16 +; CHECK-NEXT:cpc r3, r17 +; CHECK-NEXT:breq .LBB0_3 +; CHECK-NEXT:rjmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: ; %bb1 +; CHECK-NEXT:; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT:mov r30, r1 +; CHECK-NEXT:rjmp .LBB0_1 +; CHECK-NEXT: .LBB0_4: ; %bb3 +; CHECK-NEXT:ldd r24, Y+1 ; 1-
[llvm-branch-commits] [llvm] release/18.x: [AVR] Remove earlyclobber from LDDRdPtrQ (#85277) (PR #85512)
benshi001 wrote: > @benshi001 What do you think about merging this PR to the release branch? Yes. I think so. Because this is a bug fix. https://github.com/llvm/llvm-project/pull/85512 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport #85277 (PR #85422)
https://github.com/benshi001 approved this pull request. https://github.com/llvm/llvm-project/pull/85422 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AVR] Fix 16-bit LDDs with immediate overflows (#104923) (PR #106993)
https://github.com/benshi001 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/106993 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 25531a1 - [AVR] Optimize 8-bit logic left/right shifts
Author: Ben Shi Date: 2021-01-23T23:54:16+08:00 New Revision: 25531a1d9657897e648d93f776a3abb70e9816ef URL: https://github.com/llvm/llvm-project/commit/25531a1d9657897e648d93f776a3abb70e9816ef DIFF: https://github.com/llvm/llvm-project/commit/25531a1d9657897e648d93f776a3abb70e9816ef.diff LOG: [AVR] Optimize 8-bit logic left/right shifts Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D89047 Added: Modified: llvm/lib/Target/AVR/AVRISelLowering.cpp llvm/lib/Target/AVR/AVRISelLowering.h llvm/lib/Target/AVR/AVRInstrInfo.td llvm/test/CodeGen/AVR/ctlz.ll llvm/test/CodeGen/AVR/ctpop.ll llvm/test/CodeGen/AVR/cttz.ll llvm/test/CodeGen/AVR/shift.ll Removed: diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 9a464d0a52d8..bd5fd266d395 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -334,6 +334,24 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Invalid shift opcode"); } + // Optimize int8 shifts. + if (VT.getSizeInBits() == 8) { +if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) { + // Optimize LSL when 4 <= ShiftAmount <= 6. + Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); + Victim = + DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT)); + ShiftAmount -= 4; +} else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount && + ShiftAmount < 7) { + // Optimize LSR when 4 <= ShiftAmount <= 6. + Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); + Victim = + DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT)); + ShiftAmount -= 4; +} + } + while (ShiftAmount--) { Victim = DAG.getNode(Opc8, dl, VT, Victim); } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index d1eaf53b15e9..ed9aea7a3297 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -56,6 +56,8 @@ enum NodeType { CMPC, /// Test for zero or minus instruction. TST, + /// Swap Rd[7:4] <-> Rd[3:0]. + SWAP, /// Operand 0 and operand 1 are selection variable, operand 2 /// is condition code and operand 3 is flag operand. SELECT_CC diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 8de85f6b36c5..926d1f853a37 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -67,6 +67,9 @@ def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>; def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>; def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>; +// SWAP node. +def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>; + //===--===// // AVR Operands, Complex Patterns and Transformations Definitions. //===--===// @@ -1729,7 +1732,7 @@ def SWAPRd : FRd<0b1001, (outs GPR8:$rd), (ins GPR8:$src), "swap\t$rd", - [(set i8:$rd, (bswap i8:$src))]>; + [(set i8:$rd, (AVRSwap i8:$src))]>; // IO register bit set/clear operations. //:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi diff --git a/llvm/test/CodeGen/AVR/ctlz.ll b/llvm/test/CodeGen/AVR/ctlz.ll index 8681b8a3f1f5..93c2f0bdfa41 100644 --- a/llvm/test/CodeGen/AVR/ctlz.ll +++ b/llvm/test/CodeGen/AVR/ctlz.ll @@ -10,8 +10,7 @@ declare i8 @llvm.ctlz.i8(i8) ; CHECK-LABEL: count_leading_zeros: ; CHECK: cpi[[RESULT:r[0-9]+]], 0 -; CHECK: brne .LBB0_1 -; CHECK: rjmp .LBB0_2 +; CHECK: breq .LBB0_2 ; CHECK: mov[[SCRATCH:r[0-9]+]], {{.*}}[[RESULT]] ; CHECK: lsr{{.*}}[[SCRATCH]] ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] @@ -20,10 +19,8 @@ declare i8 @llvm.ctlz.i8(i8) ; CHECK: lsr{{.*}}[[RESULT]] ; CHECK: or {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] ; CHECK: mov{{.*}}[[SCRATCH]], {{.*}}[[RESULT]] -; CHECK: lsr{{.*}}[[SCRATCH]] -; CHECK: lsr{{.*}}[[SCRATCH]] -; CHECK: lsr{{.*}}[[SCRATCH]] -; CHECK: lsr{{.*}}[[SCRATCH]] +; CHECK: swap {{.*}}[[SCRATCH]] +; CHECK: andi {{.*}}[[SCRATCH]], 15 ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: com{{.*}}[[SCRATCH]] ; CHECK: mov{{.*}}[[RESULT]], {{.*}}[[SCRATCH]] @@ -37,10 +34,7 @@ declare i8 @llvm.ctlz.i8(i8) ; CHECK: andi {{.*}}[[SCRATCH]], 51 ; CHECK: add{{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: mov{{.*}}[[RESULT]], {{.*}}[[SCRATCH]] -; CHECK: lsr{{.*}}[[RESULT]] -; CHECK: lsr{{.*}}[[RESULT]] -; CHECK: lsr{{.*}}[[RESULT]] -; CHECK: lsr{{.*}}[[RESULT]] +; CHECK: swap
[llvm-branch-commits] [llvm] 1eb8c5c - [AVR] Optimize 16-bit comparison with constant
Author: Ben Shi Date: 2021-01-24T00:38:57+08:00 New Revision: 1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed URL: https://github.com/llvm/llvm-project/commit/1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed DIFF: https://github.com/llvm/llvm-project/commit/1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed.diff LOG: [AVR] Optimize 16-bit comparison with constant Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D93976 Added: Modified: llvm/lib/Target/AVR/AVRISelLowering.cpp llvm/lib/Target/AVR/AVRISelLowering.h llvm/test/CodeGen/AVR/cmp.ll Removed: diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index bd5fd266d395..d919e08e468a 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -455,6 +455,36 @@ static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) { } } +/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands. +SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, + SelectionDAG &DAG, SDLoc DL) const { + assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) && + "LHS and RHS have diff erent types"); + assert(((LHS.getSimpleValueType() == MVT::i16) || + (LHS.getSimpleValueType() == MVT::i8)) && "invalid comparison type"); + + SDValue Cmp; + + if (LHS.getSimpleValueType() == MVT::i16 && dyn_cast(RHS)) { +// Generate a CPI/CPC pair if RHS is a 16-bit constant. +SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, +DAG.getIntPtrConstant(0, DL)); +SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, +DAG.getIntPtrConstant(1, DL)); +SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, +DAG.getIntPtrConstant(0, DL)); +SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, +DAG.getIntPtrConstant(1, DL)); +Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); +Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); + } else { +// Generate ordinary 16-bit comparison. +Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS); + } + + return Cmp; +} + /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for /// the given operands. SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, @@ -567,7 +597,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, DAG.getIntPtrConstant(1, DL)); Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); } else { - Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); + Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); } } else if (VT == MVT::i64) { @@ -605,7 +635,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, DAG.getIntPtrConstant(1, DL)); Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); } else { - Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS0, RHS0); + Cmp = getAVRCmp(LHS0, RHS0, DAG, DL); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp); @@ -619,7 +649,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, DAG.getIntPtrConstant(1, DL))); } else { - Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS); + Cmp = getAVRCmp(LHS, RHS, DAG, DL); } } else { llvm_unreachable("Invalid comparison size"); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index ed9aea7a3297..29d814b6c952 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -138,6 +138,8 @@ class AVRTargetLowering : public TargetLowering { private: SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc, SelectionDAG &DAG, SDLoc dl) const; + SDValue getAVRCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, +SDLoc dl) const; SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AVR/cmp.ll b/llvm/test/CodeGen/AVR/cmp.ll index b3224087a689..e9769068f911 100644 --- a/llvm/test/CodeGen/AVR/cmp.ll +++ b/ll
[llvm-branch-commits] [llvm] 2a4acf3 - [AVR] Optimize 8-bit int shift
Author: Ben Shi Date: 2021-01-24T11:04:37+08:00 New Revision: 2a4acf3ea8db19981284468c354aea2835fbfa08 URL: https://github.com/llvm/llvm-project/commit/2a4acf3ea8db19981284468c354aea2835fbfa08 DIFF: https://github.com/llvm/llvm-project/commit/2a4acf3ea8db19981284468c354aea2835fbfa08.diff LOG: [AVR] Optimize 8-bit int shift Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D90678 Added: Modified: llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp llvm/lib/Target/AVR/AVRISelLowering.cpp llvm/lib/Target/AVR/AVRISelLowering.h llvm/lib/Target/AVR/AVRInstrInfo.td llvm/test/CodeGen/AVR/shift.ll llvm/test/CodeGen/AVR/smul-with-overflow.ll Removed: diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index d10f0148cb58..a48d3d134bb5 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1476,6 +1476,111 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // ror r24 + // clr r24 + // ror r24 + + buildMI(MBB, MBBI, AVR::RORRd) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = + buildMI(MBB, MBBI, AVR::RORRd) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) +MIRRC->getOperand(2).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(3).setIsKill(); + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // rol r24 + // clr r24 + // rol r24 + + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) +MIRRC->getOperand(3).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // lsl r24 + // sbc r24, r24 + + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) +MIRRC->getOperand(3).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1697,6 +1802,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::RORWRd); EXPAND(AVR::ROLWRd); EXPAND(AVR::ASRWRd); +EXPAND(AVR::LSLB7Rd); +EXPAND(AVR::LSRB7Rd); +EXPAND(AVR::ASRB7Rd); EXPAND(AVR::SEXT); EXPAND(AVR::ZEXT); EXPAND(AVR::SPREAD); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index d919e08e468a..3e7c2984655a 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AV
[llvm-branch-commits] [clang] 89a5147 - [clang][AVR] Improve avr-ld command line options
Author: Ben Shi Date: 2021-01-25T12:01:26+08:00 New Revision: 89a5147e5a0c2e886cdf7ffa34799c069d825940 URL: https://github.com/llvm/llvm-project/commit/89a5147e5a0c2e886cdf7ffa34799c069d825940 DIFF: https://github.com/llvm/llvm-project/commit/89a5147e5a0c2e886cdf7ffa34799c069d825940.diff LOG: [clang][AVR] Improve avr-ld command line options Added: clang/test/Driver/Inputs/basic_avr_tree/bin/avr-ld clang/test/Driver/Inputs/basic_avr_tree/lib/avr/lib/libavr.a clang/test/Driver/Inputs/basic_avr_tree/lib/gcc/avr/5.4.0/libgcc.a clang/test/Driver/avr-ld.c Modified: clang/lib/Driver/ToolChains/AVR.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index ae56b7b5249e..c999c3b87f84 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -32,247 +32,248 @@ const struct { StringRef Name; std::string SubPath; StringRef Family; + unsigned DataAddr; } MCUInfo[] = { -{"at90s1200", "", "avr1"}, -{"attiny11", "", "avr1"}, -{"attiny12", "", "avr1"}, -{"attiny15", "", "avr1"}, -{"attiny28", "", "avr1"}, -{"at90s2313", "tiny-stack", "avr2"}, -{"at90s2323", "tiny-stack", "avr2"}, -{"at90s2333", "tiny-stack", "avr2"}, -{"at90s2343", "tiny-stack", "avr2"}, -{"at90s4433", "tiny-stack", "avr2"}, -{"attiny22", "tiny-stack", "avr2"}, -{"attiny26", "tiny-stack", "avr2"}, -{"at90s4414", "", "avr2"}, -{"at90s4434", "", "avr2"}, -{"at90s8515", "", "avr2"}, -{"at90c8534", "", "avr2"}, -{"at90s8535", "", "avr2"}, -{"attiny13", "avr25/tiny-stack", "avr25"}, -{"attiny13a", "avr25/tiny-stack", "avr25"}, -{"attiny2313", "avr25/tiny-stack", "avr25"}, -{"attiny2313a", "avr25/tiny-stack", "avr25"}, -{"attiny24", "avr25/tiny-stack", "avr25"}, -{"attiny24a", "avr25/tiny-stack", "avr25"}, -{"attiny25", "avr25/tiny-stack", "avr25"}, -{"attiny261", "avr25/tiny-stack", "avr25"}, -{"attiny261a", "avr25/tiny-stack", "avr25"}, -{"at86rf401", "avr25", "avr25"}, -{"ata5272", "avr25", "avr25"}, -{"attiny4313", "avr25", "avr25"}, -{"attiny44", "avr25", "avr25"}, -{"attiny44a", "avr25", "avr25"}, -{"attiny84", "avr25", "avr25"}, -{"attiny84a", "avr25", "avr25"}, -{"attiny45", "avr25", "avr25"}, -{"attiny85", "avr25", "avr25"}, -{"attiny441", "avr25", "avr25"}, -{"attiny461", "avr25", "avr25"}, -{"attiny461a", "avr25", "avr25"}, -{"attiny841", "avr25", "avr25"}, -{"attiny861", "avr25", "avr25"}, -{"attiny861a", "avr25", "avr25"}, -{"attiny87", "avr25", "avr25"}, -{"attiny43u", "avr25", "avr25"}, -{"attiny48", "avr25", "avr25"}, -{"attiny88", "avr25", "avr25"}, -{"attiny828", "avr25", "avr25"}, -{"at43usb355", "avr3", "avr3"}, -{"at76c711", "avr3", "avr3"}, -{"atmega103", "avr31", "avr31"}, -{"at43usb320", "avr31", "avr31"}, -{"attiny167", "avr35", "avr35"}, -{"at90usb82", "avr35", "avr35"}, -{"at90usb162", "avr35", "avr35"}, -{"ata5505", "avr35", "avr35"}, -{"atmega8u2", "avr35", "avr35"}, -{"atmega16u2", "avr35", "avr35"}, -{"atmega32u2", "avr35", "avr35"}, -{"attiny1634", "avr35", "avr35"}, -{"atmega8", "avr4", "avr4"}, -{"ata6289", "avr4", "avr4"}, -{"atmega8a", "avr4", "avr4"}, -{"ata6285", "avr4", "avr4"}, -{"ata6286", "avr4", "avr4"}, -{"atmega48", "avr4", "avr4"}, -{"atmega48a", "avr4", "avr4"}, -{"atmega48pa", "avr4", "avr4"}, -{"atmega48pb", "avr4", "avr4"}, -{"atmega48p", "avr4", "avr4"}, -{"atmega88", "avr4", "avr4"}, -{"atmega88a", "avr4", "avr4"}, -{"atmega88p", "avr4", "avr4"}, -{"atmega88pa", "avr4", "avr4"}, -{"atmega88pb", "avr4", "avr4"}, -{"atmega8515", "avr4", "avr4"}, -{"atmega8535", "avr4", "avr4"}, -{"atmega8hva", "avr4", "avr4"}, -{"at90pwm1", "avr4", "avr4"}, -{"at90pwm2", "avr4", "avr4"}, -{"at90pwm2b", "avr4", "avr4"}, -{"at90pwm3", "avr4", "avr4"}, -{"at90pwm3b", "avr4", "avr4"}, -{"at90pwm81", "avr4", "avr4"}, -{"ata5790", "avr5", "avr5"}, -{"ata5795", "avr5", "avr5"}, -{"atmega16", "avr5", "avr5"}, -{"atmega16a", "avr5", "avr5"}, -{"atmega161", "avr5", "avr5"}, -{"atmega162", "avr5", "avr5"}, -{"atmega163", "avr5", "avr5"}, -{"atmega164a", "avr5", "avr5"}, -{"atmega164p", "avr5", "avr5"}, -{"atmega164pa", "avr5", "avr5"}, -{"atmega165", "avr5", "avr5"}, -{"atmega165a", "avr5", "avr5"}, -{"atmega165p", "avr5", "avr5"}, -{"atmega165pa", "avr5", "avr5"}, -{"atmega168", "avr5", "avr5"}, -{"atmega168a", "avr5", "avr5"}, -{"atmega168p", "avr5", "avr5"}, -{"atmega168pa", "avr5", "avr5"}, -{"atmega168pb", "avr5", "avr5"}, -{"atmega169", "avr5", "avr5"}, -{"atmega169a", "avr5", "avr5"}, -{"atmega169p", "avr5", "avr5"
[llvm-branch-commits] [clang] 01d9f13 - Revert "[clang][AVR] Improve avr-ld command line options"
Author: Ben Shi Date: 2021-01-25T16:33:58+08:00 New Revision: 01d9f13c3a5914baf9739348ef666e348a7b2a2f URL: https://github.com/llvm/llvm-project/commit/01d9f13c3a5914baf9739348ef666e348a7b2a2f DIFF: https://github.com/llvm/llvm-project/commit/01d9f13c3a5914baf9739348ef666e348a7b2a2f.diff LOG: Revert "[clang][AVR] Improve avr-ld command line options" This reverts commit 89a5147e5a0c2e886cdf7ffa34799c069d825940. Added: Modified: clang/lib/Driver/ToolChains/AVR.cpp Removed: clang/test/Driver/Inputs/basic_avr_tree/bin/avr-ld clang/test/Driver/Inputs/basic_avr_tree/lib/avr/lib/libavr.a clang/test/Driver/Inputs/basic_avr_tree/lib/gcc/avr/5.4.0/libgcc.a clang/test/Driver/avr-ld.c diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index c999c3b87f84..ae56b7b5249e 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -32,248 +32,247 @@ const struct { StringRef Name; std::string SubPath; StringRef Family; - unsigned DataAddr; } MCUInfo[] = { -{"at90s1200", "", "avr1", 0}, -{"attiny11", "", "avr1", 0}, -{"attiny12", "", "avr1", 0}, -{"attiny15", "", "avr1", 0}, -{"attiny28", "", "avr1", 0}, -{"at90s2313", "tiny-stack", "avr2", 0x800060}, -{"at90s2323", "tiny-stack", "avr2", 0x800060}, -{"at90s2333", "tiny-stack", "avr2", 0x800060}, -{"at90s2343", "tiny-stack", "avr2", 0x800060}, -{"at90s4433", "tiny-stack", "avr2, 0x800060"}, -{"attiny22", "tiny-stack", "avr2", 0x800060}, -{"attiny26", "tiny-stack", "avr2", 0x800060}, -{"at90s4414", "", "avr2", 0x800060}, -{"at90s4434", "", "avr2", 0x800060}, -{"at90s8515", "", "avr2", 0x800060}, -{"at90c8534", "", "avr2", 0x800060}, -{"at90s8535", "", "avr2", 0x800060}, -{"attiny13", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny13a", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny2313", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny2313a", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny24", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny24a", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny25", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny261", "avr25/tiny-stack", "avr25", 0x800060}, -{"attiny261a", "avr25/tiny-stack", "avr25", 0x800060}, -{"at86rf401", "avr25", "avr25", 0x800060}, -{"ata5272", "avr25", "avr25", 0x800100}, -{"attiny4313", "avr25", "avr25", 0x800060}, -{"attiny44", "avr25", "avr25", 0x800060}, -{"attiny44a", "avr25", "avr25", 0x800060}, -{"attiny84", "avr25", "avr25", 0x800060}, -{"attiny84a", "avr25", "avr25", 0x800060}, -{"attiny45", "avr25", "avr25", 0x800060}, -{"attiny85", "avr25", "avr25", 0x800060}, -{"attiny441", "avr25", "avr25", 0x800100}, -{"attiny461", "avr25", "avr25", 0x800060}, -{"attiny461a", "avr25", "avr25", 0x800060}, -{"attiny841", "avr25", "avr25", 0x800100}, -{"attiny861", "avr25", "avr25", 0x800060}, -{"attiny861a", "avr25", "avr25", 0x800060}, -{"attiny87", "avr25", "avr25", 0x800100}, -{"attiny43u", "avr25", "avr25", 0x800060}, -{"attiny48", "avr25", "avr25", 0x800100}, -{"attiny88", "avr25", "avr25", 0x800100}, -{"attiny828", "avr25", "avr25", 0x800100}, -{"at43usb355", "avr3", "avr3", 0x800100}, -{"at76c711", "avr3", "avr3", 0x800060}, -{"atmega103", "avr31", "avr31", 0x800060}, -{"at43usb320", "avr31", "avr31", 0x800060}, -{"attiny167", "avr35", "avr35", 0x800100}, -{"at90usb82", "avr35", "avr35", 0x800100}, -{"at90usb162", "avr35", "avr35", 0x800100}, -{"ata5505", "avr35", "avr35", 0x800100}, -{"atmega8u2", "avr35", "avr35", 0x800100}, -{"atmega16u2", "avr35", "avr35", 0x800100}, -{"atmega32u2", "avr35", "avr35", 0x800100}, -{"attiny1634", "avr35", "avr35", 0x800100}, -{"atmega8", "avr4", "avr4", 0x800060}, -{"ata6289", "avr4", "avr4", 0x800100}, -{"atmega8a", "avr4", "avr4", 0x800060}, -{"ata6285", "avr4", "avr4", 0x800100}, -{"ata6286", "avr4", "avr4", 0x800100}, -{"atmega48", "avr4", "avr4", 0x800100}, -{"atmega48a", "avr4", "avr4", 0x800100}, -{"atmega48pa", "avr4", "avr4", 0x800100}, -{"atmega48pb", "avr4", "avr4", 0x800100}, -{"atmega48p", "avr4", "avr4", 0x800100}, -{"atmega88", "avr4", "avr4", 0x800100}, -{"atmega88a", "avr4", "avr4", 0x800100}, -{"atmega88p", "avr4", "avr4", 0x800100}, -{"atmega88pa", "avr4", "avr4", 0x800100}, -{"atmega88pb", "avr4", "avr4", 0x800100}, -{"atmega8515", "avr4", "avr4", 0x800060}, -{"atmega8535", "avr4", "avr4", 0x800060}, -{"atmega8hva", "avr4", "avr4", 0x800100}, -{"at90pwm1", "avr4", "avr4", 0x800100}, -{"at90pwm2", "avr4", "avr4", 0x800100}, -{"at90pwm2b", "avr4", "avr4", 0x800100}, -{"at90pwm3", "avr4", "avr4", 0x800100}, -{"at90pwm3b", "avr4", "avr4",
[llvm-branch-commits] [llvm] 351a45c - [RISCV][NFC] Add new test cases for mul
Author: Ben Shi Date: 2021-01-06T18:55:56+08:00 New Revision: 351a45ca73de3ec4524d2194674e2a92f8b7d9b5 URL: https://github.com/llvm/llvm-project/commit/351a45ca73de3ec4524d2194674e2a92f8b7d9b5 DIFF: https://github.com/llvm/llvm-project/commit/351a45ca73de3ec4524d2194674e2a92f8b7d9b5.diff LOG: [RISCV][NFC] Add new test cases for mul Added: Modified: llvm/test/CodeGen/RISCV/mul.ll Removed: diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 9104b4089ab9..3703e4128c69 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -972,3 +972,207 @@ define i64 @muli64_m3840(i64 %a) nounwind { %1 = mul i64 %a, -3840 ret i64 %1 } + +define i128 @muli128_m3840(i128 %a) nounwind { +; RV32I-LABEL: muli128_m3840: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -64 +; RV32I-NEXT:sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT:sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT:lw a3, 0(a1) +; RV32I-NEXT:lw a2, 4(a1) +; RV32I-NEXT:lw a4, 8(a1) +; RV32I-NEXT:lw a1, 12(a1) +; RV32I-NEXT:mv s0, a0 +; RV32I-NEXT:addi a0, zero, -1 +; RV32I-NEXT:sw a0, 20(sp) +; RV32I-NEXT:sw a0, 16(sp) +; RV32I-NEXT:sw a0, 12(sp) +; RV32I-NEXT:lui a0, 1048575 +; RV32I-NEXT:addi a0, a0, 256 +; RV32I-NEXT:sw a0, 8(sp) +; RV32I-NEXT:sw a1, 36(sp) +; RV32I-NEXT:sw a4, 32(sp) +; RV32I-NEXT:sw a2, 28(sp) +; RV32I-NEXT:addi a0, sp, 40 +; RV32I-NEXT:addi a1, sp, 24 +; RV32I-NEXT:addi a2, sp, 8 +; RV32I-NEXT:sw a3, 24(sp) +; RV32I-NEXT:call __multi3@plt +; RV32I-NEXT:lw a0, 52(sp) +; RV32I-NEXT:lw a1, 48(sp) +; RV32I-NEXT:lw a2, 44(sp) +; RV32I-NEXT:lw a3, 40(sp) +; RV32I-NEXT:sw a0, 12(s0) +; RV32I-NEXT:sw a1, 8(s0) +; RV32I-NEXT:sw a2, 4(s0) +; RV32I-NEXT:sw a3, 0(s0) +; RV32I-NEXT:lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT:lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT:addi sp, sp, 64 +; RV32I-NEXT:ret +; +; RV32IM-LABEL: muli128_m3840: +; RV32IM: # %bb.0: +; RV32IM-NEXT:addi sp, sp, -64 +; RV32IM-NEXT:sw ra, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT:sw s0, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT:lw a3, 0(a1) +; RV32IM-NEXT:lw a2, 4(a1) +; RV32IM-NEXT:lw a4, 8(a1) +; RV32IM-NEXT:lw a1, 12(a1) +; RV32IM-NEXT:mv s0, a0 +; RV32IM-NEXT:addi a0, zero, -1 +; RV32IM-NEXT:sw a0, 20(sp) +; RV32IM-NEXT:sw a0, 16(sp) +; RV32IM-NEXT:sw a0, 12(sp) +; RV32IM-NEXT:lui a0, 1048575 +; RV32IM-NEXT:addi a0, a0, 256 +; RV32IM-NEXT:sw a0, 8(sp) +; RV32IM-NEXT:sw a1, 36(sp) +; RV32IM-NEXT:sw a4, 32(sp) +; RV32IM-NEXT:sw a2, 28(sp) +; RV32IM-NEXT:addi a0, sp, 40 +; RV32IM-NEXT:addi a1, sp, 24 +; RV32IM-NEXT:addi a2, sp, 8 +; RV32IM-NEXT:sw a3, 24(sp) +; RV32IM-NEXT:call __multi3@plt +; RV32IM-NEXT:lw a0, 52(sp) +; RV32IM-NEXT:lw a1, 48(sp) +; RV32IM-NEXT:lw a2, 44(sp) +; RV32IM-NEXT:lw a3, 40(sp) +; RV32IM-NEXT:sw a0, 12(s0) +; RV32IM-NEXT:sw a1, 8(s0) +; RV32IM-NEXT:sw a2, 4(s0) +; RV32IM-NEXT:sw a3, 0(s0) +; RV32IM-NEXT:lw s0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT:lw ra, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT:addi sp, sp, 64 +; RV32IM-NEXT:ret +; +; RV64I-LABEL: muli128_m3840: +; RV64I: # %bb.0: +; RV64I-NEXT:addi sp, sp, -16 +; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT:lui a2, 1048575 +; RV64I-NEXT:addiw a2, a2, 256 +; RV64I-NEXT:addi a3, zero, -1 +; RV64I-NEXT:call __multi3@plt +; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT:addi sp, sp, 16 +; RV64I-NEXT:ret +; +; RV64IM-LABEL: muli128_m3840: +; RV64IM: # %bb.0: +; RV64IM-NEXT:lui a2, 1048575 +; RV64IM-NEXT:addiw a2, a2, 256 +; RV64IM-NEXT:mul a1, a1, a2 +; RV64IM-NEXT:mulhu a3, a0, a2 +; RV64IM-NEXT:sub a3, a3, a0 +; RV64IM-NEXT:add a1, a3, a1 +; RV64IM-NEXT:mul a0, a0, a2 +; RV64IM-NEXT:ret + %1 = mul i128 %a, -3840 + ret i128 %1 +} + +define i128 @muli128_m63(i128 %a) nounwind { +; RV32I-LABEL: muli128_m63: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -64 +; RV32I-NEXT:sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT:sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT:lw a3, 0(a1) +; RV32I-NEXT:lw a2, 4(a1) +; RV32I-NEXT:lw a4, 8(a1) +; RV32I-NEXT:lw a1, 12(a1) +; RV32I-NEXT:mv s0, a0 +; RV32I-NEXT:addi a0, zero, -1 +; RV32I-NEXT:sw a0, 20(sp) +; RV32I-NEXT:sw a0, 16(sp) +; RV32I-NEXT:sw a0, 12(sp) +; RV32I-NEXT:addi a0, zero, -63 +; RV32I-NEXT:sw a0, 8(sp) +; RV32I-NEXT:sw a1, 36(sp) +; RV32I-NEXT:sw a4, 32(sp) +; RV32I-NEXT:sw a2, 28(sp) +; RV32I-NEXT:addi a0, sp, 40 +; RV32I-NEXT:addi a1, sp, 24 +; RV32I-NEXT:addi a2, sp, 8 +; RV3
[llvm-branch-commits] [llvm] 55f0a1b - [RISCV] Optimize multiplication with constant
Author: Ben Shi Date: 2021-01-09T10:37:21+08:00 New Revision: 55f0a1b06632688f08eb616fe02674cf2e666080 URL: https://github.com/llvm/llvm-project/commit/55f0a1b06632688f08eb616fe02674cf2e666080 DIFF: https://github.com/llvm/llvm-project/commit/55f0a1b06632688f08eb616fe02674cf2e666080.diff LOG: [RISCV] Optimize multiplication with constant 1. Break MUL with specific constant to a SLLI and an ADD/SUB on riscv32 with the M extension. 2. Break MUL with specific constant to two SLLI and an ADD/SUB, if the constant needs a pair of LUI/ADDI to construct. Reviewed by: craig.topper Differential Revision: https://reviews.llvm.org/D93619 Added: Modified: llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/test/CodeGen/RISCV/mul.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ba6548e48303..22d15bc8586b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3899,16 +3899,28 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { // Check integral scalar types. if (VT.isScalarInteger()) { -// Do not perform the transformation on riscv32 with the M extension. -if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) +// Omit the optimization if the sub target has the M extension and the data +// size exceeds XLen. +if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) return false; if (auto *ConstNode = dyn_cast(C.getNode())) { - if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) + // Break the MUL to a SLLI and an ADD/SUB. + const APInt &Imm = ConstNode->getAPIntValue(); + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) +return true; + // Omit the following optimization if the sub target has the M extension + // and the data size >= XLen. + if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) return false; - int64_t Imm = ConstNode->getSExtValue(); - if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || - isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) + // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs + // a pair of LUI/ADDI. + if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { +APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); +if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || +(1 - ImmS).isPowerOf2()) return true; + } } } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 3703e4128c69..647004df3f0c 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -85,8 +85,8 @@ define signext i32 @mul_constant(i32 %a) nounwind { ; ; RV32IM-LABEL: mul_constant: ; RV32IM: # %bb.0: -; RV32IM-NEXT:addi a1, zero, 5 -; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:slli a1, a0, 2 +; RV32IM-NEXT:add a0, a1, a0 ; RV32IM-NEXT:ret ; ; RV64I-LABEL: mul_constant: @@ -182,10 +182,12 @@ define i64 @mul64_constant(i64 %a) nounwind { ; RV32IM-LABEL: mul64_constant: ; RV32IM: # %bb.0: ; RV32IM-NEXT:addi a2, zero, 5 -; RV32IM-NEXT:mul a1, a1, a2 -; RV32IM-NEXT:mulhu a3, a0, a2 +; RV32IM-NEXT:mulhu a2, a0, a2 +; RV32IM-NEXT:slli a3, a1, 2 ; RV32IM-NEXT:add a1, a3, a1 -; RV32IM-NEXT:mul a0, a0, a2 +; RV32IM-NEXT:add a1, a2, a1 +; RV32IM-NEXT:slli a2, a0, 2 +; RV32IM-NEXT:add a0, a2, a0 ; RV32IM-NEXT:ret ; ; RV64I-LABEL: mul64_constant: @@ -300,8 +302,8 @@ define i32 @muli32_p65(i32 %a) nounwind { ; ; RV32IM-LABEL: muli32_p65: ; RV32IM: # %bb.0: -; RV32IM-NEXT:addi a1, zero, 65 -; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:slli a1, a0, 6 +; RV32IM-NEXT:add a0, a1, a0 ; RV32IM-NEXT:ret ; ; RV64I-LABEL: muli32_p65: @@ -328,8 +330,8 @@ define i32 @muli32_p63(i32 %a) nounwind { ; ; RV32IM-LABEL: muli32_p63: ; RV32IM: # %bb.0: -; RV32IM-NEXT:addi a1, zero, 63 -; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:slli a1, a0, 6 +; RV32IM-NEXT:sub a0, a1, a0 ; RV32IM-NEXT:ret ; ; RV64I-LABEL: muli32_p63: @@ -364,10 +366,12 @@ define i64 @muli64_p65(i64 %a) nounwind { ; RV32IM-LABEL: muli64_p65: ; RV32IM: # %bb.0: ; RV32IM-NEXT:addi a2, zero, 65 -; RV32IM-NEXT:mul a1, a1, a2 -; RV32IM-NEXT:mulhu a3, a0, a2 +; RV32IM-NEXT:mulhu a2, a0, a2 +; RV32IM-NEXT:slli a3, a1, 6 ; RV32IM-NEXT:add a1, a3, a1 -; RV32IM-NEXT:mul a0, a0, a2 +; RV32IM-NEXT:add a1, a2, a1 +; RV32IM-NEXT:slli a2, a0, 6 +; RV32IM-NEXT:add a0, a2, a0 ; RV32IM-NEXT:ret ;
[llvm-branch-commits] [llvm] 9e4b682 - [RISCV][NFC] Add tests for multiplication with constant
Author: Ben Shi Date: 2020-12-21T10:20:36+08:00 New Revision: 9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2 URL: https://github.com/llvm/llvm-project/commit/9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2 DIFF: https://github.com/llvm/llvm-project/commit/9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2.diff LOG: [RISCV][NFC] Add tests for multiplication with constant Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D93589 Added: Modified: llvm/test/CodeGen/RISCV/mul.ll Removed: diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 958417e5e748..9104b4089ab9 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -562,3 +562,413 @@ define i64 @muli64_m65(i64 %a) nounwind { %1 = mul i64 %a, -65 ret i64 %1 } + +define i32 @muli32_p384(i32 %a) nounwind { +; RV32I-LABEL: muli32_p384: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -16 +; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT:addi a1, zero, 384 +; RV32I-NEXT:call __mulsi3@plt +; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT:addi sp, sp, 16 +; RV32I-NEXT:ret +; +; RV32IM-LABEL: muli32_p384: +; RV32IM: # %bb.0: +; RV32IM-NEXT:addi a1, zero, 384 +; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:ret +; +; RV64I-LABEL: muli32_p384: +; RV64I: # %bb.0: +; RV64I-NEXT:addi sp, sp, -16 +; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT:addi a1, zero, 384 +; RV64I-NEXT:call __muldi3@plt +; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT:addi sp, sp, 16 +; RV64I-NEXT:ret +; +; RV64IM-LABEL: muli32_p384: +; RV64IM: # %bb.0: +; RV64IM-NEXT:addi a1, zero, 384 +; RV64IM-NEXT:mul a0, a0, a1 +; RV64IM-NEXT:ret + %1 = mul i32 %a, 384 + ret i32 %1 +} + +define i32 @muli32_p12288(i32 %a) nounwind { +; RV32I-LABEL: muli32_p12288: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -16 +; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT:lui a1, 3 +; RV32I-NEXT:call __mulsi3@plt +; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT:addi sp, sp, 16 +; RV32I-NEXT:ret +; +; RV32IM-LABEL: muli32_p12288: +; RV32IM: # %bb.0: +; RV32IM-NEXT:lui a1, 3 +; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:ret +; +; RV64I-LABEL: muli32_p12288: +; RV64I: # %bb.0: +; RV64I-NEXT:addi sp, sp, -16 +; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT:lui a1, 3 +; RV64I-NEXT:call __muldi3@plt +; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT:addi sp, sp, 16 +; RV64I-NEXT:ret +; +; RV64IM-LABEL: muli32_p12288: +; RV64IM: # %bb.0: +; RV64IM-NEXT:lui a1, 3 +; RV64IM-NEXT:mul a0, a0, a1 +; RV64IM-NEXT:ret + %1 = mul i32 %a, 12288 + ret i32 %1 +} + +define i32 @muli32_p4352(i32 %a) nounwind { +; RV32I-LABEL: muli32_p4352: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -16 +; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT:lui a1, 1 +; RV32I-NEXT:addi a1, a1, 256 +; RV32I-NEXT:call __mulsi3@plt +; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT:addi sp, sp, 16 +; RV32I-NEXT:ret +; +; RV32IM-LABEL: muli32_p4352: +; RV32IM: # %bb.0: +; RV32IM-NEXT:lui a1, 1 +; RV32IM-NEXT:addi a1, a1, 256 +; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:ret +; +; RV64I-LABEL: muli32_p4352: +; RV64I: # %bb.0: +; RV64I-NEXT:addi sp, sp, -16 +; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT:lui a1, 1 +; RV64I-NEXT:addiw a1, a1, 256 +; RV64I-NEXT:call __muldi3@plt +; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT:addi sp, sp, 16 +; RV64I-NEXT:ret +; +; RV64IM-LABEL: muli32_p4352: +; RV64IM: # %bb.0: +; RV64IM-NEXT:lui a1, 1 +; RV64IM-NEXT:addiw a1, a1, 256 +; RV64IM-NEXT:mul a0, a0, a1 +; RV64IM-NEXT:ret + %1 = mul i32 %a, 4352 + ret i32 %1 +} + +define i32 @muli32_p3840(i32 %a) nounwind { +; RV32I-LABEL: muli32_p3840: +; RV32I: # %bb.0: +; RV32I-NEXT:addi sp, sp, -16 +; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT:lui a1, 1 +; RV32I-NEXT:addi a1, a1, -256 +; RV32I-NEXT:call __mulsi3@plt +; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT:addi sp, sp, 16 +; RV32I-NEXT:ret +; +; RV32IM-LABEL: muli32_p3840: +; RV32IM: # %bb.0: +; RV32IM-NEXT:lui a1, 1 +; RV32IM-NEXT:addi a1, a1, -256 +; RV32IM-NEXT:mul a0, a0, a1 +; RV32IM-NEXT:ret +; +; RV64I-LABEL: muli32_p3840: +; RV64I: # %bb.0: +; RV64I-NEXT:addi sp, sp, -16 +; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT:lui a1, 1 +; RV64I-NEXT:addiw a1, a1, -256 +; RV64I-NEXT:call __muldi3@plt +; RV64I-NEXT:ld ra, 8(sp) #