[llvm-branch-commits] [llvm] Backport #85277 (PR #85422)

2024-03-15 Thread Ben Shi via llvm-branch-commits

https://github.com/benshi001 updated 
https://github.com/llvm/llvm-project/pull/85422

>From bb49a9f39c5643ed7503c78b56bfc28388e21709 Mon Sep 17 00:00:00 2001
From: Patryk Wychowaniec 
Date: Fri, 15 Mar 2024 12:07:54 +0100
Subject: [PATCH] [AVR] Remove earlyclobber from LDDRdPtrQ (#85277)

LDDRdPtrQ was marked as `earlyclobber`, which doesn't play well with
GreedyRA (which can generate this instruction through `loadRegFromStackSlot()`).

This seems to be the same case as:

https://github.com/llvm/llvm-project/blob/a99b912c9b74f6ef91786b4dfbc25160c27d3b41/llvm/lib/Target/AVR/AVRInstrInfo.td#L1421

Closes https://github.com/llvm/llvm-project/issues/81911.
---
 llvm/lib/Target/AVR/AVRInstrInfo.td |   2 +-
 llvm/test/CodeGen/AVR/bug-81911.ll  | 163 
 2 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AVR/bug-81911.ll

diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td 
b/llvm/lib/Target/AVR/AVRInstrInfo.td
index efaaec32ee6bb1..0a77c7c1d418a1 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1398,7 +1398,7 @@ let mayLoad = 1, hasSideEffects = 0,
 
 // Load indirect with displacement operations.
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ
+  def LDDRdPtrQ
   : FSTDLDD<0,
 (outs GPR8
  : $reg),
diff --git a/llvm/test/CodeGen/AVR/bug-81911.ll 
b/llvm/test/CodeGen/AVR/bug-81911.ll
new file mode 100644
index 00..2a22666a1ff927
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/bug-81911.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=avr -mcpu=atmega328 -O1 -verify-machineinstrs | 
FileCheck %s
+
+define internal i8 @main() {
+; CHECK-LABEL: main:
+; CHECK:   ; %bb.0: ; %bb0
+; CHECK-NEXT:push r2
+; CHECK-NEXT:push r3
+; CHECK-NEXT:push r4
+; CHECK-NEXT:push r5
+; CHECK-NEXT:push r6
+; CHECK-NEXT:push r7
+; CHECK-NEXT:push r8
+; CHECK-NEXT:push r9
+; CHECK-NEXT:push r10
+; CHECK-NEXT:push r11
+; CHECK-NEXT:push r12
+; CHECK-NEXT:push r13
+; CHECK-NEXT:push r14
+; CHECK-NEXT:push r15
+; CHECK-NEXT:push r16
+; CHECK-NEXT:push r17
+; CHECK-NEXT:push r28
+; CHECK-NEXT:push r29
+; CHECK-NEXT:in r28, 61
+; CHECK-NEXT:in r29, 62
+; CHECK-NEXT:sbiw r28, 13
+; CHECK-NEXT:in r0, 63
+; CHECK-NEXT:cli
+; CHECK-NEXT:out 62, r29
+; CHECK-NEXT:out 63, r0
+; CHECK-NEXT:out 61, r28
+; CHECK-NEXT:ldi r16, 0
+; CHECK-NEXT:ldi r17, 0
+; CHECK-NEXT:ldi r18, -1
+; CHECK-NEXT:;APP
+; CHECK-NEXT:ldi r24, 123
+; CHECK-NEXT:;NO_APP
+; CHECK-NEXT:std Y+1, r24 ; 1-byte Folded Spill
+; CHECK-NEXT:movw r24, r28
+; CHECK-NEXT:adiw r24, 6
+; CHECK-NEXT:std Y+3, r25 ; 2-byte Folded Spill
+; CHECK-NEXT:std Y+2, r24 ; 2-byte Folded Spill
+; CHECK-NEXT:movw r8, r16
+; CHECK-NEXT:movw r6, r16
+; CHECK-NEXT:movw r4, r16
+; CHECK-NEXT:movw r2, r16
+; CHECK-NEXT:rjmp .LBB0_2
+; CHECK-NEXT:  .LBB0_1: ; %bb1
+; CHECK-NEXT:; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:andi r30, 1
+; CHECK-NEXT:ldd r31, Y+4 ; 1-byte Folded Reload
+; CHECK-NEXT:dec r31
+; CHECK-NEXT:cpi r30, 0
+; CHECK-NEXT:movw r8, r18
+; CHECK-NEXT:movw r6, r20
+; CHECK-NEXT:movw r4, r22
+; CHECK-NEXT:movw r2, r24
+; CHECK-NEXT:mov r18, r31
+; CHECK-NEXT:brne .LBB0_2
+; CHECK-NEXT:rjmp .LBB0_4
+; CHECK-NEXT:  .LBB0_2: ; %bb1
+; CHECK-NEXT:; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:std Y+4, r18 ; 1-byte Folded Spill
+; CHECK-NEXT:movw r18, r8
+; CHECK-NEXT:movw r20, r6
+; CHECK-NEXT:movw r22, r4
+; CHECK-NEXT:movw r24, r2
+; CHECK-NEXT:ldi r26, 10
+; CHECK-NEXT:ldi r27, 0
+; CHECK-NEXT:movw r10, r26
+; CHECK-NEXT:movw r12, r16
+; CHECK-NEXT:movw r14, r16
+; CHECK-NEXT:call __udivdi3
+; CHECK-NEXT:std Y+13, r25
+; CHECK-NEXT:std Y+12, r24
+; CHECK-NEXT:std Y+11, r23
+; CHECK-NEXT:std Y+10, r22
+; CHECK-NEXT:std Y+9, r21
+; CHECK-NEXT:std Y+8, r20
+; CHECK-NEXT:std Y+7, r19
+; CHECK-NEXT:std Y+6, r18
+; CHECK-NEXT:ldd r30, Y+2 ; 2-byte Folded Reload
+; CHECK-NEXT:ldd r31, Y+3 ; 2-byte Folded Reload
+; CHECK-NEXT:;APP
+; CHECK-NEXT:;NO_APP
+; CHECK-NEXT:ldi r30, 1
+; CHECK-NEXT:cp r8, r1
+; CHECK-NEXT:cpc r9, r1
+; CHECK-NEXT:cpc r6, r16
+; CHECK-NEXT:cpc r7, r17
+; CHECK-NEXT:cpc r4, r16
+; CHECK-NEXT:cpc r5, r17
+; CHECK-NEXT:cpc r2, r16
+; CHECK-NEXT:cpc r3, r17
+; CHECK-NEXT:breq .LBB0_3
+; CHECK-NEXT:rjmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: ; %bb1
+; CHECK-NEXT:; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:mov r30, r1
+; CHECK-NEXT:rjmp .LBB0_1
+; CHECK-NEXT:  .LBB0_4: ; %bb3
+; CHECK-NEXT:ldd r24, Y+1 ; 1-

[llvm-branch-commits] [llvm] release/18.x: [AVR] Remove earlyclobber from LDDRdPtrQ (#85277) (PR #85512)

2024-03-17 Thread Ben Shi via llvm-branch-commits

benshi001 wrote:

> @benshi001 What do you think about merging this PR to the release branch?

Yes. I think so. Because this is a bug fix.

https://github.com/llvm/llvm-project/pull/85512
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Backport #85277 (PR #85422)

2024-03-17 Thread Ben Shi via llvm-branch-commits

https://github.com/benshi001 approved this pull request.


https://github.com/llvm/llvm-project/pull/85422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AVR] Fix 16-bit LDDs with immediate overflows (#104923) (PR #106993)

2024-09-16 Thread Ben Shi via llvm-branch-commits

https://github.com/benshi001 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/106993
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 25531a1 - [AVR] Optimize 8-bit logic left/right shifts

2021-01-23 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-23T23:54:16+08:00
New Revision: 25531a1d9657897e648d93f776a3abb70e9816ef

URL: 
https://github.com/llvm/llvm-project/commit/25531a1d9657897e648d93f776a3abb70e9816ef
DIFF: 
https://github.com/llvm/llvm-project/commit/25531a1d9657897e648d93f776a3abb70e9816ef.diff

LOG: [AVR] Optimize 8-bit logic left/right shifts

Reviewed By: dylanmckay

Differential Revision: https://reviews.llvm.org/D89047

Added: 


Modified: 
llvm/lib/Target/AVR/AVRISelLowering.cpp
llvm/lib/Target/AVR/AVRISelLowering.h
llvm/lib/Target/AVR/AVRInstrInfo.td
llvm/test/CodeGen/AVR/ctlz.ll
llvm/test/CodeGen/AVR/ctpop.ll
llvm/test/CodeGen/AVR/cttz.ll
llvm/test/CodeGen/AVR/shift.ll

Removed: 




diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp 
b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 9a464d0a52d8..bd5fd266d395 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -334,6 +334,24 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, 
SelectionDAG &DAG) const {
 llvm_unreachable("Invalid shift opcode");
   }
 
+  // Optimize int8 shifts.
+  if (VT.getSizeInBits() == 8) {
+if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
+  // Optimize LSL when 4 <= ShiftAmount <= 6.
+  Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+  Victim =
+  DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));
+  ShiftAmount -= 4;
+} else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&
+   ShiftAmount < 7) {
+  // Optimize LSR when 4 <= ShiftAmount <= 6.
+  Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+  Victim =
+  DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));
+  ShiftAmount -= 4;
+}
+  }
+
   while (ShiftAmount--) {
 Victim = DAG.getNode(Opc8, dl, VT, Victim);
   }

diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.h 
b/llvm/lib/Target/AVR/AVRISelLowering.h
index d1eaf53b15e9..ed9aea7a3297 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -56,6 +56,8 @@ enum NodeType {
   CMPC,
   /// Test for zero or minus instruction.
   TST,
+  /// Swap Rd[7:4] <-> Rd[3:0].
+  SWAP,
   /// Operand 0 and operand 1 are selection variable, operand 2
   /// is condition code and operand 3 is flag operand.
   SELECT_CC

diff  --git a/llvm/lib/Target/AVR/AVRInstrInfo.td 
b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 8de85f6b36c5..926d1f853a37 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -67,6 +67,9 @@ def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>;
 def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>;
 def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>;
 
+// SWAP node.
+def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>;
+
 
//===--===//
 // AVR Operands, Complex Patterns and Transformations Definitions.
 
//===--===//
@@ -1729,7 +1732,7 @@ def SWAPRd : FRd<0b1001,
  (outs GPR8:$rd),
  (ins GPR8:$src),
  "swap\t$rd",
- [(set i8:$rd, (bswap i8:$src))]>;
+ [(set i8:$rd, (AVRSwap i8:$src))]>;
 
 // IO register bit set/clear operations.
 //:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi

diff  --git a/llvm/test/CodeGen/AVR/ctlz.ll b/llvm/test/CodeGen/AVR/ctlz.ll
index 8681b8a3f1f5..93c2f0bdfa41 100644
--- a/llvm/test/CodeGen/AVR/ctlz.ll
+++ b/llvm/test/CodeGen/AVR/ctlz.ll
@@ -10,8 +10,7 @@ declare i8 @llvm.ctlz.i8(i8)
 
 ; CHECK-LABEL: count_leading_zeros:
 ; CHECK: cpi[[RESULT:r[0-9]+]], 0
-; CHECK: brne   .LBB0_1
-; CHECK: rjmp   .LBB0_2
+; CHECK: breq   .LBB0_2
 ; CHECK: mov[[SCRATCH:r[0-9]+]], {{.*}}[[RESULT]]
 ; CHECK: lsr{{.*}}[[SCRATCH]]
 ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
@@ -20,10 +19,8 @@ declare i8 @llvm.ctlz.i8(i8)
 ; CHECK: lsr{{.*}}[[RESULT]]
 ; CHECK: or {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
 ; CHECK: mov{{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
-; CHECK: lsr{{.*}}[[SCRATCH]]
-; CHECK: lsr{{.*}}[[SCRATCH]]
-; CHECK: lsr{{.*}}[[SCRATCH]]
-; CHECK: lsr{{.*}}[[SCRATCH]]
+; CHECK: swap   {{.*}}[[SCRATCH]]
+; CHECK: andi   {{.*}}[[SCRATCH]], 15
 ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
 ; CHECK: com{{.*}}[[SCRATCH]]
 ; CHECK: mov{{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
@@ -37,10 +34,7 @@ declare i8 @llvm.ctlz.i8(i8)
 ; CHECK: andi   {{.*}}[[SCRATCH]], 51
 ; CHECK: add{{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
 ; CHECK: mov{{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
-; CHECK: lsr{{.*}}[[RESULT]]
-; CHECK: lsr{{.*}}[[RESULT]]
-; CHECK: lsr{{.*}}[[RESULT]]
-; CHECK: lsr{{.*}}[[RESULT]]
+; CHECK: swap 

[llvm-branch-commits] [llvm] 1eb8c5c - [AVR] Optimize 16-bit comparison with constant

2021-01-23 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-24T00:38:57+08:00
New Revision: 1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed

URL: 
https://github.com/llvm/llvm-project/commit/1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed
DIFF: 
https://github.com/llvm/llvm-project/commit/1eb8c5cd35ed0f3e06ea77a93824901f680ca1ed.diff

LOG: [AVR] Optimize 16-bit comparison with constant

Reviewed By: dylanmckay

Differential Revision: https://reviews.llvm.org/D93976

Added: 


Modified: 
llvm/lib/Target/AVR/AVRISelLowering.cpp
llvm/lib/Target/AVR/AVRISelLowering.h
llvm/test/CodeGen/AVR/cmp.ll

Removed: 




diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp 
b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index bd5fd266d395..d919e08e468a 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -455,6 +455,36 @@ static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
   }
 }
 
+/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
+SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, SDLoc DL) const {
+  assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
+ "LHS and RHS have 
diff erent types");
+  assert(((LHS.getSimpleValueType() == MVT::i16) ||
+  (LHS.getSimpleValueType() == MVT::i8)) && "invalid comparison type");
+
+  SDValue Cmp;
+
+  if (LHS.getSimpleValueType() == MVT::i16 && dyn_cast(RHS)) {
+// Generate a CPI/CPC pair if RHS is a 16-bit constant.
+SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+DAG.getIntPtrConstant(0, DL));
+SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+DAG.getIntPtrConstant(1, DL));
+SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+DAG.getIntPtrConstant(0, DL));
+SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+DAG.getIntPtrConstant(1, DL));
+Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
+  } else {
+// Generate ordinary 16-bit comparison.
+Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
+  }
+
+  return Cmp;
+}
+
 /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
 /// the given operands.
 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode 
CC,
@@ -567,7 +597,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue 
RHS, ISD::CondCode CC,
 DAG.getIntPtrConstant(1, DL));
   Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
 } else {
-  Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+  Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);
   Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
 }
   } else if (VT == MVT::i64) {
@@ -605,7 +635,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue 
RHS, ISD::CondCode CC,
 DAG.getIntPtrConstant(1, DL));
   Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
 } else {
-  Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS0, RHS0);
+  Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);
   Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
   Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
   Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
@@ -619,7 +649,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue 
RHS, ISD::CondCode CC,
 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
   LHS, DAG.getIntPtrConstant(1, DL)));
 } else {
-  Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
+  Cmp = getAVRCmp(LHS, RHS, DAG, DL);
 }
   } else {
 llvm_unreachable("Invalid comparison size");

diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.h 
b/llvm/lib/Target/AVR/AVRISelLowering.h
index ed9aea7a3297..29d814b6c952 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -138,6 +138,8 @@ class AVRTargetLowering : public TargetLowering {
 private:
   SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
 SelectionDAG &DAG, SDLoc dl) const;
+  SDValue getAVRCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+SDLoc dl) const;
   SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/test/CodeGen/AVR/cmp.ll b/llvm/test/CodeGen/AVR/cmp.ll
index b3224087a689..e9769068f911 100644
--- a/llvm/test/CodeGen/AVR/cmp.ll
+++ b/ll

[llvm-branch-commits] [llvm] 2a4acf3 - [AVR] Optimize 8-bit int shift

2021-01-23 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-24T11:04:37+08:00
New Revision: 2a4acf3ea8db19981284468c354aea2835fbfa08

URL: 
https://github.com/llvm/llvm-project/commit/2a4acf3ea8db19981284468c354aea2835fbfa08
DIFF: 
https://github.com/llvm/llvm-project/commit/2a4acf3ea8db19981284468c354aea2835fbfa08.diff

LOG: [AVR] Optimize 8-bit int shift

Reviewed By: dylanmckay

Differential Revision: https://reviews.llvm.org/D90678

Added: 


Modified: 
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
llvm/lib/Target/AVR/AVRISelLowering.cpp
llvm/lib/Target/AVR/AVRISelLowering.h
llvm/lib/Target/AVR/AVRInstrInfo.td
llvm/test/CodeGen/AVR/shift.ll
llvm/test/CodeGen/AVR/smul-with-overflow.ll

Removed: 




diff  --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp 
b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index d10f0148cb58..a48d3d134bb5 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -1476,6 +1476,111 @@ bool AVRExpandPseudo::expand(Block &MBB, 
BlockIt MBBI) {
   return true;
 }
 
+template <>
+bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+
+  // ror r24
+  // clr r24
+  // ror r24
+
+  buildMI(MBB, MBBI, AVR::RORRd)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  buildMI(MBB, MBBI, AVR::EORRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  auto MIRRC =
+  buildMI(MBB, MBBI, AVR::RORRd)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+MIRRC->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIRRC->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+
+  // rol r24
+  // clr r24
+  // rol r24
+
+  buildMI(MBB, MBBI, AVR::ADCRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  buildMI(MBB, MBBI, AVR::EORRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  auto MIRRC =
+  buildMI(MBB, MBBI, AVR::ADCRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+MIRRC->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  MIRRC->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+
+  // lsl r24
+  // sbc r24, r24
+
+  buildMI(MBB, MBBI, AVR::ADDRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr)
+  .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+  .addReg(DstReg, getKillRegState(DstIsKill))
+  .addReg(DstReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+MIRRC->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  MIRRC->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
 template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) {
   MachineInstr &MI = *MBBI;
   Register DstLoReg, DstHiReg;
@@ -1697,6 +1802,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
 EXPAND(AVR::RORWRd);
 EXPAND(AVR::ROLWRd);
 EXPAND(AVR::ASRWRd);
+EXPAND(AVR::LSLB7Rd);
+EXPAND(AVR::LSRB7Rd);
+EXPAND(AVR::ASRB7Rd);
 EXPAND(AVR::SEXT);
 EXPAND(AVR::ZEXT);
 EXPAND(AVR::SPREAD);

diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp 
b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index d919e08e468a..3e7c2984655a 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AV

[llvm-branch-commits] [clang] 89a5147 - [clang][AVR] Improve avr-ld command line options

2021-01-24 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-25T12:01:26+08:00
New Revision: 89a5147e5a0c2e886cdf7ffa34799c069d825940

URL: 
https://github.com/llvm/llvm-project/commit/89a5147e5a0c2e886cdf7ffa34799c069d825940
DIFF: 
https://github.com/llvm/llvm-project/commit/89a5147e5a0c2e886cdf7ffa34799c069d825940.diff

LOG: [clang][AVR] Improve avr-ld command line options

Added: 
clang/test/Driver/Inputs/basic_avr_tree/bin/avr-ld
clang/test/Driver/Inputs/basic_avr_tree/lib/avr/lib/libavr.a
clang/test/Driver/Inputs/basic_avr_tree/lib/gcc/avr/5.4.0/libgcc.a
clang/test/Driver/avr-ld.c

Modified: 
clang/lib/Driver/ToolChains/AVR.cpp

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/AVR.cpp 
b/clang/lib/Driver/ToolChains/AVR.cpp
index ae56b7b5249e..c999c3b87f84 100644
--- a/clang/lib/Driver/ToolChains/AVR.cpp
+++ b/clang/lib/Driver/ToolChains/AVR.cpp
@@ -32,247 +32,248 @@ const struct {
   StringRef Name;
   std::string SubPath;
   StringRef Family;
+  unsigned DataAddr;
 } MCUInfo[] = {
-{"at90s1200", "", "avr1"},
-{"attiny11", "", "avr1"},
-{"attiny12", "", "avr1"},
-{"attiny15", "", "avr1"},
-{"attiny28", "", "avr1"},
-{"at90s2313", "tiny-stack", "avr2"},
-{"at90s2323", "tiny-stack", "avr2"},
-{"at90s2333", "tiny-stack", "avr2"},
-{"at90s2343", "tiny-stack", "avr2"},
-{"at90s4433", "tiny-stack", "avr2"},
-{"attiny22", "tiny-stack", "avr2"},
-{"attiny26", "tiny-stack", "avr2"},
-{"at90s4414", "", "avr2"},
-{"at90s4434", "", "avr2"},
-{"at90s8515", "", "avr2"},
-{"at90c8534", "", "avr2"},
-{"at90s8535", "", "avr2"},
-{"attiny13", "avr25/tiny-stack", "avr25"},
-{"attiny13a", "avr25/tiny-stack", "avr25"},
-{"attiny2313", "avr25/tiny-stack", "avr25"},
-{"attiny2313a", "avr25/tiny-stack", "avr25"},
-{"attiny24", "avr25/tiny-stack", "avr25"},
-{"attiny24a", "avr25/tiny-stack", "avr25"},
-{"attiny25", "avr25/tiny-stack", "avr25"},
-{"attiny261", "avr25/tiny-stack", "avr25"},
-{"attiny261a", "avr25/tiny-stack", "avr25"},
-{"at86rf401", "avr25", "avr25"},
-{"ata5272", "avr25", "avr25"},
-{"attiny4313", "avr25", "avr25"},
-{"attiny44", "avr25", "avr25"},
-{"attiny44a", "avr25", "avr25"},
-{"attiny84", "avr25", "avr25"},
-{"attiny84a", "avr25", "avr25"},
-{"attiny45", "avr25", "avr25"},
-{"attiny85", "avr25", "avr25"},
-{"attiny441", "avr25", "avr25"},
-{"attiny461", "avr25", "avr25"},
-{"attiny461a", "avr25", "avr25"},
-{"attiny841", "avr25", "avr25"},
-{"attiny861", "avr25", "avr25"},
-{"attiny861a", "avr25", "avr25"},
-{"attiny87", "avr25", "avr25"},
-{"attiny43u", "avr25", "avr25"},
-{"attiny48", "avr25", "avr25"},
-{"attiny88", "avr25", "avr25"},
-{"attiny828", "avr25", "avr25"},
-{"at43usb355", "avr3", "avr3"},
-{"at76c711", "avr3", "avr3"},
-{"atmega103", "avr31", "avr31"},
-{"at43usb320", "avr31", "avr31"},
-{"attiny167", "avr35", "avr35"},
-{"at90usb82", "avr35", "avr35"},
-{"at90usb162", "avr35", "avr35"},
-{"ata5505", "avr35", "avr35"},
-{"atmega8u2", "avr35", "avr35"},
-{"atmega16u2", "avr35", "avr35"},
-{"atmega32u2", "avr35", "avr35"},
-{"attiny1634", "avr35", "avr35"},
-{"atmega8", "avr4", "avr4"},
-{"ata6289", "avr4", "avr4"},
-{"atmega8a", "avr4", "avr4"},
-{"ata6285", "avr4", "avr4"},
-{"ata6286", "avr4", "avr4"},
-{"atmega48", "avr4", "avr4"},
-{"atmega48a", "avr4", "avr4"},
-{"atmega48pa", "avr4", "avr4"},
-{"atmega48pb", "avr4", "avr4"},
-{"atmega48p", "avr4", "avr4"},
-{"atmega88", "avr4", "avr4"},
-{"atmega88a", "avr4", "avr4"},
-{"atmega88p", "avr4", "avr4"},
-{"atmega88pa", "avr4", "avr4"},
-{"atmega88pb", "avr4", "avr4"},
-{"atmega8515", "avr4", "avr4"},
-{"atmega8535", "avr4", "avr4"},
-{"atmega8hva", "avr4", "avr4"},
-{"at90pwm1", "avr4", "avr4"},
-{"at90pwm2", "avr4", "avr4"},
-{"at90pwm2b", "avr4", "avr4"},
-{"at90pwm3", "avr4", "avr4"},
-{"at90pwm3b", "avr4", "avr4"},
-{"at90pwm81", "avr4", "avr4"},
-{"ata5790", "avr5", "avr5"},
-{"ata5795", "avr5", "avr5"},
-{"atmega16", "avr5", "avr5"},
-{"atmega16a", "avr5", "avr5"},
-{"atmega161", "avr5", "avr5"},
-{"atmega162", "avr5", "avr5"},
-{"atmega163", "avr5", "avr5"},
-{"atmega164a", "avr5", "avr5"},
-{"atmega164p", "avr5", "avr5"},
-{"atmega164pa", "avr5", "avr5"},
-{"atmega165", "avr5", "avr5"},
-{"atmega165a", "avr5", "avr5"},
-{"atmega165p", "avr5", "avr5"},
-{"atmega165pa", "avr5", "avr5"},
-{"atmega168", "avr5", "avr5"},
-{"atmega168a", "avr5", "avr5"},
-{"atmega168p", "avr5", "avr5"},
-{"atmega168pa", "avr5", "avr5"},
-{"atmega168pb", "avr5", "avr5"},
-{"atmega169", "avr5", "avr5"},
-{"atmega169a", "avr5", "avr5"},
-{"atmega169p", "avr5", "avr5"

[llvm-branch-commits] [clang] 01d9f13 - Revert "[clang][AVR] Improve avr-ld command line options"

2021-01-25 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-25T16:33:58+08:00
New Revision: 01d9f13c3a5914baf9739348ef666e348a7b2a2f

URL: 
https://github.com/llvm/llvm-project/commit/01d9f13c3a5914baf9739348ef666e348a7b2a2f
DIFF: 
https://github.com/llvm/llvm-project/commit/01d9f13c3a5914baf9739348ef666e348a7b2a2f.diff

LOG: Revert "[clang][AVR] Improve avr-ld command line options"

This reverts commit 89a5147e5a0c2e886cdf7ffa34799c069d825940.

Added: 


Modified: 
clang/lib/Driver/ToolChains/AVR.cpp

Removed: 
clang/test/Driver/Inputs/basic_avr_tree/bin/avr-ld
clang/test/Driver/Inputs/basic_avr_tree/lib/avr/lib/libavr.a
clang/test/Driver/Inputs/basic_avr_tree/lib/gcc/avr/5.4.0/libgcc.a
clang/test/Driver/avr-ld.c



diff  --git a/clang/lib/Driver/ToolChains/AVR.cpp 
b/clang/lib/Driver/ToolChains/AVR.cpp
index c999c3b87f84..ae56b7b5249e 100644
--- a/clang/lib/Driver/ToolChains/AVR.cpp
+++ b/clang/lib/Driver/ToolChains/AVR.cpp
@@ -32,248 +32,247 @@ const struct {
   StringRef Name;
   std::string SubPath;
   StringRef Family;
-  unsigned DataAddr;
 } MCUInfo[] = {
-{"at90s1200", "", "avr1", 0},
-{"attiny11", "", "avr1", 0},
-{"attiny12", "", "avr1", 0},
-{"attiny15", "", "avr1", 0},
-{"attiny28", "", "avr1", 0},
-{"at90s2313", "tiny-stack", "avr2", 0x800060},
-{"at90s2323", "tiny-stack", "avr2", 0x800060},
-{"at90s2333", "tiny-stack", "avr2", 0x800060},
-{"at90s2343", "tiny-stack", "avr2", 0x800060},
-{"at90s4433", "tiny-stack", "avr2, 0x800060"},
-{"attiny22", "tiny-stack", "avr2", 0x800060},
-{"attiny26", "tiny-stack", "avr2", 0x800060},
-{"at90s4414", "", "avr2", 0x800060},
-{"at90s4434", "", "avr2", 0x800060},
-{"at90s8515", "", "avr2", 0x800060},
-{"at90c8534", "", "avr2", 0x800060},
-{"at90s8535", "", "avr2", 0x800060},
-{"attiny13", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny13a", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny2313", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny2313a", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny24", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny24a", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny25", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny261", "avr25/tiny-stack", "avr25", 0x800060},
-{"attiny261a", "avr25/tiny-stack", "avr25", 0x800060},
-{"at86rf401", "avr25", "avr25", 0x800060},
-{"ata5272", "avr25", "avr25", 0x800100},
-{"attiny4313", "avr25", "avr25", 0x800060},
-{"attiny44", "avr25", "avr25", 0x800060},
-{"attiny44a", "avr25", "avr25", 0x800060},
-{"attiny84", "avr25", "avr25", 0x800060},
-{"attiny84a", "avr25", "avr25", 0x800060},
-{"attiny45", "avr25", "avr25", 0x800060},
-{"attiny85", "avr25", "avr25", 0x800060},
-{"attiny441", "avr25", "avr25", 0x800100},
-{"attiny461", "avr25", "avr25", 0x800060},
-{"attiny461a", "avr25", "avr25", 0x800060},
-{"attiny841", "avr25", "avr25", 0x800100},
-{"attiny861", "avr25", "avr25", 0x800060},
-{"attiny861a", "avr25", "avr25", 0x800060},
-{"attiny87", "avr25", "avr25", 0x800100},
-{"attiny43u", "avr25", "avr25", 0x800060},
-{"attiny48", "avr25", "avr25", 0x800100},
-{"attiny88", "avr25", "avr25", 0x800100},
-{"attiny828", "avr25", "avr25", 0x800100},
-{"at43usb355", "avr3", "avr3", 0x800100},
-{"at76c711", "avr3", "avr3", 0x800060},
-{"atmega103", "avr31", "avr31", 0x800060},
-{"at43usb320", "avr31", "avr31", 0x800060},
-{"attiny167", "avr35", "avr35", 0x800100},
-{"at90usb82", "avr35", "avr35", 0x800100},
-{"at90usb162", "avr35", "avr35", 0x800100},
-{"ata5505", "avr35", "avr35", 0x800100},
-{"atmega8u2", "avr35", "avr35", 0x800100},
-{"atmega16u2", "avr35", "avr35", 0x800100},
-{"atmega32u2", "avr35", "avr35", 0x800100},
-{"attiny1634", "avr35", "avr35", 0x800100},
-{"atmega8", "avr4", "avr4", 0x800060},
-{"ata6289", "avr4", "avr4", 0x800100},
-{"atmega8a", "avr4", "avr4", 0x800060},
-{"ata6285", "avr4", "avr4", 0x800100},
-{"ata6286", "avr4", "avr4", 0x800100},
-{"atmega48", "avr4", "avr4", 0x800100},
-{"atmega48a", "avr4", "avr4", 0x800100},
-{"atmega48pa", "avr4", "avr4", 0x800100},
-{"atmega48pb", "avr4", "avr4", 0x800100},
-{"atmega48p", "avr4", "avr4", 0x800100},
-{"atmega88", "avr4", "avr4", 0x800100},
-{"atmega88a", "avr4", "avr4", 0x800100},
-{"atmega88p", "avr4", "avr4", 0x800100},
-{"atmega88pa", "avr4", "avr4", 0x800100},
-{"atmega88pb", "avr4", "avr4", 0x800100},
-{"atmega8515", "avr4", "avr4", 0x800060},
-{"atmega8535", "avr4", "avr4", 0x800060},
-{"atmega8hva", "avr4", "avr4", 0x800100},
-{"at90pwm1", "avr4", "avr4", 0x800100},
-{"at90pwm2", "avr4", "avr4", 0x800100},
-{"at90pwm2b", "avr4", "avr4", 0x800100},
-{"at90pwm3", "avr4", "avr4", 0x800100},
-{"at90pwm3b", "avr4", "avr4", 

[llvm-branch-commits] [llvm] 351a45c - [RISCV][NFC] Add new test cases for mul

2021-01-06 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-06T18:55:56+08:00
New Revision: 351a45ca73de3ec4524d2194674e2a92f8b7d9b5

URL: 
https://github.com/llvm/llvm-project/commit/351a45ca73de3ec4524d2194674e2a92f8b7d9b5
DIFF: 
https://github.com/llvm/llvm-project/commit/351a45ca73de3ec4524d2194674e2a92f8b7d9b5.diff

LOG: [RISCV][NFC] Add new test cases for mul

Added: 


Modified: 
llvm/test/CodeGen/RISCV/mul.ll

Removed: 




diff  --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 9104b4089ab9..3703e4128c69 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -972,3 +972,207 @@ define i64 @muli64_m3840(i64 %a) nounwind {
   %1 = mul i64 %a, -3840
   ret i64 %1
 }
+
+define i128 @muli128_m3840(i128 %a) nounwind {
+; RV32I-LABEL: muli128_m3840:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -64
+; RV32I-NEXT:sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:lw a3, 0(a1)
+; RV32I-NEXT:lw a2, 4(a1)
+; RV32I-NEXT:lw a4, 8(a1)
+; RV32I-NEXT:lw a1, 12(a1)
+; RV32I-NEXT:mv s0, a0
+; RV32I-NEXT:addi a0, zero, -1
+; RV32I-NEXT:sw a0, 20(sp)
+; RV32I-NEXT:sw a0, 16(sp)
+; RV32I-NEXT:sw a0, 12(sp)
+; RV32I-NEXT:lui a0, 1048575
+; RV32I-NEXT:addi a0, a0, 256
+; RV32I-NEXT:sw a0, 8(sp)
+; RV32I-NEXT:sw a1, 36(sp)
+; RV32I-NEXT:sw a4, 32(sp)
+; RV32I-NEXT:sw a2, 28(sp)
+; RV32I-NEXT:addi a0, sp, 40
+; RV32I-NEXT:addi a1, sp, 24
+; RV32I-NEXT:addi a2, sp, 8
+; RV32I-NEXT:sw a3, 24(sp)
+; RV32I-NEXT:call __multi3@plt
+; RV32I-NEXT:lw a0, 52(sp)
+; RV32I-NEXT:lw a1, 48(sp)
+; RV32I-NEXT:lw a2, 44(sp)
+; RV32I-NEXT:lw a3, 40(sp)
+; RV32I-NEXT:sw a0, 12(s0)
+; RV32I-NEXT:sw a1, 8(s0)
+; RV32I-NEXT:sw a2, 4(s0)
+; RV32I-NEXT:sw a3, 0(s0)
+; RV32I-NEXT:lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:addi sp, sp, 64
+; RV32I-NEXT:ret
+;
+; RV32IM-LABEL: muli128_m3840:
+; RV32IM:   # %bb.0:
+; RV32IM-NEXT:addi sp, sp, -64
+; RV32IM-NEXT:sw ra, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:sw s0, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT:lw a3, 0(a1)
+; RV32IM-NEXT:lw a2, 4(a1)
+; RV32IM-NEXT:lw a4, 8(a1)
+; RV32IM-NEXT:lw a1, 12(a1)
+; RV32IM-NEXT:mv s0, a0
+; RV32IM-NEXT:addi a0, zero, -1
+; RV32IM-NEXT:sw a0, 20(sp)
+; RV32IM-NEXT:sw a0, 16(sp)
+; RV32IM-NEXT:sw a0, 12(sp)
+; RV32IM-NEXT:lui a0, 1048575
+; RV32IM-NEXT:addi a0, a0, 256
+; RV32IM-NEXT:sw a0, 8(sp)
+; RV32IM-NEXT:sw a1, 36(sp)
+; RV32IM-NEXT:sw a4, 32(sp)
+; RV32IM-NEXT:sw a2, 28(sp)
+; RV32IM-NEXT:addi a0, sp, 40
+; RV32IM-NEXT:addi a1, sp, 24
+; RV32IM-NEXT:addi a2, sp, 8
+; RV32IM-NEXT:sw a3, 24(sp)
+; RV32IM-NEXT:call __multi3@plt
+; RV32IM-NEXT:lw a0, 52(sp)
+; RV32IM-NEXT:lw a1, 48(sp)
+; RV32IM-NEXT:lw a2, 44(sp)
+; RV32IM-NEXT:lw a3, 40(sp)
+; RV32IM-NEXT:sw a0, 12(s0)
+; RV32IM-NEXT:sw a1, 8(s0)
+; RV32IM-NEXT:sw a2, 4(s0)
+; RV32IM-NEXT:sw a3, 0(s0)
+; RV32IM-NEXT:lw s0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:lw ra, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:addi sp, sp, 64
+; RV32IM-NEXT:ret
+;
+; RV64I-LABEL: muli128_m3840:
+; RV64I:   # %bb.0:
+; RV64I-NEXT:addi sp, sp, -16
+; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:lui a2, 1048575
+; RV64I-NEXT:addiw a2, a2, 256
+; RV64I-NEXT:addi a3, zero, -1
+; RV64I-NEXT:call __multi3@plt
+; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:addi sp, sp, 16
+; RV64I-NEXT:ret
+;
+; RV64IM-LABEL: muli128_m3840:
+; RV64IM:   # %bb.0:
+; RV64IM-NEXT:lui a2, 1048575
+; RV64IM-NEXT:addiw a2, a2, 256
+; RV64IM-NEXT:mul a1, a1, a2
+; RV64IM-NEXT:mulhu a3, a0, a2
+; RV64IM-NEXT:sub a3, a3, a0
+; RV64IM-NEXT:add a1, a3, a1
+; RV64IM-NEXT:mul a0, a0, a2
+; RV64IM-NEXT:ret
+  %1 = mul i128 %a, -3840
+  ret i128 %1
+}
+
+define i128 @muli128_m63(i128 %a) nounwind {
+; RV32I-LABEL: muli128_m63:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -64
+; RV32I-NEXT:sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:lw a3, 0(a1)
+; RV32I-NEXT:lw a2, 4(a1)
+; RV32I-NEXT:lw a4, 8(a1)
+; RV32I-NEXT:lw a1, 12(a1)
+; RV32I-NEXT:mv s0, a0
+; RV32I-NEXT:addi a0, zero, -1
+; RV32I-NEXT:sw a0, 20(sp)
+; RV32I-NEXT:sw a0, 16(sp)
+; RV32I-NEXT:sw a0, 12(sp)
+; RV32I-NEXT:addi a0, zero, -63
+; RV32I-NEXT:sw a0, 8(sp)
+; RV32I-NEXT:sw a1, 36(sp)
+; RV32I-NEXT:sw a4, 32(sp)
+; RV32I-NEXT:sw a2, 28(sp)
+; RV32I-NEXT:addi a0, sp, 40
+; RV32I-NEXT:addi a1, sp, 24
+; RV32I-NEXT:addi a2, sp, 8
+; RV3

[llvm-branch-commits] [llvm] 55f0a1b - [RISCV] Optimize multiplication with constant

2021-01-08 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2021-01-09T10:37:21+08:00
New Revision: 55f0a1b06632688f08eb616fe02674cf2e666080

URL: 
https://github.com/llvm/llvm-project/commit/55f0a1b06632688f08eb616fe02674cf2e666080
DIFF: 
https://github.com/llvm/llvm-project/commit/55f0a1b06632688f08eb616fe02674cf2e666080.diff

LOG: [RISCV] Optimize multiplication with constant

1. Break MUL with specific constant to a SLLI and an ADD/SUB on riscv32
   with the M extension.
2. Break MUL with specific constant to two SLLI and an ADD/SUB, if the
   constant needs a pair of LUI/ADDI to construct.

Reviewed by: craig.topper

Differential Revision: https://reviews.llvm.org/D93619

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/mul.ll

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp 
b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ba6548e48303..22d15bc8586b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3899,16 +3899,28 @@ bool 
RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
  SDValue C) const {
   // Check integral scalar types.
   if (VT.isScalarInteger()) {
-// Do not perform the transformation on riscv32 with the M extension.
-if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
+// Omit the optimization if the sub target has the M extension and the data
+// size exceeds XLen.
+if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
   return false;
 if (auto *ConstNode = dyn_cast(C.getNode())) {
-  if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
+  // Break the MUL to a SLLI and an ADD/SUB.
+  const APInt &Imm = ConstNode->getAPIntValue();
+  if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
+  (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
+return true;
+  // Omit the following optimization if the sub target has the M extension
+  // and the data size >= XLen.
+  if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
 return false;
-  int64_t Imm = ConstNode->getSExtValue();
-  if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
-  isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
+  // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
+  // a pair of LUI/ADDI.
+  if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
+APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
+if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
+(1 - ImmS).isPowerOf2())
 return true;
+  }
 }
   }
 

diff  --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 3703e4128c69..647004df3f0c 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -85,8 +85,8 @@ define signext i32 @mul_constant(i32 %a) nounwind {
 ;
 ; RV32IM-LABEL: mul_constant:
 ; RV32IM:   # %bb.0:
-; RV32IM-NEXT:addi a1, zero, 5
-; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:slli a1, a0, 2
+; RV32IM-NEXT:add a0, a1, a0
 ; RV32IM-NEXT:ret
 ;
 ; RV64I-LABEL: mul_constant:
@@ -182,10 +182,12 @@ define i64 @mul64_constant(i64 %a) nounwind {
 ; RV32IM-LABEL: mul64_constant:
 ; RV32IM:   # %bb.0:
 ; RV32IM-NEXT:addi a2, zero, 5
-; RV32IM-NEXT:mul a1, a1, a2
-; RV32IM-NEXT:mulhu a3, a0, a2
+; RV32IM-NEXT:mulhu a2, a0, a2
+; RV32IM-NEXT:slli a3, a1, 2
 ; RV32IM-NEXT:add a1, a3, a1
-; RV32IM-NEXT:mul a0, a0, a2
+; RV32IM-NEXT:add a1, a2, a1
+; RV32IM-NEXT:slli a2, a0, 2
+; RV32IM-NEXT:add a0, a2, a0
 ; RV32IM-NEXT:ret
 ;
 ; RV64I-LABEL: mul64_constant:
@@ -300,8 +302,8 @@ define i32 @muli32_p65(i32 %a) nounwind {
 ;
 ; RV32IM-LABEL: muli32_p65:
 ; RV32IM:   # %bb.0:
-; RV32IM-NEXT:addi a1, zero, 65
-; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:slli a1, a0, 6
+; RV32IM-NEXT:add a0, a1, a0
 ; RV32IM-NEXT:ret
 ;
 ; RV64I-LABEL: muli32_p65:
@@ -328,8 +330,8 @@ define i32 @muli32_p63(i32 %a) nounwind {
 ;
 ; RV32IM-LABEL: muli32_p63:
 ; RV32IM:   # %bb.0:
-; RV32IM-NEXT:addi a1, zero, 63
-; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:slli a1, a0, 6
+; RV32IM-NEXT:sub a0, a1, a0
 ; RV32IM-NEXT:ret
 ;
 ; RV64I-LABEL: muli32_p63:
@@ -364,10 +366,12 @@ define i64 @muli64_p65(i64 %a) nounwind {
 ; RV32IM-LABEL: muli64_p65:
 ; RV32IM:   # %bb.0:
 ; RV32IM-NEXT:addi a2, zero, 65
-; RV32IM-NEXT:mul a1, a1, a2
-; RV32IM-NEXT:mulhu a3, a0, a2
+; RV32IM-NEXT:mulhu a2, a0, a2
+; RV32IM-NEXT:slli a3, a1, 6
 ; RV32IM-NEXT:add a1, a3, a1
-; RV32IM-NEXT:mul a0, a0, a2
+; RV32IM-NEXT:add a1, a2, a1
+; RV32IM-NEXT:slli a2, a0, 6
+; RV32IM-NEXT:add a0, a2, a0
 ; RV32IM-NEXT:ret
 ;

[llvm-branch-commits] [llvm] 9e4b682 - [RISCV][NFC] Add tests for multiplication with constant

2020-12-20 Thread Ben Shi via llvm-branch-commits

Author: Ben Shi
Date: 2020-12-21T10:20:36+08:00
New Revision: 9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2

URL: 
https://github.com/llvm/llvm-project/commit/9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2
DIFF: 
https://github.com/llvm/llvm-project/commit/9e4b682baf2c1eab8f47b8eaa8ffea2d846a68b2.diff

LOG: [RISCV][NFC] Add tests for multiplication with constant

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D93589

Added: 


Modified: 
llvm/test/CodeGen/RISCV/mul.ll

Removed: 




diff  --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 958417e5e748..9104b4089ab9 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -562,3 +562,413 @@ define i64 @muli64_m65(i64 %a) nounwind {
   %1 = mul i64 %a, -65
   ret i64 %1
 }
+
+define i32 @muli32_p384(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p384:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -16
+; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:addi a1, zero, 384
+; RV32I-NEXT:call __mulsi3@plt
+; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:addi sp, sp, 16
+; RV32I-NEXT:ret
+;
+; RV32IM-LABEL: muli32_p384:
+; RV32IM:   # %bb.0:
+; RV32IM-NEXT:addi a1, zero, 384
+; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:ret
+;
+; RV64I-LABEL: muli32_p384:
+; RV64I:   # %bb.0:
+; RV64I-NEXT:addi sp, sp, -16
+; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:addi a1, zero, 384
+; RV64I-NEXT:call __muldi3@plt
+; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:addi sp, sp, 16
+; RV64I-NEXT:ret
+;
+; RV64IM-LABEL: muli32_p384:
+; RV64IM:   # %bb.0:
+; RV64IM-NEXT:addi a1, zero, 384
+; RV64IM-NEXT:mul a0, a0, a1
+; RV64IM-NEXT:ret
+  %1 = mul i32 %a, 384
+  ret i32 %1
+}
+
+define i32 @muli32_p12288(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p12288:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -16
+; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:lui a1, 3
+; RV32I-NEXT:call __mulsi3@plt
+; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:addi sp, sp, 16
+; RV32I-NEXT:ret
+;
+; RV32IM-LABEL: muli32_p12288:
+; RV32IM:   # %bb.0:
+; RV32IM-NEXT:lui a1, 3
+; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:ret
+;
+; RV64I-LABEL: muli32_p12288:
+; RV64I:   # %bb.0:
+; RV64I-NEXT:addi sp, sp, -16
+; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:lui a1, 3
+; RV64I-NEXT:call __muldi3@plt
+; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:addi sp, sp, 16
+; RV64I-NEXT:ret
+;
+; RV64IM-LABEL: muli32_p12288:
+; RV64IM:   # %bb.0:
+; RV64IM-NEXT:lui a1, 3
+; RV64IM-NEXT:mul a0, a0, a1
+; RV64IM-NEXT:ret
+  %1 = mul i32 %a, 12288
+  ret i32 %1
+}
+
+define i32 @muli32_p4352(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p4352:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -16
+; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:lui a1, 1
+; RV32I-NEXT:addi a1, a1, 256
+; RV32I-NEXT:call __mulsi3@plt
+; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:addi sp, sp, 16
+; RV32I-NEXT:ret
+;
+; RV32IM-LABEL: muli32_p4352:
+; RV32IM:   # %bb.0:
+; RV32IM-NEXT:lui a1, 1
+; RV32IM-NEXT:addi a1, a1, 256
+; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:ret
+;
+; RV64I-LABEL: muli32_p4352:
+; RV64I:   # %bb.0:
+; RV64I-NEXT:addi sp, sp, -16
+; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:lui a1, 1
+; RV64I-NEXT:addiw a1, a1, 256
+; RV64I-NEXT:call __muldi3@plt
+; RV64I-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:addi sp, sp, 16
+; RV64I-NEXT:ret
+;
+; RV64IM-LABEL: muli32_p4352:
+; RV64IM:   # %bb.0:
+; RV64IM-NEXT:lui a1, 1
+; RV64IM-NEXT:addiw a1, a1, 256
+; RV64IM-NEXT:mul a0, a0, a1
+; RV64IM-NEXT:ret
+  %1 = mul i32 %a, 4352
+  ret i32 %1
+}
+
+define i32 @muli32_p3840(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p3840:
+; RV32I:   # %bb.0:
+; RV32I-NEXT:addi sp, sp, -16
+; RV32I-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:lui a1, 1
+; RV32I-NEXT:addi a1, a1, -256
+; RV32I-NEXT:call __mulsi3@plt
+; RV32I-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:addi sp, sp, 16
+; RV32I-NEXT:ret
+;
+; RV32IM-LABEL: muli32_p3840:
+; RV32IM:   # %bb.0:
+; RV32IM-NEXT:lui a1, 1
+; RV32IM-NEXT:addi a1, a1, -256
+; RV32IM-NEXT:mul a0, a0, a1
+; RV32IM-NEXT:ret
+;
+; RV64I-LABEL: muli32_p3840:
+; RV64I:   # %bb.0:
+; RV64I-NEXT:addi sp, sp, -16
+; RV64I-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:lui a1, 1
+; RV64I-NEXT:addiw a1, a1, -256
+; RV64I-NEXT:call __muldi3@plt
+; RV64I-NEXT:ld ra, 8(sp) #