aemerson updated this revision to Diff 191745.
aemerson added a comment.
Minor test tweak.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59655/new/
https://reviews.llvm.org/D59655
Files:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-neon-intrinsics.c
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
llvm/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
llvm/test/CodeGen/AArch64/arm64-vadd.ll
llvm/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
Index: llvm/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S < %s -mtriple=arm64 | FileCheck %s
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
+
+; CHECK: call <4 x float> @llvm.aarch64.neon.faddp.v4f32
+define <4 x float> @upgrade_aarch64_neon_addp_float(<4 x float> %a, <4 x float> %b) {
+ %res = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %res
+}
+
Index: llvm/test/CodeGen/AArch64/arm64-vadd.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -712,7 +712,7 @@
;CHECK: faddp.2s
%tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = load <2 x float>, <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ %tmp3 = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -721,7 +721,7 @@
;CHECK: faddp.4s
%tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = load <4 x float>, <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ %tmp3 = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -730,13 +730,13 @@
;CHECK: faddp.2d
%tmp1 = load <2 x double>, <2 x double>* %A
%tmp2 = load <2 x double>, <2 x double>* %B
- %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+ %tmp3 = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>) nounwind readnone
define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs) {
; CHECK-LABEL: uaddl_duprhs
Index: llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -65,27 +65,27 @@
ret <2 x i64> %val
}
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_faddp_v2f32:
- %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+ %val = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: faddp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_faddp_v4f32:
- %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+ %val = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: faddp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_faddp_v2f64:
- %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+ %val = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: faddp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -151,7 +151,7 @@
# DEBUG: .. the first uncovered type index: 1, OK
#
# DEBUG-NEXT: G_INTRINSIC (opcode {{[0-9]+}}): 0 type indices
-# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
Index: llvm/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
+++ /dev/null
@@ -1,32 +0,0 @@
-# RUN: llc -mtriple aarch64-unknown-unknown -O0 -start-before=legalizer -pass-remarks-missed=gisel* %s -o - 2>&1 | FileCheck %s
-#
-# Check that we fall back on @llvm.aarch64.neon.addp and ensure that we get the
-# correct instruction.
-# https://bugs.llvm.org/show_bug.cgi?id=40968
-
---- |
- define <2 x float> @foo(<2 x float> %v1, <2 x float> %v2) {
- entry:
- %v3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %v1, <2 x float> %v2)
- ret <2 x float> %v3
- }
- declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-...
----
-name: foo
-alignment: 2
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $d0, $d1
- ; CHECK: remark:
- ; CHECK-SAME: unable to legalize instruction: %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0:_(<2 x s32>), %1:_(<2 x s32>)
- ; CHECK: faddp
- ; CHECK-NOT: addp
- %0:_(<2 x s32>) = COPY $d0
- %1:_(<2 x s32>) = COPY $d1
- %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0(<2 x s32>), %1(<2 x s32>)
- $d0 = COPY %2(<2 x s32>)
- RET_ReallyLR implicit $d0
-
-...
Index: llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -34,9 +34,6 @@
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
-
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
Index: llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -64,11 +64,6 @@
return std::make_pair(0, EltTy);
});
- // HACK: Check that the intrinsic isn't ambiguous.
- // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
- getActionDefinitionsBuilder(G_INTRINSIC)
- .custom();
-
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
.clampScalar(0, s16, s64)
@@ -517,30 +512,11 @@
return false;
case TargetOpcode::G_VAARG:
return legalizeVaArg(MI, MRI, MIRBuilder);
- case TargetOpcode::G_INTRINSIC:
- return legalizeIntrinsic(MI, MRI, MIRBuilder);
}
llvm_unreachable("expected switch to return");
}
-bool AArch64LegalizerInfo::legalizeIntrinsic(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- // HACK: Don't allow faddp/addp for now. We don't pass down the type info
- // necessary to get this right today.
- //
- // It looks like addp/faddp is the only intrinsic that's impacted by this.
- // All other intrinsics fully describe the required types in their names.
- //
- // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
- const MachineOperand &IntrinOp = MI.getOperand(1);
- if (IntrinOp.isIntrinsicID() &&
- IntrinOp.getIntrinsicID() == Intrinsic::aarch64_neon_addp)
- return false;
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3499,7 +3499,7 @@
}
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
Index: llvm/lib/IR/AutoUpgrade.cpp
===================================================================
--- llvm/lib/IR/AutoUpgrade.cpp
+++ llvm/lib/IR/AutoUpgrade.cpp
@@ -568,6 +568,17 @@
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
+ if (Name.startswith("aarch64.neon.addp")) {
+ VectorType *ArgTy = cast<VectorType>(F->arg_begin()->getType());
+ if (ArgTy->getElementType()->isFloatingPointTy()) {
+ auto fArgs = F->getFunctionType()->params();
+ Type *Tys[] = {fArgs[0], fArgs[1]};
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::aarch64_neon_faddp, Tys);
+ return true;
+ }
+ return false;
+ }
break;
}
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -289,6 +289,7 @@
// Pairwise Add
def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+ def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
// Long Pairwise Add
// FIXME: In theory, we shouldn't need intrinsics for saddlp or
Index: clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
===================================================================
--- clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -736,14 +736,14 @@
}
// CHECK-LABEL: test_vpadd_f16
-// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b)
// CHECK: ret <4 x half> [[ADD]]
float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
return vpadd_f16(a, b);
}
// CHECK-LABEL: test_vpaddq_f16
-// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b)
// CHECK: ret <8 x half> [[ADD]]
float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
return vpaddq_f16(a, b);
Index: clang/test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -4411,7 +4411,7 @@
// CHECK-LABEL: @test_vpadd_f32(
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b)
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
// CHECK: ret <2 x float> [[VPADD_V2_I]]
float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
@@ -4475,7 +4475,7 @@
// CHECK-LABEL: @test_vpaddq_f32(
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
// CHECK: ret <4 x float> [[VPADDQ_V2_I]]
float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
@@ -4485,7 +4485,7 @@
// CHECK-LABEL: @test_vpaddq_f64(
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b)
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
// CHECK: ret <2 x double> [[VPADDQ_V2_I]]
float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -5095,6 +5095,12 @@
switch (BuiltinID) {
default: break;
+ case NEON::BI__builtin_neon_vpadd_v:
+ case NEON::BI__builtin_neon_vpaddq_v:
+ // We don't allow fp/int overloading of intrinsics.
+ if (VTy->getElementType()->isFloatingPointTy())
+ Int = Intrinsic::aarch64_neon_faddp;
+ break;
case NEON::BI__builtin_neon_vabs_v:
case NEON::BI__builtin_neon_vabsq_v:
if (VTy->getElementType()->isFloatingPointTy())
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits