jmciver created this revision.
Herald added a project: All.
jmciver added reviewers: vitalybuka, jdoerfert, rjmccall, aqjune, efriedma,
nikic.
jmciver added subscribers: mattd, asavonic, pengfei, pcwang-thead, sstefan1,
xbolva00, nlopes.
jmciver edited the summary of this revision.
jmciver published this revision for review.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Add noundef attribute support for matrix and vector type loads in support of
elemental or sufflevector operations.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D138983
Files:
clang/lib/CodeGen/CGExpr.cpp
clang/test/CodeGen/matrix-type-operators.c
clang/test/CodeGen/vector-noundef.c
clang/test/CodeGenCXX/vector-noundef.cpp
Index: clang/test/CodeGenCXX/vector-noundef.cpp
===================================================================
--- clang/test/CodeGenCXX/vector-noundef.cpp
+++ clang/test/CodeGenCXX/vector-noundef.cpp
@@ -6,6 +6,9 @@
using VecOfThreeChars __attribute__((ext_vector_type(3))) = char;
using VecOfThreeUChars __attribute__((ext_vector_type(3))) = unsigned char;
+using VecOfFourFloats __attribute__((ext_vector_type(4))) = float;
+using VecOfTwoFloats __attribute__((ext_vector_type(2))) = float;
+
// CHECK-LABEL: @_Z15getElement4BoolRDv4_b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -82,3 +85,33 @@
{
return a[0];
}
+
+// CHECK-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VEC2_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[VEC4_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8
+// CHECK-NEXT: store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8
+// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT: ret void
+//
+// DISABLE-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f(
+// DISABLE-NEXT: entry:
+// DISABLE-NEXT: [[VEC2_ADDR:%.*]] = alloca ptr, align 8
+// DISABLE-NEXT: [[VEC4_ADDR:%.*]] = alloca ptr, align 8
+// DISABLE-NEXT: store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8
+// DISABLE-NEXT: store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8
+// DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8
+// DISABLE-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
+// DISABLE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+// DISABLE-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8
+// DISABLE-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 8
+// DISABLE-NEXT: ret void
+//
+void vectorSubsection(VecOfTwoFloats& vec2, VecOfFourFloats& vec4) {
+ vec2 = vec4.xy;
+}
Index: clang/test/CodeGen/vector-noundef.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/vector-noundef.c
@@ -0,0 +1,107 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -no-enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s --check-prefix=DISABLE
+
+#include <immintrin.h>
+
+// CHECK-LABEL: @test_mm_mask_div_ss(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__U_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__U_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[__A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[__B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16
+// CHECK-NEXT: store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1
+// CHECK-NEXT: store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16
+// CHECK-NEXT: store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16, !noundef [[NOUNDEF2:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16
+// CHECK-NEXT: store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1
+// CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16
+// CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0
+// CHECK-NEXT: [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0
+// CHECK-NEXT: store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1
+// CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
+// CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+// CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+// CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]]
+// CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0
+// CHECK-NEXT: ret <4 x float> [[TMP18]]
+//
+// DISABLE-LABEL: @test_mm_mask_div_ss(
+// DISABLE-NEXT: entry:
+// DISABLE-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__U_ADDR_I:%.*]] = alloca i8, align 1
+// DISABLE-NEXT: [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__W_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__U_ADDR:%.*]] = alloca i8, align 1
+// DISABLE-NEXT: [[__A_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: [[__B_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT: store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16
+// DISABLE-NEXT: store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1
+// DISABLE-NEXT: store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16
+// DISABLE-NEXT: store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16
+// DISABLE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16
+// DISABLE-NEXT: [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1
+// DISABLE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16
+// DISABLE-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16
+// DISABLE-NEXT: store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16
+// DISABLE-NEXT: store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1
+// DISABLE-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16
+// DISABLE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16
+// DISABLE-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16
+// DISABLE-NEXT: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16
+// DISABLE-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+// DISABLE-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0
+// DISABLE-NEXT: [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]]
+// DISABLE-NEXT: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0
+// DISABLE-NEXT: store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT: [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1
+// DISABLE-NEXT: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT: [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16
+// DISABLE-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
+// DISABLE-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0
+// DISABLE-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+// DISABLE-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+// DISABLE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]]
+// DISABLE-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0
+// DISABLE-NEXT: ret <4 x float> [[TMP18]]
+//
+__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return _mm_mask_div_ss(__W,__U,__A,__B);
+}
Index: clang/test/CodeGen/matrix-type-operators.c
===================================================================
--- clang/test/CodeGen/matrix-type-operators.c
+++ clang/test/CodeGen/matrix-type-operators.c
@@ -1217,7 +1217,7 @@
void insert_compound_stmt(dx5x5_t a) {
// CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
- // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8
+ // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8, !noundef [[NOUNDEF]]
// CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
// CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
// CHECK-NEXT: [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8
@@ -1245,7 +1245,7 @@
// CHECK-NEXT: [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
// OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
// OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
- // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
+ // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4, !noundef [[NOUNDEF]]
// CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
// CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
// OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1963,6 +1963,8 @@
if (LV.isVectorElt()) {
llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
LV.isVolatileQualified());
+ applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+ LV.getType(), Load);
return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
"vecext"));
}
@@ -1986,6 +1988,8 @@
}
llvm::LoadInst *Load =
Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
+ applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+ LV.getType(), Load);
return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
}
@@ -2033,6 +2037,8 @@
RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(),
LV.isVolatileQualified());
+ applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+ LV.getType(), dyn_cast<llvm::LoadInst>(Vec));
const llvm::Constant *Elts = LV.getExtVectorElts();
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits