aqjune updated this revision to Diff 354678.
aqjune added a comment.
Minor fixes
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D104790/new/
https://reviews.llvm.org/D104790
Files:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/X86/avx-builtins.c
clang/test/CodeGen/X86/avx2-builtins.c
clang/test/CodeGen/X86/avx512f-builtins.c
clang/test/CodeGen/X86/sse-builtins.c
clang/test/CodeGen/X86/sse2-builtins.c
llvm/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Index: llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
===================================================================
--- llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -6386,18 +6386,30 @@
}
declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
+define <4 x float> @test_mm_undefined_ps() {
+; CHECK-LABEL: test_mm_undefined_ps:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %v = freeze <2 x double> poison
+ %w = bitcast <2 x double> %v to <4 x float>
+ ret <4 x float> %w
+}
+
define <2 x double> @test_mm_undefined_pd() {
; CHECK-LABEL: test_mm_undefined_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- ret <2 x double> undef
+ %v = freeze <2 x double> poison
+ ret <2 x double> %v
}
define <2 x i64> @test_mm_undefined_si128() {
; CHECK-LABEL: test_mm_undefined_si128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- ret <2 x i64> undef
+ %v = freeze <2 x double> poison
+ %w = bitcast <2 x double> %v to <2 x i64>
+ ret <2 x i64> %w
}
define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
Index: llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
===================================================================
--- llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -2965,32 +2965,55 @@
}
declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
-define <2 x double> @test_mm_undefined_pd() nounwind {
-; CHECK-LABEL: test_mm_undefined_pd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ret{{[l|q]}}
- ret <2 x double> undef
-}
-
define <4 x double> @test_mm256_undefined_pd() nounwind {
; CHECK-LABEL: test_mm256_undefined_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
- ret <4 x double> undef
+ %v = freeze <4 x double> poison
+ ret <4 x double> %v
}
define <8 x float> @test_mm256_undefined_ps() nounwind {
; CHECK-LABEL: test_mm256_undefined_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
- ret <8 x float> undef
+ %v = freeze <4 x double> poison
+ %w = bitcast <4 x double> %v to <8 x float>
+ ret <8 x float> %w
}
define <4 x i64> @test_mm256_undefined_si256() nounwind {
; CHECK-LABEL: test_mm256_undefined_si256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
- ret <4 x i64> undef
+ %v = freeze <4 x double> poison
+ %w = bitcast <4 x double> %v to <4 x i64>
+ ret <4 x i64> %w
+}
+
+define <16 x float> @test_mm512_undefined() nounwind {
+; CHECK-LABEL: test_mm512_undefined:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}}
+ %v = freeze <8 x double> poison
+ %w = bitcast <8 x double> %v to <16 x float>
+ ret <16 x float> %w
+}
+
+define <8 x double> @test_mm512_undefined_pd() nounwind {
+; CHECK-LABEL: test_mm512_undefined_pd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}}
+ %v = freeze <8 x double> poison
+ ret <8 x double> %v
+}
+
+define <8 x i64> @test_mm512_undefined_epi32() nounwind {
+; CHECK-LABEL: test_mm512_undefined_epi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}}
+ %v = freeze <8 x i64> poison
+ ret <8 x i64> %v
}
define <4 x double> @test_mm256_unpackhi_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26011,10 +26011,11 @@
TLI.getPointerTy(DAG.getDataLayout()));
EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
- // If source is undef or we know it won't be used, use a zero vector
- // to break register dependency.
+ // If source is undef, frozen undef with one use only, or we
+ // know it won't be used, use a zero vector to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
- if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+ if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) ||
+ ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
// Cast mask to an integer type.
@@ -26052,10 +26053,12 @@
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
- // If source is undef or we know it won't be used, use a zero vector
- // to break register dependency.
+ // If source is undef, frozen undef with one use only, or we
+ // know it won't be used, use a zero vector to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
- if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+ // TODO: use undef instead and let BreakFalseDeps deal with it?
+ if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) ||
+ ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12497,6 +12497,10 @@
if (N0.isUndef())
return DAG.getUNDEF(VT);
+ // bitcast (freeze undef) -> freeze undef
+ if (N0.isFreezeUndef() && N0.hasOneUse())
+ return DAG.getFreeze(DAG.getUNDEF(VT));
+
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize types, unless both types are integer and the
// scalar type is legal. Only do this before legalize ops, since the target
Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -207,6 +207,7 @@
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
inline bool isUndef() const;
+ inline bool isFreezeUndef() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
inline void dump() const;
@@ -1150,6 +1151,10 @@
return Node->isUndef();
}
+inline bool SDValue::isFreezeUndef() const {
+ return Node->getOpcode() == ISD::FREEZE && Node->getOperand(0).isUndef();
+}
+
inline bool SDValue::use_empty() const {
return !Node->hasAnyUseOfValue(ResNo);
}
Index: clang/test/CodeGen/X86/sse2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse2-builtins.c
+++ clang/test/CodeGen/X86/sse2-builtins.c
@@ -1630,13 +1630,16 @@
__m128d test_mm_undefined_pd() {
// CHECK-LABEL: test_mm_undefined_pd
- // CHECK: ret <2 x double> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: ret <2 x double> %[[FR]]
return _mm_undefined_pd();
}
__m128i test_mm_undefined_si128() {
// CHECK-LABEL: test_mm_undefined_si128
- // CHECK: ret <2 x i64> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+ // CHECK: ret <2 x i64> %[[FR_BC]]
return _mm_undefined_si128();
}
Index: clang/test/CodeGen/X86/sse-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse-builtins.c
+++ clang/test/CodeGen/X86/sse-builtins.c
@@ -786,7 +786,9 @@
__m128 test_mm_undefined_ps() {
// CHECK-LABEL: test_mm_undefined_ps
- // CHECK: ret <4 x float> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
+ // CHECK: ret <4 x float> %[[FR_BC]]
return _mm_undefined_ps();
}
Index: clang/test/CodeGen/X86/avx512f-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3780,25 +3780,32 @@
__m512 test_mm512_undefined() {
// CHECK-LABEL: @test_mm512_undefined
- // CHECK: ret <16 x float> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+ // CHECK: ret <16 x float> %[[FR_BC]]
return _mm512_undefined();
}
__m512 test_mm512_undefined_ps() {
// CHECK-LABEL: @test_mm512_undefined_ps
- // CHECK: ret <16 x float> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+ // CHECK: ret <16 x float> %[[FR_BC]]
return _mm512_undefined_ps();
}
__m512d test_mm512_undefined_pd() {
// CHECK-LABEL: @test_mm512_undefined_pd
- // CHECK: ret <8 x double> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+ // CHECK: ret <8 x double> %[[FR]]
return _mm512_undefined_pd();
}
__m512i test_mm512_undefined_epi32() {
// CHECK-LABEL: @test_mm512_undefined_epi32
- // CHECK: ret <8 x i64> zeroinitializer
+ // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <8 x i64>
+ // CHECK: ret <8 x i64> %[[FR_BC]]
return _mm512_undefined_epi32();
}
Index: clang/test/CodeGen/X86/avx2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx2-builtins.c
+++ clang/test/CodeGen/X86/avx2-builtins.c
@@ -455,7 +455,9 @@
__m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi64
- // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+ // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
return _mm_i32gather_epi64(b, c, 2);
}
@@ -467,7 +469,9 @@
__m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
// CHECK-LABEL: test_mm256_i32gather_epi64
- // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+ // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+ // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
return _mm256_i32gather_epi64(b, c, 2);
}
@@ -479,10 +483,11 @@
__m128d test_mm_i32gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_pd
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
// CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
- // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i32gather_pd(b, c, 2);
}
@@ -494,10 +499,11 @@
__m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm256_i32gather_pd
+ // CHECK: %[[FR:.*]] = freeze <4 x double> poison
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x double>
// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
// CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
- // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+ // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
return _mm256_i32gather_pd(b, c, 2);
}
@@ -509,10 +515,12 @@
__m128 test_mm_i32gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_ps
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
// CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i32gather_ps(b, c, 2);
}
@@ -524,10 +532,12 @@
__m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i32gather_ps
+ // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <8 x float>
// CHECK: [[CMP:%.*]] = fcmp oeq <8 x float>
// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
// CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
- // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
+ // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %[[FR_BC]], i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
return _mm256_i32gather_ps(b, c, 2);
}
@@ -563,7 +573,9 @@
__m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_epi64
- // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+ // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
return _mm_i64gather_epi64(b, c, 2);
}
@@ -575,7 +587,9 @@
__m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_epi64
- // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+ // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+ // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
return _mm256_i64gather_epi64(b, c, 2);
}
@@ -587,10 +601,11 @@
__m128d test_mm_i64gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_pd
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
// CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
- // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %[[FR]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i64gather_pd(b, c, 2);
}
@@ -602,8 +617,9 @@
__m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_pd
+ // CHECK: %[[FR:.*]] = freeze <4 x double> poison
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
- // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+ // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
return _mm256_i64gather_pd(b, c, 2);
}
@@ -615,10 +631,12 @@
__m128 test_mm_i64gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_ps
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
// CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i64gather_ps(b, c, 2);
}
@@ -630,10 +648,12 @@
__m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_ps
+ // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+ // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
// CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm256_i64gather_ps(b, c, 2);
}
Index: clang/test/CodeGen/X86/avx-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx-builtins.c
+++ clang/test/CodeGen/X86/avx-builtins.c
@@ -2063,19 +2063,24 @@
__m256 test_mm256_undefined_ps() {
// CHECK-LABEL: test_mm256_undefined_ps
- // CHECK: ret <8 x float> zeroinitializer
+ // CHECK: freeze <4 x double> poison
+ // CHECK: bitcast <4 x double> %{{.*}} to <8 x float>
+ // CHECK: ret <8 x float> %{{.*}}
return _mm256_undefined_ps();
}
__m256d test_mm256_undefined_pd() {
// CHECK-LABEL: test_mm256_undefined_pd
- // CHECK: ret <4 x double> zeroinitializer
+ // CHECK: freeze <4 x double> poison
+ // CHECK: ret <4 x double> %{{.*}}
return _mm256_undefined_pd();
}
__m256i test_mm256_undefined_si256() {
// CHECK-LABEL: test_mm256_undefined_si256
- // CHECK: ret <4 x i64> zeroinitializer
+ // CHECK: freeze <4 x double> poison
+ // CHECK: bitcast <4 x double> %{{.*}} to <4 x i64>
+ // CHECK: ret <4 x i64> %{{.*}}
return _mm256_undefined_si256();
}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -12491,12 +12491,9 @@
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
- // The x86 definition of "undef" is not the same as the LLVM definition
- // (PR32176). We leave optimizing away an unnecessary zero constant to the
- // IR optimizer and backend.
- // TODO: If we had a "freeze" IR instruction to generate a fixed undef
- // value, we should use that here instead of a zero.
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
+ // The x86 definition of "undef" is equivalent to "freeze poison" in LLVM
+ // (PR32176).
+ return Builder.CreateFreeze(PoisonValue::get(ConvertType(E->getType())));
case X86::BI__builtin_ia32_vec_init_v8qi:
case X86::BI__builtin_ia32_vec_init_v4hi:
case X86::BI__builtin_ia32_vec_init_v2si:
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits