[PATCH] D104790: [x86] fix mm_undefined intrinsics to use arbitrary frozen bit pattern

Juneyoung Lee via Phabricator via cfe-commits Wed, 23 Jun 2021 09:03:39 -0700

aqjune created this revision.
aqjune added reviewers: efriedma, spatel, craig.topper, RKSimon.
Herald added a subscriber: pengfei.
aqjune requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


This fixes lowering of `mm*_undefined*` intrinsics to use `freeze poison` 
instead of zeroinitializer.
(mentioned & discussed in D103874 <https://reviews.llvm.org/D103874>)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104790

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/X86/avx-builtins.c
  clang/test/CodeGen/X86/avx2-builtins.c
  clang/test/CodeGen/X86/avx512f-builtins.c
  clang/test/CodeGen/X86/sse-builtins.c
  clang/test/CodeGen/X86/sse2-builtins.c

Index: clang/test/CodeGen/X86/sse2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse2-builtins.c
+++ clang/test/CodeGen/X86/sse2-builtins.c
@@ -1630,13 +1630,16 @@
 
 __m128d test_mm_undefined_pd() {
   // CHECK-LABEL: test_mm_undefined_pd
-  // CHECK: ret <2 x double> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: ret <2 x double> %[[FR]]
   return _mm_undefined_pd();
 }
 
 __m128i test_mm_undefined_si128() {
   // CHECK-LABEL: test_mm_undefined_si128
-  // CHECK: ret <2 x i64> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: ret <2 x i64> %[[FR_BC]]
   return _mm_undefined_si128();
 }
 
Index: clang/test/CodeGen/X86/sse-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse-builtins.c
+++ clang/test/CodeGen/X86/sse-builtins.c
@@ -786,7 +786,9 @@
 
 __m128 test_mm_undefined_ps() {
   // CHECK-LABEL: test_mm_undefined_ps
-  // CHECK: ret <4 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
+  // CHECK: ret <4 x float> %[[FR_BC]]
   return _mm_undefined_ps();
 }
 
Index: clang/test/CodeGen/X86/avx512f-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3780,25 +3780,32 @@
 
 __m512 test_mm512_undefined() {
   // CHECK-LABEL: @test_mm512_undefined
-  // CHECK: ret <16 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+  // CHECK: ret <16 x float> %[[FR_BC]]
   return _mm512_undefined();
 }
 
 __m512 test_mm512_undefined_ps() {
   // CHECK-LABEL: @test_mm512_undefined_ps
-  // CHECK: ret <16 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+  // CHECK: ret <16 x float> %[[FR_BC]]
   return _mm512_undefined_ps();
 }
 
 __m512d test_mm512_undefined_pd() {
   // CHECK-LABEL: @test_mm512_undefined_pd
-  // CHECK: ret <8 x double> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: ret <8 x double> %[[FR]]
   return _mm512_undefined_pd();
 }
 
 __m512i test_mm512_undefined_epi32() {
   // CHECK-LABEL: @test_mm512_undefined_epi32
-  // CHECK: ret <8 x i64> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <8 x i64>
+  // CHECK: ret <8 x i64> %[[FR_BC]]
   return _mm512_undefined_epi32();
 }
 
Index: clang/test/CodeGen/X86/avx2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx2-builtins.c
+++ clang/test/CodeGen/X86/avx2-builtins.c
@@ -455,7 +455,9 @@
 
 __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_epi64
-  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
   return _mm_i32gather_epi64(b, c, 2);
 }
 
@@ -467,7 +469,9 @@
 
 __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
   return _mm256_i32gather_epi64(b, c, 2);
 }
 
@@ -479,10 +483,11 @@
 
 __m128d test_mm_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_pd
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-  // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i32gather_pd(b, c, 2);
 }
 
@@ -494,10 +499,11 @@
 
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_pd
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
-  // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+  // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i32gather_pd(b, c, 2);
 }
 
@@ -509,10 +515,12 @@
 
 __m128 test_mm_i32gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i32gather_ps(b, c, 2);
 }
 
@@ -524,10 +532,12 @@
 
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i32gather_ps
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <8 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <8 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
-  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
+  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %[[FR_BC]], i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
   return _mm256_i32gather_ps(b, c, 2);
 }
 
@@ -563,7 +573,9 @@
 
 __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_epi64
-  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
   return _mm_i64gather_epi64(b, c, 2);
 }
 
@@ -575,7 +587,9 @@
 
 __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
   return _mm256_i64gather_epi64(b, c, 2);
 }
 
@@ -587,10 +601,11 @@
 
 __m128d test_mm_i64gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_pd
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-  // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %[[FR]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i64gather_pd(b, c, 2);
 }
 
@@ -602,8 +617,9 @@
 
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_pd
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
   // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
-  // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+  // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i64gather_pd(b, c, 2);
 }
 
@@ -615,10 +631,12 @@
 
 __m128 test_mm_i64gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i64gather_ps(b, c, 2);
 }
 
@@ -630,10 +648,12 @@
 
 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm256_i64gather_ps(b, c, 2);
 }
 
Index: clang/test/CodeGen/X86/avx-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx-builtins.c
+++ clang/test/CodeGen/X86/avx-builtins.c
@@ -2063,19 +2063,24 @@
 
 __m256 test_mm256_undefined_ps() {
   // CHECK-LABEL: test_mm256_undefined_ps
-  // CHECK: ret <8 x float> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: bitcast <4 x double> %{{.*}} to <8 x float>
+  // CHECK: ret <8 x float> %{{.*}}
   return _mm256_undefined_ps();
 }
 
 __m256d test_mm256_undefined_pd() {
   // CHECK-LABEL: test_mm256_undefined_pd
-  // CHECK: ret <4 x double> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: ret <4 x double> %{{.*}}
   return _mm256_undefined_pd();
 }
 
 __m256i test_mm256_undefined_si256() {
   // CHECK-LABEL: test_mm256_undefined_si256
-  // CHECK: ret <4 x i64> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: bitcast <4 x double> %{{.*}} to <4 x i64>
+  // CHECK: ret <4 x i64> %{{.*}}
   return _mm256_undefined_si256();
 }
 
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -12491,12 +12491,9 @@
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:
-    // The x86 definition of "undef" is not the same as the LLVM definition
-    // (PR32176). We leave optimizing away an unnecessary zero constant to the
-    // IR optimizer and backend.
-    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
-    // value, we should use that here instead of a zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
+    // The x86 definition of "undef" is equivalent to "freeze poison" in LLVM
+    // (PR32176).
+    return Builder.CreateFreeze(PoisonValue::get(ConvertType(E->getType())));
   case X86::BI__builtin_ia32_vec_init_v8qi:
   case X86::BI__builtin_ia32_vec_init_v4hi:
   case X86::BI__builtin_ia32_vec_init_v2si:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D104790: [x86] fix mm*_undefined* intrinsics to use arbitrary frozen bit pattern

Reply via email to

[PATCH] D104790: [x86] fix mm_undefined intrinsics to use arbitrary frozen bit pattern