[PATCH] D56811: [Mem2Reg] Enable promotion for bitcastable load/store values

Sergey Dmitriev via Phabricator via cfe-commits Wed, 16 Jan 2019 14:17:09 -0800

sdmitriev created this revision.
sdmitriev added a reviewer: andrew.w.kaylor.
Herald added subscribers: cfe-commits, javed.absar.


This patch enables Mem2Reg to handle loads/stores from/to bitcasted alloca
values as long as the loaded/stored value is bitcastable to the allocated
type (see example below). Such instruction sequences can be introduced by
the InstCombine pass as part of the load canonicalization.

    
  %f = alloca float, align 4
  ...
  %0 = getelementptr inbounds float, float* %A, i64 %idx
  %1 = bitcast float* %0 to i32*
  %2 = load i32, i32* %1, align 4
  %3 = bitcast float* %f to i32*
  store i32 %2, i32* %3, align 4


Repository:
  rC Clang

https://reviews.llvm.org/D56811

Files:
  test/CodeGen/aarch64-neon-vget.c
  test/CodeGen/arm_neon_intrinsics.c

Index: test/CodeGen/aarch64-neon-vget.c
===================================================================
--- test/CodeGen/aarch64-neon-vget.c
+++ test/CodeGen/aarch64-neon-vget.c
@@ -80,18 +80,12 @@
 }
 
 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
-// CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
-// CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
-// CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
-// CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
-// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
-// CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
-// CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
-// CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
-// CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
+// CHECK:   [[TMP3:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK:   [[CONV:%.*]] = fpext half [[TMP3]] to float
 // CHECK:   ret float [[CONV]]
 float32_t test_vget_lane_f16(float16x4_t a) {
   return vget_lane_f16(a, 1);
@@ -173,18 +167,12 @@
 }
 
 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
-// CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
-// CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
-// CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
-// CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
-// CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
-// CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
-// CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
-// CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
-// CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP2]], i32 3
+// CHECK:   [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
+// CHECK:   [[CONV:%.*]] = fpext half [[TMP3]] to float
 // CHECK:   ret float [[CONV]]
 float32_t test_vgetq_lane_f16(float16x8_t a) {
   return vgetq_lane_f16(a, 3);
@@ -303,23 +291,14 @@
 }
 
 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
-// CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
-// CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
-// CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
-// CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
-// CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
-// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
-// CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
-// CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
-// CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
-// CHECK:   ret <4 x half> [[TMP8]]
+// CHECK:   [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK:   [[TMP2:%.*]] = bitcast <4 x half> %b to <4 x i16>
+// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP1]], i32 3
+// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[VSET_LANE]] to <4 x half>
+// CHECK:   ret <4 x half> [[TMP5]]
 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
   return vset_lane_f16(*a, b, 3);
 }
@@ -400,23 +379,14 @@
 }
 
 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
-// CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
-// CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
-// CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
-// CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
-// CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
-// CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
-// CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
-// CHECK:   ret <8 x half> [[TMP8]]
+// CHECK:   [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x half> %b to <8 x i16>
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP1]], i32 7
+// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[VSET_LANE]] to <8 x half>
+// CHECK:   ret <8 x half> [[TMP5]]
 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
   return vsetq_lane_f16(*a, b, 7);
 }
Index: test/CodeGen/arm_neon_intrinsics.c
===================================================================
--- test/CodeGen/arm_neon_intrinsics.c
+++ test/CodeGen/arm_neon_intrinsics.c
@@ -3387,18 +3387,12 @@
 }
 
 // CHECK-LABEL: @test_vget_lane_f16(
-// CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
-// CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
-// CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
-// CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
-// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
-// CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
-// CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
-// CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
-// CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
+// CHECK:   [[TMP4:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK:   [[CONV:%.*]] = fpext half [[TMP4]] to float
 // CHECK:   ret float [[CONV]]
 float32_t test_vget_lane_f16(float16x4_t a) {
   return vget_lane_f16(a, 1);
@@ -3480,18 +3474,12 @@
 }
 
 // CHECK-LABEL: @test_vgetq_lane_f16(
-// CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
-// CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
-// CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
-// CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
-// CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
-// CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
-// CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
-// CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
-// CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP2]], i32 3
+// CHECK:   [[TMP4:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK:   [[CONV:%.*]] = fpext half [[TMP4]] to float
 // CHECK:   ret float [[CONV]]
 float32_t test_vgetq_lane_f16(float16x8_t a) {
   return vgetq_lane_f16(a, 3);
@@ -14129,23 +14117,14 @@
 }
 
 // CHECK-LABEL: @test_vset_lane_f16(
-// CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
-// CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
-// CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
-// CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
-// CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
-// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1
-// CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
-// CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
-// CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
-// CHECK:   ret <4 x half> [[TMP8]]
+// CHECK:   [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK:   [[TMP2:%.*]] = bitcast <4 x half> %b to <4 x i16>
+// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP1]], i32 1
+// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[VSET_LANE]] to <4 x half>
+// CHECK:   ret <4 x half> [[TMP5]]
 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
   return vset_lane_f16(*a, b, 1);
 }
@@ -14226,23 +14205,14 @@
 }
 
 // CHECK-LABEL: @test_vsetq_lane_f16(
-// CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
-// CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
-// CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
-// CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
-// CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3
-// CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
-// CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
-// CHECK:   ret <8 x half> [[TMP8]]
+// CHECK:   [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x half> %b to <8 x i16>
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP1]], i32 3
+// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[VSET_LANE]] to <8 x half>
+// CHECK:   ret <8 x half> [[TMP5]]
 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
   return vsetq_lane_f16(*a, b, 3);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D56811: [Mem2Reg] Enable promotion for bitcastable load/store values

Reply via email to