Author: David Sherwood Date: 2022-02-28T10:51:43-08:00 New Revision: 64534d2f406c0cb87502b617940cb0d39b7ff489
URL: https://github.com/llvm/llvm-project/commit/64534d2f406c0cb87502b617940cb0d39b7ff489 DIFF: https://github.com/llvm/llvm-project/commit/64534d2f406c0cb87502b617940cb0d39b7ff489.diff LOG: [InstCombine] Bail out of load-store forwarding for scalable vector types This patch fixes an invalid TypeSize->uint64_t implicit conversion in FoldReinterpretLoadFromConst. If the size of the constant is scalable we bail out of the optimisation for now. Tests added here: Transforms/InstCombine/load-store-forward.ll Differential Revision: https://reviews.llvm.org/D120240 (cherry picked from commit 47eff645d8e873ba531014751c1c06a716a367e9) Added: Modified: llvm/lib/Analysis/ConstantFolding.cpp llvm/test/Transforms/InstCombine/load-store-forward.ll Removed: ################################################################################ diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 7cf69f613c669..c0a9304b6257f 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -589,14 +589,17 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, if (BytesLoaded > 32 || BytesLoaded == 0) return nullptr; - int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize(); - // If we're not accessing anything in this constant, the result is undefined. if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) return UndefValue::get(IntType); + // TODO: We should be able to support scalable types. + TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); + if (InitializerSize.isScalable()) + return nullptr; + // If we're not accessing anything in this constant, the result is undefined. - if (Offset >= InitializerSize) + if (Offset >= InitializerSize.getFixedValue()) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index c1a01454772f2..c7afe3c4c79fc 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -120,3 +120,172 @@ define i32 @vec_store_load_overlap(i32* %p) { %load = load i32, i32* %p5, align 2 ret i32 %load } + +define i32 @load_i32_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_i32_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +define i64 @load_i64_store_nxv8i8(i8* %a) { +; CHECK-LABEL: @load_i64_store_nxv8i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to <vscale x 8 x i8>* +; CHECK-NEXT: store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8>* [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast i8* [[A]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8 +; CHECK-NEXT: ret i64 [[LOAD]] +; +entry: + %0 = bitcast i8* %a to <vscale x 8 x i8>* + store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8>* %0, align 16 + %a2 = bitcast i8* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +define i64 @load_i64_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_i64_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast i32* [[A]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8 +; CHECK-NEXT: ret i64 [[LOAD]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %a2 = bitcast i32* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +define i8 @load_i8_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_i8_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8* [[A2]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %a2 = bitcast i32* %a to i8* + %load = load i8, i8* %a2, align 1 + ret i8 %load +} + +define float @load_f32_store_nxv4f32(float* %a) { +; CHECK-LABEL: @load_f32_store_nxv4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <vscale x 4 x float>* +; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[A]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; +entry: + %0 = bitcast float* %a to <vscale x 4 x float>* + store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.0, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* %0, align 16 + %1 = load float, float* %a, align 4 + ret float %1 +} + +define i32 @load_i32_store_nxv4f32(float* %a) { +; CHECK-LABEL: @load_i32_store_nxv4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <vscale x 4 x float>* +; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast float* [[A]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A2]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +entry: + %0 = bitcast float* %a to <vscale x 4 x float>* + store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.0, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* %0, align 16 + %a2 = bitcast float* %a to i32* + %load = load i32, i32* %a2, align 4 + ret i32 %load +} + +define <4 x i32> @load_v4i32_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_v4i32_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %1 = bitcast i32* %a to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + ret <4 x i32> %2 +} + +define <4 x i16> @load_v4i16_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_v4i16_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <4 x i16>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 16 +; CHECK-NEXT: ret <4 x i16> [[TMP2]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %1 = bitcast i32* %a to <4 x i16>* + %2 = load <4 x i16>, <4 x i16>* %1, align 16 + ret <4 x i16> %2 +} + +; Loaded data type exceeds the known minimum size of the store. +define i64 @load_i64_store_nxv4i8(i8* %a) { +; CHECK-LABEL: @load_i64_store_nxv4i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to <vscale x 4 x i8>* +; CHECK-NEXT: store <vscale x 4 x i8> shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i32 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i8>* [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast i8* [[A]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8 +; CHECK-NEXT: ret i64 [[LOAD]] +; +entry: + %0 = bitcast i8* %a to <vscale x 4 x i8>* + store <vscale x 4 x i8> shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i32 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i8>* %0, align 16 + %a2 = bitcast i8* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +; Loaded data size is unknown - we cannot guarantee it won't +; exceed the store size. +define <vscale x 4 x i8> @load_nxv4i8_store_nxv4i32(i32* %a) { +; CHECK-LABEL: @load_nxv4i8_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <vscale x 4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], align 16 +; CHECK-NEXT: ret <vscale x 4 x i8> [[TMP2]] +; +entry: + %0 = bitcast i32* %a to <vscale x 4 x i32>* + store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16 + %1 = bitcast i32* %a to <vscale x 4 x i8>* + %2 = load <vscale x 4 x i8>, <vscale x 4 x i8>* %1, align 16 + ret <vscale x 4 x i8> %2 +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
