DylanFleming-arm updated this revision to Diff 360879.
DylanFleming-arm added a comment.
Removed changes to RiscV code
Added check that target isAArch64 before adding default value vscale_range
attribute
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D106277/new/
https://reviews.llvm.org/D106277
Files:
clang/lib/CodeGen/CodeGenFunction.cpp
clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -12,7 +12,7 @@
; that we can use gather instructions with the correct offsets, taking
; vscale into account.
-define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) {
+define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @widen_ptr_phi_unrolled(
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ]
@@ -122,7 +122,7 @@
; because it is stored to memory.
;
-define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) {
+define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK-LABEL: @pointer_iv_mixed(
; CHECK: vector.body
; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -170,7 +170,7 @@
ret i32 %tmp5
}
-
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,6 +1,6 @@
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
-define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
+define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_i32(
; CHECK: vector.body
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -27,7 +27,7 @@
ret void
}
-define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {
+define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_f64(
; CHECK: vector.body
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -55,7 +55,7 @@
}
-define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {
+define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @cond_stride7_f64(
; CHECK: vector.body
; CHECK: %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
@@ -90,7 +90,7 @@
ret void
}
-
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -59,7 +59,7 @@
ret void
}
-attributes #0 = { "target-features"="+neon,+sve" }
+attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
-define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {
+define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64
; CHECK: vector.body:
; CHECK: %[[IND:.*]] = load <vscale x 4 x i64>, <vscale x 4 x i64>*
@@ -29,7 +29,7 @@
; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
; additional 'sext' in the for.body loop exposes additional code paths
; during vectorisation.
-define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) {
+define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @scatter_nxv4i32_ind32
; CHECK: vector.body:
; CHECK: %[[VALS:.*]] = load <vscale x 4 x float>
@@ -57,7 +57,7 @@
ret void
}
-define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {
+define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @scatter_inv_nxv4i32
; CHECK: vector.ph:
; CHECK: %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -89,7 +89,7 @@
ret void
}
-define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) {
+define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @gather_inv_nxv4i32
; CHECK: vector.ph:
; CHECK: %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -122,6 +122,7 @@
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
-define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_i32i32i16
; CHECK: vector.ph:
; CHECK: %[[INVINS:.*]] = insertelement <vscale x 4 x i16*> poison, i16* %inv, i32 0
@@ -39,7 +39,7 @@
ret void
}
-define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_f64f64f64
; CHECK: vector.ph:
; CHECK: %[[INVINS:.*]] = insertelement <vscale x 4 x double*> poison, double* %inv, i32 0
@@ -76,7 +76,7 @@
ret void
}
-define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {
+define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @invariant_load_cond
; CHECK: vector.body
; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
@@ -117,6 +117,7 @@
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -44,7 +44,7 @@
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test1
; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) {
+define void @test1(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -88,7 +88,7 @@
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test2
; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) {
+define void @test2(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -135,7 +135,7 @@
; CHECK-DBG: LV: Using user VF vscale x 2.
; CHECK-LABEL: @test3
; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) {
+define void @test3(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -186,7 +186,7 @@
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test4
; CHECK: <4 x i32>
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -233,7 +233,7 @@
; CHECK-DBG: LV: Using user VF vscale x 4
; CHECK-LABEL: @test5
; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) {
+define void @test5(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -283,7 +283,7 @@
; CHECK-DBG: Selecting VF: 4.
; CHECK-LABEL: @test6
; CHECK: <4 x i32>
-define void @test6(i32* %a, i32* %b) {
+define void @test6(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -317,7 +317,7 @@
; CHECK-NO-SVE-LABEL: @test_no_sve
; CHECK-NO-SVE: <4 x i32>
; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) {
+define void @test_no_sve(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -350,7 +350,7 @@
; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
; CHECK-NO-SVE-LABEL: @test_no_max_vscale
; CHECK-NO-SVE: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) {
+define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -372,6 +372,7 @@
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!21 = !{!21, !22, !23}
!22 = !{!"llvm.loop.vectorize.width", i32 4}
!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -7,7 +7,7 @@
; Test that the MaxVF for the following loop, that has no dependence distances,
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) {
+define void @test0(i32* %a, i8* %b, i32* %c) #0 {
; CHECK: LV: Checking a loop in "test0"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -40,7 +40,7 @@
; Test that the MaxVF for the following loop, with a dependence distance
; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) {
+define void @test1(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test1"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -74,7 +74,7 @@
; Test that the MaxVF for the following loop, with a dependence distance
; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) {
+define void @test2(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test2"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -108,7 +108,7 @@
; Test that the MaxVF for the following loop, with a dependence distance
; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) {
+define void @test3(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test3"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -142,7 +142,7 @@
; Test the fallback mechanism when scalable vectors are not feasible due
; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
; CHECK: LV: Checking a loop in "test4"
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -172,3 +172,5 @@
exit:
ret void
}
+
+attributes #0 = { vscale_range(0, 16) }
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -3,7 +3,7 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
-define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -48,7 +48,7 @@
ret float %add
}
-define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_unroll
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ]
@@ -112,7 +112,7 @@
ret float %add
}
-define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_interleave
; CHECK-ORDERED: entry
; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
@@ -205,7 +205,7 @@
ret void
}
-define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_of_sum
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -267,7 +267,7 @@
ret float %res
}
-define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_conditional
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -342,7 +342,7 @@
}
; Negative test - loop contains multiple fadds which we cannot safely reorder
-define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_multiple
; CHECK-ORDERED-NOT: vector.body
@@ -389,6 +389,7 @@
ret float %rdx
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !3, !6, !8}
!1 = distinct !{!1, !3, !7, !8}
!2 = distinct !{!2, !4, !6, !8}
Index: llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -18,7 +18,7 @@
; return a;
; }
;
-define i32 @PR33613(double* %b, double %j, i32 %d) {
+define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
; CHECK-VF4UF2-LABEL: @PR33613
; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
@@ -66,7 +66,7 @@
; }
;
; Check that the sext sank after the load in the vector loop.
-define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
; CHECK-VF4UF1-LABEL: @PR34711
; CHECK-VF4UF1: vector.body
; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
@@ -100,5 +100,6 @@
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Index: llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
===================================================================
--- llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
+++ llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
@@ -2,7 +2,7 @@
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
-define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_scatters'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
Index: llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
===================================================================
--- llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
+++ llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
@@ -2,7 +2,7 @@
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
-define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_gathers'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5694,6 +5694,11 @@
// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+ if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+ Attribute VScaleRangeAttr =
+ TheFunction->getFnAttribute(Attribute::VScaleRange);
+ MaxVScale = VScaleRangeAttr.getVScaleRangeArgs().second;
+ }
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,12 +125,6 @@
return ST->getMinVectorRegisterBitWidth();
}
- Optional<unsigned> getMaxVScale() const {
- if (ST->hasSVE())
- return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
- return BaseT::getMaxVScale();
- }
-
unsigned getMaxInterleaveFactor(unsigned VF);
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1421,7 +1421,12 @@
return InstructionCost::getInvalid();
ElementCount LegalVF = LT.second.getVectorElementCount();
- Optional<unsigned> MaxNumVScale = getMaxVScale();
+ Optional<unsigned> MaxNumVScale;
+ if (I->getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ Attribute VScaleRangeAttr =
+ I->getFunction()->getFnAttribute(Attribute::VScaleRange);
+ MaxNumVScale = VScaleRangeAttr.getVScaleRangeArgs().second;
+ }
assert(MaxNumVScale && "Expected valid max vscale value");
InstructionCost MemOpCost =
Index: clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
===================================================================
--- clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
+++ clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
@@ -3,10 +3,13 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// CHECK-LABEL: @func() #0
// CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
-// CHECK-NONE-NOT: vscale_range
+// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} }
void func() {}
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -501,6 +501,10 @@
unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
VScale, VScale));
+ } else if (getContext().getTargetInfo().getTriple().isAArch64() &&
+ getContext().getTargetInfo().hasFeature("sve")) {
+ CurFn->addFnAttr(
+ llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16));
}
// If we generated an unreachable return block, delete it now.
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits