DylanFleming-arm created this revision.
Herald added subscribers: frasercrmck, luismarques, apazos, sameer.abuasal, 
s.egerton, Jim, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, 
rogfer01, edward-jones, zzheng, jrtc27, niosHD, sabuasal, simoncook, johnrusso, 
rbar, asb, hiraditya, tschuett.
Herald added a reviewer: efriedma.
DylanFleming-arm requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, MaskRay.
Herald added projects: clang, LLVM.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106277

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  llvm/include/llvm/Analysis/TargetTransformInfo.h
  llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/lib/Analysis/TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
  llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
  llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -12,7 +12,7 @@
 ; that we can use gather instructions with the correct offsets, taking
 ; vscale into account.
 
-define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) {
+define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 {
 ; CHECK-LABEL: @widen_ptr_phi_unrolled(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ]
@@ -122,7 +122,7 @@
 ; because it is stored to memory.
 ;
 
-define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) {
+define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
 ; CHECK-LABEL: @pointer_iv_mixed(
 ; CHECK:     vector.body
 ; CHECK:       %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -170,7 +170,7 @@
   ret i32 %tmp5
 }
 
-
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
 
-define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
+define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_i32(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -27,7 +27,7 @@
   ret void
 }
 
-define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {
+define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -55,7 +55,7 @@
 }
 
 
-define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {
+define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @cond_stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
@@ -90,7 +90,7 @@
   ret void
 }
 
-
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -59,7 +59,7 @@
   ret void
 }
 
-attributes #0 = { "target-features"="+neon,+sve" }
+attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
 
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
 
-define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {
+define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
 ; CHECK-LABEL: @gather_nxv4i32_ind64
 ; CHECK: vector.body:
 ; CHECK:   %[[IND:.*]] = load <vscale x 4 x i64>, <vscale x 4 x i64>*
@@ -29,7 +29,7 @@
 ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
 ; additional 'sext' in the for.body loop exposes additional code paths
 ; during vectorisation.
-define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) {
+define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_nxv4i32_ind32
 ; CHECK: vector.body:
 ; CHECK:   %[[VALS:.*]] = load <vscale x 4 x float>
@@ -57,7 +57,7 @@
   ret void
 }
 
-define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {
+define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_inv_nxv4i32
 ; CHECK: vector.ph:
 ; CHECK:   %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -89,7 +89,7 @@
   ret void
 }
 
-define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) {
+define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @gather_inv_nxv4i32
 ; CHECK: vector.ph:
 ; CHECK:   %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -122,6 +122,7 @@
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
 
-define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_i32i32i16
 ; CHECK:     vector.ph:
 ; CHECK:       %[[INVINS:.*]] = insertelement <vscale x 4 x i16*> poison, i16* %inv, i32 0
@@ -39,7 +39,7 @@
   ret void
 }
 
-define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_f64f64f64
 ; CHECK:     vector.ph:
 ; CHECK:       %[[INVINS:.*]] = insertelement <vscale x 4 x double*> poison, double* %inv, i32 0
@@ -76,7 +76,7 @@
   ret void
 }
 
-define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {
+define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @invariant_load_cond
 ; CHECK: vector.body
 ; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
@@ -117,6 +117,7 @@
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -44,7 +44,7 @@
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test1
 ; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) {
+define void @test1(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -88,7 +88,7 @@
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test2
 ; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) {
+define void @test2(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -135,7 +135,7 @@
 ; CHECK-DBG: LV: Using user VF vscale x 2.
 ; CHECK-LABEL: @test3
 ; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) {
+define void @test3(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -186,7 +186,7 @@
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test4
 ; CHECK: <4 x i32>
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -233,7 +233,7 @@
 ; CHECK-DBG: LV: Using user VF vscale x 4
 ; CHECK-LABEL: @test5
 ; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) {
+define void @test5(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -283,7 +283,7 @@
 ; CHECK-DBG: Selecting VF: 4.
 ; CHECK-LABEL: @test6
 ; CHECK: <4 x i32>
-define void @test6(i32* %a, i32* %b) {
+define void @test6(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -317,7 +317,7 @@
 ; CHECK-NO-SVE-LABEL: @test_no_sve
 ; CHECK-NO-SVE: <4 x i32>
 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) {
+define void @test_no_sve(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -350,7 +350,7 @@
 ; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
 ; CHECK-NO-SVE-LABEL: @test_no_max_vscale
 ; CHECK-NO-SVE: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) {
+define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -372,6 +372,7 @@
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !21 = !{!21, !22, !23}
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -7,7 +7,7 @@
 ; Test that the MaxVF for the following loop, that has no dependence distances,
 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
 ; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) {
+define void @test0(i32* %a, i8* %b, i32* %c) #0 {
 ; CHECK: LV: Checking a loop in "test0"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -40,7 +40,7 @@
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) {
+define void @test1(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test1"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -74,7 +74,7 @@
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) {
+define void @test2(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test2"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -108,7 +108,7 @@
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) {
+define void @test3(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test3"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -142,7 +142,7 @@
 
 ; Test the fallback mechanism when scalable vectors are not feasible due
 ; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
 ; CHECK: LV: Checking a loop in "test4"
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -172,3 +172,5 @@
 exit:
   ret void
 }
+
+attributes #0 = { vscale_range(0, 16) }
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true  -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
 
-define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict
 ; CHECK-ORDERED: vector.body:
 ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -48,7 +48,7 @@
   ret float %add
 }
 
-define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict_unroll
 ; CHECK-ORDERED: vector.body:
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ]
@@ -112,7 +112,7 @@
   ret float %add
 }
 
-define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict_interleave
 ; CHECK-ORDERED: entry
 ; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
@@ -205,7 +205,7 @@
   ret void
 }
 
-define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_of_sum
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -267,7 +267,7 @@
   ret float %res
 }
 
-define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_conditional
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -342,7 +342,7 @@
 }
 
 ; Negative test - loop contains multiple fadds which we cannot safely reorder
-define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_multiple
 ; CHECK-ORDERED-NOT: vector.body
 
@@ -389,6 +389,7 @@
   ret float %rdx
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !3, !6, !8}
 !1 = distinct !{!1, !3, !7, !8}
 !2 = distinct !{!2, !4, !6, !8}
Index: llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -18,7 +18,7 @@
 ;   return a;
 ; }
 ;
-define i32 @PR33613(double* %b, double %j, i32 %d) {
+define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
 ; CHECK-VF4UF2-LABEL: @PR33613
 ; CHECK-VF4UF2: vector.body
 ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
@@ -66,7 +66,7 @@
 ; }
 ;
 ; Check that the sext sank after the load in the vector loop.
-define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
 ; CHECK-VF4UF1-LABEL: @PR34711
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
@@ -100,5 +100,6 @@
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Index: llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
===================================================================
--- llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
+++ llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
@@ -2,7 +2,7 @@
 
 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
 
-define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
 ; CHECK-LABEL: 'masked_scatters'
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
Index: llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
===================================================================
--- llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
+++ llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
@@ -2,7 +2,7 @@
 
 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
 
-define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
 ; CHECK-LABEL: 'masked_gathers'
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction:   %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction:   %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5693,7 +5693,12 @@
     return MaxScalableVF;
 
   // Limit MaxScalableVF by the maximum safe dependence distance.
-  Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+  Optional<unsigned> MaxVScale;
+  if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+    Attribute VScaleRangeAttr =
+        TheFunction->getFnAttribute(Attribute::VScaleRange);
+    MaxVScale = VScaleRangeAttr.getVScaleRangeArgs().second;
+  }
   MaxScalableVF = ElementCount::getScalable(
       MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
   if (!MaxScalableVF)
Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,7 +55,6 @@
 
   bool shouldExpandReduction(const IntrinsicInst *II) const;
   bool supportsScalableVectors() const { return ST->hasStdExtV(); }
-  Optional<unsigned> getMaxVScale() const;
 
   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
     switch (K) {
Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -116,20 +116,6 @@
   }
 }
 
-Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
-  // There is no assumption of the maximum vector length in V specification.
-  // We use the value specified by users as the maximum vector length.
-  // This function will use the assumed maximum vector length to get the
-  // maximum vscale for LoopVectorizer.
-  // If users do not specify the maximum vector length, we have no way to
-  // know whether the LoopVectorizer is safe to do or not.
-  // We only consider to use single vector register (LMUL = 1) to vectorize.
-  unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits();
-  if (ST->hasStdExtV() && MaxVectorSizeInBits != 0)
-    return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
-  return BaseT::getMaxVScale();
-}
-
 InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
     unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
     Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,12 +125,6 @@
     return ST->getMinVectorRegisterBitWidth();
   }
 
-  Optional<unsigned> getMaxVScale() const {
-    if (ST->hasSVE())
-      return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
-    return BaseT::getMaxVScale();
-  }
-
   unsigned getMaxInterleaveFactor(unsigned VF);
 
   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1421,7 +1421,12 @@
     return InstructionCost::getInvalid();
 
   ElementCount LegalVF = LT.second.getVectorElementCount();
-  Optional<unsigned> MaxNumVScale = getMaxVScale();
+  Optional<unsigned> MaxNumVScale;
+  if (I->getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+    Attribute VScaleRangeAttr =
+        I->getFunction()->getFnAttribute(Attribute::VScaleRange);
+    MaxNumVScale = VScaleRangeAttr.getVScaleRangeArgs().second;
+  }
   assert(MaxNumVScale && "Expected valid max vscale value");
 
   InstructionCost MemOpCost =
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -594,10 +594,6 @@
   return TTIImpl->getMinVectorRegisterBitWidth();
 }
 
-Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
-  return TTIImpl->getMaxVScale();
-}
-
 bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
   return TTIImpl->shouldMaximizeVectorBandwidth();
 }
Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -652,8 +652,6 @@
     return TypeSize::getFixed(32);
   }
 
-  Optional<unsigned> getMaxVScale() const { return None; }
-
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the demanded result elements need to be inserted and/or
   /// extracted from vectors.
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -393,8 +393,6 @@
 
   unsigned getMinVectorRegisterBitWidth() const { return 128; }
 
-  Optional<unsigned> getMaxVScale() const { return None; }
-
   bool shouldMaximizeVectorBandwidth() const { return false; }
 
   ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -903,9 +903,6 @@
   /// \return The width of the smallest vector register type.
   unsigned getMinVectorRegisterBitWidth() const;
 
-  /// \return The maximum value of vscale if the target specifies an
-  ///  architectural maximum vector length, and None otherwise.
-  Optional<unsigned> getMaxVScale() const;
 
   /// \return True if the vectorization factor should be chosen to
   /// make the vector of the smallest element type match the size of a
@@ -1536,7 +1533,6 @@
   virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
   virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
-  virtual Optional<unsigned> getMaxVScale() const = 0;
   virtual bool shouldMaximizeVectorBandwidth() const = 0;
   virtual ElementCount getMinimumVF(unsigned ElemWidth,
                                     bool IsScalable) const = 0;
@@ -1985,9 +1981,6 @@
   unsigned getMinVectorRegisterBitWidth() override {
     return Impl.getMinVectorRegisterBitWidth();
   }
-  Optional<unsigned> getMaxVScale() const override {
-    return Impl.getMaxVScale();
-  }
   bool shouldMaximizeVectorBandwidth() const override {
     return Impl.shouldMaximizeVectorBandwidth();
   }
Index: clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
===================================================================
--- clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
+++ clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
@@ -3,10 +3,13 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 
 // CHECK-LABEL: @func() #0
 // CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
-// CHECK-NONE-NOT: vscale_range
+// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} }
 void func() {}
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -501,6 +501,9 @@
     unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
     CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
                                                              VScale, VScale));
+  } else if (getContext().getTargetInfo().hasFeature("sve")) {
+    CurFn->addFnAttr(
+        llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16));
   }
 
   // If we generated an unreachable return block, delete it now.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to