[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)

via llvm-branch-commits Sun, 30 Mar 2025 18:17:57 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ryotaro Kasuga (kasuga-fj)

<details>
<summary>Changes</summary>

There is a problem with the current profitability check for vectorization in 
LoopInterchange. There are both false positives and false negatives. The former 
means that the heuristic may say that "an exchange is necessary to vectorize 
the innermost loop" even though it's already possible. The latter means that 
the heuristic may miss a case where an exchange is necessary to vectorize the 
innermost loop. Note that this is not a dependency analysis problem.  These 
problems can occur even if the analysis is accurate (no overestimation).

This patch adds tests to clarify the cases that should be fixed. The root cause 
of these cases is that the heuristic doesn't handle the direction of a 
dependency correctly.

---
Full diff: https://github.com/llvm/llvm-project/pull/133665.diff


1 Files Affected:

- (added) 
llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
(+108) 


``````````diff
diff --git 
a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
new file mode 100644
index 0000000000000..606117e70db86
--- /dev/null
+++ 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN:     -pass-remarks-output=%t -disable-output 
-loop-interchange-profitabilities=vectorize
+; RUN: FileCheck -input-file %t %s
+
+@A = dso_local global [256 x [256 x float]] zeroinitializer
+@B = dso_local global [256 x [256 x float]] zeroinitializer
+@C = dso_local global [256 x [256 x float]] zeroinitializer
+
+; Check that the below loops are exchanged for vectorization.
+;
+; for (int i = 0; i < 256; i++) {
+;   for (int j = 1; j < 256; j++) {
+;     A[i][j] = A[i][j-1] + B[i][j];
+;     C[i][j] += 1;
+;   }
+; }
+;
+; FIXME: These loops are not exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:      --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            InterchangeNotProfitable
+; CHECK-NEXT: Function:        interchange_necesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Interchanging loops is not considered to 
improve cache locality nor vectorization.
+; CHECK-NEXT: ...
+define void @interchange_necesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j.dec
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, 
i64 %j
+  %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, 
i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %c = load float, ptr %c.index, align 4
+  %add.0 = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j
+  store float %add.0, ptr %a.store.index, align 4
+  %add.1 = fadd float %c, 1.0
+  store float %add.1, ptr %c.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; Check that the following innermost loop can be vectorized so that
+; interchangig is unnecessary.
+;
+; for (int i = 0; i < 256; i++)
+;   for (int j = 1; j < 256; j++)
+;     A[i][j-1] = A[i][j] + B[i][j];
+;
+; FIXME: These loops are exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:      --- !Passed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Function:        interchange_unnecesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+define void @interchange_unnecesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, 
i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %add = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j.dec
+  store float %add, ptr %a.store.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/133665
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)

Reply via email to