From 935d08fcd38b9e1399f0d7b3f42646ec04b7f3e1 Mon Sep 17 00:00:00 2001
From: Tyler Nowicki <tnowicki@apple.com>
Date: Thu, 20 Aug 2015 16:19:55 -0700
Subject: [PATCH 4/4] Modifiy vectorization requirement test for memcheck
 threshold to allopw vectorize(enable) pragma to override the small threshold.

---
 lib/Transforms/Vectorize/LoopVectorize.cpp     | 21 ++++++++++++++++-----
 test/Transforms/LoopVectorize/runtime-limit.ll | 18 ++++++++++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 13cc665..f387411 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -214,6 +214,11 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
     cl::desc("The maximum interleave count to use when interleaving a scalar "
              "reduction in a nested loop."));
 
+static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
+    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+    cl::desc("The maximum allowed number of runtime memory checks with a "
+             "vectorize(enable) pragma."));
+
 namespace {
 
 // Forward declarations.
@@ -926,6 +931,9 @@ public:
       return DiagnosticInfo::AlwaysPrint;
     return LV_NAME;
   }
+  bool allowReordering() const {
+    return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+  }
 
 private:
   /// Find hints specified in the loop metadata and update local values.
@@ -1505,9 +1513,7 @@ public:
   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
     const char *Name = Hints.vectorizeAnalysisPassName();
     bool Failed = false;
-    if (UnsafeAlgebraInst &&
-        Hints.getForce() == LoopVectorizeHints::FK_Undefined &&
-        Hints.getWidth() == 0) {
+    if (UnsafeAlgebraInst && !Hints.allowReordering()) {
       emitOptimizationRemarkAnalysisFPCommute(
           F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
           VectorizationReport() << "cannot prove it is safe to reorder "
@@ -1515,8 +1521,13 @@ public:
       Failed = true;
     }
 
-    if (NumRuntimePointerChecks >
-        VectorizerParams::RuntimeMemoryCheckThreshold) {
+    // Test if runtime memcheck thresholds are exceeded.
+    bool PragmaThresholdReached =
+        NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+    bool ThresholdReached = NumRuntimePointerChecks >
+                                VectorizerParams::RuntimeMemoryCheckThreshold &&
+                            !Hints.allowReordering();
+    if (ThresholdReached || PragmaThresholdReached) {
       emitOptimizationRemarkAnalysisAliasing(
           F->getContext(), Name, *F, L->getStartLoc(),
           VectorizationReport()
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index a2593eb..e583e6b 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,4 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -8,10 +10,18 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; Second loop produces diagnostic analysis remark.
 ;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
 
+; First loop produced diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1)
+; Second loop produces diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1)
+
 ; We are vectorizing with 6 runtime checks.
 ;CHECK-LABEL: func1x6(
-;CHECK: <4 x i32>
+;CHECK: <{{[0-9]}} x i32>
 ;CHECK: ret
+;OVERRIDE-LABEL: func1x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
 entry:
   br label %for.body
@@ -44,8 +54,12 @@ for.end:                                          ; preds = %for.body
 
 ; We are not vectorizing with 12 runtime checks.
 ;CHECK-LABEL: func2x6(
-;CHECK-NOT: <4 x i32>
+;CHECK-NOT: <{{[0-9]}} x i32>
 ;CHECK: ret
+; We vectorize with 12 checks if a vectorization hint is provided.
+;OVERRIDE-LABEL: func2x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
 define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
 entry:
   br label %for.body
-- 
2.3.2 (Apple Git-55)