Successfully identified regression in *llvm* in CI configuration 
tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3.  So far, this commit has 
regressed CI configurations:
 - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3

Culprit:
<cut>
commit 5c315bee8c9db27d12cead928eea5a3fef97f34f
Author: Dawid Jurczak <dawid_ju...@vp.pl>
Date:   Mon Jul 5 11:42:17 2021 +0200

    [DSE] Transform memset + malloc --> calloc (PR25892)
    
    After this change DSE can eliminate malloc + memset and emit calloc.
    It's https://reviews.llvm.org/D101440 follow-up.
    
    Differential Revision: https://reviews.llvm.org/D103009
</cut>

Results regressed to (for first_bad == 5c315bee8c9db27d12cead928eea5a3fef97f34f)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3 
artifacts/build-5c315bee8c9db27d12cead928eea5a3fef97f34f/results_id:
1
# 464.h264ref,h264ref_base.default                              regressed by 105
# 464.h264ref,[.] FastFullPelBlockMotionSearch                  regressed by 146

from (for last_good == bc5b5ea037dbadd281c59248ae9d2742b51c69ed)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3 
artifacts/build-bc5b5ea037dbadd281c59248ae9d2742b51c69ed/results_id:
1

Artifacts of last_good build: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/build-bc5b5ea037dbadd281c59248ae9d2742b51c69ed/
Results ID of last_good: 
tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3221
Artifacts of first_bad build: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/build-5c315bee8c9db27d12cead928eea5a3fef97f34f/
Results ID of first_bad: 
tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3210
Build top page/logs: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/

Configuration details:


Reproduce builds:
<cut>
mkdir investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f
cd investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f

git clone https://git.linaro.org/toolchain/jenkins-scripts

mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/manifests/build-baseline.sh
 --fail
curl -o artifacts/manifests/build-parameters.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/manifests/build-parameters.sh
 --fail
curl -o artifacts/test.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/test.sh
 --fail
chmod +x artifacts/test.sh

# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh

# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ 
--exclude /llvm/ ./ ./bisect/baseline/

cd llvm

# Reproduce first_bad build
git checkout --detach 5c315bee8c9db27d12cead928eea5a3fef97f34f
../artifacts/test.sh

# Reproduce last_good build
git checkout --detach bc5b5ea037dbadd281c59248ae9d2742b51c69ed
../artifacts/test.sh

cd ..
</cut>

History of pending regressions and results: 
https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3

Artifacts: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/artifact/artifacts/
Build log: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O3/17/consoleText

Full commit (up to 1000 lines):
<cut>
commit 5c315bee8c9db27d12cead928eea5a3fef97f34f
Author: Dawid Jurczak <dawid_ju...@vp.pl>
Date:   Mon Jul 5 11:42:17 2021 +0200

    [DSE] Transform memset + malloc --> calloc (PR25892)
    
    After this change DSE can eliminate malloc + memset and emit calloc.
    It's https://reviews.llvm.org/D101440 follow-up.
    
    Differential Revision: https://reviews.llvm.org/D103009
---
 .../lib/Transforms/Scalar/DeadStoreElimination.cpp |  81 +++++++++--
 .../Transforms/DeadStoreElimination/noop-stores.ll | 153 ++++++++++++++++++++-
 2 files changed, 219 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp 
b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..0ada5c6e72c9 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -56,6 +56,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
@@ -505,7 +507,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, 
Instruction *SecondI,
   BasicBlock::iterator SecondBBI(SecondI);
   BasicBlock *FirstBB = FirstI->getParent();
   BasicBlock *SecondBB = SecondI->getParent();
-  MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+  MemoryLocation MemLoc;
+  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+    MemLoc = MemoryLocation::getForDest(MemSet);
+  else
+    MemLoc = MemoryLocation::get(SecondI);
+
   auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
 
   // Start checking the SecondBB.
@@ -819,14 +826,17 @@ bool isNoopIntrinsic(Instruction *I) {
 }
 
 // Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+                const TargetLibraryInfo &TLI) {
   Instruction *DI = D->getMemoryInst();
   // Calls that only access inaccessible memory cannot read or write any memory
   // locations we consider for elimination.
   if (auto *CB = dyn_cast<CallBase>(DI))
-    if (CB->onlyAccessesInaccessibleMemory())
+    if (CB->onlyAccessesInaccessibleMemory()) {
+      if (isAllocLikeFn(DI, &TLI))
+        return false;
       return true;
-
+    }
   // We can eliminate stores to locations not visible to the caller across
   // throwing instructions.
   if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +851,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
     return true;
 
   // Skip intrinsics that do not really read or modify memory.
-  if (isNoopIntrinsic(D->getMemoryInst()))
+  if (isNoopIntrinsic(DI))
     return true;
 
   return false;
@@ -1389,7 +1399,7 @@ struct DSEState {
       MemoryDef *CurrentDef = cast<MemoryDef>(Current);
       Instruction *CurrentI = CurrentDef->getMemoryInst();
 
-      if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+      if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO), TLI))
         continue;
 
       // Before we try to remove anything, check for any extra throwing
@@ -1816,13 +1826,58 @@ struct DSEState {
 
     if (StoredConstant && StoredConstant->isNullValue()) {
       auto *DefUOInst = dyn_cast<Instruction>(DefUO);
-      if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
-        auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
-        // If UnderlyingDef is the clobbering access of Def, no instructions
-        // between them can modify the memory location.
-        auto *ClobberDef =
-            MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
-        return UnderlyingDef == ClobberDef;
+      if (DefUOInst) {
+        if (isCallocLikeFn(DefUOInst, &TLI)) {
+          auto *UnderlyingDef =
+              cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+          // If UnderlyingDef is the clobbering access of Def, no instructions
+          // between them can modify the memory location.
+          auto *ClobberDef =
+              MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+          return UnderlyingDef == ClobberDef;
+        }
+
+        if (MemSet) {
+          if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+              F.hasFnAttribute(Attribute::SanitizeAddress) ||
+              F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+              F.getName() == "calloc")
+            return false;
+          auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+          if (!Malloc)
+            return false;
+          auto *InnerCallee = Malloc->getCalledFunction();
+          if (!InnerCallee)
+            return false;
+          LibFunc Func;
+          if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+              Func != LibFunc_malloc)
+            return false;
+          if (Malloc->getOperand(0) == MemSet->getLength()) {
+            if (DT.dominates(Malloc, MemSet) &&
+                memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+              IRBuilder<> IRB(Malloc);
+              const auto &DL = Malloc->getModule()->getDataLayout();
+              AttributeList EmptyList;
+              if (auto *Calloc = emitCalloc(
+                      ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+                      Malloc->getArgOperand(0), EmptyList, IRB, TLI)) {
+                MemorySSAUpdater Updater(&MSSA);
+                auto *LastDef = cast<MemoryDef>(
+                    Updater.getMemorySSA()->getMemoryAccess(Malloc));
+                auto *NewAccess = Updater.createMemoryAccessAfter(
+                    cast<Instruction>(Calloc), LastDef, LastDef);
+                auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+                Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+                Updater.removeMemoryAccess(Malloc);
+                Malloc->replaceAllUsesWith(Calloc);
+                Malloc->eraseFromParent();
+                return true;
+              }
+              return false;
+            }
+          }
+        }
       }
     }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll 
b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 184653982a6a..12534b6047c5 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -1,9 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='dse,verify<memoryssa>' -S | 
FileCheck %s
 target datalayout = 
"E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare i8* @calloc(i64, i64)
 declare void @memset_pattern16(i8*, i8*, i64)
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
@@ -309,6 +308,156 @@ entry:
   ret void
 }
 
+declare noalias i8* @malloc(i64)
+declare noalias i8* @_Znwm(i64)
+declare void @clobber_memory(float*)
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @malloc(i64 %size) inaccessiblememonly
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+  ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT:    call void @clobber_memory(float* [[BC]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 
[[SIZE]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @malloc(i64 %size) inaccessiblememonly
+  %bc = bitcast i8* %call to float*
+  call void @clobber_memory(float* %bc)
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+  ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc_with_different_sizes(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
+; CHECK-NEXT:    [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 
[[SIZE2]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @malloc(i64 %size) inaccessiblememonly
+  %size2 = add nsw i64 %size, -1
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size2, i1 false)
+  ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_new(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_new(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @_Znwm(i64 [[SIZE:%.*]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 
[[SIZE]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @_Znwm(i64 %size)
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+  ret i8* %call
+}
+
+; This should not create a calloc and should not crash the compiler.
+define i8* @notmalloc_memset(i64 %size, i8*(i64)* %notmalloc) {
+; CHECK-LABEL: @notmalloc_memset(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i64 [[SIZE:%.*]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL1]], i8 0, i64 
[[SIZE]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL1]]
+;
+  %call1 = call i8* %notmalloc(i64 %size)
+  call void @llvm.memset.p0i8.i64(i8* %call1, i8 0, i64 %size, i1 false)
+  ret i8* %call1
+}
+
+; This should not create recursive call to calloc.
+define i8* @calloc(i64 %nmemb, i64 %size) {
+; CHECK-LABEL: @calloc(
+; CHECK:       entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SIZE:%.*]], [[NMEMB:%.*]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 16 i8* @malloc(i64 
[[MUL]])
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label 
[[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 
[[CALL]], i8 0, i64 [[MUL]], i1 false)
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+entry:
+  %mul = mul i64 %size, %nmemb
+  %call = tail call noalias align 16 i8* @malloc(i64 %mul)
+  %tobool.not = icmp eq i8* %call, null
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %call, i8 0, i64 
%mul, i1 false)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i8* %call
+}
+
+define float* @pr25892(i64 %size) {
+; CHECK-LABEL: @pr25892(
+; CHECK:       entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, 
[[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float* [[RETVAL_0]]
+;
+entry:
+  %call = call i8* @malloc(i64 %size) inaccessiblememonly
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cleanup, label %if.end
+if.end:
+  %bc = bitcast i8* %call to float*
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+  br label %cleanup
+cleanup:
+  %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+  ret float* %retval.0
+}
+
+define float* @pr25892_with_extra_store(i64 %size) {
+; CHECK-LABEL: @pr25892_with_extra_store(
+; CHECK:       entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, 
[[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float* [[RETVAL_0]]
+;
+entry:
+  %call = call i8* @malloc(i64 %size) inaccessiblememonly
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cleanup, label %if.end
+if.end:
+  %bc = bitcast i8* %call to float*
+  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+  store i8 0, i8* %call, align 1
+  br label %cleanup
+cleanup:
+  %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+  ret float* %retval.0
+}
+
 ; PR50143
 define i8* @store_zero_after_calloc_inaccessiblememonly() {
 ; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
</cut>
_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to