Successfully identified regression in *llvm* in CI configuration 
tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO.  So far, this commit has 
regressed CI configurations:
 - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO

Culprit:
<cut>
commit 428a62f65f16f1640b1bfe033d20e6a4f545dd3e
Author: thomasraoux <thomasra...@google.com>
Date:   Wed Jun 9 09:42:32 2021 -0700

    [mlir][gpu] Add op to create MMA constant matrix
    
    This allow creating a matrix with all elements set to a given value. This is
    needed to be able to implement a simple dot op.
    
    Differential Revision: https://reviews.llvm.org/D103870
</cut>

Results regressed to (for first_bad == 428a62f65f16f1640b1bfe033d20e6a4f545dd3e)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -O2_LTO -- 
artifacts/build-428a62f65f16f1640b1bfe033d20e6a4f545dd3e/results_id:
1
# 400.perlbench,perlbench_base.default                          regressed by 103

from (for last_good == 3b46283c1539f89619f2b40ab7732f434d7c68ff)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -O2_LTO -- 
artifacts/build-3b46283c1539f89619f2b40ab7732f434d7c68ff/results_id:
1

Artifacts of last_good build: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/build-3b46283c1539f89619f2b40ab7732f434d7c68ff/
Results ID of last_good: 
tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/1827
Artifacts of first_bad build: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/build-428a62f65f16f1640b1bfe033d20e6a4f545dd3e/
Results ID of first_bad: 
tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/1831
Build top page/logs: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/

Configuration details:


Reproduce builds:
<cut>
mkdir investigate-llvm-428a62f65f16f1640b1bfe033d20e6a4f545dd3e
cd investigate-llvm-428a62f65f16f1640b1bfe033d20e6a4f545dd3e

git clone https://git.linaro.org/toolchain/jenkins-scripts

mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/manifests/build-baseline.sh
 --fail
curl -o artifacts/manifests/build-parameters.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/manifests/build-parameters.sh
 --fail
curl -o artifacts/test.sh 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/test.sh
 --fail
chmod +x artifacts/test.sh

# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh

# Save baseline build state (which is then restored in artifacts/test.sh)
rsync -a --del --delete-excluded --exclude bisect/ --exclude artifacts/ 
--exclude llvm/ ./ ./bisect/baseline/

cd llvm

# Reproduce first_bad build
git checkout --detach 428a62f65f16f1640b1bfe033d20e6a4f545dd3e
../artifacts/test.sh

# Reproduce last_good build
git checkout --detach 3b46283c1539f89619f2b40ab7732f434d7c68ff
../artifacts/test.sh

cd ..
</cut>

History of pending regressions and results: 
https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO

Artifacts: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/
Build log: 
https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-aarch64-spec2k6-O2_LTO/18/consoleText

Full commit (up to 1000 lines):
<cut>
commit 428a62f65f16f1640b1bfe033d20e6a4f545dd3e
Author: thomasraoux <thomasra...@google.com>
Date:   Wed Jun 9 09:42:32 2021 -0700

    [mlir][gpu] Add op to create MMA constant matrix
    
    This allow creating a matrix with all elements set to a given value. This is
    needed to be able to implement a simple dot op.
    
    Differential Revision: https://reviews.llvm.org/D103870
---
 mlir/include/mlir/Dialect/GPU/GPUOps.td            | 45 ++++++++++++++++++++++
 mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp    | 42 +++++++++++++++++++-
 .../Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir     | 25 ++++++++++++
 mlir/test/Dialect/GPU/ops.mlir                     |  4 ++
 4 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td 
b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 8e2520b675ae..1e78e4af4d51 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -1022,4 +1022,49 @@ def GPU_SubgroupMmaComputeOp : 
GPU_Op<"subgroup_mma_compute",
   let verifier = [{ return ::verify(*this); }];
 }
 
+def GPU_SubgroupMmaConstantMatrixOp : GPU_Op<"subgroup_mma_constant_matrix",
+    [NoSideEffect,
+     TypesMatchWith<"value type matches element type of mma_matrix",
+                    "res", "value",
+                    "$_self.cast<gpu::MMAMatrixType>().getElementType()">]>{
+
+  let summary = "GPU warp synchronous constant matrix";
+
+  let description = [{
+    The `gpu.subgroup_mma_constant_matrix` creates a `!gpu.mma_matrix` with
+    constant elements.
+
+    The operation takes a scalar input and return a `!gpu.mma_matrix` where 
each
+    element of is equal to the operand constant. The destination mma_matrix 
type
+    must have elememt type equal to the constant type. Since the layout of
+    `!gpu.mma_matrix` is opaque this only support setting all the elements to
+    the same value.
+
+    This op is meant to be used along with `gpu.subgroup_mma_compute`.
+    
+    Example:
+
+    ```mlir
+     %0 = gpu.subgroup_mma_constant_matrix %a :
+       !gpu.mma_matrix<16x16xf16, "AOp">
+     %1 = gpu.subgroup_mma_constant_matrix %b :
+       !gpu.mma_matrix<16x16xf32, "COp">
+    ```
+  }];
+
+  let arguments = (ins AnyTypeOf<[F16, F32]>:$value);
+
+  let results = (outs GPU_MMAMatrix:$res);
+
+  let extraClassDeclaration = [{
+    gpu::MMAMatrixType getType() {
+      return res().getType().cast<gpu::MMAMatrixType>();
+    }
+  }];
+
+  let assemblyFormat = [{
+    $value attr-dict `:` type($res)
+  }];
+}
+
 #endif // GPU_OPS
diff --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp 
b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
index d72c8c217f86..d46a185dec22 100644
--- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
@@ -348,12 +348,52 @@ struct WmmaMmaOpToNVVMLowering
   }
 };
 
+/// Convert GPU MMA ConstantMatrixOp to a chain of InsertValueOp.
+struct WmmaConstantOpToNVVMLowering
+    : public ConvertOpToLLVMPattern<gpu::SubgroupMmaConstantMatrixOp> {
+  using ConvertOpToLLVMPattern<
+      gpu::SubgroupMmaConstantMatrixOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupMmaConstantMatrixOp subgroupMmaConstantOp,
+                  ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (failed(areAllLLVMTypes(subgroupMmaConstantOp.getOperation(), operands,
+                               rewriter)))
+      return failure();
+    Location loc = subgroupMmaConstantOp.getLoc();
+    Value cst = operands[0];
+    LLVM::LLVMStructType type = convertMMAToLLVMType(
+        subgroupMmaConstantOp.getType().cast<gpu::MMAMatrixType>());
+    // If the element type is a vector create a vector from the operand.
+    if (auto vecType = type.getBody()[0].dyn_cast<VectorType>()) {
+      Value vecCst = rewriter.create<LLVM::UndefOp>(loc, vecType);
+      for (int64_t vecEl = 0; vecEl < vecType.getNumElements(); vecEl++) {
+        Value idx = rewriter.create<LLVM::ConstantOp>(
+            loc, typeConverter->convertType(rewriter.getIntegerType(32)),
+            rewriter.getI32ArrayAttr(vecEl));
+        vecCst = rewriter.create<LLVM::InsertElementOp>(loc, vecType, vecCst,
+                                                        cst, idx);
+      }
+      cst = vecCst;
+    }
+    Value matrixStruct = rewriter.create<LLVM::UndefOp>(loc, type);
+    for (size_t i : llvm::seq(size_t(0), type.getBody().size())) {
+      matrixStruct = rewriter.create<LLVM::InsertValueOp>(
+          loc, matrixStruct, cst, rewriter.getI32ArrayAttr(i));
+    }
+    rewriter.replaceOp(subgroupMmaConstantOp, matrixStruct);
+    return success();
+  }
+};
+
 } // anonymous namespace
 
 namespace mlir {
 void populateGpuWMMAToNVVMConversionPatterns(LLVMTypeConverter &converter,
                                              RewritePatternSet &patterns) {
   patterns.insert<WmmaLoadOpToNVVMLowering, WmmaMmaOpToNVVMLowering,
-                  WmmaStoreOpToNVVMLowering>(converter);
+                  WmmaStoreOpToNVVMLowering, WmmaConstantOpToNVVMLowering>(
+      converter);
 }
 } // namespace mlir
diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir 
b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
index de5d0d3fcf1c..f692dffdfcba 100644
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@@ -151,3 +151,28 @@ gpu.module @test_module {
       return
     }
 }
+
+
+// -----
+
+gpu.module @test_module {
+
+// CHECK-LABEL: func @gpu_wmma_constant_op
+//       CHECK: %[[CST:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16
+//       CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<2xf16>
+//       CHECK: %[[C0:.+]] = llvm.mlir.constant([0 : i32]) : i32
+//       CHECK: %[[V1:.+]] = llvm.insertelement %[[CST]], %[[V0]][%[[C0]] : 
i32] : vector<2xf16>
+//       CHECK: %[[C1:.+]] = llvm.mlir.constant([1 : i32]) : i32
+//       CHECK: %[[V2:.+]] = llvm.insertelement %[[CST]], %[[V1]][%[[C1]] : 
i32] : vector<2xf16>
+//       CHECK: %[[M0:.+]] = llvm.mlir.undef : !llvm.struct<(vector<2xf16>, 
vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+//       CHECK: %[[M1:.+]] = llvm.insertvalue %[[V2]], %[[M0]][0 : i32] : 
!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+//       CHECK: %[[M2:.+]] = llvm.insertvalue %[[V2]], %[[M1]][1 : i32] : 
!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+//       CHECK: %[[M3:.+]] = llvm.insertvalue %[[V2]], %[[M2]][2 : i32] : 
!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+//       CHECK: %[[M4:.+]] = llvm.insertvalue %[[V2]], %[[M3]][3 : i32] : 
!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+//       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, 
vector<2xf16>, vector<2xf16>, vector<2xf16>)>
+  func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+    %cst = constant 1.0 : f16
+    %C = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf16, 
"COp">
+    return %C : !gpu.mma_matrix<16x16xf16, "COp">
+  }
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index a98fe1c49683..1bed13c4b21a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -201,8 +201,12 @@ module attributes {gpu.container_module} {
     // CHECK: %[[wg:.*]] = memref.alloca()
     %i = constant 16 : index
     // CHECK: %[[i:.*]] = constant 16 : index
+     %cst = constant 1.000000e+00 : f32
+    // CHECK: %[[cst:.*]] = constant 1.000000e+00 : f32
     %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} 
: memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "AOp">
     // CHECK: gpu.subgroup_mma_load_matrix %[[wg]][%[[i]], %[[i]]] 
{leadDimension = 32 : index} : memref<32x32xf16, 3> -> 
!gpu.mma_matrix<16x16xf16, "AOp">
+    %1 = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf32, 
"COp">
+    // CHECK: gpu.subgroup_mma_constant_matrix %[[cst]] : 
!gpu.mma_matrix<16x16xf32, "COp">
     return
   }
 }
</cut>
_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to