[llvm-branch-commits] [mlir] [MLIR][AArch64] Add integration test for lowering of `vector.contract` to Neon FEAT_I8MM (PR #144699)
@@ -0,0 +1,336 @@ +// REQUIRES: arm-emulator + +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: --convert-vector-to-scf --convert-scf-to-cf --convert-vector-to-llvm='enable-arm-neon enable-arm-i8mm' \ +// DEFINE: --expand-strided-metadata --convert-to-llvm --finalize-memref-to-llvm \ +// DEFINE: --lower-affine --convert-arith-to-llvm --reconcile-unrealized-casts \ +// DEFINE: -o %t + +// DEFINE: %{entry_point} = main + +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+neon,+i8mm" \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%native_mlir_arm_runner_utils + +// RUN: rm -f %t && %{compile} && FileCheck %s --input-file=%t -check-prefix CHECK-IR && %{run} | FileCheck %s + +#packed_maps = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (n, k)>, + affine_map<(m, n, k) -> (m, n)> +] + +// +// Test the lowering of `vector.contract` using the `LowerContractionToNeonI8MMPattern` +// +// The operation that the `vector.contract` in this test performs is matrix +// multiplication with accumulate +// OUT = ACC + LHS * RHS +// of two 8-bit integer matrices LHS and RHS, and a 32-bit integer matrix ACC +// into a 32-bit integer matrix OUT. The LHS and RHS can be sign- or zero- extended, +// this test covers all the possible variants. +// +// Tested are calculations as well as that the relevant `ArmNeon` dialect +// operations ('arm_neon.smmla`, arm_neon.ummla`, etc) are emitted. +// +// That pattern above handles (therefore this test prepares) input/output vectors with +// specific shapes: +// * LHS: vector +// * RHS: vector +// * ACC, OUT: vector +// where the M and N are even and K is divisible by 8. +// Note that the RHS is transposed. +// This data layout makes it efficient to load data into SIMD +// registers in the layout expected by FEAT_I8MM instructions. +// Such a `vector.contract` is representative of the code we aim to generate +// by vectorisation of `linalg.mmt4d`. +// +// In this specific test we use M == 4, N == 4, and K == 8. banach-space wrote: Isn't K = 16 in the code below? https://github.com/llvm/llvm-project/pull/144699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH 1/2] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || >From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 13 Jun 2025 15:43:00 + Subject: [PATCH 2/2] Test new command line flag Created using spr 1.3.5-bogner --- llvm/test/CodeGen/RISCV/branch.ll | 49 +++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index 578080cd3a240..ed86ca8ca4dd1 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \ +; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-LABEL: foo: @@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-NEXT:lw zero, 0(a1) ; RV32I-NEXT: .LBB0_14: # %end ; RV32I-NEXT:ret +; +; RV32I-MBPLIMIT-LABEL: foo: +; RV32I-MBPLIMIT: # %bb.0: +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2 +; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end +; RV32I-MBPLIMIT-NEXT:ret +; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1 +; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %val1, %a br i1 %tst1, label %end, label %test2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [llvm] XCore: Declare libcalls used for align 4 memcpy (PR #144976)
https://github.com/nigelp-xmos approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/144976 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][AArch64] Add integration test for lowering of `vector.contract` to Neon FEAT_I8MM (PR #144699)
https://github.com/banach-space approved this pull request. Thanks, it's great to see more tests for `i8mm`. The documentation makes it relatively easy to follow (despite this being fairly complex!) - that's much appreciated! Overall LGTM, but I have one request. Could you unify the input data between SVE and NEON? I am happy for actual code to be duplicated. Btw, could you share how you generated the expected output? If that's some short numpy snippet, could you include it for future reference? (should these tests start to fail) Thank you! https://github.com/llvm/llvm-project/pull/144699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH 1/2] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || >From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 13 Jun 2025 15:43:00 + Subject: [PATCH 2/2] Test new command line flag Created using spr 1.3.5-bogner --- llvm/test/CodeGen/RISCV/branch.ll | 49 +++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index 578080cd3a240..ed86ca8ca4dd1 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \ +; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-LABEL: foo: @@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-NEXT:lw zero, 0(a1) ; RV32I-NEXT: .LBB0_14: # %end ; RV32I-NEXT:ret +; +; RV32I-MBPLIMIT-LABEL: foo: +; RV32I-MBPLIMIT: # %bb.0: +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2 +; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end +; RV32I-MBPLIMIT-NEXT:ret +; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1 +; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %val1, %a br i1 %tst1, label %end, label %test2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. momchil-velikov wrote: Comment added. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + +// Number of trailing dimensions to collapse, including the scalable +// dimension. Nothing to do if the single scalable dimension is already the +// last one. +const int64_t numCollapseDims = std::distance( +llvm::find(origScalableDims, true), origScalableDims.end()); +if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + +// We want a simple memref (not a tensor) with contiguous elements for at +// least all the trailing dimensions up to and including the scalable one. +auto memTy = dyn_cast(readOp.getBase().getType()); +if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + +// The collapsed dimensions (excluding the scalable one) of the vector and +// the memref must match and the corresponding indices must be in-bounds (it +// follows these indices would be zero). This guarantees that the operation +// transfers a contiguous block. momchil-velikov wrote: This part wasn't tested at all. Test cases added. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/143146 >From 198ed819841270aeec7159fe2a9a4c092b8d8af7 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 14 May 2025 09:03:49 + Subject: [PATCH 1/4] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors THis patch add a transform of `transfer_read` operation to change the vector type to one that can be mapped to an LLVM type. This is done by collapsing trailing dimensions so we obtain a vector type with a single scalable dimension in the rightmost position. --- .../Transforms/LegalizeVectorStorage.cpp | 110 - .../ArmSVE/legalize-transfer-read.mlir| 226 ++ .../transfer-read-scalable-not-rightmost.mlir | 72 ++ 3 files changed, 407 insertions(+), 1 deletion(-) create mode 100644 mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-not-rightmost.mlir diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp index d2ac850a5f70b..f16d33c004fec 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp @@ -298,6 +298,113 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewriten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + +// Number of trailing dimensions to collapse, including the scalable +// dimension. Nothing to do if the single scalable dimension is already the +// last one. +const int64_t numCollapseDims = std::distance( +llvm::find(origScalableDims, true), origScalableDims.end()); +if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + +// We want a simple memref (not a tensor) with contiguous elements for at +// least all the trailing dimensions up to and including the scalable one. +auto memTy = dyn_cast(readOp.getBase().getType()); +if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + +// The collapsed dimensions (excluding the scalable one) of the vector and +// the memref must match and the corresponding indices must be in-bounds (it +// follows these indices would be zero). This guarantees that the operation +// transfers a contiguous block. +if (!llvm::equal(memTy.getShape().take_back(numCollapseDims - 1), + origVT.getShape().take_back(numCollapseDims - 1))) + return rewriter.notifyMatchFailure( + readOp, "memref and vector dimensions do not match"); + +SmallVector origInBounds = readOp.getInBoundsValues(); +if (!llvm::all_of( +ArrayRef(origInBounds).take_back(numCollapseDims - 1), +[](bool v) { return v; })) + return rewriter.notifyMatchFailure(readOp, + "out-if-bounds index to collapse"); + +// Collapse the trailing dimensions of the memref. +SmallVector reassoc; +for (int64_t i = 0; i < memTy.getRank() - numCollapseDims + 1; ++i) + reassoc.push_back({i}); +for (int64_t i = memTy.getRank() - numCollapseDims + 1; i < memTy.getRank(); + ++i) + reassoc.
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/Pierre-vh approved this pull request. https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner { return {MatchMI, MatchMI->getOperand(1).getReg()}; } + std::pair tryMatchRALFromUnmerge(Register Src) { +MachineInstr *ReadAnyLane = MRI.getVRegDef(Src); +if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) { Pierre-vh wrote: use early return here? https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/145009 Similar to the existing implementations for X86 and PPC, support symbolizing branch targets for AArch64. Do not omit the address for ADRP as the target is typically not at an intended location. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/145024 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/145024 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145024?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145025** https://app.graphite.dev/github/pr/llvm/llvm-project/145025?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145024** https://app.graphite.dev/github/pr/llvm/llvm-project/145024?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145024?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145023** https://app.graphite.dev/github/pr/llvm/llvm-project/145023?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145024 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR In gfx90a-gfx950, it's possible to emit MFMAs which use AGPRs or VGPRs for vdst and src2. We do not want to do use the AGPR form, unless required by register pressure as it requires cross bank register copies from most other instructions. Currently we select the AGPR or VGPR version depending on a crude heuristic for whether it's possible AGPRs will be required. We really need the register allocation to be complete to make a good decision, which is what this pass is for. This adds the pass, but does not yet remove the selection patterns for AGPRs. Add test XXX - Add test Debug register count Test cleanup xxx Pass work Debug print pass Compute class constraints pass Add test with rewritable source pass work pass work Copy is gone delete the copy junk Add another test not sure if useful test Comment based on vague memories More tests skipFunction Comment moretest comment skipFunction untied test Untied earlyclobber test shitwork I am dummy junK Fix newpm cleanup Comment Revert debug junk in RegAllocBase Revert debug junk Update llc-pipeline Rename test cleanup early exit comment sanitize --- Patch is 24.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145024.diff 10 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+11) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) - (added) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+300) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3) - (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1) - (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (+4) - (modified) llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir (+10-12) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+4) - (modified) llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll (+4) - (modified) llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn (+1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 68a3caf595449..3464856267b17 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -541,6 +541,17 @@ extern char &GCNRewritePartialRegUsesID; void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &); extern char &AMDGPUWaitSGPRHazardsLegacyID; +class AMDGPURewriteAGPRCopyMFMAPass +: public PassInfoMixin { +public: + AMDGPURewriteAGPRCopyMFMAPass() = default; + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &); +extern char &AMDGPURewriteAGPRCopyMFMALegacyID; + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 13453963eec6d..b61216c5e5e92 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", AMDGPUMarkLastScratchLoadPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass()) +MACHINE_FUNCTION_PASS("amdgpu-rewrite-agpr-copy-mfma", AMDGPURewriteAGPRCopyMFMAPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp new file mode 100644 index 0..f877858413505 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -0,0 +1,300 @@ +//===-- AMDGPURewriteAGPRCopyMFMA.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA +/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and +/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have +/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This +/// pass will attempt to delete the cross register bank copy and replace the +/// MFMA opcode. +/// +/// TODO: +/// - Handle non-tied dst+src2 cases. We need
[llvm-branch-commits] [llvm] WIP: AMDGPU: Always select the VGPR version of MFMAs (PR #145025)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/145025 We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting. >From 7f38e2b194c6ef4e4d4b34af6dc93f8d67e81434 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Dec 2024 15:41:44 -0600 Subject: [PATCH] WIP: AMDGPU: Always select the VGPR version of MFMAs We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting. --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 10 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 20 +-- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 6 -- .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 6 -- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 55 ++- 5 files changed, 35 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index dca55dafcc5e3..8331fe333e637 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4865,31 +4865,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // for srcA/srcB? // // vdst, srcA, srcB, srcC - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 07d79d677104a..11c9adb3371d5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16076,7 +16076,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - SIMachineFunctionInfo *Info = MF->getInfo(); if (TII->isVOP3(MI.getOpcode())) { // Make sure constant bus requirements are respected. @@ -16087,7 +16086,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // use between vgpr and agpr as agpr tuples tend to be big. if (!MI.getDesc().operands().empty()) { unsigned Opc = MI.getOpcode(); - bool HasAGPRs = Info->mayNeedAGPRs(); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); for (auto I : @@ -16095,7 +16093,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) { if (I == -1) break; -if ((I == Src2Idx) && (HasAGPRs)) +if (I == Src2Idx) break; MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !Op.getReg().isVirtual()) @@ -16129,22 +16127,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, TII->legalizeOpWithMove(MI, Src1Idx); } } - - if (!HasAGPRs) -return; - - // Resolve the rest of AV operands to AGPRs. - if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) { -if (Src2->isReg() && Src2->getReg().isVirtual()) { - auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg()); - if (TRI->isVectorSup
[llvm-branch-commits] [llvm] WIP: AMDGPU: Always select the VGPR version of MFMAs (PR #145025)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting. --- Full diff: https://github.com/llvm/llvm-project/pull/145025.diff 5 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4-6) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-19) - (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (-6) - (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (-6) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+30-25) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index dca55dafcc5e3..8331fe333e637 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4865,31 +4865,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // for srcA/srcB? // // vdst, srcA, srcB, srcC - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 07d79d677104a..11c9adb3371d5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16076,7 +16076,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - SIMachineFunctionInfo *Info = MF->getInfo(); if (TII->isVOP3(MI.getOpcode())) { // Make sure constant bus requirements are respected. @@ -16087,7 +16086,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // use between vgpr and agpr as agpr tuples tend to be big. if (!MI.getDesc().operands().empty()) { unsigned Opc = MI.getOpcode(); - bool HasAGPRs = Info->mayNeedAGPRs(); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); for (auto I : @@ -16095,7 +16093,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) { if (I == -1) break; -if ((I == Src2Idx) && (HasAGPRs)) +if (I == Src2Idx) break; MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !Op.getReg().isVirtual()) @@ -16129,22 +16127,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, TII->legalizeOpWithMove(MI, Src1Idx); } } - - if (!HasAGPRs) -return; - - // Resolve the rest of AV operands to AGPRs. - if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) { -if (Src2->isReg() && Src2->getReg().isVirtual()) { - auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg()); - if (TRI->isVectorSuperClass(RC)) { -auto *NewRC = TRI->getEquivalentAGPRClass(RC); -MRI.setRegClass(Src2->getReg(), NewRC); -if (Src2->isTied()) - MRI.setRegClass(MI.getOperand(0).getReg(), NewRC); - } -} - } } return; diff --git a/llvm/lib/Target/AMDGPU/S
[llvm-branch-commits] [llvm] WIP: AMDGPU: Always select the VGPR version of MFMAs (PR #145025)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145025?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145025** https://app.graphite.dev/github/pr/llvm/llvm-project/145025?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145025?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145024** https://app.graphite.dev/github/pr/llvm/llvm-project/145024?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145023** https://app.graphite.dev/github/pr/llvm/llvm-project/145023?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145025 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/145024 AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR In gfx90a-gfx950, it's possible to emit MFMAs which use AGPRs or VGPRs for vdst and src2. We do not want to do use the AGPR form, unless required by register pressure as it requires cross bank register copies from most other instructions. Currently we select the AGPR or VGPR version depending on a crude heuristic for whether it's possible AGPRs will be required. We really need the register allocation to be complete to make a good decision, which is what this pass is for. This adds the pass, but does not yet remove the selection patterns for AGPRs. Add test XXX - Add test Debug register count Test cleanup xxx Pass work Debug print pass Compute class constraints pass Add test with rewritable source pass work pass work Copy is gone delete the copy junk Add another test not sure if useful test Comment based on vague memories More tests skipFunction Comment moretest comment skipFunction untied test Untied earlyclobber test shitwork I am dummy junK Fix newpm cleanup Comment Revert debug junk in RegAllocBase Revert debug junk Update llc-pipeline Rename test cleanup early exit comment sanitize >From cfdee22a3782408dbbccac3cd28bca3d9c77692f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 8 Dec 2024 14:24:58 -0500 Subject: [PATCH] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR In gfx90a-gfx950, it's possible to emit MFMAs which use AGPRs or VGPRs for vdst and src2. We do not want to do use the AGPR form, unless required by register pressure as it requires cross bank register copies from most other instructions. Currently we select the AGPR or VGPR version depending on a crude heuristic for whether it's possible AGPRs will be required. We really need the register allocation to be complete to make a good decision, which is what this pass is for. This adds the pass, but does not yet remove the selection patterns for AGPRs. This is a WIP, and NFC-ish. It should be a no-op on any currently selected code. It also does not yet trigger on the real examples of interest, which require handling batches of MFMAs at once. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 11 + llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 300 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 + llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 4 + ...class-vgpr-mfma-to-av-with-load-source.mir | 22 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 + .../CodeGen/AMDGPU/sgpr-regalloc-flags.ll | 4 + .../secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 1 + 10 files changed, 339 insertions(+), 12 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 68a3caf595449..3464856267b17 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -541,6 +541,17 @@ extern char &GCNRewritePartialRegUsesID; void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &); extern char &AMDGPUWaitSGPRHazardsLegacyID; +class AMDGPURewriteAGPRCopyMFMAPass +: public PassInfoMixin { +public: + AMDGPURewriteAGPRCopyMFMAPass() = default; + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &); +extern char &AMDGPURewriteAGPRCopyMFMALegacyID; + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 13453963eec6d..b61216c5e5e92 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", AMDGPUMarkLastScratchLoadPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass()) +MACHINE_FUNCTION_PASS("amdgpu-rewrite-agpr-copy-mfma", AMDGPURewriteAGPRCopyMFMAPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp new file mode 100644 index 0..f877858413505 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -0,0 +1,300 @@ +//===-- AMDGPURewriteAGPRCopyMFMA.cpp ---
[llvm-branch-commits] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
llvmbot wrote: @llvm/pr-subscribers-llvm-binary-utilities Author: Alexis Engelke (aengelke) Changes Similar to the existing implementations for X86 and PPC, support symbolizing branch targets for AArch64. Do not omit the address for ADRP as the target is typically not at an intended location. --- Full diff: https://github.com/llvm/llvm-project/pull/145009.diff 3 Files Affected: - (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp (+10) - (added) llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml (+42) - (modified) llvm/tools/llvm-objdump/llvm-objdump.cpp (+2-1) ``diff diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index bbe83821eca8e..fa7610db82bfb 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1784,6 +1784,10 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. + if (SymbolizeOperands) +return; + const MCOperand &Op = MI->getOperand(OpNum); // If the label has already been resolved to an immediate offset (say, when @@ -1813,6 +1817,12 @@ void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. + // However, do print for ADRP, as this is typically used together with an ADD + // or an immediate-offset ldr/str and the label is likely at the wrong point. + if (SymbolizeOperands && MI->getOpcode() != AArch64::ADRP) +return; + const MCOperand &Op = MI->getOperand(OpNum); // If the label has already been resolved to an immediate offset (say, when diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml new file mode 100644 index 0..3f3c6f33e620f --- /dev/null +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml @@ -0,0 +1,42 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data:ELFDATA2LSB + Type:ET_EXEC + Machine: EM_AARCH64 +Sections: + - Name:.text +Type:SHT_PROGBITS +Address: 0x4000 +Flags: [SHF_ALLOC, SHF_EXECINSTR] +Content: '6080005801d0228000103f0002eb4054fc1762b4c0035fd6' + - Name:.data +Type:SHT_PROGBITS +Flags: [SHF_ALLOC, SHF_WRITE] +Address: 0x5000 +Symbols: + - Name:_start +Section: .text +Value: 0x4000 + - Name:symbol +Section: .data +Value: 0x500c diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 5ecb33375943f..c5967cd090eec 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1495,8 +1495,9 @@ collectLocalBranchTargets(ArrayRef Bytes, MCInstrAnalysis *MIA, // Supported by certain targets. const bool isPPC = STI->getTargetTriple().isPPC(); const bool isX86 = STI->getTargetTriple().isX86(); + const bool isAArch64 = STI->getTargetTriple().isAArch64(); const bool isBPF = STI->getTargetTriple().isBPF(); - if (!isPPC && !isX86 && !isBPF) + if (!isPPC && !isX86 && !isAArch64 && !isBPF) return; if (MIA) `` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
https://github.com/ilovepi approved this pull request. LGTM. overall really good improvement. most of my comments are me noticing bad existing code we should fix. I also left a few nit comments to address, but they're rather minor. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -662,6 +709,14 @@ void addTemplateSpecialization(TemplateInfo *I, I->Specialization.emplace(std::move(TSI)); } +template static void addConstraint(T I, ConstraintInfo &&C) { + llvm::errs() << "invalid container for constraint info"; + exit(1); +} ilovepi wrote: Well, I was going to leave a comment about this, but I see there's already 4 occurrences. These should probably use exit on err, but that should all be handled separately. I assume this could be rewritten w/ some `constexpr enable_if` magic to be equivalent, but 🤷 most of this code was written when the LLVM codebase was still c++14-only. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -248,6 +257,27 @@ static void serializeCommonChildren(const ScopeChildren &Children, } } +template +static void serializeArray(const std::vector &Records, Object &Obj, + const std::string &Key, + SerializationFunc serializeInfo) { + json::Value RecordsArray = Array(); + auto &RecordsArrayRef = *RecordsArray.getAsArray(); + RecordsArrayRef.reserve(Records.size()); + for (const auto &Item : Records) { +json::Value ItemVal = Object(); +auto &ItemObj = *ItemVal.getAsObject(); +serializeInfo(Item, ItemObj); +RecordsArrayRef.push_back(ItemVal); + } ilovepi wrote: Here's another code pattern I see repeated a lot. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -817,6 +872,20 @@ llvm::Error ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) { addChild(I, std::move(TI)); return llvm::Error::success(); } + case BI_CONSTRAINT_BLOCK_ID: { +ConstraintInfo CI; +if (auto Err = readBlock(ID, &CI)) + return Err; +addConstraint(I, std::move(CI)); +return llvm::Error::success(); + } ilovepi wrote: We should have a helper to avoid this kind of boilerplate that can take a callable. Not for this patch, though. If you wouldn't mind, please add a TODO, or file an issue. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
@@ -4274,6 +4274,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { case lltok::kw_bitcast: case lltok::kw_addrspacecast: case lltok::kw_inttoptr: + // ptrtoaddr not supported in constant exprs (yet?). jrtc27 wrote: (i.e. it's a TODO if not implemented here, not a question of whether it should be supported, IMO) https://github.com/llvm/llvm-project/pull/139357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
@@ -4274,6 +4274,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { case lltok::kw_bitcast: case lltok::kw_addrspacecast: case lltok::kw_inttoptr: + // ptrtoaddr not supported in constant exprs (yet?). jrtc27 wrote: That's something that is needed, we support that on CHERI https://github.com/llvm/llvm-project/pull/139357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
https://github.com/rampitec commented: Do you assume that at this stage there are no accvgpr_write/read instructions, but only COPY? https://github.com/llvm/llvm-project/pull/145024 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
https://github.com/krzysz00 commented: I think the RFC's at consensus and that things are in a decent state. One comment I have is that the documentation mentions vectors of pointers and I don't see any tests for that. But that minor issue aside, I'd lay ... one last call for comments before this gets out of limbo? Let's give it until next Wednesday, PDT afternoon? https://github.com/llvm/llvm-project/pull/139357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
https://github.com/ilovepi edited https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -584,6 +613,18 @@ template <> llvm::Error addReference(RecordInfo *I, Reference &&R, FieldId F) { } } +template <> +llvm::Error addReference(ConstraintInfo *I, Reference &&R, FieldId F) { + switch (F) { + case FieldId::F_concept: +I->ConceptRef = std::move(R); +return llvm::Error::success(); + default: +return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid type cannot contain Reference"); ilovepi wrote: can we make it more obvious that this is related to concepts somehow? you just have a single arm in the switch, so it may also be preferable to just use an `if`, unless you plan to support more feild types. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/rafaelauler approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Test all projects when CI scripts change (PR #144034)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/144034 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Test all projects when CI scripts change (PR #144034)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/144034 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
arichardson wrote: ping. What can I do to push this forward? I would like to avoid making any further follow-up changes if there is any risk this would not land. @nikic are you still happy with the introduction of this instruction? https://github.com/llvm/llvm-project/pull/139357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AArch64: Add libcall impl declarations for __arm_sc* memory functions (PR #144977)
https://github.com/dpaoliello approved this pull request. https://github.com/llvm/llvm-project/pull/144977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
https://github.com/banach-space edited https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -203,7 +205,14 @@ class AMDGPURegBankLegalizeCombiner { bool tryEliminateReadAnyLane(MachineInstr &Copy) { Register Dst = Copy.getOperand(0).getReg(); Register Src = Copy.getOperand(1).getReg(); -if (!Src.isVirtual()) + +// Skip non-vgpr Dst +if ((Dst.isVirtual() && MRI.getRegBankOrNull(Dst) != VgprRB) || +(Dst.isPhysical() && !TRI.isVGPR(MRI, Dst))) Pierre-vh wrote: ```suggestion if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB) : !TRI.isVGPR(MRI, Dst)) ``` https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Simplify doTrace in BAT mode (PR #143233)
https://github.com/rafaelauler approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/143233 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Simplify doTrace in BAT mode (PR #143233)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/143233 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Simplify doTrace in BAT mode (PR #143233)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/143233 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/145030 >From d65161f2d8e65512a6924ac96f069ab5acce0fcd Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 20 Jun 2025 12:25:00 + Subject: [PATCH] [mlir][Transforms] Dialect conversion: Add missing erasure notifications --- .../Transforms/Utils/DialectConversion.cpp| 57 --- mlir/test/Transforms/test-legalizer.mlir | 18 +- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index ff48647f43305..9c75b7436cddc 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -274,6 +274,26 @@ struct RewriterState { // IR rewrites //===--===// +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op); + +/// Notify the listener that the given block and its contents are being erased. +static void notifyIRErased(RewriterBase::Listener *listener, Block &b) { + for (Operation &op : b) +notifyIRErased(listener, op); + listener->notifyBlockErased(&b); +} + +/// Notify the listener that the given operation and its contents are being +/// erased. +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op) { + for (Region &r : op.getRegions()) { +for (Block &b : r) { + notifyIRErased(listener, b); +} + } + listener->notifyOperationErased(&op); +} + /// An IR rewrite that can be committed (upon success) or rolled back (upon /// failure). /// @@ -422,17 +442,20 @@ class EraseBlockRewrite : public BlockRewrite { } void commit(RewriterBase &rewriter) override { -// Erase the block. assert(block && "expected block"); -assert(block->empty() && "expected empty block"); -// Notify the listener that the block is about to be erased. +// Notify the listener that the block and its contents are being erased. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - listener->notifyBlockErased(block); + notifyIRErased(listener, *block); } void cleanup(RewriterBase &rewriter) override { +// Erase the contents of the block. +for (auto &op : llvm::make_early_inc_range(llvm::reverse(*block))) + rewriter.eraseOp(&op); +assert(block->empty() && "expected empty block"); + // Erase the block. block->dropAllDefinedValueUses(); delete block; @@ -1147,12 +1170,9 @@ void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); - // Notify the listener that the operation (and its nested operations) was - // erased. - if (listener) { -op->walk( -[&](Operation *op) { listener->notifyOperationErased(op); }); - } + // Notify the listener that the operation and its contents are being erased. + if (listener) +notifyIRErased(listener, *op); // Do not erase the operation yet. It may still be referenced in `mapping`. // Just unlink it for now and erase it during cleanup. @@ -1605,6 +1625,8 @@ void ConversionPatternRewriterImpl::replaceOp( } void ConversionPatternRewriterImpl::eraseBlock(Block *block) { + assert(!wasOpReplaced(block->getParentOp()) && + "attempting to erase a block within a replaced/erased op"); appendRewrite(block); // Unlink the block from its parent region. The block is kept in the rewrite @@ -1612,12 +1634,16 @@ void ConversionPatternRewriterImpl::eraseBlock(Block *block) { // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); + + // Mark all nested ops as erased. + block->walk([&](Operation *op) { replacedOps.insert(op); }); } void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { - assert(!wasOpReplaced(block->getParentOp()) && - "attempting to insert into a region within a replaced/erased op"); + assert( + (!config.allowPatternRollback || !wasOpReplaced(block->getParentOp())) && + "attempting to insert into a region within a replaced/erased op"); LLVM_DEBUG( { Operation *parent = block->getParentOp(); @@ -1709,13 +1735,6 @@ void ConversionPatternRewriter::eraseOp(Operation *op) { } void ConversionPatternRewriter::eraseBlock(Block *block) { - assert(!impl->wasOpReplaced(block->getParentOp()) && - "attempting to erase a block within a replaced/erased op"); - - // Mark all ops for erasure. - for (Operation &op : *block) -eraseOp(&op); - impl->eraseBlock(block); } diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 34948ae685f0a..204c8c1456826 100644 --- a/mlir/test/Transforms/tes
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/145030 >From edb49ecf11faa51847b324d6e43336845e71fcf4 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 20 Jun 2025 12:25:00 + Subject: [PATCH] [mlir][Transforms] Dialect conversion: Add missing erasure notifications --- .../Transforms/Utils/DialectConversion.cpp| 52 +-- mlir/test/Transforms/test-legalizer.mlir | 18 ++- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index ff48647f43305..ad82a007b7996 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -274,6 +274,26 @@ struct RewriterState { // IR rewrites //===--===// +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op); + +/// Notify the listener that the given block and its contents are being erased. +static void notifyIRErased(RewriterBase::Listener *listener, Block &b) { + for (Operation &op : b) +notifyIRErased(listener, op); + listener->notifyBlockErased(&b); +} + +/// Notify the listener that the given operation and its contents are being +/// erased. +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op) { + for (Region &r : op.getRegions()) { +for (Block &b : r) { + notifyIRErased(listener, b); +} + } + listener->notifyOperationErased(&op); +} + /// An IR rewrite that can be committed (upon success) or rolled back (upon /// failure). /// @@ -422,17 +442,20 @@ class EraseBlockRewrite : public BlockRewrite { } void commit(RewriterBase &rewriter) override { -// Erase the block. assert(block && "expected block"); -assert(block->empty() && "expected empty block"); -// Notify the listener that the block is about to be erased. +// Notify the listener that the block and its contents are being erased. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - listener->notifyBlockErased(block); + notifyIRErased(listener, *block); } void cleanup(RewriterBase &rewriter) override { +// Erase the contents of the block. +for (auto &op : llvm::make_early_inc_range(llvm::reverse(*block))) + rewriter.eraseOp(&op); +assert(block->empty() && "expected empty block"); + // Erase the block. block->dropAllDefinedValueUses(); delete block; @@ -1147,12 +1170,9 @@ void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); - // Notify the listener that the operation (and its nested operations) was - // erased. - if (listener) { -op->walk( -[&](Operation *op) { listener->notifyOperationErased(op); }); - } + // Notify the listener that the operation and its contents are being erased. + if (listener) +notifyIRErased(listener, *op); // Do not erase the operation yet. It may still be referenced in `mapping`. // Just unlink it for now and erase it during cleanup. @@ -1605,6 +1625,8 @@ void ConversionPatternRewriterImpl::replaceOp( } void ConversionPatternRewriterImpl::eraseBlock(Block *block) { + assert(!wasOpReplaced(block->getParentOp()) && + "attempting to erase a block within a replaced/erased op"); appendRewrite(block); // Unlink the block from its parent region. The block is kept in the rewrite @@ -1612,6 +1634,9 @@ void ConversionPatternRewriterImpl::eraseBlock(Block *block) { // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); + + // Mark all nested ops as erased. + block->walk([&](Operation *op) { replacedOps.insert(op); }); } void ConversionPatternRewriterImpl::notifyBlockInserted( @@ -1709,13 +1734,6 @@ void ConversionPatternRewriter::eraseOp(Operation *op) { } void ConversionPatternRewriter::eraseBlock(Block *block) { - assert(!impl->wasOpReplaced(block->getParentOp()) && - "attempting to erase a block within a replaced/erased op"); - - // Mark all ops for erasure. - for (Operation &op : *block) -eraseOp(&op); - impl->eraseBlock(block); } diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 34948ae685f0a..204c8c1456826 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -461,12 +461,26 @@ func.func @convert_detached_signature() { // - +// CHECK: notifyOperationReplaced: test.erase_op +// CHECK: notifyOperationErased: test.dummy_op_lvl_2 +// CHECK: notifyBlockErased +// CHECK: notifyOperationErased: test.dummy_op_lvl_1 +// CHECK: notifyBlockErased +// CHECK: notifyOperationErased: test.erase_op +// CHECK: notifyOperationInser
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
llvmbot wrote: @llvm/pr-subscribers-mlir-core Author: Matthias Springer (matthias-springer) Changes Add missing listener notifications when erasing nested blocks/operations. This commit also moves some of the functionality from `ConversionPatternRewriter` to `ConversionPatternRewriterImpl`. This is in preparation of the One-Shot Dialect Conversion refactoring: The implementations in `ConversionPatternRewriter` should be as simple as possible, so that a switch between "rollback allowed" and "rollback not allowed" can be inserted at that level. (In the latter case, `ConversionPatternRewriterImpl` can be bypassed to some degree, and `PatternRewriter::eraseBlock` etc. can be used.) --- Full diff: https://github.com/llvm/llvm-project/pull/145030.diff 2 Files Affected: - (modified) mlir/lib/Transforms/Utils/DialectConversion.cpp (+43-19) - (modified) mlir/test/Transforms/test-legalizer.mlir (+16-2) ``diff diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index ff48647f43305..7419d79cd8856 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -274,6 +274,26 @@ struct RewriterState { // IR rewrites //===--===// +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op); + +/// Notify the listener that the given block and its contents are being erased. +static void notifyIRErased(RewriterBase::Listener *listener, Block &b) { + for (Operation &op : b) +notifyIRErased(listener, op); + listener->notifyBlockErased(&b); +} + +/// Notify the listener that the given operation and its contents are being +/// erased. +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op) { + for (Region &r : op.getRegions()) { +for (Block &b : r) { + notifyIRErased(listener, b); +} + } + listener->notifyOperationErased(&op); +} + /// An IR rewrite that can be committed (upon success) or rolled back (upon /// failure). /// @@ -422,17 +442,20 @@ class EraseBlockRewrite : public BlockRewrite { } void commit(RewriterBase &rewriter) override { -// Erase the block. assert(block && "expected block"); -assert(block->empty() && "expected empty block"); -// Notify the listener that the block is about to be erased. +// Notify the listener that the block and its contents are being erased. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - listener->notifyBlockErased(block); + notifyIRErased(listener, *block); } void cleanup(RewriterBase &rewriter) override { +// Erase the contents of the block. +for (auto &op : llvm::make_early_inc_range(llvm::reverse(*block))) + rewriter.eraseOp(&op); +assert(block->empty() && "expected empty block"); + // Erase the block. block->dropAllDefinedValueUses(); delete block; @@ -1147,12 +1170,9 @@ void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); - // Notify the listener that the operation (and its nested operations) was - // erased. - if (listener) { -op->walk( -[&](Operation *op) { listener->notifyOperationErased(op); }); - } + // Notify the listener that the operation and its contents are being erased. + if (listener) +notifyIRErased(listener, *op); // Do not erase the operation yet. It may still be referenced in `mapping`. // Just unlink it for now and erase it during cleanup. @@ -1605,6 +1625,8 @@ void ConversionPatternRewriterImpl::replaceOp( } void ConversionPatternRewriterImpl::eraseBlock(Block *block) { + assert(!wasOpReplaced(block->getParentOp()) && + "attempting to erase a block within a replaced/erased op"); appendRewrite(block); // Unlink the block from its parent region. The block is kept in the rewrite @@ -1612,12 +1634,16 @@ void ConversionPatternRewriterImpl::eraseBlock(Block *block) { // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); + + // Mark all nested ops as erased. + block->walk([&](Operation *op) { replacedOps.insert(op); }); } void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { - assert(!wasOpReplaced(block->getParentOp()) && - "attempting to insert into a region within a replaced/erased op"); + assert( + (!config.allowPatternRollback || !wasOpReplaced(block->getParentOp())) && + "attempting to insert into a region within a replaced/erased op"); LLVM_DEBUG( { Operation *parent = block->getParentOp(); @@ -1630,6 +1656,11 @@ void ConversionPatternRewriterImpl::notifyBlockInserted( } }); + if (!config.allowPa
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + +// Number of trailing dimensions to collapse, including the scalable +// dimension. Nothing to do if the single scalable dimension is already the +// last one. +const int64_t numCollapseDims = std::distance( +llvm::find(origScalableDims, true), origScalableDims.end()); +if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + +// We want a simple memref (not a tensor) with contiguous elements for at +// least all the trailing dimensions up to and including the scalable one. +auto memTy = dyn_cast(readOp.getBase().getType()); +if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + +// The collapsed dimensions (excluding the scalable one) of the vector and +// the memref must match and the corresponding indices must be in-bounds (it +// follows these indices would be zero). This guarantees that the operation +// transfers a contiguous block. banach-space wrote: > // The collapsed dimensions (excluding the scalable one) of the vector and >// the memref must match What about dynamic dim sizes in the memref? If that's not supported, is there a test? https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + +// Number of trailing dimensions to collapse, including the scalable +// dimension. Nothing to do if the single scalable dimension is already the +// last one. +const int64_t numCollapseDims = std::distance( +llvm::find(origScalableDims, true), origScalableDims.end()); +if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + +// We want a simple memref (not a tensor) with contiguous elements for at +// least all the trailing dimensions up to and including the scalable one. +auto memTy = dyn_cast(readOp.getBase().getType()); +if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + +// The collapsed dimensions (excluding the scalable one) of the vector and +// the memref must match and the corresponding indices must be in-bounds (it +// follows these indices would be zero). This guarantees that the operation +// transfers a contiguous block. +if (!llvm::equal(memTy.getShape().take_back(numCollapseDims - 1), + origVT.getShape().take_back(numCollapseDims - 1))) + return rewriter.notifyMatchFailure( + readOp, "memref and vector dimensions do not match"); + +SmallVector origInBounds = readOp.getInBoundsValues(); +if (!llvm::all_of( +ArrayRef(origInBounds).take_back(numCollapseDims - 1), +[](bool v) { return v; })) + return rewriter.notifyMatchFailure(readOp, + "out-if-bounds index to collapse"); momchil-velikov wrote: Fixed. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -649,6 +693,9 @@ template <> void addTemplate(RecordInfo *I, TemplateInfo &&P) { template <> void addTemplate(FunctionInfo *I, TemplateInfo &&P) { I->Template.emplace(std::move(P)); } +template <> void addTemplate(ConceptInfo *I, TemplateInfo &&P) { + I->Template = std::move(P); +} ilovepi wrote: Not something you need to fix here, but dang do we require a lot of boiler-plate changes to the APIs. It's almost like we're not holding templates in the normal way... https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][AArch64] Add integration test for lowering of `vector.contract` to Neon FEAT_I8MM (PR #144699)
https://github.com/banach-space edited https://github.com/llvm/llvm-project/pull/144699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
https://github.com/banach-space commented: Great work, Momchil - thank you! I've left a number of comments, but nothing major. My main high-level suggestion is to follow the guidance in [MLIR's Testing Guide](https://mlir.llvm.org/getting_started/TestingGuide/#contributor-guidelines) a bit more closely. It’s a relatively new (and long!) document, so I’ve included specific in-line suggestions to make it easier to see where things could align better. For additional context, this [RFC](https://discourse.llvm.org/t/rfc-should-we-aim-for-more-consistency-in-tests/) provides some of the rationale behind that approach. Also - what about memrefs with dynamic dimensions? https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
banach-space wrote: [nit] Avoid using the word `test` in test function names. It's just noise that doesn't add any new info. Instead, try to convey what makes a particular test case unique. See here for MLIR guidelines: https://mlir.llvm.org/getting_started/TestingGuide/#test-formatting-best-practices https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + +// Number of trailing dimensions to collapse, including the scalable +// dimension. Nothing to do if the single scalable dimension is already the +// last one. +const int64_t numCollapseDims = std::distance( +llvm::find(origScalableDims, true), origScalableDims.end()); +if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + +// We want a simple memref (not a tensor) with contiguous elements for at +// least all the trailing dimensions up to and including the scalable one. +auto memTy = dyn_cast(readOp.getBase().getType()); +if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + +// The collapsed dimensions (excluding the scalable one) of the vector and +// the memref must match and the corresponding indices must be in-bounds (it +// follows these indices would be zero). This guarantees that the operation +// transfers a contiguous block. +if (!llvm::equal(memTy.getShape().take_back(numCollapseDims - 1), + origVT.getShape().take_back(numCollapseDims - 1))) + return rewriter.notifyMatchFailure( + readOp, "memref and vector dimensions do not match"); + +SmallVector origInBounds = readOp.getInBoundsValues(); +if (!llvm::all_of( +ArrayRef(origInBounds).take_back(numCollapseDims - 1), +[](bool v) { return v; })) + return rewriter.notifyMatchFailure(readOp, + "out-if-bounds index to collapse"); banach-space wrote: Note, it's not really index that's out-of-bounds, but the corresponding memory access. So, index could be in-bounds, but we might be reading "more" than there's available to read (starting at that index). For example: ```mlir vector.transfer_read %mem[5] : memref<7xi8>, vector<7xi8> ``` ```suggestion "out-of-bounds index to collapse"); ``` https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); banach-space wrote: Would supporting non-identity be a problem? It would be good to add a comment, either: * `TODO: We haven't required this, so leaving for later.` or * "Too complex because of , disabling". Any hint for future developers would be helpful. https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { lower(MI, Mapping, WaterfallSgprs); } +bool RegBankLegalizeHelper::executeInWaterfallLoop( +MachineIRBuilder &B, iterator_range Range, +SmallSet &SGPROperandRegs) { + // Track use registers which have already been expanded with a readfirstlane + // sequence. This may have multiple uses if moving a sequence. + DenseMap WaterfalledRegMap; + + MachineBasicBlock &MBB = B.getMBB(); + MachineFunction &MF = B.getMF(); + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); + unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg; + if (ST.isWave32()) { Pierre-vh wrote: So it can be a field, right? https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: banach-space wrote: Is it guaranteed that `%i` will be renamed as `arg0` after the transformation? AFAIK, no, but perhaps I am missing something? https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. +VectorType origVT = readOp.getVectorType(); +ArrayRef origScalableDims = origVT.getScalableDims(); +const int64_t origVRank = origVT.getRank(); +if (origVRank < 2 || llvm::count(origScalableDims, true) != 1) banach-space wrote: [nit] [getNumScalableDims](https://github.com/banach-space/llvm-project/blob/c15e7dddaea765eab4f9ed73e79b762138dc4ac0/mlir/include/mlir/IR/BuiltinTypes.td#L1368-L1371) would be more canonical then `llvm::count` https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -298,16 +298,139 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type. This is done by collapsing trailing +/// dimensions so we obtain a vector type with a single scalable dimension in +/// the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, +PatternRewriter &rewriter) const override { + +// Do not try to transform masked reads. For example, if we have a transfer +// to a `vector<[4]x4xi8>` we could have a mask like +//1 1 1 0 +//1 1 1 0 +//1 1 1 0 +//0 0 0 0 +// Flattening this mask would look like +//1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 +// and we have not yet figured out an efficient way to build such a mask, +// neither from the mask operand, nor from the original `vector.create_mask` +// operation (if visible at all). +if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + +if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + +// We handle transfers of vectors with rank >= 2 and a single scalable +// dimension. banach-space wrote: [nit] It would be helpful to add _why_: * Don't need to worry about 1D, that's supported by default. * More than 1 scalable dims are tricky (how to collapse e.g. `vscale * vscale`?) https://github.com/llvm/llvm-project/pull/143146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [flang] [mlir] [flang][OpenMP][NFC] remove globals with mlir::StateStack (PR #144898)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/144898 >From 392514e4d56491575ec47a1eb5607fd52f5b1ff9 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 18 Jun 2025 21:01:13 + Subject: [PATCH 1/2] [flang][OpenMP][NFC] remove globals with mlir::StateStack Idea suggested by @skatrak --- flang/include/flang/Lower/AbstractConverter.h | 3 + flang/lib/Lower/Bridge.cpp| 6 ++ flang/lib/Lower/OpenMP/OpenMP.cpp | 102 -- mlir/include/mlir/Support/StateStack.h| 11 ++ 4 files changed, 91 insertions(+), 31 deletions(-) diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 8ae68e143cd2f..de3e833f60699 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -26,6 +26,7 @@ namespace mlir { class SymbolTable; +class StateStack; } namespace fir { @@ -361,6 +362,8 @@ class AbstractConverter { /// functions in order to be in sync). virtual mlir::SymbolTable *getMLIRSymbolTable() = 0; + virtual mlir::StateStack &getStateStack() = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 64b16b3abe991..8506b9a984e58 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -69,6 +69,7 @@ #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Parser/Parser.h" +#include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" @@ -1237,6 +1238,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::SymbolTable *getMLIRSymbolTable() override { return &mlirSymbolTable; } + mlir::StateStack &getStateStack() override { return stateStack; } + /// Add the symbol to the local map and return `true`. If the symbol is /// already in the map and \p forced is `false`, the map is not updated. /// Instead the value `false` is returned. @@ -6552,6 +6555,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// attribute since mlirSymbolTable must pro-actively be maintained when /// new Symbol operations are created. mlir::SymbolTable mlirSymbolTable; + + /// Used to store context while recursing into regions during lowering. + mlir::StateStack stateStack; }; } // namespace diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7ad8869597274..bff3321af2814 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -38,6 +38,7 @@ #include "flang/Support/OpenMP-utils.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -200,9 +201,41 @@ class HostEvalInfo { /// the handling of the outer region by keeping a stack of information /// structures, but it will probably still require some further work to support /// reverse offloading. -static llvm::SmallVector hostEvalInfo; -static llvm::SmallVector -sectionsStack; +class HostEvalInfoStackFrame +: public mlir::StateStackFrameBase { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(HostEvalInfoStackFrame) + + HostEvalInfo info; +}; + +static HostEvalInfo * +getHostEvalInfoStackTop(lower::AbstractConverter &converter) { + HostEvalInfoStackFrame *frame = + converter.getStateStack().getStackTop(); + return frame ? &frame->info : nullptr; +} + +/// Stack frame for storing the OpenMPSectionsConstruct currently being +/// processed so that it can be refered to when lowering the construct. +class SectionsConstructStackFrame +: public mlir::StateStackFrameBase { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SectionsConstructStackFrame) + + explicit SectionsConstructStackFrame( + const parser::OpenMPSectionsConstruct §ionsConstruct) + : sectionsConstruct{sectionsConstruct} {} + + const parser::OpenMPSectionsConstruct §ionsConstruct; +}; + +static const parser::OpenMPSectionsConstruct * +getSectionsConstructStackTop(lower::AbstractConverter &converter) { + SectionsConstructStackFrame *frame = + converter.getStateStack().getStackTop(); + return frame ? &frame->sectionsConstruct : nullptr; +} /// Bind symbols to their corresponding entry block arguments. /// @@ -537,31 +570,32 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (!ompEval) return; -HostEvalInfo &hostInfo = hostEvalInfo.back(); +HostEvalInfo *hostInfo = getHostEvalInfoStackTop(converter); +assert(hostInfo && "expected HOST_EVAL info structure"); switch (extractOmpDirective(*ompEval)) {
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Matthias Springer (matthias-springer) Changes Add missing listener notifications when erasing nested blocks/operations. This commit also moves some of the functionality from `ConversionPatternRewriter` to `ConversionPatternRewriterImpl`. This is in preparation of the One-Shot Dialect Conversion refactoring: The implementations in `ConversionPatternRewriter` should be as simple as possible, so that a switch between "rollback allowed" and "rollback not allowed" can be inserted at that level. (In the latter case, `ConversionPatternRewriterImpl` can be bypassed to some degree, and `PatternRewriter::eraseBlock` etc. can be used.) --- Full diff: https://github.com/llvm/llvm-project/pull/145030.diff 2 Files Affected: - (modified) mlir/lib/Transforms/Utils/DialectConversion.cpp (+43-19) - (modified) mlir/test/Transforms/test-legalizer.mlir (+16-2) ``diff diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index ff48647f43305..7419d79cd8856 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -274,6 +274,26 @@ struct RewriterState { // IR rewrites //===--===// +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op); + +/// Notify the listener that the given block and its contents are being erased. +static void notifyIRErased(RewriterBase::Listener *listener, Block &b) { + for (Operation &op : b) +notifyIRErased(listener, op); + listener->notifyBlockErased(&b); +} + +/// Notify the listener that the given operation and its contents are being +/// erased. +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op) { + for (Region &r : op.getRegions()) { +for (Block &b : r) { + notifyIRErased(listener, b); +} + } + listener->notifyOperationErased(&op); +} + /// An IR rewrite that can be committed (upon success) or rolled back (upon /// failure). /// @@ -422,17 +442,20 @@ class EraseBlockRewrite : public BlockRewrite { } void commit(RewriterBase &rewriter) override { -// Erase the block. assert(block && "expected block"); -assert(block->empty() && "expected empty block"); -// Notify the listener that the block is about to be erased. +// Notify the listener that the block and its contents are being erased. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - listener->notifyBlockErased(block); + notifyIRErased(listener, *block); } void cleanup(RewriterBase &rewriter) override { +// Erase the contents of the block. +for (auto &op : llvm::make_early_inc_range(llvm::reverse(*block))) + rewriter.eraseOp(&op); +assert(block->empty() && "expected empty block"); + // Erase the block. block->dropAllDefinedValueUses(); delete block; @@ -1147,12 +1170,9 @@ void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); - // Notify the listener that the operation (and its nested operations) was - // erased. - if (listener) { -op->walk( -[&](Operation *op) { listener->notifyOperationErased(op); }); - } + // Notify the listener that the operation and its contents are being erased. + if (listener) +notifyIRErased(listener, *op); // Do not erase the operation yet. It may still be referenced in `mapping`. // Just unlink it for now and erase it during cleanup. @@ -1605,6 +1625,8 @@ void ConversionPatternRewriterImpl::replaceOp( } void ConversionPatternRewriterImpl::eraseBlock(Block *block) { + assert(!wasOpReplaced(block->getParentOp()) && + "attempting to erase a block within a replaced/erased op"); appendRewrite(block); // Unlink the block from its parent region. The block is kept in the rewrite @@ -1612,12 +1634,16 @@ void ConversionPatternRewriterImpl::eraseBlock(Block *block) { // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); + + // Mark all nested ops as erased. + block->walk([&](Operation *op) { replacedOps.insert(op); }); } void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { - assert(!wasOpReplaced(block->getParentOp()) && - "attempting to insert into a region within a replaced/erased op"); + assert( + (!config.allowPatternRollback || !wasOpReplaced(block->getParentOp())) && + "attempting to insert into a region within a replaced/erased op"); LLVM_DEBUG( { Operation *parent = block->getParentOp(); @@ -1630,6 +1656,11 @@ void ConversionPatternRewriterImpl::notifyBlockInserted( } }); + if (!config.allowPattern
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/145030 Add missing listener notifications when erasing nested blocks/operations. This commit also moves some of the functionality from `ConversionPatternRewriter` to `ConversionPatternRewriterImpl`. This is in preparation of the One-Shot Dialect Conversion refactoring: The implementations in `ConversionPatternRewriter` should be as simple as possible, so that a switch between "rollback allowed" and "rollback not allowed" can be inserted at that level. (In the latter case, `ConversionPatternRewriterImpl` can be bypassed to some degree, and `PatternRewriter::eraseBlock` etc. can be used.) >From 40dea2a59a6fcc49976488b106109f22df8707f0 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 20 Jun 2025 12:25:00 + Subject: [PATCH] [mlir][Transforms] Dialect conversion: Add missing erasure notifications --- .../Transforms/Utils/DialectConversion.cpp| 62 +-- mlir/test/Transforms/test-legalizer.mlir | 18 +- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index ff48647f43305..7419d79cd8856 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -274,6 +274,26 @@ struct RewriterState { // IR rewrites //===--===// +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op); + +/// Notify the listener that the given block and its contents are being erased. +static void notifyIRErased(RewriterBase::Listener *listener, Block &b) { + for (Operation &op : b) +notifyIRErased(listener, op); + listener->notifyBlockErased(&b); +} + +/// Notify the listener that the given operation and its contents are being +/// erased. +static void notifyIRErased(RewriterBase::Listener *listener, Operation &op) { + for (Region &r : op.getRegions()) { +for (Block &b : r) { + notifyIRErased(listener, b); +} + } + listener->notifyOperationErased(&op); +} + /// An IR rewrite that can be committed (upon success) or rolled back (upon /// failure). /// @@ -422,17 +442,20 @@ class EraseBlockRewrite : public BlockRewrite { } void commit(RewriterBase &rewriter) override { -// Erase the block. assert(block && "expected block"); -assert(block->empty() && "expected empty block"); -// Notify the listener that the block is about to be erased. +// Notify the listener that the block and its contents are being erased. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - listener->notifyBlockErased(block); + notifyIRErased(listener, *block); } void cleanup(RewriterBase &rewriter) override { +// Erase the contents of the block. +for (auto &op : llvm::make_early_inc_range(llvm::reverse(*block))) + rewriter.eraseOp(&op); +assert(block->empty() && "expected empty block"); + // Erase the block. block->dropAllDefinedValueUses(); delete block; @@ -1147,12 +1170,9 @@ void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); - // Notify the listener that the operation (and its nested operations) was - // erased. - if (listener) { -op->walk( -[&](Operation *op) { listener->notifyOperationErased(op); }); - } + // Notify the listener that the operation and its contents are being erased. + if (listener) +notifyIRErased(listener, *op); // Do not erase the operation yet. It may still be referenced in `mapping`. // Just unlink it for now and erase it during cleanup. @@ -1605,6 +1625,8 @@ void ConversionPatternRewriterImpl::replaceOp( } void ConversionPatternRewriterImpl::eraseBlock(Block *block) { + assert(!wasOpReplaced(block->getParentOp()) && + "attempting to erase a block within a replaced/erased op"); appendRewrite(block); // Unlink the block from its parent region. The block is kept in the rewrite @@ -1612,12 +1634,16 @@ void ConversionPatternRewriterImpl::eraseBlock(Block *block) { // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); + + // Mark all nested ops as erased. + block->walk([&](Operation *op) { replacedOps.insert(op); }); } void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { - assert(!wasOpReplaced(block->getParentOp()) && - "attempting to insert into a region within a replaced/erased op"); + assert( + (!config.allowPatternRollback || !wasOpReplaced(block->getParentOp())) && + "attempting to insert into a region within a replaced/erased op"); LLVM_DEBUG( { Operat
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/145030 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)
@@ -0,0 +1,262 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + +// - + +// CHECK-LABEL: @test_base_case +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]]: +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @test_base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_using_strided_layout +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %c0], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +#s0 = strided<[?, ?, 8, 1]> + +func.func @test_using_strided_layout(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// - + +// CHECK-LABEL: @test_3d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME:: memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [true]} +// CHECK-SAME:: memref>, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +#s1 = strided<[?, 16, 8, 1]> + +func.func @test_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @test_4d_vector +// CHECK-SAME: %[[I:arg0]]: index, %[[J:arg1]]: index, %[[M:arg2]] +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref> into +// CHECK-SAME: memref> +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %c0_i8 {in_bounds = [false, true]} +// CHECK-SAME: : memref>, vector<2x[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<2x[4]x2x8xi8> + +#s2 = strided<[?, 16, 8, 1]> + +func.func @test_4d_vector(%i : index, %j : index, %M : memref) -> vector<2x[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [false, true, true, true]} : memref, vector<2x[4]x2x8xi8> + + return %A : vector<2x[4]x2x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_non_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_non_scalable(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// - + +// CHECK-LABEL: @negative_test_vector_legal_scalable_0 +// CHECK-NOT: memref.collapse + +func.func @negative_test_vector_legal_scalable_0(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : ve
[llvm-branch-commits] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/90429 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594 >From afeb26be5f099d384115a55b19707bbb2a730245 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Tue, 10 Jun 2025 20:02:36 +0100 Subject: [PATCH] [DLCov] Origin-Tracking: Add debugify support --- llvm/lib/Transforms/Utils/Debugify.cpp | 83 ++--- llvm/utils/llvm-original-di-preservation.py | 24 +++--- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index c2dbdc57eb3b5..460b5e50e42d7 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -15,7 +15,10 @@ #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +31,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// We need the Signals header to operate on stacktraces if we're using DebugLoc +// origin-tracking. +#include "llvm/Support/Signals.h" +#endif #define DEBUG_TYPE "debugify" @@ -59,6 +67,52 @@ cl::opt DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static DenseMap> SymbolizedAddrs; +static DenseSet UnsymbolizedAddrs; + +std::string symbolizeStackTrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { +sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); +UnsymbolizedAddrs.clear(); + } + auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) { +if (TraceIdx != 0) + OS << "\n"; +auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx]; +unsigned VirtualFrameNo = 0; +for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(StackTrace[Frame]) && + "Expected each address to have been symbolized."); + for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) { +OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedFrame << '\n'; + } +} + } + return Result; +} +void collectStackAddresses(Instruction &I) { + auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); + for (auto &[Depth, StackTrace] : OriginStackTraces) { +for (int Frame = 0; Frame < Depth; ++Frame) { + void *Addr = StackTrace[Frame]; + if (!SymbolizedAddrs.contains(Addr)) +UnsymbolizedAddrs.insert(Addr); +} + } +} +#else +void collectStackAddresses(Instruction &I) {} +#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + uint64_t getAllocSizeInBits(Module &M, Type *Ty) { return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; } @@ -373,6 +427,8 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); +// Track the addresses to symbolize, if the feature is enabled. +collectStackAddresses(I); DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } @@ -448,14 +504,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, auto BBName = BB->hasName() ? BB->getName() : "no-name"; auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); +auto CreateJSONBugEntry = [&](const char *Action) { + Bugs.push_back(llvm::json::Object({ + {"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", Action}, +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + {"origin", symbolizeStackTrace(Instr)}, +#endif + })); +}; + auto InstrIt = DILocsBefore.find(Instr); if (InstrIt == DILocsBefore.end()) { if (ShouldWriteIntoJSON) -Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "not-generate"}})); +CreateJSONBugEntry("not-generate"); else dbg() << "WARNING: " << N
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Collect stack traces in DebugLoc (PR #143592)
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143592 >From 4410b5f351cad4cd611cbc773337197d5fa367b8 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Tue, 10 Jun 2025 20:00:51 +0100 Subject: [PATCH] [DLCov] Origin-Tracking: Core implementation --- llvm/include/llvm/IR/DebugLoc.h| 49 +- llvm/include/llvm/IR/Instruction.h | 2 +- llvm/lib/CodeGen/BranchFolding.cpp | 7 + llvm/lib/IR/DebugLoc.cpp | 22 +- 4 files changed, 71 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index 999e03b6374a5..6d79aa6b2aa01 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -27,6 +27,21 @@ namespace llvm { class Function; #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + struct DbgLocOrigin { +static constexpr unsigned long MaxDepth = 16; +using StackTracesTy = +SmallVector>, 0>; +StackTracesTy StackTraces; +DbgLocOrigin(bool ShouldCollectTrace); +void addTrace(); +const StackTracesTy &getOriginStackTraces() const { return StackTraces; }; + }; +#else + struct DbgLocOrigin { +DbgLocOrigin(bool) {} + }; +#endif // Used to represent different "kinds" of DebugLoc, expressing that the // instruction it is part of is either normal and should contain a valid // DILocation, or otherwise describing the reason why the instruction does @@ -55,22 +70,29 @@ namespace llvm { Temporary }; - // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify - // to ignore intentionally-empty DebugLocs. - class DILocAndCoverageTracking : public TrackingMDNodeRef { + // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin, + // allowing Debugify to ignore intentionally-empty DebugLocs and display the + // code responsible for generating unintentionally-empty DebugLocs. + // Currently we only need to track the Origin of this DILoc when using a + // DebugLoc that is not annotated (i.e. has DebugLocKind::Normal) and has a + // null DILocation, so only collect the origin stacktrace in those cases. + class DILocAndCoverageTracking : public TrackingMDNodeRef, + public DbgLocOrigin { public: DebugLocKind Kind; // Default constructor for empty DebugLocs. DILocAndCoverageTracking() -: TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} -// Valid or nullptr MDNode*, normal DebugLocKind. +: TrackingMDNodeRef(nullptr), DbgLocOrigin(true), + Kind(DebugLocKind::Normal) {} +// Valid or nullptr MDNode*, no annotative DebugLocKind. DILocAndCoverageTracking(const MDNode *Loc) -: TrackingMDNodeRef(const_cast(Loc)), +: TrackingMDNodeRef(const_cast(Loc)), DbgLocOrigin(!Loc), Kind(DebugLocKind::Normal) {} LLVM_ABI DILocAndCoverageTracking(const DILocation *Loc); // Explicit DebugLocKind, which always means a nullptr MDNode*. DILocAndCoverageTracking(DebugLocKind Kind) -: TrackingMDNodeRef(nullptr), Kind(Kind) {} +: TrackingMDNodeRef(nullptr), + DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {} }; template <> struct simplify_type { using SimpleType = MDNode *; @@ -187,6 +209,19 @@ namespace llvm { #endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const { + return Loc.getOriginStackTraces(); +} +DebugLoc getCopied() const { + DebugLoc NewDL = *this; + NewDL.Loc.addTrace(); + return NewDL; +} +#else +DebugLoc getCopied() const { return *this; } +#endif + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa(getAsMDNode()). diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 8e1ef24226789..ef382a9168f24 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -507,7 +507,7 @@ class Instruction : public User, LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const; /// Set the debug location information for this instruction. - void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); } + void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc).getCopied(); } /// Return the debug location for this node as a DebugLoc. const DebugLoc &getDebugLoc() const { return DbgLoc; } diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index ff9f0ff5d5bc3..3b3e7a418feb5 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoM
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Collect stack traces in DebugLoc (PR #143592)
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143592 >From 4410b5f351cad4cd611cbc773337197d5fa367b8 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Tue, 10 Jun 2025 20:00:51 +0100 Subject: [PATCH] [DLCov] Origin-Tracking: Core implementation --- llvm/include/llvm/IR/DebugLoc.h| 49 +- llvm/include/llvm/IR/Instruction.h | 2 +- llvm/lib/CodeGen/BranchFolding.cpp | 7 + llvm/lib/IR/DebugLoc.cpp | 22 +- 4 files changed, 71 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index 999e03b6374a5..6d79aa6b2aa01 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -27,6 +27,21 @@ namespace llvm { class Function; #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + struct DbgLocOrigin { +static constexpr unsigned long MaxDepth = 16; +using StackTracesTy = +SmallVector>, 0>; +StackTracesTy StackTraces; +DbgLocOrigin(bool ShouldCollectTrace); +void addTrace(); +const StackTracesTy &getOriginStackTraces() const { return StackTraces; }; + }; +#else + struct DbgLocOrigin { +DbgLocOrigin(bool) {} + }; +#endif // Used to represent different "kinds" of DebugLoc, expressing that the // instruction it is part of is either normal and should contain a valid // DILocation, or otherwise describing the reason why the instruction does @@ -55,22 +70,29 @@ namespace llvm { Temporary }; - // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify - // to ignore intentionally-empty DebugLocs. - class DILocAndCoverageTracking : public TrackingMDNodeRef { + // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin, + // allowing Debugify to ignore intentionally-empty DebugLocs and display the + // code responsible for generating unintentionally-empty DebugLocs. + // Currently we only need to track the Origin of this DILoc when using a + // DebugLoc that is not annotated (i.e. has DebugLocKind::Normal) and has a + // null DILocation, so only collect the origin stacktrace in those cases. + class DILocAndCoverageTracking : public TrackingMDNodeRef, + public DbgLocOrigin { public: DebugLocKind Kind; // Default constructor for empty DebugLocs. DILocAndCoverageTracking() -: TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} -// Valid or nullptr MDNode*, normal DebugLocKind. +: TrackingMDNodeRef(nullptr), DbgLocOrigin(true), + Kind(DebugLocKind::Normal) {} +// Valid or nullptr MDNode*, no annotative DebugLocKind. DILocAndCoverageTracking(const MDNode *Loc) -: TrackingMDNodeRef(const_cast(Loc)), +: TrackingMDNodeRef(const_cast(Loc)), DbgLocOrigin(!Loc), Kind(DebugLocKind::Normal) {} LLVM_ABI DILocAndCoverageTracking(const DILocation *Loc); // Explicit DebugLocKind, which always means a nullptr MDNode*. DILocAndCoverageTracking(DebugLocKind Kind) -: TrackingMDNodeRef(nullptr), Kind(Kind) {} +: TrackingMDNodeRef(nullptr), + DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {} }; template <> struct simplify_type { using SimpleType = MDNode *; @@ -187,6 +209,19 @@ namespace llvm { #endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const { + return Loc.getOriginStackTraces(); +} +DebugLoc getCopied() const { + DebugLoc NewDL = *this; + NewDL.Loc.addTrace(); + return NewDL; +} +#else +DebugLoc getCopied() const { return *this; } +#endif + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa(getAsMDNode()). diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 8e1ef24226789..ef382a9168f24 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -507,7 +507,7 @@ class Instruction : public User, LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const; /// Set the debug location information for this instruction. - void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); } + void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc).getCopied(); } /// Return the debug location for this node as a DebugLoc. const DebugLoc &getDebugLoc() const { return DbgLoc; } diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index ff9f0ff5d5bc3..3b3e7a418feb5 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoM
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594 >From afeb26be5f099d384115a55b19707bbb2a730245 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Tue, 10 Jun 2025 20:02:36 +0100 Subject: [PATCH] [DLCov] Origin-Tracking: Add debugify support --- llvm/lib/Transforms/Utils/Debugify.cpp | 83 ++--- llvm/utils/llvm-original-di-preservation.py | 24 +++--- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index c2dbdc57eb3b5..460b5e50e42d7 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -15,7 +15,10 @@ #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +31,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// We need the Signals header to operate on stacktraces if we're using DebugLoc +// origin-tracking. +#include "llvm/Support/Signals.h" +#endif #define DEBUG_TYPE "debugify" @@ -59,6 +67,52 @@ cl::opt DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static DenseMap> SymbolizedAddrs; +static DenseSet UnsymbolizedAddrs; + +std::string symbolizeStackTrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { +sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); +UnsymbolizedAddrs.clear(); + } + auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) { +if (TraceIdx != 0) + OS << "\n"; +auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx]; +unsigned VirtualFrameNo = 0; +for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(StackTrace[Frame]) && + "Expected each address to have been symbolized."); + for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) { +OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedFrame << '\n'; + } +} + } + return Result; +} +void collectStackAddresses(Instruction &I) { + auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); + for (auto &[Depth, StackTrace] : OriginStackTraces) { +for (int Frame = 0; Frame < Depth; ++Frame) { + void *Addr = StackTrace[Frame]; + if (!SymbolizedAddrs.contains(Addr)) +UnsymbolizedAddrs.insert(Addr); +} + } +} +#else +void collectStackAddresses(Instruction &I) {} +#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + uint64_t getAllocSizeInBits(Module &M, Type *Ty) { return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; } @@ -373,6 +427,8 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); +// Track the addresses to symbolize, if the feature is enabled. +collectStackAddresses(I); DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } @@ -448,14 +504,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, auto BBName = BB->hasName() ? BB->getName() : "no-name"; auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); +auto CreateJSONBugEntry = [&](const char *Action) { + Bugs.push_back(llvm::json::Object({ + {"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", Action}, +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + {"origin", symbolizeStackTrace(Instr)}, +#endif + })); +}; + auto InstrIt = DILocsBefore.find(Instr); if (InstrIt == DILocsBefore.end()) { if (ShouldWriteIntoJSON) -Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "not-generate"}})); +CreateJSONBugEntry("not-generate"); else dbg() << "WARNING: " << N
[llvm-branch-commits] [clang-tools-extra] [clang-doc] document global variables (PR #145070)
https://github.com/evelez7 edited https://github.com/llvm/llvm-project/pull/145070 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] 34db3b6 - Revert "[lldb][DWARF] Remove object_pointer from ParsedDWARFAttributes (#144880)"
Author: Michael Buch Date: 2025-06-20T17:20:32+01:00 New Revision: 34db3b66fd4b406453ed712bf8995eb2ed32dcb7 URL: https://github.com/llvm/llvm-project/commit/34db3b66fd4b406453ed712bf8995eb2ed32dcb7 DIFF: https://github.com/llvm/llvm-project/commit/34db3b66fd4b406453ed712bf8995eb2ed32dcb7.diff LOG: Revert "[lldb][DWARF] Remove object_pointer from ParsedDWARFAttributes (#144880)" This reverts commit b017b4ce9a45d4c5a339e24142da5d4a7e4c5db1. Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h Removed: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 3bec89cdf7469..4f79c8aa3f811 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -445,6 +445,15 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { name.SetCString(form_value.AsCString()); break; +case DW_AT_object_pointer: + // GetAttributes follows DW_AT_specification. + // DW_TAG_subprogram definitions and declarations may both + // have a DW_AT_object_pointer. Don't overwrite the one + // we parsed for the definition with the one from the declaration. + if (!object_pointer.IsValid()) +object_pointer = form_value.Reference(); + break; + case DW_AT_signature: signature = form_value; break; @@ -1107,7 +1116,7 @@ bool DWARFASTParserClang::ParseObjCMethod( std::pair DWARFASTParserClang::ParseCXXMethod( const DWARFDIE &die, CompilerType clang_type, const ParsedDWARFTypeAttributes &attrs, const DWARFDIE &decl_ctx_die, -const DWARFDIE &object_parameter, bool &ignore_containing_context) { +bool is_static, bool &ignore_containing_context) { Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); SymbolFileDWARF *dwarf = die.GetDWARF(); assert(dwarf); @@ -1191,9 +1200,6 @@ std::pair DWARFASTParserClang::ParseCXXMethod( TypeSystemClang::GetDeclContextForType(class_opaque_type), die, attrs.name.GetCString()); - // In DWARF, a C++ method is static if it has no object parameter child. - const bool is_static = !object_parameter.IsValid(); - // We have a C++ member function with no children (this pointer!) and clang // will get mad if we try and make a function that isn't well formed in the // DWARF, so we will just skip it... @@ -1219,7 +1225,9 @@ std::pair DWARFASTParserClang::ParseCXXMethod( ClangASTMetadata metadata; metadata.SetUserID(die.GetID()); -if (char const *object_pointer_name = object_parameter.GetName()) { +char const *object_pointer_name = +attrs.object_pointer ? attrs.object_pointer.GetName() : nullptr; +if (object_pointer_name) { metadata.SetObjectPtrName(object_pointer_name); LLDB_LOGF(log, "Setting object pointer name: %s on method object %p.\n", object_pointer_name, static_cast(cxx_method_decl)); @@ -1315,9 +1323,11 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, type_handled = ParseObjCMethod(*objc_method, die, clang_type, attrs, is_variadic); } else if (is_cxx_method) { +// In DWARF, a C++ method is static if it has no object parameter child. +const bool is_static = !object_parameter.IsValid(); auto [handled, type_sp] = -ParseCXXMethod(die, clang_type, attrs, decl_ctx_die, - object_parameter, ignore_containing_context); +ParseCXXMethod(die, clang_type, attrs, decl_ctx_die, is_static, + ignore_containing_context); if (type_sp) return type_sp; @@ -1412,7 +1422,9 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, ClangASTMetadata metadata; metadata.SetUserID(die.GetID()); - if (char const *object_pointer_name = object_parameter.GetName()) { + char const *object_pointer_name = + attrs.object_pointer ? attrs.object_pointer.GetName() : nullptr; + if (object_pointer_name) { metadata.SetObjectPtrName(object_pointer_name); LLDB_LOGF(log, "Setting object pointer name: %s on function " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index a90f55bcff948..111604ce4068a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -470,8 +470,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param[in] decl_ctx_die The DIE representing the DeclContext of the C++ ///
[llvm-branch-commits] [clang-tools-extra] [clang-doc] document global variables (PR #145070)
https://github.com/evelez7 created https://github.com/llvm/llvm-project/pull/145070 None >From 7c0658cc9cbf5d28125ecbfed4b95667cb1ccecf Mon Sep 17 00:00:00 2001 From: Erick Velez Date: Wed, 18 Jun 2025 16:36:49 -0700 Subject: [PATCH] [clang-doc] document global variables --- clang-tools-extra/clang-doc/BitcodeReader.cpp | 40 ++ clang-tools-extra/clang-doc/BitcodeWriter.cpp | 32 +-- clang-tools-extra/clang-doc/BitcodeWriter.h | 6 +++ clang-tools-extra/clang-doc/HTMLGenerator.cpp | 3 ++ .../clang-doc/HTMLMustacheGenerator.cpp | 2 + clang-tools-extra/clang-doc/JSONGenerator.cpp | 14 +++ clang-tools-extra/clang-doc/MDGenerator.cpp | 4 ++ clang-tools-extra/clang-doc/Mapper.cpp| 6 +++ clang-tools-extra/clang-doc/Mapper.h | 1 + .../clang-doc/Representation.cpp | 16 clang-tools-extra/clang-doc/Representation.h | 14 ++- clang-tools-extra/clang-doc/Serialize.cpp | 27 clang-tools-extra/clang-doc/Serialize.h | 4 ++ clang-tools-extra/clang-doc/YAMLGenerator.cpp | 1 + .../test/clang-doc/json/namespace.cpp | 41 +-- .../unittests/clang-doc/BitcodeTest.cpp | 2 + 16 files changed, 188 insertions(+), 25 deletions(-) diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp b/clang-tools-extra/clang-doc/BitcodeReader.cpp index 5b70280e7dba8..063d19f64 100644 --- a/clang-tools-extra/clang-doc/BitcodeReader.cpp +++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp @@ -93,6 +93,7 @@ static llvm::Error decodeRecord(const Record &R, InfoType &Field, case InfoType::IT_enum: case InfoType::IT_typedef: case InfoType::IT_concept: + case InfoType::IT_variable: Field = IT; return llvm::Error::success(); } @@ -416,6 +417,23 @@ static llvm::Error parseRecord(const Record &R, unsigned ID, "invalid field for ConstraintInfo"); } +static llvm::Error parseRecord(const Record &R, unsigned ID, + llvm::StringRef Blob, VarInfo *I) { + switch (ID) { + case VAR_USR: +return decodeRecord(R, I->USR, Blob); + case VAR_NAME: +return decodeRecord(R, I->Name, Blob); + case VAR_DEFLOCATION: +return decodeRecord(R, I->DefLoc, Blob); + case VAR_IS_STATIC: +return decodeRecord(R, I->IsStatic, Blob); + default: +return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid field for VarInfo"); + } +} + template static llvm::Expected getCommentInfo(T I) { return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid type cannot contain CommentInfo"); @@ -458,6 +476,10 @@ template <> llvm::Expected getCommentInfo(ConceptInfo *I) { return &I->Description.emplace_back(); } +template <> Expected getCommentInfo(VarInfo *I) { + return &I->Description.emplace_back(); +} + // When readSubBlock encounters a TypeInfo sub-block, it calls addTypeInfo on // the parent block to set it. The template specializations define what to do // for each supported parent block. @@ -497,6 +519,11 @@ template <> llvm::Error addTypeInfo(TypedefInfo *I, TypeInfo &&T) { return llvm::Error::success(); } +template <> llvm::Error addTypeInfo(VarInfo *I, TypeInfo &&T) { + I->Type = std::move(T); + return llvm::Error::success(); +} + template static llvm::Error addReference(T I, Reference &&R, FieldId F) { return llvm::createStringError(llvm::inconvertibleErrorCode(), @@ -644,6 +671,9 @@ template <> void addChild(NamespaceInfo *I, TypedefInfo &&R) { template <> void addChild(NamespaceInfo *I, ConceptInfo &&R) { I->Children.Concepts.emplace_back(std::move(R)); } +template <> void addChild(NamespaceInfo *I, VarInfo &&R) { + I->Children.Variables.emplace_back(std::move(R)); +} // Record children: template <> void addChild(RecordInfo *I, FunctionInfo &&R) { @@ -886,6 +916,13 @@ llvm::Error ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) { addChild(I, std::move(CI)); return llvm::Error::success(); } + case BI_VAR_BLOCK_ID: { +VarInfo VI; +if (auto Err = readBlock(ID, &VI)) + return Err; +addChild(I, std::move(VI)); +return llvm::Error::success(); + } default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid subblock type"); @@ -995,6 +1032,8 @@ ClangDocBitcodeReader::readBlockToInfo(unsigned ID) { return createInfo(ID); case BI_FUNCTION_BLOCK_ID: return createInfo(ID); + case BI_VAR_BLOCK_ID: +return createInfo(ID); default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "cannot create info"); @@ -1034,6 +1073,7 @@ ClangDocBitcodeReader::readBitcode() { case BI_ENUM_BLOCK_ID: case BI_TYPEDEF_BLOCK_ID: case BI_CONCEPT_BLOCK_ID: +case BI_VAR_BLOCK_ID: case BI_FUNCTION_BLOCK_
[llvm-branch-commits] [clang-tools-extra] [clang-doc] document global variables (PR #145070)
evelez7 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145070?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145070** https://app.graphite.dev/github/pr/llvm/llvm-project/145070?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145070?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145069** https://app.graphite.dev/github/pr/llvm/llvm-project/145069?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#144430** https://app.graphite.dev/github/pr/llvm/llvm-project/144430?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#144160** https://app.graphite.dev/github/pr/llvm/llvm-project/144160?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145070 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Precommit test for global variables (PR #145069)
evelez7 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145069?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145070** https://app.graphite.dev/github/pr/llvm/llvm-project/145070?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145069** https://app.graphite.dev/github/pr/llvm/llvm-project/145069?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145069?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#144430** https://app.graphite.dev/github/pr/llvm/llvm-project/144430?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#144160** https://app.graphite.dev/github/pr/llvm/llvm-project/144160?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145069 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Precommit test for global variables (PR #145069)
https://github.com/evelez7 created https://github.com/llvm/llvm-project/pull/145069 None >From f572cfc62317c37510288a63cb0fccb6a3a1f000 Mon Sep 17 00:00:00 2001 From: Erick Velez Date: Thu, 19 Jun 2025 21:25:13 -0700 Subject: [PATCH] [clang-doc] Precommit test for global variables --- .../test/clang-doc/json/namespace.cpp | 20 ++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/test/clang-doc/json/namespace.cpp b/clang-tools-extra/test/clang-doc/json/namespace.cpp index 928864be1feb0..248d47351bd38 100644 --- a/clang-tools-extra/test/clang-doc/json/namespace.cpp +++ b/clang-tools-extra/test/clang-doc/json/namespace.cpp @@ -103,5 +103,23 @@ typedef int MyTypedef; // CHECK-NEXT: } // CHECK-NEXT:], // CHECK-NEXT:"USR": "" -// CHECK-NOT: "Variables": [ +// CHECK-NOT:"Variables": [ +// CHECK-NOT: { +// CHECK-NOT:"IsStatic": true, +// CHECK-NOT:"Location": { +// CHECK-NOT: "Filename": "{{.*}}namespace.cpp", +// CHECK-NOT: "LineNumber": 13 +// CHECK-NOT:}, +// CHECK-NOT:"Name": "Global", +// CHECK-NOT:"Type": { +// COM:FIXME: IsBuiltIn emits as its default value +// CHECK-NOT: "IsBuiltIn": false, +// CHECK-NOT: "IsTemplate": false, +// CHECK-NOT: "Name": "int", +// CHECK-NOT: "QualName": "int", +// CHECK-NOT: "USR": "" +// CHECK-NOT:}, +// CHECK-NOT:"USR": "{{[0-9A-F]*}}" +// CHECK-NOT: } +// CHECK-NOT:] // CHECK-NEXT: } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Precommit test for global variables (PR #145069)
https://github.com/ilovepi approved this pull request. https://github.com/llvm/llvm-project/pull/145069 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)
https://github.com/svkeerthy updated https://github.com/llvm/llvm-project/pull/143999 >From 8b8932b55c8a6a087d516e174e1d57c9908259bd Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Thu, 12 Jun 2025 23:54:10 + Subject: [PATCH] Simplifying creation of Embedder --- llvm/docs/MLGO.rst| 7 +-- llvm/include/llvm/Analysis/IR2Vec.h | 4 +- .../Analysis/FunctionPropertiesAnalysis.cpp | 10 ++--- llvm/lib/Analysis/IR2Vec.cpp | 17 +++ .../FunctionPropertiesAnalysisTest.cpp| 7 ++- llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++ 6 files changed, 33 insertions(+), 56 deletions(-) diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst index 28095447f6a5a..0b849f3382f63 100644 --- a/llvm/docs/MLGO.rst +++ b/llvm/docs/MLGO.rst @@ -482,14 +482,9 @@ embeddings can be computed and accessed via an ``ir2vec::Embedder`` instance. // Assuming F is an llvm::Function& // For example, using IR2VecKind::Symbolic: - Expected> EmbOrErr = + std::unique_ptr Emb = ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary); - if (auto Err = EmbOrErr.takeError()) { -// Handle error in embedder creation -return; - } - std::unique_ptr Emb = std::move(*EmbOrErr); 3. **Compute and Access Embeddings**: Call ``getFunctionVector()`` to get the embedding for the function. diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 2a7a6edda70a8..06312562060aa 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -170,8 +170,8 @@ class Embedder { virtual ~Embedder() = default; /// Factory method to create an Embedder object. - static Expected> - create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary); + static std::unique_ptr create(IR2VecKind Mode, const Function &F, + const Vocab &Vocabulary); /// Returns a map containing instructions and the corresponding embeddings for /// the function F if it has been computed. If not, it computes the embeddings diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp index 29d3aaf46dc06..dd4eb7f0df053 100644 --- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, // We instantiate the IR2Vec embedder each time, as having an unique // pointer to the embedder as member of the class would make it // non-copyable. Instantiating the embedder in itself is not costly. -auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic, +auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic, *BB.getParent(), *IR2VecVocab); -if (Error Err = EmbOrErr.takeError()) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { -BB.getContext().emitError("Error creating IR2Vec embeddings: " + - EI.message()); - }); +if (!Embedder) { + BB.getContext().emitError("Error creating IR2Vec embeddings"); return; } -auto Embedder = std::move(*EmbOrErr); const auto &BBEmbedding = Embedder->getBBVector(BB); // Subtract BBEmbedding from Function embedding if the direction is -1, // and add it if the direction is +1. diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 7ff7acebedf4e..27cc2a4109879 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab &Vocabulary) Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {} -Expected> -Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) { +std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F, + const Vocab &Vocabulary) { switch (Mode) { case IR2VecKind::Symbolic: return std::make_unique(F, Vocabulary); } - return make_error("Unknown IR2VecKind", errc::invalid_argument); + llvm_unreachable("Unknown IR2Vec kind"); + return nullptr; } // FIXME: Currently lookups are string based. Use numeric Keys @@ -384,17 +385,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M, auto Vocab = IR2VecVocabResult.getVocabulary(); for (Function &F : M) { -Expected> EmbOrErr = +std::unique_ptr Emb = Embedder::create(IR2VecKind::Symbolic, F, Vocab); -if (auto Err = EmbOrErr.takeError()) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { -OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n"; - }); +if (!Emb) { + OS << "Error creating I
[llvm-branch-commits] [llvm] [NFC] Formatting PassRegistry.def (PR #144139)
https://github.com/svkeerthy updated https://github.com/llvm/llvm-project/pull/144139 >From 7fa87f2e42378d656ba743a4971e5c2ffaee8492 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Fri, 13 Jun 2025 18:22:10 + Subject: [PATCH] [NFC] Formatting PassRegistry.def --- llvm/lib/Passes/PassRegistry.def | 40 ++-- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ec14c6a9211d9..5256f1378b64c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -63,7 +63,8 @@ MODULE_PASS("coro-early", CoroEarlyPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("ctx-instr-gen", PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)) -MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)) +MODULE_PASS("ctx-prof-flatten", +PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)) MODULE_PASS("ctx-prof-flatten-prethinlink", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true)) MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing()) @@ -74,7 +75,8 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("dxil-upgrade", DXILUpgradePass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) -MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable)) +MODULE_PASS("expand-variadics", +ExpandVariadicsPass(ExpandVariadicsMode::Disable)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("global-merge-func", GlobalMergeFuncPass()) @@ -104,7 +106,10 @@ MODULE_PASS("lower-ifunc", LowerIFuncPass()) MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) MODULE_PASS("fatlto-cleanup", FatLtoCleanup()) -MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default)) +MODULE_PASS("pgo-force-function-attrs", +PGOForceFunctionAttrsPass(PGOOpt + ? PGOOpt->ColdOptType + : PGOOptions::ColdFuncOpt::Default)) MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) @@ -178,7 +183,7 @@ MODULE_PASS_WITH_PARAMS( parseASanPassOptions, "kernel") MODULE_PASS_WITH_PARAMS( "cg-profile", "CGProfilePass", -[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink);}, +[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink); }, parseCGProfilePassOptions, "in-lto-post-link") MODULE_PASS_WITH_PARAMS( "global-merge", "GlobalMergePass", @@ -287,7 +292,8 @@ CGSCC_PASS_WITH_PARAMS( FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) -FUNCTION_ANALYSIS("bb-sections-profile-reader", BasicBlockSectionsProfileReaderAnalysis(TM)) +FUNCTION_ANALYSIS("bb-sections-profile-reader", + BasicBlockSectionsProfileReaderAnalysis(TM)) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) @@ -377,7 +383,7 @@ FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) FUNCTION_PASS("expand-fp", ExpandFpPass(TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) FUNCTION_PASS("extra-vector-passes", - ExtraFunctionPassManager()) + ExtraFunctionPassManager()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) @@ -548,8 +554,7 @@ FUNCTION_PASS_WITH_PARAMS( "max-iterations=N") FUNCTION_PASS_WITH_PARAMS( "lint", "LintPass", -[](bool AbortOnError) { return LintPass(AbortOnError); }, -parseLintOptions, +[](bool AbortOnError) { return LintPass(AbortOnError); }, parseLintOptions, "abort-on-error") FUNCTION_PASS_WITH_PARAMS( "loop-unroll", "LoopUnrollPass", @@ -576,7 +581,8 @@ FUNCTION_PASS_WITH_PARAMS( "normalize", "IRNormalizerPass", [](IRNormalizerOptions Options) { return IRNormalizerPass(Options); }, parseIRNormalizerPassOptions, - "no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;fold-all;no-reorder-operands;reorder-operands") +"no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;" +"fold-all;no-reorder-operands;reorder-operands") FUNCTION_PASS_WITH_PARAMS( "mldst-motion", "MergedLoadStoreMotionPass", [](MergedLoadStoreMotionOptions Opts) { @@ -590,7 +596,7 @@ FUNCTION_PASS_WITH_PARAMS( }, [](StringRe
[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)
https://github.com/svkeerthy updated https://github.com/llvm/llvm-project/pull/143999 >From 8b8932b55c8a6a087d516e174e1d57c9908259bd Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Thu, 12 Jun 2025 23:54:10 + Subject: [PATCH] Simplifying creation of Embedder --- llvm/docs/MLGO.rst| 7 +-- llvm/include/llvm/Analysis/IR2Vec.h | 4 +- .../Analysis/FunctionPropertiesAnalysis.cpp | 10 ++--- llvm/lib/Analysis/IR2Vec.cpp | 17 +++ .../FunctionPropertiesAnalysisTest.cpp| 7 ++- llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++ 6 files changed, 33 insertions(+), 56 deletions(-) diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst index 28095447f6a5a..0b849f3382f63 100644 --- a/llvm/docs/MLGO.rst +++ b/llvm/docs/MLGO.rst @@ -482,14 +482,9 @@ embeddings can be computed and accessed via an ``ir2vec::Embedder`` instance. // Assuming F is an llvm::Function& // For example, using IR2VecKind::Symbolic: - Expected> EmbOrErr = + std::unique_ptr Emb = ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary); - if (auto Err = EmbOrErr.takeError()) { -// Handle error in embedder creation -return; - } - std::unique_ptr Emb = std::move(*EmbOrErr); 3. **Compute and Access Embeddings**: Call ``getFunctionVector()`` to get the embedding for the function. diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 2a7a6edda70a8..06312562060aa 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -170,8 +170,8 @@ class Embedder { virtual ~Embedder() = default; /// Factory method to create an Embedder object. - static Expected> - create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary); + static std::unique_ptr create(IR2VecKind Mode, const Function &F, + const Vocab &Vocabulary); /// Returns a map containing instructions and the corresponding embeddings for /// the function F if it has been computed. If not, it computes the embeddings diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp index 29d3aaf46dc06..dd4eb7f0df053 100644 --- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, // We instantiate the IR2Vec embedder each time, as having an unique // pointer to the embedder as member of the class would make it // non-copyable. Instantiating the embedder in itself is not costly. -auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic, +auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic, *BB.getParent(), *IR2VecVocab); -if (Error Err = EmbOrErr.takeError()) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { -BB.getContext().emitError("Error creating IR2Vec embeddings: " + - EI.message()); - }); +if (!Embedder) { + BB.getContext().emitError("Error creating IR2Vec embeddings"); return; } -auto Embedder = std::move(*EmbOrErr); const auto &BBEmbedding = Embedder->getBBVector(BB); // Subtract BBEmbedding from Function embedding if the direction is -1, // and add it if the direction is +1. diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 7ff7acebedf4e..27cc2a4109879 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab &Vocabulary) Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {} -Expected> -Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) { +std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F, + const Vocab &Vocabulary) { switch (Mode) { case IR2VecKind::Symbolic: return std::make_unique(F, Vocabulary); } - return make_error("Unknown IR2VecKind", errc::invalid_argument); + llvm_unreachable("Unknown IR2Vec kind"); + return nullptr; } // FIXME: Currently lookups are string based. Use numeric Keys @@ -384,17 +385,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M, auto Vocab = IR2VecVocabResult.getVocabulary(); for (Function &F : M) { -Expected> EmbOrErr = +std::unique_ptr Emb = Embedder::create(IR2VecKind::Symbolic, F, Vocab); -if (auto Err = EmbOrErr.takeError()) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { -OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n"; - }); +if (!Emb) { + OS << "Error creating I
[llvm-branch-commits] [llvm] [NFC] Formatting PassRegistry.def (PR #144139)
https://github.com/svkeerthy updated https://github.com/llvm/llvm-project/pull/144139 >From 7fa87f2e42378d656ba743a4971e5c2ffaee8492 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Fri, 13 Jun 2025 18:22:10 + Subject: [PATCH] [NFC] Formatting PassRegistry.def --- llvm/lib/Passes/PassRegistry.def | 40 ++-- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ec14c6a9211d9..5256f1378b64c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -63,7 +63,8 @@ MODULE_PASS("coro-early", CoroEarlyPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("ctx-instr-gen", PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)) -MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)) +MODULE_PASS("ctx-prof-flatten", +PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)) MODULE_PASS("ctx-prof-flatten-prethinlink", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true)) MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing()) @@ -74,7 +75,8 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("dxil-upgrade", DXILUpgradePass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) -MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable)) +MODULE_PASS("expand-variadics", +ExpandVariadicsPass(ExpandVariadicsMode::Disable)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("global-merge-func", GlobalMergeFuncPass()) @@ -104,7 +106,10 @@ MODULE_PASS("lower-ifunc", LowerIFuncPass()) MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) MODULE_PASS("fatlto-cleanup", FatLtoCleanup()) -MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default)) +MODULE_PASS("pgo-force-function-attrs", +PGOForceFunctionAttrsPass(PGOOpt + ? PGOOpt->ColdOptType + : PGOOptions::ColdFuncOpt::Default)) MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) @@ -178,7 +183,7 @@ MODULE_PASS_WITH_PARAMS( parseASanPassOptions, "kernel") MODULE_PASS_WITH_PARAMS( "cg-profile", "CGProfilePass", -[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink);}, +[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink); }, parseCGProfilePassOptions, "in-lto-post-link") MODULE_PASS_WITH_PARAMS( "global-merge", "GlobalMergePass", @@ -287,7 +292,8 @@ CGSCC_PASS_WITH_PARAMS( FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) -FUNCTION_ANALYSIS("bb-sections-profile-reader", BasicBlockSectionsProfileReaderAnalysis(TM)) +FUNCTION_ANALYSIS("bb-sections-profile-reader", + BasicBlockSectionsProfileReaderAnalysis(TM)) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) @@ -377,7 +383,7 @@ FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) FUNCTION_PASS("expand-fp", ExpandFpPass(TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) FUNCTION_PASS("extra-vector-passes", - ExtraFunctionPassManager()) + ExtraFunctionPassManager()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) @@ -548,8 +554,7 @@ FUNCTION_PASS_WITH_PARAMS( "max-iterations=N") FUNCTION_PASS_WITH_PARAMS( "lint", "LintPass", -[](bool AbortOnError) { return LintPass(AbortOnError); }, -parseLintOptions, +[](bool AbortOnError) { return LintPass(AbortOnError); }, parseLintOptions, "abort-on-error") FUNCTION_PASS_WITH_PARAMS( "loop-unroll", "LoopUnrollPass", @@ -576,7 +581,8 @@ FUNCTION_PASS_WITH_PARAMS( "normalize", "IRNormalizerPass", [](IRNormalizerOptions Options) { return IRNormalizerPass(Options); }, parseIRNormalizerPassOptions, - "no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;fold-all;no-reorder-operands;reorder-operands") +"no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;" +"fold-all;no-reorder-operands;reorder-operands") FUNCTION_PASS_WITH_PARAMS( "mldst-motion", "MergedLoadStoreMotionPass", [](MergedLoadStoreMotionOptions Opts) { @@ -590,7 +596,7 @@ FUNCTION_PASS_WITH_PARAMS( }, [](StringRe
[llvm-branch-commits] [llvm] Increasing tolerance in ApproximatelyEquals (PR #145117)
https://github.com/svkeerthy created https://github.com/llvm/llvm-project/pull/145117 None >From d05856c47337b3b6e9086a5ee06b7c39412d9103 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Fri, 20 Jun 2025 22:56:46 + Subject: [PATCH] Increasing tolerance in ApproximatelyEquals --- llvm/include/llvm/Analysis/IR2Vec.h| 2 +- llvm/unittests/Analysis/IR2VecTest.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 06312562060aa..480b834077b86 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -116,7 +116,7 @@ struct Embedding { /// Returns true if the embedding is approximately equal to the RHS embedding /// within the specified tolerance. - bool approximatelyEquals(const Embedding &RHS, double Tolerance = 1e-6) const; + bool approximatelyEquals(const Embedding &RHS, double Tolerance = 1e-4) const; void print(raw_ostream &OS) const; }; diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp index 05af55b59323b..33ac16828eb6c 100644 --- a/llvm/unittests/Analysis/IR2VecTest.cpp +++ b/llvm/unittests/Analysis/IR2VecTest.cpp @@ -154,14 +154,14 @@ TEST(EmbeddingTest, ApproximatelyEqual) { EXPECT_TRUE(E1.approximatelyEquals(E2)); // Diff = 1e-7 Embedding E3 = {1.2, 2.2, 3.2}; // Diff = 2e-5 - EXPECT_FALSE(E1.approximatelyEquals(E3)); + EXPECT_FALSE(E1.approximatelyEquals(E3, 1e-6)); EXPECT_TRUE(E1.approximatelyEquals(E3, 3e-5)); Embedding E_clearly_within = {1.005, 2.005, 3.005}; // Diff = 5e-7 EXPECT_TRUE(E1.approximatelyEquals(E_clearly_within)); Embedding E_clearly_outside = {1.1, 2.1, 3.1}; // Diff = 1e-5 - EXPECT_FALSE(E1.approximatelyEquals(E_clearly_outside)); + EXPECT_FALSE(E1.approximatelyEquals(E_clearly_outside, 1e-6)); Embedding E4 = {1.0, 2.0, 3.5}; // Large diff EXPECT_FALSE(E1.approximatelyEquals(E4, 0.01)); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Overloading operator+ for Embeddngs (PR #145118)
https://github.com/svkeerthy created https://github.com/llvm/llvm-project/pull/145118 None >From cbd2c6e77eefb4ba7b8acbf6ea12f21486e7dbc8 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Fri, 20 Jun 2025 23:00:40 + Subject: [PATCH] Overloading operator+ for Embeddngs --- llvm/include/llvm/Analysis/IR2Vec.h| 1 + llvm/lib/Analysis/IR2Vec.cpp | 8 llvm/unittests/Analysis/IR2VecTest.cpp | 18 ++ 3 files changed, 27 insertions(+) diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 480b834077b86..f6c40d36f8026 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -106,6 +106,7 @@ struct Embedding { const std::vector &getData() const { return Data; } /// Arithmetic operators + Embedding operator+(const Embedding &RHS) const; Embedding &operator+=(const Embedding &RHS); Embedding &operator-=(const Embedding &RHS); Embedding &operator*=(double Factor); diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 27cc2a4109879..d5d27db8bd2bf 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -71,6 +71,14 @@ inline bool fromJSON(const llvm::json::Value &E, Embedding &Out, // Embedding //===--===// +Embedding Embedding::operator+(const Embedding &RHS) const { + assert(this->size() == RHS.size() && "Vectors must have the same dimension"); + Embedding Result(*this); + std::transform(this->begin(), this->end(), RHS.begin(), Result.begin(), + std::plus()); + return Result; +} + Embedding &Embedding::operator+=(const Embedding &RHS) { assert(this->size() == RHS.size() && "Vectors must have the same dimension"); std::transform(this->begin(), this->end(), RHS.begin(), this->begin(), diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp index 33ac16828eb6c..50eb7f73c6f50 100644 --- a/llvm/unittests/Analysis/IR2VecTest.cpp +++ b/llvm/unittests/Analysis/IR2VecTest.cpp @@ -109,6 +109,18 @@ TEST(EmbeddingTest, ConstructorsAndAccessors) { } } +TEST(EmbeddingTest, AddVectorsOutOfPlace) { + Embedding E1 = {1.0, 2.0, 3.0}; + Embedding E2 = {0.5, 1.5, -1.0}; + + Embedding E3 = E1 + E2; + EXPECT_THAT(E3, ElementsAre(1.5, 3.5, 2.0)); + + // Check that E1 and E2 are unchanged + EXPECT_THAT(E1, ElementsAre(1.0, 2.0, 3.0)); + EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0)); +} + TEST(EmbeddingTest, AddVectors) { Embedding E1 = {1.0, 2.0, 3.0}; Embedding E2 = {0.5, 1.5, -1.0}; @@ -180,6 +192,12 @@ TEST(EmbeddingTest, AccessOutOfBounds) { EXPECT_DEATH(E[4] = 4.0, "Index out of bounds"); } +TEST(EmbeddingTest, MismatchedDimensionsAddVectorsOutOfPlace) { + Embedding E1 = {1.0, 2.0}; + Embedding E2 = {1.0}; + EXPECT_DEATH(E1 + E2, "Vectors must have the same dimension"); +} + TEST(EmbeddingTest, MismatchedDimensionsAddVectors) { Embedding E1 = {1.0, 2.0}; Embedding E2 = {1.0}; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Increasing tolerance in ApproximatelyEquals (PR #145117)
svkeerthy wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145117?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145119** https://app.graphite.dev/github/pr/llvm/llvm-project/145119?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145118** https://app.graphite.dev/github/pr/llvm/llvm-project/145118?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145117** https://app.graphite.dev/github/pr/llvm/llvm-project/145117?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145117?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#143999** https://app.graphite.dev/github/pr/llvm/llvm-project/143999?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143986** https://app.graphite.dev/github/pr/llvm/llvm-project/143986?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143479** https://app.graphite.dev/github/pr/llvm/llvm-project/143479?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#144139](https://github.com/llvm/llvm-project/pull/144139) https://app.graphite.dev/github/pr/llvm/llvm-project/144139?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * **#143476** https://app.graphite.dev/github/pr/llvm/llvm-project/143476?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143200** https://app.graphite.dev/github/pr/llvm/llvm-project/143200?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143197** https://app.graphite.dev/github/pr/llvm/llvm-project/143197?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145117 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] document global variables (PR #145070)
https://github.com/evelez7 updated https://github.com/llvm/llvm-project/pull/145070 >From fa5f1cb09df62f018e5b7b53ccec4b77d94d1828 Mon Sep 17 00:00:00 2001 From: Erick Velez Date: Wed, 18 Jun 2025 16:36:49 -0700 Subject: [PATCH] [clang-doc] document global variables --- clang-tools-extra/clang-doc/BitcodeReader.cpp | 40 ++ clang-tools-extra/clang-doc/BitcodeWriter.cpp | 32 +-- clang-tools-extra/clang-doc/BitcodeWriter.h | 6 +++ clang-tools-extra/clang-doc/HTMLGenerator.cpp | 3 ++ .../clang-doc/HTMLMustacheGenerator.cpp | 2 + clang-tools-extra/clang-doc/JSONGenerator.cpp | 14 +++ clang-tools-extra/clang-doc/MDGenerator.cpp | 4 ++ clang-tools-extra/clang-doc/Mapper.cpp| 6 +++ clang-tools-extra/clang-doc/Mapper.h | 1 + .../clang-doc/Representation.cpp | 16 clang-tools-extra/clang-doc/Representation.h | 14 ++- clang-tools-extra/clang-doc/Serialize.cpp | 27 clang-tools-extra/clang-doc/Serialize.h | 4 ++ clang-tools-extra/clang-doc/YAMLGenerator.cpp | 1 + .../test/clang-doc/json/namespace.cpp | 41 +-- .../unittests/clang-doc/BitcodeTest.cpp | 2 + 16 files changed, 188 insertions(+), 25 deletions(-) diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp b/clang-tools-extra/clang-doc/BitcodeReader.cpp index 66852931226bf..cbdd5d245b8de 100644 --- a/clang-tools-extra/clang-doc/BitcodeReader.cpp +++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp @@ -93,6 +93,7 @@ static llvm::Error decodeRecord(const Record &R, InfoType &Field, case InfoType::IT_enum: case InfoType::IT_typedef: case InfoType::IT_concept: + case InfoType::IT_variable: Field = IT; return llvm::Error::success(); } @@ -416,6 +417,23 @@ static llvm::Error parseRecord(const Record &R, unsigned ID, "invalid field for ConstraintInfo"); } +static llvm::Error parseRecord(const Record &R, unsigned ID, + llvm::StringRef Blob, VarInfo *I) { + switch (ID) { + case VAR_USR: +return decodeRecord(R, I->USR, Blob); + case VAR_NAME: +return decodeRecord(R, I->Name, Blob); + case VAR_DEFLOCATION: +return decodeRecord(R, I->DefLoc, Blob); + case VAR_IS_STATIC: +return decodeRecord(R, I->IsStatic, Blob); + default: +return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid field for VarInfo"); + } +} + template static llvm::Expected getCommentInfo(T I) { return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid type cannot contain CommentInfo"); @@ -458,6 +476,10 @@ template <> llvm::Expected getCommentInfo(ConceptInfo *I) { return &I->Description.emplace_back(); } +template <> Expected getCommentInfo(VarInfo *I) { + return &I->Description.emplace_back(); +} + // When readSubBlock encounters a TypeInfo sub-block, it calls addTypeInfo on // the parent block to set it. The template specializations define what to do // for each supported parent block. @@ -497,6 +519,11 @@ template <> llvm::Error addTypeInfo(TypedefInfo *I, TypeInfo &&T) { return llvm::Error::success(); } +template <> llvm::Error addTypeInfo(VarInfo *I, TypeInfo &&T) { + I->Type = std::move(T); + return llvm::Error::success(); +} + template static llvm::Error addReference(T I, Reference &&R, FieldId F) { return llvm::createStringError(llvm::inconvertibleErrorCode(), @@ -643,6 +670,9 @@ template <> void addChild(NamespaceInfo *I, TypedefInfo &&R) { template <> void addChild(NamespaceInfo *I, ConceptInfo &&R) { I->Children.Concepts.emplace_back(std::move(R)); } +template <> void addChild(NamespaceInfo *I, VarInfo &&R) { + I->Children.Variables.emplace_back(std::move(R)); +} // Record children: template <> void addChild(RecordInfo *I, FunctionInfo &&R) { @@ -887,6 +917,13 @@ llvm::Error ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) { addChild(I, std::move(CI)); return llvm::Error::success(); } + case BI_VAR_BLOCK_ID: { +VarInfo VI; +if (auto Err = readBlock(ID, &VI)) + return Err; +addChild(I, std::move(VI)); +return llvm::Error::success(); + } default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid subblock type"); @@ -996,6 +1033,8 @@ ClangDocBitcodeReader::readBlockToInfo(unsigned ID) { return createInfo(ID); case BI_FUNCTION_BLOCK_ID: return createInfo(ID); + case BI_VAR_BLOCK_ID: +return createInfo(ID); default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "cannot create info"); @@ -1035,6 +1074,7 @@ ClangDocBitcodeReader::readBitcode() { case BI_ENUM_BLOCK_ID: case BI_TYPEDEF_BLOCK_ID: case BI_CONCEPT_BLOCK_ID: +case BI_VAR_BLOCK_ID: case BI_FUNCTION_BLOCK_ID: {
[llvm-branch-commits] [llvm] Hexagon: Add libcall declarations for special memcpy (PR #144975)
https://github.com/aankit-ca approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/144975 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Introduce a pass to replace VGPR MFMAs with AGPR (PR #145024)
arsenm wrote: > Do you assume that at this stage there are no accvgpr_write/read > instructions, but only COPY? Yes, you should never use those for actual copies. Not using COPY always hurts optimizations https://github.com/llvm/llvm-project/pull/145024 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -211,6 +214,15 @@ struct TemplateSpecializationInfo { std::vector Params; }; +struct ConstraintInfo { + ConstraintInfo() = default; + ConstraintInfo(SymbolID USR, StringRef Name) + : ConceptRef(USR, Name, InfoType::IT_concept) {} + Reference ConceptRef; + + SmallString<16> Expression; // The expression that defines the constraint. ilovepi wrote: Maybe just name it `ConstraintExpr`? https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)
@@ -248,6 +257,27 @@ static void serializeCommonChildren(const ScopeChildren &Children, } } +template +static void serializeArray(const std::vector &Records, Object &Obj, + const std::string &Key, + SerializationFunc serializeInfo) { + json::Value RecordsArray = Array(); + auto &RecordsArrayRef = *RecordsArray.getAsArray(); + RecordsArrayRef.reserve(Records.size()); + for (const auto &Item : Records) { +json::Value ItemVal = Object(); +auto &ItemObj = *ItemVal.getAsObject(); +serializeInfo(Item, ItemObj); +RecordsArrayRef.push_back(ItemVal); + } evelez7 wrote: I'm going to refactor a lot of the code in here later to call this function for arrays. And I'll probably make a similar one for objects that just get a value declared and passed to `serializeInfo`. https://github.com/llvm/llvm-project/pull/144430 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Add missing erasure notifications (PR #145030)
https://github.com/j2kun approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/145030 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits