[llvm-branch-commits] [clang] [llvm] [HLSL] Define RasterizerOrderedBuffer resource (PR #74897)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/74897 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL] Define RasterizerOrderedBuffer resource (PR #74897)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/74897 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL] Define RasterizerOrderedBuffer resource (PR #74897)
@@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s bogner wrote: That's a fair point. I went ahead and changed the triple to specify a pixel shader to avoid any confusion / future validation issues https://github.com/llvm/llvm-project/pull/74897 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [compiler-rt] [llvm] [clang-tools-extra] [libcxx] [clang] [HLSL] Define RasterizerOrderedBuffer resource (PR #74897)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/74897 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [mlir] [libcxx] [compiler-rt] [clang] [llvm] [HLSL] Define RasterizerOrderedBuffer resource (PR #74897)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/74897 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Move OpenMP-related code from `FirConverter` to `OpenMPMixin` (PR #74866)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/74866 >From 27fab0c65445893fb27baead5573bad2dd690dfc Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 8 Dec 2023 09:13:11 -0600 Subject: [PATCH] [flang] Move OpenMP-related code from `FirConverter` to `OpenMPMixin` This improves the separation of the generic Fortran lowering and the lowering of OpenMP constructs. The mixin is intended to be derived from via CRTP: ``` class FirConverter : public OpenMPMixin ... ``` The primary goal of the mixin is to implement `genFIR` functions that the derived converter can then call via ``` std::visit([this](auto &&s) { genFIR(s); }); ``` The mixin is also expecting a handful of functions to be present in the derived class, most importantly `genFIR(Evaluation&)`, plus getter classes for the op builder, symbol table, etc. The pre-existing PFT-lowering functionality is preserved. --- flang/lib/Lower/Bridge.cpp | 84 +- flang/lib/Lower/ConverterMixin.h | 28 flang/lib/Lower/FirConverter.h | 38 +- flang/lib/Lower/OpenMP.cpp | 118 ++- flang/lib/Lower/OpenMPMixin.h| 66 + 5 files changed, 235 insertions(+), 99 deletions(-) create mode 100644 flang/lib/Lower/ConverterMixin.h create mode 100644 flang/lib/Lower/OpenMPMixin.h diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5aaba233744b2d..0a476a38d8d2de 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -170,7 +170,7 @@ void FirConverter::run(Fortran::lower::pft::Program &pft) { }); finalizeOpenACCLowering(); - finalizeOpenMPLowering(globalOmpRequiresSymbol); + OpenMPBase::finalize(globalOmpRequiresSymbol); } /// Generate FIR for Evaluation \p eval. @@ -977,70 +977,6 @@ void FirConverter::genFIR(const Fortran::parser::OpenACCRoutineConstruct &acc) { // Handled by genFIR(const Fortran::parser::OpenACCDeclarativeConstruct &) } -void FirConverter::genFIR(const Fortran::parser::OpenMPConstruct &omp) { - mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); - localSymbols.pushScope(); - genOpenMPConstruct(*this, bridge.getSemanticsContext(), getEval(), omp); - - const Fortran::parser::OpenMPLoopConstruct *ompLoop = - std::get_if(&omp.u); - const Fortran::parser::OpenMPBlockConstruct *ompBlock = - std::get_if(&omp.u); - - // If loop is part of an OpenMP Construct then the OpenMP dialect - // workshare loop operation has already been created. Only the - // body needs to be created here and the do_loop can be skipped. - // Skip the number of collapsed loops, which is 1 when there is a - // no collapse requested. - - Fortran::lower::pft::Evaluation *curEval = &getEval(); - const Fortran::parser::OmpClauseList *loopOpClauseList = nullptr; - if (ompLoop) { -loopOpClauseList = &std::get( -std::get(ompLoop->t).t); -int64_t collapseValue = Fortran::lower::getCollapseValue(*loopOpClauseList); - -curEval = &curEval->getFirstNestedEvaluation(); -for (int64_t i = 1; i < collapseValue; i++) { - curEval = &*std::next(curEval->getNestedEvaluations().begin()); -} - } - - for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) -genFIR(e); - - if (ompLoop) { -genOpenMPReduction(*this, *loopOpClauseList); - } else if (ompBlock) { -const auto &blockStart = -std::get(ompBlock->t); -const auto &blockClauses = -std::get(blockStart.t); -genOpenMPReduction(*this, blockClauses); - } - - localSymbols.popScope(); - builder->restoreInsertionPoint(insertPt); - - // Register if a target region was found - ompDeviceCodeFound = - ompDeviceCodeFound || Fortran::lower::isOpenMPTargetConstruct(omp); -} - -void FirConverter::genFIR( -const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) { - mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); - // Register if a declare target construct intended for a target device was - // found - ompDeviceCodeFound = - ompDeviceCodeFound || - Fortran::lower::isOpenMPDeviceDeclareTarget(*this, getEval(), ompDecl); - genOpenMPDeclarativeConstruct(*this, getEval(), ompDecl); - for (Fortran::lower::pft::Evaluation &e : getEval().getNestedEvaluations()) -genFIR(e); - builder->restoreInsertionPoint(insertPt); -} - void FirConverter::genFIR(const Fortran::parser::OpenStmt &stmt) { mlir::Value iostat = genOpenStatement(*this, stmt); genIoConditionBranches(getEval(), stmt.v, iostat); @@ -3752,13 +3688,7 @@ void FirConverter::instantiateVar(const Fortran::lower::pft::Variable &var, Fortran::lower::AggregateStoreMap &storeMap) { Fortran::lower::instantiateVariable(*this, var, localSymbols, storeMap); if (var.hasSymbol()) { -if (var.getSymbol().test( -Fortran::semantics::Symbol::Flag::OmpThreadprivate)) - For
[llvm-branch-commits] [llvm] [BPF] expand cttz, ctlz for i32, i64 (PR #73668)
https://github.com/inclyc updated https://github.com/llvm/llvm-project/pull/73668 >From c9afc897d51cf7ec93d7d5f7a43ed53346bb282d Mon Sep 17 00:00:00 2001 From: Yingchi Long Date: Wed, 29 Nov 2023 00:23:49 +0800 Subject: [PATCH] [BPF] expand cttz, ctlz for i32, i64 Fixes: https://github.com/llvm/llvm-project/issues/62252 --- llvm/lib/Target/BPF/BPFISelLowering.cpp | 9 +- llvm/test/CodeGen/BPF/cttz-ctlz.ll | 304 2 files changed, 308 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/cttz-ctlz.ll diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 45645df52f26a4..996fb590443b5f 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -111,6 +111,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, VT, Expand); setOperationAction(ISD::SRA_PARTS, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); +setOperationAction(ISD::CTTZ, VT, Expand); +setOperationAction(ISD::CTLZ, VT, Expand); +setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); +setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::SETCC, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); @@ -123,11 +127,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, STI.getHasJmp32() ? Custom : Promote); } - setOperationAction(ISD::CTTZ, MVT::i64, Custom); - setOperationAction(ISD::CTLZ, MVT::i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!STI.hasMovsx()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); diff --git a/llvm/test/CodeGen/BPF/cttz-ctlz.ll b/llvm/test/CodeGen/BPF/cttz-ctlz.ll new file mode 100644 index 00..f42b2e2d10871b --- /dev/null +++ b/llvm/test/CodeGen/BPF/cttz-ctlz.ll @@ -0,0 +1,304 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -march=bpf | FileCheck %s + +; test that we can expand CTTZ & CTLZ + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @cttz_i32_zdef(i32 %a) { +; CHECK-LABEL: cttz_i32_zdef: +; CHECK: # %bb.0: +; CHECK-NEXT:r2 = r1 +; CHECK-NEXT:r2 = -r2 +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r1 *= 125613361 +; CHECK-NEXT:r2 = 4160749568 ll +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r1 >>= 27 +; CHECK-NEXT:r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll +; CHECK-NEXT:r2 += r1 +; CHECK-NEXT:r0 = *(u8 *)(r2 + 0) +; CHECK-NEXT:exit +%ret = call i32 @llvm.cttz.i32(i32 %a, i1 1) +ret i32 %ret +} + + +define i32 @cttz_i32(i32 %a) { +; CHECK-LABEL: cttz_i32: +; CHECK: # %bb.0: +; CHECK-NEXT:r0 = 32 +; CHECK-NEXT:r2 = r1 +; CHECK-NEXT:r2 <<= 32 +; CHECK-NEXT:r2 >>= 32 +; CHECK-NEXT:if r2 == 0 goto LBB1_2 +; CHECK-NEXT: # %bb.1: # %cond.false +; CHECK-NEXT:r2 = r1 +; CHECK-NEXT:r2 = -r2 +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r1 *= 125613361 +; CHECK-NEXT:r2 = 4160749568 ll +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r1 >>= 27 +; CHECK-NEXT:r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll +; CHECK-NEXT:r2 += r1 +; CHECK-NEXT:r0 = *(u8 *)(r2 + 0) +; CHECK-NEXT: LBB1_2: # %cond.end +; CHECK-NEXT:exit +%ret = call i32 @llvm.cttz.i32(i32 %a, i1 0) +ret i32 %ret +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64_zdef(i64 %a) { +; CHECK-LABEL: cttz_i64_zdef: +; CHECK: # %bb.0: +; CHECK-NEXT:r2 = r1 +; CHECK-NEXT:r2 = -r2 +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r2 = 151050438420815295 ll +; CHECK-NEXT:r1 *= r2 +; CHECK-NEXT:r1 >>= 58 +; CHECK-NEXT:r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll +; CHECK-NEXT:r2 += r1 +; CHECK-NEXT:r0 = *(u8 *)(r2 + 0) +; CHECK-NEXT:exit +%ret = call i64 @llvm.cttz.i64(i64 %a, i1 1) +ret i64 %ret +} + + +define i64 @cttz_i64(i64 %a) { +; CHECK-LABEL: cttz_i64: +; CHECK: # %bb.0: +; CHECK-NEXT:r0 = 64 +; CHECK-NEXT:if r1 == 0 goto LBB3_2 +; CHECK-NEXT: # %bb.1: # %cond.false +; CHECK-NEXT:r2 = r1 +; CHECK-NEXT:r2 = -r2 +; CHECK-NEXT:r1 &= r2 +; CHECK-NEXT:r2 = 151050438420815295 ll +; CHECK-NEXT:r1 *= r2 +; CHECK-NEXT:r1 >>= 58 +; CHECK-NEXT:r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll +; CHECK-NEXT:r2 += r1 +; CHECK-NEXT:r0 = *(u8 *)(r2 + 0) +; CHECK-NEXT: LBB3_2: # %cond.end +; CHECK-NEXT:exit +%ret = call i64 @llvm.cttz.i64(i64 %a, i1 0) +ret i64 %ret +} + + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @ctlz_i32_zdef(i32 %a) { +; CHECK-LABEL: ctlz_i32_zdef: +; CHECK: # %bb.0: +; CHECK-NEXT:r2 = 4294967294 ll +; CHECK-NEXT:r3 = r1 +; CHECK-NEXT:r3 &= r2 +; CHECK-NEXT:r3 >>= 1 +; CHECK-NEXT:r1 |= r3 +;