[llvm-branch-commits] [flang] [flang][OpenMP] Parse METADIRECTIVE in specification part (PR #123397)
llvmbot wrote: @llvm/pr-subscribers-flang-parser Author: Krzysztof Parzyszek (kparzysz) Changes Add METADIRECTIVE to the OpenMP declarative constructs as well. Emit a TODO error for both declarative and executable cases. --- Full diff: https://github.com/llvm/llvm-project/pull/123397.diff 7 Files Affected: - (modified) flang/include/flang/Parser/parse-tree.h (+2-2) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7-5) - (modified) flang/lib/Parser/openmp-parsers.cpp (+3-1) - (added) flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 (+9) - (added) flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 (+9) - (modified) flang/test/Parser/OpenMP/metadirective-v50.f90 (+2) - (modified) flang/test/Parser/OpenMP/metadirective.f90 (+53) ``diff diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 6053ad5dc0f7ad..2e27b6ea7eafa1 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4537,8 +4537,8 @@ struct OpenMPDeclarativeConstruct { CharBlock source; std::variant + OpenMPThreadprivate, OpenMPRequiresConstruct, OpenMPUtilityConstruct, + OpenMPDeclareTargetConstruct, OmpMetadirectiveDirective> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3a4336fe5b90f9..debab2352abd0a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3137,6 +3137,13 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // support the case of threadprivate variable declared in module. } +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OmpMetadirectiveDirective &meta) { + TODO(converter.getCurrentLocation(), "METADIRECTIVE"); +} + static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -3229,11 +3236,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct"); } -static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - const parser::OmpMetadirectiveDirective &construct) {} - static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index e3c9292bc5f91e..f5387dcf4b3c3d 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1283,7 +1283,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 b/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 new file mode 100644 index 00..2e160a18966162 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: METADIRECTIVE +subroutine f00 + continue + !Executable + !$omp metadirective when(user={condition(.true.)}: nothing) +end diff --git a/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 b/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 new file mode 100644 index 00..a00612a92218a2 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: METADIRECTIVE +subroutine f00 + !Specification + !$omp metadirective when(user={condition(.true.)}: nothing) + implicit none +end diff --git a/flang/test/Parser/OpenMP/metadirective-v50.f90 b/flang/test/Parser/OpenMP/metadirective-v50.f90 index 73d5077da3d9f1..d7c3121b8f1b80 100644 --- a/flang/test/Parser/OpenMP/metadirective-v50.f90 +++ b/flang/test/Parser/OpenMP/metadirective-v50.f90 @@ -2,12 +2,14 @@ !RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine f01 + continue !$omp metadirective & !$omp & when(user={conditi
[llvm-branch-commits] [flang] [flang][OpenMP] Parse METADIRECTIVE in specification part (PR #123397)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/123397 Add METADIRECTIVE to the OpenMP declarative constructs as well. Emit a TODO error for both declarative and executable cases. >From 7fa026c6561020669f42b94fb474648f59fb1e7a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 17 Jan 2025 15:04:52 -0600 Subject: [PATCH] [flang][OpenMP] Parse METADIRECTIVE in specification part Add METADIRECTIVE to the OpenMP declarative constructs as well. Emit a TODO error for both declarative and executable cases. --- flang/include/flang/Parser/parse-tree.h | 4 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 12 +++-- flang/lib/Parser/openmp-parsers.cpp | 4 +- .../Lower/OpenMP/Todo/metadirective-exec.f90 | 9 .../Lower/OpenMP/Todo/metadirective-spec.f90 | 9 .../test/Parser/OpenMP/metadirective-v50.f90 | 2 + flang/test/Parser/OpenMP/metadirective.f90| 53 +++ 7 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 6053ad5dc0f7ad..2e27b6ea7eafa1 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4537,8 +4537,8 @@ struct OpenMPDeclarativeConstruct { CharBlock source; std::variant + OpenMPThreadprivate, OpenMPRequiresConstruct, OpenMPUtilityConstruct, + OpenMPDeclareTargetConstruct, OmpMetadirectiveDirective> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3a4336fe5b90f9..debab2352abd0a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3137,6 +3137,13 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // support the case of threadprivate variable declared in module. } +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OmpMetadirectiveDirective &meta) { + TODO(converter.getCurrentLocation(), "METADIRECTIVE"); +} + static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -3229,11 +3236,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct"); } -static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - const parser::OmpMetadirectiveDirective &construct) {} - static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index e3c9292bc5f91e..f5387dcf4b3c3d 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1283,7 +1283,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 b/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 new file mode 100644 index 00..2e160a18966162 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/metadirective-exec.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: METADIRECTIVE +subroutine f00 + continue + !Executable + !$omp metadirective when(user={condition(.true.)}: nothing) +end diff --git a/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 b/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 new file mode 100644 index 00..a00612a92218a2 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/metadirective-spec.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: METADIRECTIVE +subroutine f00 + !Specification + !$omp metadirective when(user={condition(.true.)}: nothing) + implicit none +end diff --git a/flang/test/Parser/OpenMP/metadirective-v50.f90 b/flang/test/Parser/OpenMP/metadire
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); ellishg wrote: An alternative option is to pass an iterator of `JumpTableIndices` to `emitJumpTables()` instead of an `ArrayRef`. That way you can use `llvm::reverse()` on the iterator, without needing to modify memory at all. ```suggestion std::reverse(JumpTableIndices.begin() + NextHotJumpTableIndex + 1, JumpTableIndices.end()); ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } ellishg wrote: `ArrayRef::take_front(0)` will return an empty array, and `emitJumpTables()` handles this case. ```suggestion emitJumpTables( ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; ellishg wrote: ```suggestion if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) { JumpTableIndices[NextColdJumpTableIndex--] = JTI; } else { JumpTableIndices[NextHotJumpTableIndex++] = JTI; } ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { ellishg wrote: `NextHotJumpTableIndex` is initialized to `JT.size() - 1` ```suggestion if (NextHotJumpTableIndex < (int)JT.size() - 1) { ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); + +emitJumpTables( +ArrayRef(JumpTableIndices) +.take_back(JT.size() - NextHotJumpTableIndex), +TLOF.getSectionForJumpTable( + F, TM, &JT[JumpTableIndices[NextHotJumpTableIndex]]), +JTInDiffSection, *MJTI); + } + + return; +} + +void AsmPrinter::emitJumpTables(ArrayRef JumpTableIndices, +MCSection *JumpTableSection, +bool JTInDiffSection, +const MachineJumpTableInfo &MJTI) { + if (JumpTableIndices.empty()) +return; + + const DataLayout &DL = MF->getDataLayout(); if (JTInDiffSection) { -// Drop it in the readonly section. -MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); -OutStreamer->switchSection(ReadOnlySection); +OutStreamer->switchSection(JumpTableSection); } - emitAlignment(Align(MJTI->getEntryAlignment(DL))); + emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout(; // Jump tables in code sections are marked with a data_region directive // where that's supported. if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionJT32); - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { -const std::vector &JTBBs = JT[JTI].MBBs; + const auto &JT = MJTI.getJumpTables(); + for (unsigned Index = 0, e = JumpTableIndices.size(); Index != e; ++Index) { ellishg wrote: Then you can replace `JT[JumpTableIndices[Index]]` -> `JT[JTIndex]` ```suggestion for (auto JTIndex : JumpTableIndices) { ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { ellishg wrote: Actually, you might be able to omit this check too depending on your changes https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Remove `isF...()` type API for low-precision FP types (PR #123326)
https://github.com/sergey-kozub approved this pull request. https://github.com/llvm/llvm-project/pull/123326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Add address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
https://github.com/hekota created https://github.com/llvm/llvm-project/pull/123411 Introduces a new address space `hlsl_constant(2)` for constant buffer declarations. This address space is applied to declarations inside `cbuffer` block. Later on, it will also be applied to `ConstantBuffer` syntax and the default `$Globals` constant buffer. Clang codegen translates constant buffer declarations to global variables and loads from `hlsl_constant(2)` address space. More work coming soon will include addition of metadata that will map these globals to individual constant buffers and enable their transformation to appropriate constant buffer load intrinsics later on in an LLVM pass. Fixes #123406 >From 6aba475e4af789fc03594560ad9937e3502cce51 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 17 Jan 2025 13:31:01 -0800 Subject: [PATCH] [HLSL] Add address space `hlsl_constant(2)` for constant buffer declarations --- clang/include/clang/Basic/AddressSpaces.h | 1 + clang/lib/AST/TypePrinter.cpp | 2 + clang/lib/Basic/Targets/AArch64.h | 1 + clang/lib/Basic/Targets/AMDGPU.cpp| 1 + clang/lib/Basic/Targets/DirectX.h | 1 + clang/lib/Basic/Targets/NVPTX.h | 1 + clang/lib/Basic/Targets/SPIR.h| 2 + clang/lib/Basic/Targets/SystemZ.h | 1 + clang/lib/Basic/Targets/TCE.h | 1 + clang/lib/Basic/Targets/WebAssembly.h | 1 + clang/lib/Basic/Targets/X86.h | 1 + clang/lib/CodeGen/CGHLSLRuntime.cpp | 16 clang/lib/Sema/SemaHLSL.cpp | 15 ++-- .../ast-dump-comment-cbuffer-tbuffer.hlsl | 16 clang/test/AST/HLSL/cbuffer.hlsl | 24 ++-- clang/test/AST/HLSL/packoffset.hlsl | 38 +-- clang/test/AST/HLSL/pch_hlsl_buffer.hlsl | 12 +++--- .../test/AST/HLSL/resource_binding_attr.hlsl | 8 ++-- clang/test/CodeGenHLSL/cbuf.hlsl | 13 +-- clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl | 8 +++- .../static_global_and_function_in_cb.hlsl | 14 --- 21 files changed, 97 insertions(+), 80 deletions(-) diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h index 7b723d508fff17..d18bfe54931f93 100644 --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -58,6 +58,7 @@ enum class LangAS : unsigned { // HLSL specific address spaces. hlsl_groupshared, + hlsl_constant, // Wasm specific address spaces. wasm_funcref, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index a850410ffc8468..6cad74fef3fe33 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2556,6 +2556,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { return "__funcref"; case LangAS::hlsl_groupshared: return "groupshared"; + case LangAS::hlsl_constant: +return "hlsl_constant"; default: return std::to_string(toTargetAddressSpace(AS)); } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ecf80b23a508c9..600940f5e4e23c 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -44,6 +44,7 @@ static const unsigned ARM64AddrSpaceMap[] = { static_cast(AArch64AddrSpace::ptr32_uptr), static_cast(AArch64AddrSpace::ptr64), 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 99f8f2944e2796..824134d52ec139 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -83,6 +83,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared +llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_constant }; } // namespace targets diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h index ab22d1281a4df7..4e6bc0e040398b 100644 --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -42,6 +42,7 @@ static const unsigned DirectXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 3, // hlsl_groupshared +2, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index d81b89a7f24ac0..c6531148fe30ce 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -46,6 +46,7 @@ static const unsigned NVPTXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
https://github.com/hekota edited https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/122982 >From 58ef8ad2d3d9bfa008745b35f1514222c13b773a Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Tue, 14 Jan 2025 22:23:22 + Subject: [PATCH 1/3] [HLSL][RootSignature] Implement Parsing of Descriptor Tables - Defines the in-memory data layout for the Descriptor Table Clauses, its dependent flags/enums and parent RootElement in HLSLRootSignature.h - Implements a Parser and its required Parsing methods in ParseHLSLRootSignature --- .../clang/Parse/ParseHLSLRootSignature.h | 68 clang/lib/Parse/ParseHLSLRootSignature.cpp| 327 ++ .../Parse/ParseHLSLRootSignatureTest.cpp | 85 + .../llvm/Frontend/HLSL/HLSLRootSignature.h| 140 4 files changed, 620 insertions(+) create mode 100644 llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index 6c534411e754a0..9464bd8f2f9e0f 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -21,6 +21,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Frontend/HLSL/HLSLRootSignature.h" + namespace llvm { namespace hlsl { namespace root_signature { @@ -89,6 +91,72 @@ class RootSignatureLexer { } }; +class RootSignatureParser { +public: + RootSignatureParser(SmallVector &Elements, + const SmallVector &Tokens); + + // Iterates over the provided tokens and constructs the in-memory + // representations of the RootElements. + // + // The return value denotes if there was a failure and the method will + // return on the first encountered failure, or, return false if it + // can sucessfully reach the end of the tokens. + bool Parse(); + +private: + bool ReportError(); // TODO: Implement this to report error through Diags + + // Root Element helpers + bool ParseRootElement(); + bool ParseDescriptorTable(); + bool ParseDescriptorTableClause(); + + // Common parsing helpers + bool ParseRegister(Register &Register); + + // Various flags/enum parsing helpers + bool ParseDescriptorRangeFlags(DescriptorRangeFlags &Flags); + bool ParseShaderVisibility(ShaderVisibility &Flag); + + // Increment the token iterator if we have not reached the end. + // Return value denotes if we were already at the last token. + bool ConsumeNextToken(); + + // Attempt to retrieve the next token, if TokenKind is invalid then there was + // no next token. + RootSignatureToken PeekNextToken(); + + // Peek if the next token is of the expected kind. + // + // Return value denotes if it failed to match the expected kind, either it is + // the end of the stream or it didn't match any of the expected kinds. + bool PeekExpectedToken(TokenKind Expected); + bool PeekExpectedToken(ArrayRef AnyExpected); + + // Consume the next token and report an error if it is not of the expected + // kind. + // + // Return value denotes if it failed to match the expected kind, either it is + // the end of the stream or it didn't match any of the expected kinds. + bool ConsumeExpectedToken(TokenKind Expected); + bool ConsumeExpectedToken(ArrayRef AnyExpected); + + // Peek if the next token is of the expected kind and if it is then consume + // it. + // + // Return value denotes if it failed to match the expected kind, either it is + // the end of the stream or it didn't match any of the expected kinds. It will + // not report an error if there isn't a match. + bool TryConsumeExpectedToken(TokenKind Expected); + bool TryConsumeExpectedToken(ArrayRef Expected); + +private: + SmallVector &Elements; + SmallVector::const_iterator CurTok; + SmallVector::const_iterator LastTok; +}; + } // namespace root_signature } // namespace hlsl } // namespace llvm diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index fac4a92f1920be..c5e6dd112c6fae 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -148,6 +148,333 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { return false; } +// Parser Definitions + +RootSignatureParser::RootSignatureParser( +SmallVector &Elements, +const SmallVector &Tokens) +: Elements(Elements) { + CurTok = Tokens.begin(); + LastTok = Tokens.end(); +} + +bool RootSignatureParser::ReportError() { return true; } + +bool RootSignatureParser::Parse() { + CurTok--; // Decrement once here so we can use the ...ExpectedToken api + + // Iterate as many RootElements as possible + bool HasComma = true; + while (HasComma && + !TryConsumeExpectedToken(ArrayRef{TokenKind::kw_DescriptorTable})) { +if (ParseRootElement()) + return true; +HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma); + } + if (HasComma) +return Report
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -455,14 +456,22 @@ void createHostLayoutStructForBuffer(Sema &S, HLSLBufferDecl *BufDecl) { LS->setImplicit(true); LS->startDefinition(); - for (const Decl *D : BufDecl->decls()) { -const VarDecl *VD = dyn_cast(D); + for (Decl *D : BufDecl->decls()) { +VarDecl *VD = dyn_cast(D); if (!VD || VD->getStorageClass() == SC_Static) continue; const Type *Ty = VD->getType()->getUnqualifiedDesugaredType(); if (FieldDecl *FD = createFieldForHostLayoutStruct( -S, Ty, VD->getIdentifier(), LS, BufDecl)) +S, Ty, VD->getIdentifier(), LS, BufDecl)) { + // add the field decl to the layout struct LS->addDecl(FD); + // update address space of the original decl to hlsl_constant + // and disable initialization + QualType NewTy = + AST.getAddrSpaceQualType(VD->getType(), LangAS::hlsl_constant); + VD->setType(NewTy); + VD->setInit(nullptr); tex3d wrote: Does this `VD->setInit(nullptr);` silently get rid of an initializer if there was one? This feels a bit sketchy, unless I'm missing something. I know we don't currently support capturing initializers for constant buffer values, but it is part of HLSL syntax and could in theory be captured. Silently erasing it from the AST node at this point seems weird. https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
https://github.com/tex3d edited https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -83,6 +83,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared +llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_constant tex3d wrote: Note: I noticed this uses `LangASMap`, but doesn't specify a value for `wasm_funcref`, which will invisibly initialize it to `0` (initializer shorter than array type `LangASMap`) and that maps to the dummy value of `FLAT_ADDRESS` likely desired anyway. It doesn't rely on this auto-initialization for sycl_global and beyond though, even though it could. https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -2556,6 +2556,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { return "__funcref"; case LangAS::hlsl_groupshared: return "groupshared"; + case LangAS::hlsl_constant: +return "hlsl_constant"; tex3d wrote: Should hlsl addrspace cases be ordered before `wasm_funcref`, like it is in the enum and everywhere else, just for consistency? https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -43,8 +43,8 @@ tbuffer B { // AST-NEXT: FullComment // AST-NEXT: ParagraphComment // AST-NEXT: TextComment {{.*}} Text=" CBuffer decl." -// AST-NEXT: VarDecl {{.*}} a 'float' -// AST-NEXT: VarDecl {{.*}} b 'int' +// AST-NEXT: VarDecl {{.*}} a 'hlsl_constant float' +// AST-NEXT: VarDecl {{.*}} b 'hlsl_constant int' tex3d wrote: Might we want a different address space for tbuffer, potentially? https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -42,6 +42,7 @@ static const unsigned DirectXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 3, // hlsl_groupshared +2, // hlsl_constant tex3d wrote: I'm surprised we neither use the `LangASMap` type for this array, nor static_assert that the size is the same, to prevent these from getting out-of-sync. Does the assignment of this array pointer to `LangASMap *` cause a compilation failure which can be used to enforce map updates when address spaces are added? https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -39,7 +39,7 @@ typedef float EmptyArrayTypedef[10][0]; // CHECK: HLSLResourceClassAttr {{.*}} Implicit CBuffer // CHECK: HLSLResourceAttr {{.*}} Implicit CBuffer cbuffer CB { - // CHECK: VarDecl {{.*}} col:9 used a1 'float' + // CHECK: VarDecl {{.*}} col:9 used a1 'hlsl_constant float' tex3d wrote: Will this also impact type names in diagnostics? https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
https://github.com/tex3d commented: I think this looks good, though I had some questions. Looks like a test update is needed: ``` error: 'expected-error' diagnostics expected but not seen: File /var/lib/buildkite-agent/builds/linux-56-59b8f5d88-q72qr-1/llvm-project/github-pull-requests/clang/test/SemaTemplate/address_space-dependent.cpp Line 46: address space is larger than the maximum supported (8388586) error: 'expected-error' diagnostics seen but not expected: File /var/lib/buildkite-agent/builds/linux-56-59b8f5d88-q72qr-1/llvm-project/github-pull-requests/clang/test/SemaTemplate/address_space-dependent.cpp Line 46: address space is larger than the maximum supported (8388585) ``` https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [YAML] Don't validate `Fill::Size` after error (PR #123280)
@@ -1750,7 +1750,9 @@ void MappingTraits>::mapping( std::string MappingTraits>::validate( IO &io, std::unique_ptr &C) { if (const auto *F = dyn_cast(C.get())) { -if (F->Pattern && F->Pattern->binary_size() != 0 && !F->Size) +// Can't check the `Size`, as it's required and may be left uninitialized by vitalybuka wrote: Right, those are just once exposed by check-llvm under msan, but it's a general issue. Inconvenience is that some Val can not be initialized with `Val = {}', we need some template magic like for `yamlize` It's existing behavior, but is any concerns that errors after the first one could be bogus? Like here, for missing Size, we will print that it `can't be 0` as well. https://github.com/llvm/llvm-project/pull/123280 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DXIL] Add support for root signature flag element in DXContainer (PR #123147)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/123147 >From 916b2f17afef4e7b79818596551df44c75a55016 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Thu, 16 Jan 2025 22:16:45 + Subject: [PATCH 01/12] addressing pr comments --- llvm/include/llvm/BinaryFormat/DXContainer.h | 2 +- .../BinaryFormat/DXContainerConstants.def | 24 +-- .../include/llvm/ObjectYAML/DXContainerYAML.h | 2 +- llvm/lib/Object/DXContainer.cpp | 1 - llvm/lib/ObjectYAML/DXContainerYAML.cpp | 6 ++--- llvm/tools/obj2yaml/dxcontainer2yaml.cpp | 3 +-- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 74003387a249ef..605281df31ed46 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -161,7 +161,7 @@ enum class FeatureFlags : uint64_t { static_assert((uint64_t)FeatureFlags::NextUnusedBit <= 1ull << 63, "Shader flag bits exceed enum size."); -#define ROOT_ELEMENT_FLAG(Num, Val) Val = 1ull << Num, +#define ROOT_ELEMENT_FLAG(Num, Val, Str) Val = 1ull << Num, enum class RootElementFlag : uint32_t { #include "DXContainerConstants.def" }; diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index b351b9a01773c8..a382cd714ce7f3 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -56,18 +56,18 @@ SHADER_FEATURE_FLAG(31, 36, NextUnusedBit, "Next reserved shader flag bit (not a #ifdef ROOT_ELEMENT_FLAG -ROOT_ELEMENT_FLAG(0, AllowInputAssemblerInputLayout) -ROOT_ELEMENT_FLAG(1, DenyVertexShaderRootAccess) -ROOT_ELEMENT_FLAG(2, DenyHullShaderRootAccess) -ROOT_ELEMENT_FLAG(3, DenyDomainShaderRootAccess) -ROOT_ELEMENT_FLAG(4, DenyGeometryShaderRootAccess) -ROOT_ELEMENT_FLAG(5, DenyPixelShaderRootAccess) -ROOT_ELEMENT_FLAG(6, AllowStreamOutput) -ROOT_ELEMENT_FLAG(7, LocalRootSignature) -ROOT_ELEMENT_FLAG(8, DenyAmplificationShaderRootAccess) -ROOT_ELEMENT_FLAG(9, DenyMeshShaderRootAccess) -ROOT_ELEMENT_FLAG(10, CBVSRVUAVHeapDirectlyIndexed) -ROOT_ELEMENT_FLAG(11, SamplerHeapDirectlyIndexed) +ROOT_ELEMENT_FLAG(0, AllowInputAssemblerInputLayout, "") +ROOT_ELEMENT_FLAG(1, DenyVertexShaderRootAccess, "") +ROOT_ELEMENT_FLAG(2, DenyHullShaderRootAccess, "") +ROOT_ELEMENT_FLAG(3, DenyDomainShaderRootAccess, "") +ROOT_ELEMENT_FLAG(4, DenyGeometryShaderRootAccess, "") +ROOT_ELEMENT_FLAG(5, DenyPixelShaderRootAccess, "") +ROOT_ELEMENT_FLAG(6, AllowStreamOutput, "") +ROOT_ELEMENT_FLAG(7, LocalRootSignature, "") +ROOT_ELEMENT_FLAG(8, DenyAmplificationShaderRootAccess, "") +ROOT_ELEMENT_FLAG(9, DenyMeshShaderRootAccess, "") +ROOT_ELEMENT_FLAG(10, CBVSRVUAVHeapDirectlyIndexed, "") +ROOT_ELEMENT_FLAG(11, SamplerHeapDirectlyIndexed, "") #undef ROOT_ELEMENT_FLAG #endif // ROOT_ELEMENT_FLAG diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h index bb232543cd3b07..755c81541e5db0 100644 --- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -72,7 +72,7 @@ struct ShaderHash { std::vector Digest; }; -#define ROOT_ELEMENT_FLAG(Num, Val) bool Val = false; +#define ROOT_ELEMENT_FLAG(Num, Val, Str) bool Val = false; struct RootSignatureDesc { RootSignatureDesc() = default; RootSignatureDesc(const dxbc::RootSignatureDesc &Data); diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp index b7eff25ed7b33b..160844f73669ac 100644 --- a/llvm/lib/Object/DXContainer.cpp +++ b/llvm/lib/Object/DXContainer.cpp @@ -206,7 +206,6 @@ Error DXContainer::parsePartOffsets() { case dxbc::PartType::RTS0: if (Error Err = parseRootSignature(PartData)) return Err; - break; } } diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index 80f4587a06ff5e..682216e5febec0 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp @@ -33,14 +33,14 @@ DXContainerYAML::ShaderFeatureFlags::ShaderFeatureFlags(uint64_t FlagData) { DXContainerYAML::RootSignatureDesc::RootSignatureDesc( const dxbc::RootSignatureDesc &Data) : Version(Data.Version) { -#define ROOT_ELEMENT_FLAG(Num, Val) \ +#define ROOT_ELEMENT_FLAG(Num, Val, Str) \ Val = (Data.Flags & (uint32_t)dxbc::RootElementFlag::Val) > 0; #include "llvm/BinaryFormat/DXContainerConstants.def" } uint32_t DXContainerYAML::RootSignatureDesc::getEncodedFlags() { uint64_t Flag = 0; -#define ROOT_ELEMENT_FLAG(Num, Val) \ +#define ROOT_ELEMENT_FLAG(Num, Val, Str) \
[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definitions (#123279) (PR #123439)
llvmbot wrote: @llvm/pr-subscribers-clang-format Author: None (llvmbot) Changes Backport a7bca1861bfcd1490319115c1027166e27f4ae27 Requested by: @owenca --- Full diff: https://github.com/llvm/llvm-project/pull/123439.diff 3 Files Affected: - (modified) clang/lib/Format/UnwrappedLineParser.cpp (+3-3) - (modified) clang/unittests/Format/FormatTest.cpp (-11) - (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+16-3) ``diff diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index e3fb976ee1cc44..eb520870bb7708 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -505,14 +505,14 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { auto *NextTok = Tokens->getNextNonComment(); if (!Line->InMacroBody && !Style.isTableGen()) { - // Skip PPDirective lines and comments. + // Skip PPDirective lines (except macro definitions) and comments. while (NextTok->is(tok::hash)) { NextTok = Tokens->getNextToken(); -if (NextTok->is(tok::pp_not_keyword)) +if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define)) break; do { NextTok = Tokens->getNextToken(); -} while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof)); +} while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); while (NextTok->is(tok::comment)) NextTok = Tokens->getNextToken(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 7b2947acea4a2e..e3daac5a23359f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5725,23 +5725,12 @@ TEST_F(FormatTest, HashInMacroDefinition) { verifyFormat("#define A void # ## #", getLLVMStyleWithColumns(22)); -#if 0 - // FIXME: The correct format is: verifyFormat("{\n" " {\n" "#define GEN_ID(_x) char *_x{#_x}\n" "GEN_ID(one);\n" " }\n" "}"); -#endif - verifyFormat("{\n" - " {\n" - "#define GEN_ID(_x) \\\n" - " char *_x { #_x }\n" - "GEN_ID(one);\n" - " }\n" - "}", - getGoogleStyle()); } TEST_F(FormatTest, RespectWhitespaceInMacroDefinitions) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 7d4ff3dfa32490..8f7f86d401edf0 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3214,14 +3214,27 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[0], BK_Block); EXPECT_TOKEN(Tokens[1], tok::l_brace, TT_BlockLBrace); EXPECT_BRACE_KIND(Tokens[1], BK_Block); -#if 0 - // FIXME: EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit); EXPECT_BRACE_KIND(Tokens[14], BK_BracedInit); -#endif EXPECT_BRACE_KIND(Tokens[20], BK_Block); EXPECT_BRACE_KIND(Tokens[21], BK_Block); + Tokens = annotate("{\n" +"#define FOO \\\n" +" { \\\n" +"case bar: { \\\n" +" break; \\\n" +"} \\\n" +" }\n" +"}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[4], BK_Block); + EXPECT_TOKEN(Tokens[7], tok::colon, TT_CaseLabelColon); + EXPECT_BRACE_KIND(Tokens[8], BK_Block); + EXPECT_BRACE_KIND(Tokens[11], BK_Block); + EXPECT_BRACE_KIND(Tokens[12], BK_Block); + Tokens = annotate("a = class extends goog.a {};", getGoogleStyle(FormatStyle::LK_JavaScript)); ASSERT_EQ(Tokens.size(), 11u) << Tokens; `` https://github.com/llvm/llvm-project/pull/123439 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definitions (#123279) (PR #123439)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/123439 Backport a7bca1861bfcd1490319115c1027166e27f4ae27 Requested by: @owenca >From b81158025235d50f1eb227ded9f5638f039c59e2 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 17 Jan 2025 19:26:00 -0800 Subject: [PATCH] [clang-format] Correctly annotate braces in macro definitions (#123279) Fixes #123179. (cherry picked from commit a7bca1861bfcd1490319115c1027166e27f4ae27) --- clang/lib/Format/UnwrappedLineParser.cpp | 6 +++--- clang/unittests/Format/FormatTest.cpp | 11 --- clang/unittests/Format/TokenAnnotatorTest.cpp | 19 --- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index e3fb976ee1cc44..eb520870bb7708 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -505,14 +505,14 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { auto *NextTok = Tokens->getNextNonComment(); if (!Line->InMacroBody && !Style.isTableGen()) { - // Skip PPDirective lines and comments. + // Skip PPDirective lines (except macro definitions) and comments. while (NextTok->is(tok::hash)) { NextTok = Tokens->getNextToken(); -if (NextTok->is(tok::pp_not_keyword)) +if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define)) break; do { NextTok = Tokens->getNextToken(); -} while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof)); +} while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); while (NextTok->is(tok::comment)) NextTok = Tokens->getNextToken(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 7b2947acea4a2e..e3daac5a23359f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5725,23 +5725,12 @@ TEST_F(FormatTest, HashInMacroDefinition) { verifyFormat("#define A void # ## #", getLLVMStyleWithColumns(22)); -#if 0 - // FIXME: The correct format is: verifyFormat("{\n" " {\n" "#define GEN_ID(_x) char *_x{#_x}\n" "GEN_ID(one);\n" " }\n" "}"); -#endif - verifyFormat("{\n" - " {\n" - "#define GEN_ID(_x) \\\n" - " char *_x { #_x }\n" - "GEN_ID(one);\n" - " }\n" - "}", - getGoogleStyle()); } TEST_F(FormatTest, RespectWhitespaceInMacroDefinitions) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 7d4ff3dfa32490..8f7f86d401edf0 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3214,14 +3214,27 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[0], BK_Block); EXPECT_TOKEN(Tokens[1], tok::l_brace, TT_BlockLBrace); EXPECT_BRACE_KIND(Tokens[1], BK_Block); -#if 0 - // FIXME: EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit); EXPECT_BRACE_KIND(Tokens[14], BK_BracedInit); -#endif EXPECT_BRACE_KIND(Tokens[20], BK_Block); EXPECT_BRACE_KIND(Tokens[21], BK_Block); + Tokens = annotate("{\n" +"#define FOO \\\n" +" { \\\n" +"case bar: { \\\n" +" break; \\\n" +"} \\\n" +" }\n" +"}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[4], BK_Block); + EXPECT_TOKEN(Tokens[7], tok::colon, TT_CaseLabelColon); + EXPECT_BRACE_KIND(Tokens[8], BK_Block); + EXPECT_BRACE_KIND(Tokens[11], BK_Block); + EXPECT_BRACE_KIND(Tokens[12], BK_Block); + Tokens = annotate("a = class extends goog.a {};", getGoogleStyle(FormatStyle::LK_JavaScript)); ASSERT_EQ(Tokens.size(), 11u) << Tokens; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definitions (#123279) (PR #123439)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/123439 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definitions (#123279) (PR #123439)
llvmbot wrote: @mydeveloperday What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/123439 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/121782 >From a3037ab5557dcc4a4deb5bb40f801ca9770e3854 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Jan 2025 16:44:08 +0100 Subject: [PATCH 1/7] Add FLANG_RT_ENABLE_STATIC and FLANG_RT_ENABLE_SHARED --- flang-rt/CMakeLists.txt | 30 ++ flang-rt/cmake/modules/AddFlangRT.cmake | 291 -- .../cmake/modules/AddFlangRTOffload.cmake | 8 +- flang-rt/cmake/modules/GetToolchainDirs.cmake | 254 +++ flang-rt/lib/flang_rt/CMakeLists.txt | 20 +- flang-rt/test/CMakeLists.txt | 2 +- flang-rt/test/lit.cfg.py | 2 +- 7 files changed, 366 insertions(+), 241 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 7b3d22e454a108..7effa6012a078f 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -113,6 +113,15 @@ cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) # Determine subdirectories for build output and install destinations. +# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good +#destination because it is not a ld.so default search path. +#The machine where the executable is eventually executed may not be the +#machine where the Flang compiler and its resource dir is installed, so +#setting RPath by the driver is not an solution. It should belong into +#/usr/lib//libflang_rt.so, like e.g. libgcc_s.so. +#But the linker as invoked by the Flang driver also requires +#libflang_rt.so to be found when linking and the resource lib dir is +#the only reliable location. get_toolchain_library_subdir(toolchain_lib_subdir) extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") @@ -130,6 +139,27 @@ cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) option(FLANG_RT_INCLUDE_TESTS "Generate build targets for the flang-rt unit and regression-tests." "${LLVM_INCLUDE_TESTS}") +option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON) +if (WIN32) + # Windows DLL currently not implemented. + set(FLANG_RT_ENABLE_SHARED OFF) +else () + # TODO: Enable by default to increase test coverage, and which version of the + # library should be the user's choice anyway. + # Currently, the Flang driver adds `-L"libdir" -lflang_rt` as linker + # argument, which leaves the choice which library to use to the linker. + # Since most linkers prefer the shared library, this would constitute a + # breaking change unless the driver is changed. + option(FLANG_RT_ENABLE_SHARED "Build Flang-RT as a shared library." OFF) +endif () +if (NOT FLANG_RT_ENABLE_STATIC AND NOT FLANG_RT_ENABLE_SHARED) + message(FATAL_ERROR " + Must build at least one type of library + (FLANG_RT_ENABLE_STATIC=ON, FLANG_RT_ENABLE_SHARED=ON, or both) +") +endif () + + set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile Flang-RT with GPU support (CUDA or OpenMP)") set_property(CACHE FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT PROPERTY STRINGS "" diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index 1f8b5111433825..5f493a80c35f20 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -16,7 +16,8 @@ # STATIC # Build a static (.a/.lib) library # OBJECT -# Create only object files without static/dynamic library +# Always create an object library. +# Without SHARED/STATIC, build only the object library. # INSTALL_WITH_TOOLCHAIN # Install library into Clang's resource directory so it can be found by the # Flang driver during compilation, including tests @@ -44,17 +45,73 @@ function (add_flangrt_library name) ") endif () - # Forward libtype to add_library - set(extra_args "") - if (ARG_SHARED) -list(APPEND extra_args SHARED) + # Internal names of libraries. If called with just single type option, use + # the default name for it. Name of targets must only depend on function + # arguments to be predictable for callers. + set(name_static "${name}.static") + set(name_shared "${name}.shared") + set(name_object "obj.${name}") + if (ARG_STATIC AND NOT ARG_SHARED) +set(name_static "${name}") + elseif (NOT ARG_STATIC AND ARG_SHARED) +set(name_shared "${name}") + elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND ARG_OBJECT) +set(name_object "${name}") + elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND NOT ARG_OBJECT) +# Only one of them will actually be built. +set(name_static "${name}") +set(name_shared "${name}") + endif () + + # Determine what to build. If not explicitly
[llvm-branch-commits] [flang] [Flang] Optionally do not compile the runtime in-tree (PR #122336)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122336 >From dd3ac2e6d8d8d57cd639c25bea3b8d5c99a2f81e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 9 Jan 2025 15:58:48 +0100 Subject: [PATCH 1/8] Introduce FLANG_INCLUDE_RUNTIME --- flang/CMakeLists.txt| 7 +++- flang/test/CMakeLists.txt | 6 +++- flang/test/Driver/ctofortran.f90| 1 + flang/test/Driver/exec.f90 | 1 + flang/test/Runtime/no-cpp-dep.c | 2 +- flang/test/lit.cfg.py | 5 ++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/f18/CMakeLists.txt | 4 +-- flang/unittests/CMakeLists.txt | 6 ++-- flang/unittests/Evaluate/CMakeLists.txt | 46 ++--- 10 files changed, 50 insertions(+), 29 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 68947eaa9c9bd7..69e963a43d0b97 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -233,6 +233,9 @@ else() include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) endif() +option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" ON) +pythonize_bool(FLANG_INCLUDE_RUNTIME) + set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) @@ -473,7 +476,9 @@ if (FLANG_CUF_RUNTIME) find_package(CUDAToolkit REQUIRED) endif() -add_subdirectory(runtime) +if (FLANG_INCLUDE_RUNTIME) + add_subdirectory(runtime) +endif () if (LLVM_INCLUDE_EXAMPLES) add_subdirectory(examples) diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index cab214c2ef4c8c..e398e0786147aa 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -71,9 +71,13 @@ set(FLANG_TEST_DEPENDS llvm-objdump llvm-readobj split-file - FortranRuntime FortranDecimal ) + +if (FLANG_INCLUDE_RUNTIME) + list(APPEND FLANG_TEST_DEPENDS FortranRuntime) +endif () + if (LLVM_ENABLE_PLUGINS AND NOT WIN32) list(APPEND FLANG_TEST_DEPENDS Bye) endif() diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90 index 78eac32133b18e..10c7adaccc9588 100644 --- a/flang/test/Driver/ctofortran.f90 +++ b/flang/test/Driver/ctofortran.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! RUN: split-file %s %t ! RUN: chmod +x %t/runtest.sh ! RUN: %t/runtest.sh %t %t/ffile.f90 %t/cfile.c %flang | FileCheck %s diff --git a/flang/test/Driver/exec.f90 b/flang/test/Driver/exec.f90 index fd174005ddf62a..9ca91ee24011c9 100644 --- a/flang/test/Driver/exec.f90 +++ b/flang/test/Driver/exec.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! Verify that flang can correctly build executables. ! RUN: %flang %s -o %t diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c index b1a5fa004014cc..7303ce63fdec41 100644 --- a/flang/test/Runtime/no-cpp-dep.c +++ b/flang/test/Runtime/no-cpp-dep.c @@ -3,7 +3,7 @@ This test makes sure that flang's runtime does not depend on the C++ runtime library. It tries to link this simple file against libFortranRuntime.a with a C compiler. -REQUIRES: c-compiler +REQUIRES: c-compiler, flang-rt RUN: %if system-aix %{ export OBJECT_MODE=64 %} RUN: %cc -std=c99 %s -I%include %libruntime -lm \ diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index c452b6d231c89f..78378bf5f413e8 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -163,10 +163,13 @@ ToolSubst("%not_todo_abort_cmd", command=FindTool("not"), unresolved="fatal") ) +if config.flang_include_runtime: + config.available_features.add("flang-rt") + # Define some variables to help us test that the flang runtime doesn't depend on # the C++ runtime libraries. For this we need a C compiler. If for some reason # we don't have one, we can just disable the test. -if config.cc: +if config.flang_include_runtime and config.cc: libruntime = os.path.join(config.flang_lib_dir, "libFortranRuntime.a") include = os.path.join(config.flang_src_dir, "include") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index d1a0ac763cf8a0..19f9330f93ae14 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -32,6 +32,7 @@ else: config.openmp_module_dir = None config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@" config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@" +config.flang_include_runtime = @FLANG_INCLUDE_RUNTIME@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 4362fcf0537616..022c346aabdbde 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -72,7 +72,7 @@ if (NOT CMAKE_CROSSCOMPILING) set(depends ${FLANG_
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122341 >From c77098f90a5c20bdbce078a0ee3aec1fe53772e3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 8 Jan 2025 11:23:02 +0100 Subject: [PATCH 1/4] clang-format to sort headers --- flang/tools/f18-parse-demo/f18-parse-demo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp index 90bbce246e3f16..a50c88dc840643 100644 --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -21,7 +21,6 @@ // scaffolding compiler driver that can test some semantic passes of the // F18 compiler under development. -#include "flang/Support/Fortran-features.h" #include "flang/Parser/characters.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/message.h" @@ -30,6 +29,7 @@ #include "flang/Parser/parsing.h" #include "flang/Parser/provenance.h" #include "flang/Parser/unparse.h" +#include "flang/Support/Fortran-features.h" #include "flang/Support/default-kinds.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" >From 22ed7ebde19d4003fa3036039f75977b1e6b9f60 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 8 Jan 2025 14:15:45 +0100 Subject: [PATCH 2/4] FortranRuntime -> flang_rt --- clang/lib/Driver/ToolChains/CommonArgs.cpp| 4 +- clang/lib/Driver/ToolChains/Flang.cpp | 8 ++-- flang-rt/unittests/Evaluate/CMakeLists.txt| 21 ++ flang/CMakeLists.txt | 2 +- flang/docs/FlangDriver.md | 8 ++-- flang/docs/GettingStarted.md | 6 +-- flang/docs/OpenACC-descriptor-management.md | 2 +- flang/docs/ReleaseNotes.md| 2 + .../ExternalHelloWorld/CMakeLists.txt | 2 +- flang/runtime/CMakeLists.txt | 40 +++ flang/runtime/CUDA/CMakeLists.txt | 2 +- flang/runtime/Float128Math/CMakeLists.txt | 2 +- flang/runtime/time-intrinsic.cpp | 2 +- flang/test/CMakeLists.txt | 6 ++- .../test/Driver/gcc-toolchain-install-dir.f90 | 2 +- flang/test/Driver/linker-flags.f90| 8 ++-- .../test/Driver/msvc-dependent-lib-flags.f90 | 8 ++-- flang/test/Driver/nostdlib.f90| 2 +- flang/test/Runtime/no-cpp-dep.c | 2 +- flang/test/lit.cfg.py | 2 +- flang/tools/f18/CMakeLists.txt| 8 ++-- flang/unittests/CMakeLists.txt| 2 +- flang/unittests/Evaluate/CMakeLists.txt | 9 +++-- flang/unittests/Frontend/CMakeLists.txt | 1 + flang/unittests/Runtime/CMakeLists.txt| 2 +- flang/unittests/Runtime/CUDA/CMakeLists.txt | 2 +- lld/COFF/MinGW.cpp| 2 +- 27 files changed, 97 insertions(+), 60 deletions(-) create mode 100644 flang-rt/unittests/Evaluate/CMakeLists.txt diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 9e9872975de9c2..4c6b9f29f362ca 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1317,7 +1317,7 @@ void tools::addOpenMPHostOffloadingArgs(const Compilation &C, /// Add Fortran runtime libs void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) { - // Link FortranRuntime + // Link flang_rt // These are handled earlier on Windows by telling the frontend driver to // add the correct libraries to link against as dependents in the object // file. @@ -1333,7 +1333,7 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args, if (AsNeeded) addAsNeededOption(TC, Args, CmdArgs, /*as_needed=*/false); } -CmdArgs.push_back("-lFortranRuntime"); +CmdArgs.push_back("-lflang_rt"); addArchSpecificRPath(TC, Args, CmdArgs); } diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 749af4ada9a696..2cf1108b28dab3 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -356,26 +356,26 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args, case options::OPT__SLASH_MT: CmdArgs.push_back("-D_MT"); CmdArgs.push_back("--dependent-lib=libcmt"); -CmdArgs.push_back("--dependent-lib=FortranRuntime.static.lib"); +CmdArgs.push_back("--dependent-lib=flang_rt.static.lib"); break; case options::OPT__SLASH_MTd: CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("--dependent-lib=libcmtd"); -CmdArgs.push_back("--dependent-lib=FortranRuntime.static_dbg.lib"); +CmdArgs.push_back("--dependent-lib=flang_rt.static_dbg.lib"); break; case options::OPT__SLASH_M
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); + +emitJumpTables( +ArrayRef(JumpTableIndices) +.take_back(JT.size() - NextHotJumpTableIndex), ellishg wrote: I think these are identical, but I guess it's up for debate which one is more clear. ```suggestion ArrayRef(JumpTableIndices) .drop_front(NextHotJumpTableIndex + 1), ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [YAML] Don't validate `Fill::Size` after error (PR #123280)
@@ -1750,7 +1750,9 @@ void MappingTraits>::mapping( std::string MappingTraits>::validate( IO &io, std::unique_ptr &C) { if (const auto *F = dyn_cast(C.get())) { -if (F->Pattern && F->Pattern->binary_size() != 0 && !F->Size) +// Can't check the `Size`, as it's required and may be left uninitialized by vitalybuka wrote: Actually I don't see a problem to just require: If you do custom validate, be aware that required fields can be garbage. On a quick look, only this and `MappingTraits::validate(IO &IO,` are affected https://github.com/llvm/llvm-project/pull/123280 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] ce83c94 - Revert "[clang][DebugInfo] Emit DW_AT_object_pointer on function declarations…"
Author: Michał Górny Date: 2025-01-18T07:59:09Z New Revision: ce83c9470ba1a9860c107d39e60d9148ac009e91 URL: https://github.com/llvm/llvm-project/commit/ce83c9470ba1a9860c107d39e60d9148ac009e91 DIFF: https://github.com/llvm/llvm-project/commit/ce83c9470ba1a9860c107d39e60d9148ac009e91.diff LOG: Revert "[clang][DebugInfo] Emit DW_AT_object_pointer on function declarations…" This reverts commit 10fdd09c3bda8bfc532cecf4f11babaf356554f3. Added: Modified: clang/lib/CodeGen/CGDebugInfo.cpp clang/test/CodeGenCXX/debug-info-object-pointer.cpp llvm/include/llvm-c/DebugInfo.h llvm/include/llvm/IR/DIBuilder.h llvm/lib/IR/DIBuilder.cpp llvm/lib/IR/DebugInfo.cpp Removed: diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6cbcaf03844102..f88f56c98186da 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2016,15 +2016,13 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( // First element is always return type. For 'void' functions it is NULL. Elts.push_back(Args[0]); - const bool HasExplicitObjectParameter = ThisPtr.isNull(); - - // "this" pointer is always first argument. For explicit "this" - // parameters, it will already be in Args[1]. - if (!HasExplicitObjectParameter) { + // "this" pointer is always first argument. + // ThisPtr may be null if the member function has an explicit 'this' + // parameter. + if (!ThisPtr.isNull()) { llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit); TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); -ThisPtrType = -DBuilder.createObjectPointerType(ThisPtrType, /*Implicit=*/true); +ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); Elts.push_back(ThisPtrType); } @@ -2032,13 +2030,6 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( for (unsigned i = 1, e = Args.size(); i != e; ++i) Elts.push_back(Args[i]); - // Attach FlagObjectPointer to the explicit "this" parameter. - if (HasExplicitObjectParameter) { -assert(Elts.size() >= 2 && Args.size() >= 2 && - "Expected at least return type and object parameter."); -Elts[1] = DBuilder.createObjectPointerType(Args[1], /*Implicit=*/false); - } - llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); return DBuilder.createSubroutineType(EltTypeArray, OriginalFunc->getFlags(), @@ -5127,7 +5118,7 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, llvm::DIType *CachedTy = getTypeOrNull(QualTy); if (CachedTy) Ty = CachedTy; - return DBuilder.createObjectPointerType(Ty, /*Implicit=*/true); + return DBuilder.createObjectPointerType(Ty); } void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( diff --git a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp index 49079f59909968..594d4da791ee84 100644 --- a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp +++ b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp @@ -5,11 +5,12 @@ // CHECK: !DIDerivedType(tag: DW_TAG_pointer_type // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer // +// // FIXME: DIFlagObjectPointer not attached to the explicit object +// // argument in the subprogram declaration. // CHECK: !DISubprogram(name: "explicit_this", // flags: DIFlagPrototyped -// -// CHECK: !DIDerivedType(tag: DW_TAG_rvalue_reference_type -// CHECK-SAME: flags: DIFlagObjectPointer) +// CHECK-NOT: DIFlagObjectPointer +// CHECK-NOT: DIFlagArtificial // // CHECK: !DILocalVariable(name: "this", arg: 1 // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index ac7ee5a7cc9a19..07f87d44088e7e 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -870,16 +870,13 @@ LLVMDIBuilderCreateObjCProperty(LLVMDIBuilderRef Builder, LLVMMetadataRef Ty); /** - * Create a uniqued DIType* clone with FlagObjectPointer. If \c Implicit - * is true, then also set FlagArtificial. + * Create a uniqued DIType* clone with FlagObjectPointer and FlagArtificial set. * \param Builder The DIBuilder. * \param Type The underlying type to which this pointer points. - * \param Implicit Indicates whether this pointer was implicitly generated - * (i.e., not spelled out in source). */ -LLVMMetadataRef LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, - LLVMMetadataRef Type, - LLVMBool Implicit); +LLVMMetadataRef +LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, + LLVMM
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); + +emitJumpTables( +ArrayRef(JumpTableIndices) +.take_back(JT.size() - NextHotJumpTableIndex), +TLOF.getSectionForJumpTable( + F, TM, &JT[JumpTableIndices[NextHotJumpTableIndex]]), +JTInDiffSection, *MJTI); + } + + return; +} + +void AsmPrinter::emitJumpTables(ArrayRef JumpTableIndices, +MCSection *JumpTableSection, +bool JTInDiffSection, +const MachineJumpTableInfo &MJTI) { + if (JumpTableIndices.empty()) +return; + + const DataLayout &DL = MF->getDataLayout(); if (JTInDiffSection) { -// Drop it in the readonly section. -MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); -OutStreamer->switchSection(ReadOnlySection); +OutStreamer->switchSection(JumpTableSection); } - emitAlignment(Align(MJTI->getEntryAlignment(DL))); + emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout(; // Jump tables in code sections are marked with a data_region directive // where that's supported. if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionJT32); - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { -const std::vector &JTBBs = JT[JTI].MBBs; + const auto &JT = MJTI.getJumpTables(); + for (unsigned Index = 0, e = JumpTableIndices.size(); Index != e; ++Index) { +const std::vector &JTBBs = ellishg wrote: ```suggestion ArrayRef JTBBs = ``` https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/122215 >From dd748277dff2b30ed02bfa466eeca7102aa93eb4 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 10 Jan 2025 13:53:08 -0800 Subject: [PATCH 1/9] rely to upstream --- llvm/include/llvm/CodeGen/MachineFunction.h | 2 +- .../llvm/CodeGen/MachineJumpTableInfo.h | 9 +- llvm/include/llvm/CodeGen/Passes.h| 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 12 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 87 +++--- llvm/test/CodeGen/X86/jump-table-partition.ll | 251 +++--- 6 files changed, 223 insertions(+), 140 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index c0f983d1c6787b..dcdbcaec168d22 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -91,7 +91,7 @@ template <> struct ilist_callback_traits { // The hotness of static data tracked by a MachineFunction and not represented // as a global object in the module IR / MIR. Typical examples are // MachineJumpTableInfo and MachineConstantPool. -enum class DataHotness { +enum class MachineFunctionDataHotness { Unknown, Cold, Hot, diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index cc1f54a81b9bb4..e3675d6489b350 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -28,7 +28,7 @@ namespace llvm { class MachineBasicBlock; class DataLayout; class raw_ostream; -enum class DataHotness; +enum class MachineFunctionDataHotness; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -36,7 +36,7 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; - DataHotness Hotness; + MachineFunctionDataHotness Hotness; explicit MachineJumpTableEntry(const std::vector &M); }; @@ -109,7 +109,10 @@ class MachineJumpTableInfo { return JumpTables; } - void updateJumpTableHotness(size_t JTI, DataHotness Hotness); + // Update machine jump table entry's hotness. Return true if the hotness is + // updated. + bool updateJumpTableEntryHotness(size_t JTI, + MachineFunctionDataHotness Hotness); /// RemoveJumpTable - Mark the specific index as being dead. This will /// prevent it from being emitted. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 16423d03ff7018..b5d2a7e6bf035b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,7 +71,7 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); - /// createStaticDataSplitterPass - This pass partions static data sections + /// createStaticDataSplitterPass - This pass partitions a static data section /// into a hot and cold section using profile information. MachineFunctionPass *createStaticDataSplitterPass(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index b5a89f3bcf42f1..d09e93d79aae6c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1293,7 +1293,7 @@ const unsigned MachineFunction::DebugOperandMemNumber = 100; MachineJumpTableEntry::MachineJumpTableEntry( const std::vector &MBBs) -: MBBs(MBBs), Hotness(DataHotness::Unknown) {} +: MBBs(MBBs), Hotness(MachineFunctionDataHotness::Unknown) {} /// Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { @@ -1344,13 +1344,17 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( return JumpTables.size()-1; } -void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI, - DataHotness Hotness) { +bool MachineJumpTableInfo::updateJumpTableEntryHotness( +size_t JTI, MachineFunctionDataHotness Hotness) { assert(JTI < JumpTables.size() && "Invalid JTI!"); // Note record the largest hotness is important for mergable data (constant // pools). Even if jump table instances are not merged, record the largest // value seen fwiw. - JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness); + if (Hotness <= JumpTables[JTI].Hotness) +return false; + + JumpTables[JTI].Hotness = Hotness; + return true; } /// If Old is the target of any jump tables, update the jump tables to branch diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 482a61027cf985..9e2cfe18256e35 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -6,13 +6,16 @@ // //===-
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
https://github.com/mingmingl-llvm edited https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); + +emitJumpTables( +ArrayRef(JumpTableIndices) +.take_back(JT.size() - NextHotJumpTableIndex), +TLOF.getSectionForJumpTable( + F, TM, &JT[JumpTableIndices[NextHotJumpTableIndex]]), +JTInDiffSection, *MJTI); + } + + return; +} + +void AsmPrinter::emitJumpTables(ArrayRef JumpTableIndices, +MCSection *JumpTableSection, +bool JTInDiffSection, +const MachineJumpTableInfo &MJTI) { + if (JumpTableIndices.empty()) +return; + + const DataLayout &DL = MF->getDataLayout(); if (JTInDiffSection) { -// Drop it in the readonly section. -MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); -OutStreamer->switchSection(ReadOnlySection); +OutStreamer->switchSection(JumpTableSection); } - emitAlignment(Align(MJTI->getEntryAlignment(DL))); + emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout(; // Jump tables in code sections are marked with a data_region directive // where that's supported. if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionJT32); - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { -const std::vector &JTBBs = JT[JTI].MBBs; + const auto &JT = MJTI.getJumpTables(); + for (unsigned Index = 0, e = JumpTableIndices.size(); Index != e; ++Index) { mingmingl-llvm wrote: done. https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { mingmingl-llvm wrote: removed the if check and updated cold JT function call in a similar way as https://github.com/llvm/llvm-project/pull/122215/files#r1920695368 does. https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); + +emitJumpTables( +ArrayRef(JumpTableIndices) +.take_back(JT.size() - NextHotJumpTableIndex), mingmingl-llvm wrote: `drop_front` is clearer to me, despite `make_range` supersedes the ArrayRef usage in the updated patch. I became interested in why both `drop` and `take` exists, and it turns out (amusingly) that `take_{front,back}` [handles](https://github.com/llvm/llvm-project/blob/22d4ff155aadf0f098dd5dc48d9038da15108937/llvm/include/llvm/ADT/ArrayRef.h#L414-L426) `N>=size()` by returning the full view while `drop_{front,back}` [asserts](https://github.com/llvm/llvm-project/blob/22d4ff155aadf0f098dd5dc48d9038da15108937/llvm/include/llvm/ADT/ArrayRef.h#L389-L398) `N <= size()` (and slices a bogus length otherwise). https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } + + if (NextHotJumpTableIndex < (int)JT.size()) { +// Reverse the order of cold jump tables indices. +for (int L = NextHotJumpTableIndex, R = JT.size() - 1; L < R; ++L, --R) + std::swap(JumpTableIndices[L], JumpTableIndices[R]); mingmingl-llvm wrote: This makes sense to not update memory and I like the idea of reverse iterating. I ended up using `llvm::make_range` to specify the iterated range and order, and templated the original `emitJumpTables` method. I also renamed it to `emitJumpTableImpl` and removed `virtual` keyword from it. https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
@@ -2876,42 +2875,101 @@ void AsmPrinter::emitJumpTableInfo() { MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, F); + + std::vector JumpTableIndices; + if (!TM.Options.EnableStaticDataPartitioning) { +for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) + JumpTableIndices.push_back(JTI); +emitJumpTables(JumpTableIndices, TLOF.getSectionForJumpTable(F, TM), + JTInDiffSection, *MJTI); +return; + } + + // When static data partitioning is enabled, collect jump table entries that + // go into the same section together to reduce the amount of section switch + // statements. + // + // Iterate all jump tables, put hot jump table indices towards the beginning + // of the vector, and cold jump table indices towards the end. Meanwhile + // retain the relative orders of original jump tables within a hot or unlikely + // section by reversing the cold jump table indices. + int NextHotJumpTableIndex = 0, NextColdJumpTableIndex = JT.size() - 1; + JumpTableIndices.resize(JT.size()); + for (unsigned JTI = 0, JTSize = JT.size(); JTI < JTSize; ++JTI) { +if (JT[JTI].Hotness == MachineFunctionDataHotness::Cold) + JumpTableIndices[NextColdJumpTableIndex--] = JTI; +else + JumpTableIndices[NextHotJumpTableIndex++] = JTI; + } + + if (NextHotJumpTableIndex != 0) { +emitJumpTables( +ArrayRef(JumpTableIndices).take_front(NextHotJumpTableIndex), +TLOF.getSectionForJumpTable(F, TM, &JT[0]), JTInDiffSection, *MJTI); + } mingmingl-llvm wrote: > ArrayRef::take_front(0) will return an empty array, and emitJumpTables() > handles this case. The updated change removed `if (NumHotJT>0)` and `if(NumColdJT)>0)` as suggested. In the original patch, they also gated the call to `TLOF.getSectionForJumpTable(F, TM, &MJTE)`, so that a `MCSection` won't be created if the number of JTs is zero. In the updated patch, `emitJumpTableImpl` reads `TM.Options.EnableStaticDataPartitioning` directly to call one of two `getSectionForJumpTable` functions. https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
https://github.com/mingmingl-llvm commented: thanks for the feedback! PTAL. https://github.com/llvm/llvm-project/pull/122215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AsmPrinter][ELF] Support profile-guided section prefix for jump tables' (read-only) data sections (PR #122215)
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/122215 >From dd748277dff2b30ed02bfa466eeca7102aa93eb4 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 10 Jan 2025 13:53:08 -0800 Subject: [PATCH 1/7] rely to upstream --- llvm/include/llvm/CodeGen/MachineFunction.h | 2 +- .../llvm/CodeGen/MachineJumpTableInfo.h | 9 +- llvm/include/llvm/CodeGen/Passes.h| 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 12 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 87 +++--- llvm/test/CodeGen/X86/jump-table-partition.ll | 251 +++--- 6 files changed, 223 insertions(+), 140 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index c0f983d1c6787b..dcdbcaec168d22 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -91,7 +91,7 @@ template <> struct ilist_callback_traits { // The hotness of static data tracked by a MachineFunction and not represented // as a global object in the module IR / MIR. Typical examples are // MachineJumpTableInfo and MachineConstantPool. -enum class DataHotness { +enum class MachineFunctionDataHotness { Unknown, Cold, Hot, diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index cc1f54a81b9bb4..e3675d6489b350 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -28,7 +28,7 @@ namespace llvm { class MachineBasicBlock; class DataLayout; class raw_ostream; -enum class DataHotness; +enum class MachineFunctionDataHotness; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -36,7 +36,7 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; - DataHotness Hotness; + MachineFunctionDataHotness Hotness; explicit MachineJumpTableEntry(const std::vector &M); }; @@ -109,7 +109,10 @@ class MachineJumpTableInfo { return JumpTables; } - void updateJumpTableHotness(size_t JTI, DataHotness Hotness); + // Update machine jump table entry's hotness. Return true if the hotness is + // updated. + bool updateJumpTableEntryHotness(size_t JTI, + MachineFunctionDataHotness Hotness); /// RemoveJumpTable - Mark the specific index as being dead. This will /// prevent it from being emitted. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 16423d03ff7018..b5d2a7e6bf035b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,7 +71,7 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); - /// createStaticDataSplitterPass - This pass partions static data sections + /// createStaticDataSplitterPass - This pass partitions a static data section /// into a hot and cold section using profile information. MachineFunctionPass *createStaticDataSplitterPass(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index b5a89f3bcf42f1..d09e93d79aae6c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1293,7 +1293,7 @@ const unsigned MachineFunction::DebugOperandMemNumber = 100; MachineJumpTableEntry::MachineJumpTableEntry( const std::vector &MBBs) -: MBBs(MBBs), Hotness(DataHotness::Unknown) {} +: MBBs(MBBs), Hotness(MachineFunctionDataHotness::Unknown) {} /// Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { @@ -1344,13 +1344,17 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( return JumpTables.size()-1; } -void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI, - DataHotness Hotness) { +bool MachineJumpTableInfo::updateJumpTableEntryHotness( +size_t JTI, MachineFunctionDataHotness Hotness) { assert(JTI < JumpTables.size() && "Invalid JTI!"); // Note record the largest hotness is important for mergable data (constant // pools). Even if jump table instances are not merged, record the largest // value seen fwiw. - JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness); + if (Hotness <= JumpTables[JTI].Hotness) +return false; + + JumpTables[JTI].Hotness = Hotness; + return true; } /// If Old is the target of any jump tables, update the jump tables to branch diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 482a61027cf985..9e2cfe18256e35 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -6,13 +6,16 @@ // //===-
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
llvmbot wrote: @llvm/pr-subscribers-backend-webassembly @llvm/pr-subscribers-clang-codegen Author: Helena Kotas (hekota) Changes Introduces a new address space `hlsl_constant(2)` for constant buffer declarations. This address space is applied to declarations inside `cbuffer` block. Later on, it will also be applied to `ConstantBuffer` syntax and the default `$Globals` constant buffer. Clang codegen translates constant buffer declarations to global variables and loads from `hlsl_constant(2)` address space. More work coming soon will include addition of metadata that will map these globals to individual constant buffers and enable their transformation to appropriate constant buffer load intrinsics later on in an LLVM pass. Fixes #123406 --- Patch is 25.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123411.diff 21 Files Affected: - (modified) clang/include/clang/Basic/AddressSpaces.h (+1) - (modified) clang/lib/AST/TypePrinter.cpp (+2) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+1) - (modified) clang/lib/Basic/Targets/DirectX.h (+1) - (modified) clang/lib/Basic/Targets/NVPTX.h (+1) - (modified) clang/lib/Basic/Targets/SPIR.h (+2) - (modified) clang/lib/Basic/Targets/SystemZ.h (+1) - (modified) clang/lib/Basic/Targets/TCE.h (+1) - (modified) clang/lib/Basic/Targets/WebAssembly.h (+1) - (modified) clang/lib/Basic/Targets/X86.h (+1) - (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-16) - (modified) clang/lib/Sema/SemaHLSL.cpp (+12-3) - (modified) clang/test/AST/HLSL/ast-dump-comment-cbuffer-tbuffer.hlsl (+8-8) - (modified) clang/test/AST/HLSL/cbuffer.hlsl (+12-12) - (modified) clang/test/AST/HLSL/packoffset.hlsl (+19-19) - (modified) clang/test/AST/HLSL/pch_hlsl_buffer.hlsl (+6-6) - (modified) clang/test/AST/HLSL/resource_binding_attr.hlsl (+4-4) - (modified) clang/test/CodeGenHLSL/cbuf.hlsl (+9-4) - (modified) clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl (+6-2) - (modified) clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl (+8-6) ``diff diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h index 7b723d508fff17..d18bfe54931f93 100644 --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -58,6 +58,7 @@ enum class LangAS : unsigned { // HLSL specific address spaces. hlsl_groupshared, + hlsl_constant, // Wasm specific address spaces. wasm_funcref, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index a850410ffc8468..6cad74fef3fe33 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2556,6 +2556,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { return "__funcref"; case LangAS::hlsl_groupshared: return "groupshared"; + case LangAS::hlsl_constant: +return "hlsl_constant"; default: return std::to_string(toTargetAddressSpace(AS)); } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ecf80b23a508c9..600940f5e4e23c 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -44,6 +44,7 @@ static const unsigned ARM64AddrSpaceMap[] = { static_cast(AArch64AddrSpace::ptr32_uptr), static_cast(AArch64AddrSpace::ptr64), 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 99f8f2944e2796..824134d52ec139 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -83,6 +83,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared +llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_constant }; } // namespace targets diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h index ab22d1281a4df7..4e6bc0e040398b 100644 --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -42,6 +42,7 @@ static const unsigned DirectXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 3, // hlsl_groupshared +2, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index d81b89a7f24ac0..c6531148fe30ce 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -46,6 +46,7 @@ static const unsigned NVPTXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 0, // hlsl_groupshared +0, // hlsl_constant // Wa
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
llvmbot wrote: @llvm/pr-subscribers-backend-directx Author: Helena Kotas (hekota) Changes Introduces a new address space `hlsl_constant(2)` for constant buffer declarations. This address space is applied to declarations inside `cbuffer` block. Later on, it will also be applied to `ConstantBuffer` syntax and the default `$Globals` constant buffer. Clang codegen translates constant buffer declarations to global variables and loads from `hlsl_constant(2)` address space. More work coming soon will include addition of metadata that will map these globals to individual constant buffers and enable their transformation to appropriate constant buffer load intrinsics later on in an LLVM pass. Fixes #123406 --- Patch is 25.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123411.diff 21 Files Affected: - (modified) clang/include/clang/Basic/AddressSpaces.h (+1) - (modified) clang/lib/AST/TypePrinter.cpp (+2) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+1) - (modified) clang/lib/Basic/Targets/DirectX.h (+1) - (modified) clang/lib/Basic/Targets/NVPTX.h (+1) - (modified) clang/lib/Basic/Targets/SPIR.h (+2) - (modified) clang/lib/Basic/Targets/SystemZ.h (+1) - (modified) clang/lib/Basic/Targets/TCE.h (+1) - (modified) clang/lib/Basic/Targets/WebAssembly.h (+1) - (modified) clang/lib/Basic/Targets/X86.h (+1) - (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-16) - (modified) clang/lib/Sema/SemaHLSL.cpp (+12-3) - (modified) clang/test/AST/HLSL/ast-dump-comment-cbuffer-tbuffer.hlsl (+8-8) - (modified) clang/test/AST/HLSL/cbuffer.hlsl (+12-12) - (modified) clang/test/AST/HLSL/packoffset.hlsl (+19-19) - (modified) clang/test/AST/HLSL/pch_hlsl_buffer.hlsl (+6-6) - (modified) clang/test/AST/HLSL/resource_binding_attr.hlsl (+4-4) - (modified) clang/test/CodeGenHLSL/cbuf.hlsl (+9-4) - (modified) clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl (+6-2) - (modified) clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl (+8-6) ``diff diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h index 7b723d508fff17..d18bfe54931f93 100644 --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -58,6 +58,7 @@ enum class LangAS : unsigned { // HLSL specific address spaces. hlsl_groupshared, + hlsl_constant, // Wasm specific address spaces. wasm_funcref, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index a850410ffc8468..6cad74fef3fe33 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2556,6 +2556,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { return "__funcref"; case LangAS::hlsl_groupshared: return "groupshared"; + case LangAS::hlsl_constant: +return "hlsl_constant"; default: return std::to_string(toTargetAddressSpace(AS)); } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ecf80b23a508c9..600940f5e4e23c 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -44,6 +44,7 @@ static const unsigned ARM64AddrSpaceMap[] = { static_cast(AArch64AddrSpace::ptr32_uptr), static_cast(AArch64AddrSpace::ptr64), 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 99f8f2944e2796..824134d52ec139 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -83,6 +83,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared +llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_constant }; } // namespace targets diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h index ab22d1281a4df7..4e6bc0e040398b 100644 --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -42,6 +42,7 @@ static const unsigned DirectXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 3, // hlsl_groupshared +2, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index d81b89a7f24ac0..c6531148fe30ce 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -46,6 +46,7 @@ static const unsigned NVPTXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
llvmbot wrote: @llvm/pr-subscribers-backend-systemz Author: Helena Kotas (hekota) Changes Introduces a new address space `hlsl_constant(2)` for constant buffer declarations. This address space is applied to declarations inside `cbuffer` block. Later on, it will also be applied to `ConstantBuffer` syntax and the default `$Globals` constant buffer. Clang codegen translates constant buffer declarations to global variables and loads from `hlsl_constant(2)` address space. More work coming soon will include addition of metadata that will map these globals to individual constant buffers and enable their transformation to appropriate constant buffer load intrinsics later on in an LLVM pass. Fixes #123406 --- Patch is 25.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123411.diff 21 Files Affected: - (modified) clang/include/clang/Basic/AddressSpaces.h (+1) - (modified) clang/lib/AST/TypePrinter.cpp (+2) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+1) - (modified) clang/lib/Basic/Targets/DirectX.h (+1) - (modified) clang/lib/Basic/Targets/NVPTX.h (+1) - (modified) clang/lib/Basic/Targets/SPIR.h (+2) - (modified) clang/lib/Basic/Targets/SystemZ.h (+1) - (modified) clang/lib/Basic/Targets/TCE.h (+1) - (modified) clang/lib/Basic/Targets/WebAssembly.h (+1) - (modified) clang/lib/Basic/Targets/X86.h (+1) - (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-16) - (modified) clang/lib/Sema/SemaHLSL.cpp (+12-3) - (modified) clang/test/AST/HLSL/ast-dump-comment-cbuffer-tbuffer.hlsl (+8-8) - (modified) clang/test/AST/HLSL/cbuffer.hlsl (+12-12) - (modified) clang/test/AST/HLSL/packoffset.hlsl (+19-19) - (modified) clang/test/AST/HLSL/pch_hlsl_buffer.hlsl (+6-6) - (modified) clang/test/AST/HLSL/resource_binding_attr.hlsl (+4-4) - (modified) clang/test/CodeGenHLSL/cbuf.hlsl (+9-4) - (modified) clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl (+6-2) - (modified) clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl (+8-6) ``diff diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h index 7b723d508fff17..d18bfe54931f93 100644 --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -58,6 +58,7 @@ enum class LangAS : unsigned { // HLSL specific address spaces. hlsl_groupshared, + hlsl_constant, // Wasm specific address spaces. wasm_funcref, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index a850410ffc8468..6cad74fef3fe33 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2556,6 +2556,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { return "__funcref"; case LangAS::hlsl_groupshared: return "groupshared"; + case LangAS::hlsl_constant: +return "hlsl_constant"; default: return std::to_string(toTargetAddressSpace(AS)); } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ecf80b23a508c9..600940f5e4e23c 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -44,6 +44,7 @@ static const unsigned ARM64AddrSpaceMap[] = { static_cast(AArch64AddrSpace::ptr32_uptr), static_cast(AArch64AddrSpace::ptr64), 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 99f8f2944e2796..824134d52ec139 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -83,6 +83,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared +llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_constant }; } // namespace targets diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h index ab22d1281a4df7..4e6bc0e040398b 100644 --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -42,6 +42,7 @@ static const unsigned DirectXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 3, // hlsl_groupshared +2, // hlsl_constant // Wasm address space values for this target are dummy values, // as it is only enabled for Wasm targets. 20, // wasm_funcref diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index d81b89a7f24ac0..c6531148fe30ce 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -46,6 +46,7 @@ static const unsigned NVPTXAddrSpaceMap[] = { 0, // ptr32_uptr 0, // ptr64 0, // hlsl_groupshared +0, // hlsl_constant // Wasm address space values for this target
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -100,22 +100,6 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { llvm::formatv("{0}{1}", Buf.Name, Buf.IsCBuffer ? ".cb." : ".tb."), GlobalValue::NotThreadLocal); - IRBuilder<> B(CBGV->getContext()); - Value *ZeroIdx = B.getInt32(0); - // Replace Const use with CB use. - for (auto &[GV, Offset] : Buf.Constants) { -Value *GEP = -B.CreateGEP(Buf.LayoutStruct, CBGV, {ZeroIdx, B.getInt32(Offset)}); - -assert(Buf.LayoutStruct->getElementType(Offset) == GV->getValueType() && - "constant type mismatch"); - -// Replace. -GV->replaceAllUsesWith(GEP); -// Erase GV. -GV->removeDeadConstantUsers(); -GV->eraseFromParent(); - } hekota wrote: I am removing this part to make sure we preserve the loads in the new address space, but there is more reworking to be done in this area. https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
https://github.com/hekota ready_for_review https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [YAML] Don't validate `Fill::Size` after error (PR #123280)
@@ -1750,7 +1750,9 @@ void MappingTraits>::mapping( std::string MappingTraits>::validate( IO &io, std::unique_ptr &C) { if (const auto *F = dyn_cast(C.get())) { -if (F->Pattern && F->Pattern->binary_size() != 0 && !F->Size) +// Can't check the `Size`, as it's required and may be left uninitialized by vitalybuka wrote: However, I can't reproduce with MachOYAML::Section! Because it IO.mapRequired("Size" before optional, so if size is missing, Section.content will not be set, and nothing to match. so the following is a fix (bad) as well :) ``` static void fillMapping(IO &IO, ELFYAML::Fill &Fill) { + IO.mapRequired("Size", Fill.Size); IO.mapOptional("Name", Fill.Name, StringRef()); IO.mapOptional("Pattern", Fill.Pattern); IO.mapOptional("Offset", Fill.Offset); - IO.mapRequired("Size", Fill.Size); } ``` https://github.com/llvm/llvm-project/pull/123280 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)
@@ -8,6 +8,12 @@ macro(enable_cuda_compilation name files) if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") +if (NOT FLANG_RT_ENABLE_STATIC) + message(FATAL_ERROR +"FLANG_RT_ENABLE_STATIC is required for CUDA build of Flang-RT" +) +endif() + enable_language(CUDA) set_target_properties(${name} jeanPerier wrote: I think this need to change to `${name}.static`. I saw you changed the arg of how this function was called, and that makes sense to me because flang_rt.staticPTX was a bit weird, but some references of name need to be updated I think. I am getting a new cmake error without this with the latest update: ``` set_target_properties Can not find target to add properties to: flang_rt ``` Line 65 below, I am also getting an error: ``` Cannot specify include directories for target "flang_rt" which is not built ``` `{name}` in the foreach on line 64 needs to be `{name}.static` https://github.com/llvm/llvm-project/pull/121782 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add test for VALU hoisiting from WWM region. NFC. (PR #123234)
@@ -0,0 +1,43 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s + rampitec wrote: Done https://github.com/llvm/llvm-project/pull/123234 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add test for VALU hoisiting from WWM region. NFC. (PR #123234)
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/123234 >From 7501423b29230f37273094e1b15e8bca0fcc90bd Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 16 Jan 2025 10:49:05 -0800 Subject: [PATCH] [AMDGPU] Add test for VALU hoisiting from WWM region. NFC. The test demonstraits a suboptimal VALU hoisting from a WWM region. As a result we have 2 WWM regions instead of one. --- llvm/test/CodeGen/AMDGPU/licm-wwm.mir | 46 +++ 1 file changed, 46 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/licm-wwm.mir diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir new file mode 100644 index 00..fc20674971a716 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s + +# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass +# to create a second WWM region. This is an unwanted hoisting. + +--- +name: licm_move_wwm +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: licm_move_wwm + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x8000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]] + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x4000), %bb.2(0x4000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[ENTER_STRICT_WWM1:%[0-9]+]]:sreg_32 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec + ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM1]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[V_READFIRSTLANE_B32_]] + ; GCN-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: +S_BRANCH %bb.1 + + bb.1: +%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +%1:sreg_32 = V_READFIRSTLANE_B32 killed %0:vgpr_32, implicit $exec +early-clobber %2:sreg_32 = STRICT_WWM killed %1:sreg_32, implicit $exec +$exec_lo = S_OR_B32 $exec_lo, %2, implicit-def $scc +S_CBRANCH_EXECNZ %bb.1, implicit $exec +S_BRANCH %bb.2 + + bb.2: +S_ENDPGM 0 +... ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)
@@ -1,47 +1,34 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_library(FortranEvaluateTesting jeanPerier wrote: I am hitting the following error with my build: `g++: error: unrecognized command line option ‘--offload-arch=native’` at the build step that is linking. ``` [2336/2919] Linking CXX executable tools/flang/unittests/Evaluate/leading-zero-bit-count.test FAILED: tools/flang/unittests/Evaluate/leading-zero-bit-count.test : && g++ -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -fdiagnostics-color -ffunction-sections -fdata-sections -Wno-deprecated-copy -Wno-ctad-maybe-unsupported -fno-strict-aliasing -fno-semantic-interposition -O3 -DNDEBUG -static-libgcc -static-libstdc++ -static-libstdc++ -fuse-ld=lld -Wl,--color-diagnostics -fopenmp --offload-arch=native tools/flang/unittests/Evaluate/CMakeFiles/leading-zero-bit-count.test.dir/leading-zero-bit-count.cpp.o -o tools/flang/unittests/Evaluate/leading-zero-bit-count.test lib/libLLVMSupport.a lib/libNonGTestTesting.a lib/libLLVMSupport.a -lrt -ldl -lpthread -lm /usr/lib64/libz.so /usr/lib64/libzstd.so lib/libLLVMDemangle.a && : g++: error: unrecognized command line option ‘--offload-arch=native’ ``` ``` cmake -G Ninja \ -DCMAKE_C_COMPILER=gcc \ -DCMAKE_CXX_COMPILER=g++ \ -DLLVM_TARGETS_TO_BUILD="host" \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_PROJECTS="clang;mlir;flang;openmp" ``` Building with gcc 9.3 on linux x86-64. I am seeing the error building from the top patch of the stack (#121782), but I suspect this has to do with this change. https://github.com/llvm/llvm-project/pull/122334 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Disable VALU sinking and hoisting with WWM (PR #123124)
@@ -2773,6 +2773,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_strict_wwm: Opcode = AMDGPU::STRICT_WWM; +CurDAG->getMachineFunction() +.getInfo() +->setInitWholeWave(); rampitec wrote: Ack. I can create a separate property HasWWM, but I really want to hear if we even want to go that way. https://github.com/llvm/llvm-project/pull/123124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 790a6e1 - Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero initializa…"
Author: Kiran Chandramohan Date: 2025-01-17T12:27:25Z New Revision: 790a6e1399ee90ae802878dcb00c0e2a1d464d69 URL: https://github.com/llvm/llvm-project/commit/790a6e1399ee90ae802878dcb00c0e2a1d464d69 DIFF: https://github.com/llvm/llvm-project/commit/790a6e1399ee90ae802878dcb00c0e2a1d464d69.diff LOG: Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero initializa…" This reverts commit 8c63648117f1e1705943903b149f36ab8a4df1e5. Added: Modified: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Flang.cpp flang/include/flang/Lower/LoweringOptions.def flang/lib/Frontend/CompilerInvocation.cpp flang/lib/Lower/ConvertVariable.cpp flang/tools/bbc/bbc.cpp Removed: flang/test/Driver/fno-zero-init.f90 flang/test/Lower/zero_init.f90 flang/test/Lower/zero_init_default_init.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c4b9743597bb2e..d38dd2b4e3cf09 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,11 +3505,6 @@ def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, Visibility<[ClangOption, FlangOption]>; -defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", - PosFlag, - NegFlag>; def fno_pointer_tbaa : Flag<["-"], "fno-pointer-tbaa">, Group; def fno_temp_file : Flag<["-"], "fno-temp-file">, Group, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, HelpText< diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9c1fd28a3a8a26..86ed25badfa2b7 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -155,10 +155,8 @@ void Flang::addCodegenOptions(const ArgList &Args, options::OPT_flang_deprecated_no_hlfir, options::OPT_fno_ppc_native_vec_elem_order, options::OPT_fppc_native_vec_elem_order, - options::OPT_finit_global_zero, - options::OPT_fno_init_global_zero, options::OPT_ftime_report, - options::OPT_ftime_report_EQ, options::OPT_funroll_loops, - options::OPT_fno_unroll_loops}); + options::OPT_ftime_report, options::OPT_ftime_report_EQ, + options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 396c91948be36b..5a6debfdffe030 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -44,8 +44,5 @@ ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0) /// If false, assume that the shapes/types/allocation-status match. ENUM_LOWERINGOPT(ReallocateLHS, unsigned, 1, 1) -/// If true, initialize globals without initialization to zero. -/// On by default. -ENUM_LOWERINGOPT(InitGlobalZero, unsigned, 1, 1) #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 3c6da4687f65d3..15b1e1e0a24881 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1377,14 +1377,6 @@ bool CompilerInvocation::createFromArgs( invoc.loweringOpts.setNoPPCNativeVecElemOrder(true); } - // -f[no-]init-global-zero - if (args.hasFlag(clang::driver::options::OPT_finit_global_zero, - clang::driver::options::OPT_fno_init_global_zero, - /*default=*/true)) -invoc.loweringOpts.setInitGlobalZero(true); - else -invoc.loweringOpts.setInitGlobalZero(false); - // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or // -Rpass-analysis. This will be used later when processing and outputting the // remarks generated by LLVM in ExecuteCompilerInvocation.cpp. diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 87236dc293ebbc..9ee42d5cd88002 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -635,11 +635,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, global.setLinkName(builder.createCommonLinkage()); Fortran::lower::createGlobalInitialization( builder, global, [&](fir::FirOpBuilder &builder) { - mlir::Value initValue; - if (converter.getLoweringOptions().getInitGlobalZero()) -initValue = builder.create(loc, symTy); - else -initValue = builder.create(loc, symTy); + mlir::Val
[llvm-branch-commits] [clang-tools-extra] [clang-tidy][NFC] refactor modernize-raw-string-literal fix hint (PR #122909)
HerrCai0907 wrote: ### Merge activity * **Jan 17, 8:39 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/122909). https://github.com/llvm/llvm-project/pull/122909 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)
@@ -1,47 +1,34 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_library(FortranEvaluateTesting jhuber6 wrote: Does anyone use the omp offload build? I'm hoping to make that unnecessary once we have the flang-rt build working. https://github.com/llvm/llvm-project/pull/122334 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
@@ -151,3 +151,17 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ret void } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: @atomic_vec2_ptr_align( +; CHECK-NEXT:[[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X:%.*]], i32 2) +; CHECK-NEXT:[[TMP2:%.*]] = trunc i128 [[TMP1]] to i64 +; CHECK-NEXT:[[TMP3:%.*]] = lshr i128 [[TMP1]], 64 +; CHECK-NEXT:[[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT:[[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT:[[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP4]], i32 1 +; CHECK-NEXT:[[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr> arsenm wrote: bump https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 74673b3 - Revert "[AArch64] Combine and lsl into ubfiz (#118974)"
Author: Cullen Rhodes Date: 2025-01-17T15:29:43Z New Revision: 74673b37ef5226ddd1a3216075e48ac75509525a URL: https://github.com/llvm/llvm-project/commit/74673b37ef5226ddd1a3216075e48ac75509525a DIFF: https://github.com/llvm/llvm-project/commit/74673b37ef5226ddd1a3216075e48ac75509525a.diff LOG: Revert "[AArch64] Combine and lsl into ubfiz (#118974)" This reverts commit f1d5efe37ea7891b3fc9f78b6fdbbe1dc207bbd6. Added: Modified: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll llvm/test/CodeGen/AArch64/extract-bits.ll llvm/test/CodeGen/AArch64/fpenv.ll llvm/test/CodeGen/AArch64/xbfiz.ll Removed: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7d3ca46204b673..0c096711bf3bdb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1140,8 +1140,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); - setTargetDAGCombine(ISD::SHL); - // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -26474,43 +26472,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return NVCAST; } -/// If the operand is a bitwise AND with a constant RHS, and the shift has a -/// constant RHS and is the only use, we can pull it out of the shift, i.e. -/// -/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) -/// -/// We prefer this canonical form to match existing isel patterns. -static SDValue performSHLCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) -return SDValue(); - - SDValue Op0 = N->getOperand(0); - if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) -return SDValue(); - - SDValue C1 = Op0->getOperand(1); - SDValue C2 = N->getOperand(1); - if (!isa(C1) || !isa(C2)) -return SDValue(); - - // Might be folded into shifted op, do not lower. - if (N->hasOneUse()) { -unsigned UseOpc = N->user_begin()->getOpcode(); -if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || -UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) - return SDValue(); - } - - SDLoc DL(N); - EVT VT = N->getValueType(0); - SDValue X = Op0->getOperand(0); - SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); - SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); - return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); -} - SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -26856,8 +26817,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCTLZCombine(N, DAG, Subtarget); case ISD::SCALAR_TO_VECTOR: return performScalarToVectorCombine(N, DCI, DAG); - case ISD::SHL: -return performSHLCombine(N, DCI, DAG); } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll index 1fffcdda4b4167..66a6745cda8f76 100644 --- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll @@ -190,7 +190,8 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT:ubfiz w0, w0, #1, #3 +; CHECK-NEXT:and w8, w0, #0x7 +; CHECK-NEXT:lsl w0, w8, #1 ; CHECK-NEXT:ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 1 @@ -199,7 +200,8 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT:ubfiz w0, w0, #4, #3 +; CHECK-NEXT:and w8, w0, #0x7 +; CHECK-NEXT:lsl w0, w8, #4 ; CHECK-NEXT:ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 4 @@ -227,8 +229,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT:lsl w8, w0, #1 -; CHECK-NEXT:and w0, w8, #0x38 +; CHECK-NEXT:and w8, w0, #0x1c +; CHECK-NEXT:lsl w0, w8, #1 ; CHECK-NEXT:ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 1 @@ -237,8 +239,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_2: ; CHECK: // %bb.0: -; CHECK-NEXT:lsl w8, w0, #2 -; CHECK-NEXT:and w0, w8, #0x70 +; CHECK-NEXT:and w8, w0, #0x1c +; CHECK-NEXT:lsl w0, w8, #2 ;
[llvm-branch-commits] [llvm] [AMDGPU] Disable VALU sinking and hoisting with WWM (PR #123124)
rampitec wrote: > I guess my concern is performance regressions if any use of WWM (e.g. atomic > optimizer) essentially turns off Machine LICM. I agree. But when moving the code llvm thinks it is something cheap, and its is not, which is also a performance problem. Things would be much easier if we could tell an instruction belongs to a WWM region. https://github.com/llvm/llvm-project/pull/123124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [YAML] Don't validate `Fill::Size` after error (PR #123280)
@@ -1750,7 +1750,9 @@ void MappingTraits>::mapping( std::string MappingTraits>::validate( IO &io, std::unique_ptr &C) { if (const auto *F = dyn_cast(C.get())) { -if (F->Pattern && F->Pattern->binary_size() != 0 && !F->Size) +// Can't check the `Size`, as it's required and may be left uninitialized by jh7370 wrote: At a guess, based on the comment, it's when the `Size` field in the YAML is missing for a `Fill`? I'm beginning to think that `mapRequired` should zero-initialise the value that is being mapped, even on failure. I suspect there are many more cases along these lines too. https://github.com/llvm/llvm-project/pull/123280 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Remove `isF...()` type API for low-precision FP types (PR #123326)
https://github.com/River707 approved this pull request. https://github.com/llvm/llvm-project/pull/123326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Remove `isF...()` type API for low-precision FP types (PR #123326)
https://github.com/kuhar approved this pull request. Nice! https://github.com/llvm/llvm-project/pull/123326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Disable VALU sinking and hoisting with WWM (PR #123124)
https://github.com/rampitec edited https://github.com/llvm/llvm-project/pull/123124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)
@@ -0,0 +1,950 @@ +//===- Construction of code generation pass pipelines -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// \file +/// +/// This file provides the implementation of the PassBuilder based on our +/// static pass registry as well as related functionality. +/// +//===--===// + +#include "llvm/CodeGen/CallBrPrepare.h" +#include "llvm/CodeGen/CodeGenPrepare.h" +#include "llvm/CodeGen/DwarfEHPrepare.h" +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/CodeGen/ExpandLargeFpConvert.h" +#include "llvm/CodeGen/ExpandMemCmp.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FinalizeISel.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LocalStackSlotAllocation.h" +#include "llvm/CodeGen/LowerEmuTLS.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/CodeGen/SafeStack.h" +#include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/CodeGen/ShadowStackGCLowering.h" +#include "llvm/CodeGen/SjLjEHPrepare.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/CodeGen/WasmEHPrepare.h" +#include "llvm/CodeGen/WinEHPrepare.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Passes/CodeGenPassBuilder.h" // Dummy passes only! +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" + +namespace llvm { +extern cl::opt FSRemappingFile; +} + +using namespace llvm; + +void PassBuilder::invokeCodeGenIREarlyEPCallbacks(ModulePassManager &MPM) { + for (auto &C : CodeGenIREarlyEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeGCLoweringEPCallbacks(FunctionPassManager &FPM) { + for (auto &C : GCLoweringEPCallbacks) +C(FPM); +} + +void PassBuilder::invokeISelPrepareEPCallbacks(ModulePassManager &MPM) { + for (auto &C : ISelPrepareEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeMachineSSAOptimizationEarlyEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationEarlyEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineSSAOptimizationLastEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationLastEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegBankSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegBankSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeILPOptsEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : ILPOptsEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineLateOptimizationEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineLateOptimizationEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMIEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : MIEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : PreEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreSched2EPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto
[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)
@@ -0,0 +1,950 @@ +//===- Construction of code generation pass pipelines -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// \file +/// +/// This file provides the implementation of the PassBuilder based on our +/// static pass registry as well as related functionality. +/// +//===--===// + +#include "llvm/CodeGen/CallBrPrepare.h" +#include "llvm/CodeGen/CodeGenPrepare.h" +#include "llvm/CodeGen/DwarfEHPrepare.h" +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/CodeGen/ExpandLargeFpConvert.h" +#include "llvm/CodeGen/ExpandMemCmp.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FinalizeISel.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LocalStackSlotAllocation.h" +#include "llvm/CodeGen/LowerEmuTLS.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/CodeGen/SafeStack.h" +#include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/CodeGen/ShadowStackGCLowering.h" +#include "llvm/CodeGen/SjLjEHPrepare.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/CodeGen/WasmEHPrepare.h" +#include "llvm/CodeGen/WinEHPrepare.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Passes/CodeGenPassBuilder.h" // Dummy passes only! +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" + +namespace llvm { +extern cl::opt FSRemappingFile; +} + +using namespace llvm; + +void PassBuilder::invokeCodeGenIREarlyEPCallbacks(ModulePassManager &MPM) { + for (auto &C : CodeGenIREarlyEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeGCLoweringEPCallbacks(FunctionPassManager &FPM) { + for (auto &C : GCLoweringEPCallbacks) +C(FPM); +} + +void PassBuilder::invokeISelPrepareEPCallbacks(ModulePassManager &MPM) { + for (auto &C : ISelPrepareEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeMachineSSAOptimizationEarlyEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationEarlyEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineSSAOptimizationLastEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationLastEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegBankSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegBankSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeILPOptsEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : ILPOptsEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineLateOptimizationEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineLateOptimizationEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMIEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : MIEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : PreEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreSched2EPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto
[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)
@@ -0,0 +1,950 @@ +//===- Construction of code generation pass pipelines -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// \file +/// +/// This file provides the implementation of the PassBuilder based on our +/// static pass registry as well as related functionality. +/// +//===--===// + +#include "llvm/CodeGen/CallBrPrepare.h" +#include "llvm/CodeGen/CodeGenPrepare.h" +#include "llvm/CodeGen/DwarfEHPrepare.h" +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/CodeGen/ExpandLargeFpConvert.h" +#include "llvm/CodeGen/ExpandMemCmp.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FinalizeISel.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LocalStackSlotAllocation.h" +#include "llvm/CodeGen/LowerEmuTLS.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/CodeGen/SafeStack.h" +#include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/CodeGen/ShadowStackGCLowering.h" +#include "llvm/CodeGen/SjLjEHPrepare.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/CodeGen/WasmEHPrepare.h" +#include "llvm/CodeGen/WinEHPrepare.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Passes/CodeGenPassBuilder.h" // Dummy passes only! +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" + +namespace llvm { +extern cl::opt FSRemappingFile; +} + +using namespace llvm; + +void PassBuilder::invokeCodeGenIREarlyEPCallbacks(ModulePassManager &MPM) { + for (auto &C : CodeGenIREarlyEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeGCLoweringEPCallbacks(FunctionPassManager &FPM) { + for (auto &C : GCLoweringEPCallbacks) +C(FPM); +} + +void PassBuilder::invokeISelPrepareEPCallbacks(ModulePassManager &MPM) { + for (auto &C : ISelPrepareEPCallbacks) +C(MPM); +} + +void PassBuilder::invokeMachineSSAOptimizationEarlyEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationEarlyEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineSSAOptimizationLastEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineSSAOptimizationLastEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreRegBankSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreRegBankSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PreGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostGlobalInstructionSelectEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostGlobalInstructionSelectEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeILPOptsEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : ILPOptsEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMachineLateOptimizationEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : MachineLateOptimizationEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokeMIEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : MIEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreEmitEPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto &C : PreEmitEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePostRegAllocEPCallbacks( +MachineFunctionPassManager &MFPM) { + for (auto &C : PostRegAllocEPCallbacks) +C(MFPM); +} + +void PassBuilder::invokePreSched2EPCallbacks(MachineFunctionPassManager &MFPM) { + for (auto
[llvm-branch-commits] [llvm] ba06cf8 - Revert "Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV in…"
Author: Sushant Gokhale Date: 2025-01-17T02:04:01-08:00 New Revision: ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53 URL: https://github.com/llvm/llvm-project/commit/ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53 DIFF: https://github.com/llvm/llvm-project/commit/ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53.diff LOG: Revert "Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV in…" This reverts commit 606d0a7cdc0c551df754eb4494a2c16861b6a9b9. Added: llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll Modified: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d0b2ded127ff73..b6acde9bdd1104 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" @@ -657,6 +658,94 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) { return nullptr; } +// If we have the following pattern, +// X = 1.0/sqrt(a) +// R1 = X * X +// R2 = a/sqrt(a) +// then this method collects all the instructions that match R1 and R2. +static bool getFSqrtDivOptPattern(Instruction *Div, + SmallPtrSetImpl &R1, + SmallPtrSetImpl &R2) { + Value *A; + if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A || + match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A) { +for (User *U : Div->users()) { + Instruction *I = cast(U); + if (match(I, m_FMul(m_Specific(Div), m_Specific(Div +R1.insert(I); +} + +CallInst *CI = cast(Div->getOperand(1)); +for (User *U : CI->users()) { + Instruction *I = cast(U); + if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A) +R2.insert(I); +} + } + return !R1.empty() && !R2.empty(); +} + +// Check legality for transforming +// x = 1.0/sqrt(a) +// r1 = x * x; +// r2 = a/sqrt(a); +// +// TO +// +// r1 = 1/a +// r2 = sqrt(a) +// x = r1 * r2 +// This transform works only when 'a' is known positive. +static bool isFSqrtDivToFMulLegal(Instruction *X, + SmallPtrSetImpl &R1, + SmallPtrSetImpl &R2) { + // Check if the required pattern for the transformation exists. + if (!getFSqrtDivOptPattern(X, R1, R2)) +return false; + + BasicBlock *BBx = X->getParent(); + BasicBlock *BBr1 = (*R1.begin())->getParent(); + BasicBlock *BBr2 = (*R2.begin())->getParent(); + + CallInst *FSqrt = cast(X->getOperand(1)); + if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() || + !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs()) +return false; + + // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed + // by recip fp as it is strictly meant to transform ops of type a/b to + // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag + // has been used(rather abused)in the past for algebraic rewrites. + if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs()) +return false; + + // Check the constraints on X, R1 and R2 combined. + // fdiv instruction and one of the multiplications must reside in the same + // block. If not, the optimized code may execute more ops than before and + // this may hamper the performance. + if (BBx != BBr1 && BBx != BBr2) +return false; + + // Check the constraints on instructions in R1. + if (any_of(R1, [BBr1](Instruction *I) { +// When you have multiple instructions residing in R1 and R2 +// respectively, it's diff icult to generate combinations of (R1,R2) and +// then check if we have the required pattern. So, for now, just be +// conservative. +return (I->getParent() != BBr1 || !I->hasAllowReassoc()); + })) +return false; + + // Check the constraints on instructions in R2. + return all_of(R2, [BBr2](Instruction *I) { +// When you have multiple instructions residing in R1 and R2 +// respectively, it's diff icult to generate combination of (R1,R2) and +// then check if we have the required pattern. So, for now, just be +// conservative. +return (I->getParent() == BBr2 && I->hasAllowReassoc()); + }); +} + Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -1913,6 +2002,75 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I, return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I); } +// Change +// X = 1/sqrt(a) +// R1 = X * X +// R2 = a * X +// +// TO +// +// F
[llvm-branch-commits] [llvm] [AMDGPU] Disable VALU sinking and hoisting with WWM (PR #123124)
@@ -2773,6 +2773,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_strict_wwm: Opcode = AMDGPU::STRICT_WWM; +CurDAG->getMachineFunction() +.getInfo() +->setInitWholeWave(); rovka wrote: I would not recommend using HasInitWholeWave for this. That has a very narrow meaning referring only to the use of the llvm.amdgcn.init.whole.wave intrinsic, not WWM in general. https://github.com/llvm/llvm-project/pull/123124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Remove `isF...()` type API for low-precision FP types (PR #123326)
https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/123326 Remove `type.isFloat4E2M1FN()` etc. Use `isa(type)` instead. For details, see: https://discourse.llvm.org/t/rethink-on-approach-to-low-precision-fp-types/82361/28 Depends on #123321. >From 55825a999595222141f79a812c72c57cebd598d8 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 17 Jan 2025 12:31:38 +0100 Subject: [PATCH] [mlir][IR] Remove `isF...()` type API for low-precision FP types --- mlir/include/mlir/IR/CommonTypeConstraints.td | 26 ++-- mlir/include/mlir/IR/Types.h | 11 - mlir/lib/CAPI/IR/BuiltinTypes.cpp | 40 +++ .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 38 +- .../ArithToAMDGPU/ArithToAMDGPU.cpp | 4 +- .../Conversion/LLVMCommon/TypeConverter.cpp | 9 ++--- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp| 8 ++-- mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 4 +- mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp| 6 +-- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 3 +- mlir/lib/IR/Types.cpp | 19 - 11 files changed, 73 insertions(+), 95 deletions(-) diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td index 6f52195c1d7c92..e752cdfb47fbb1 100644 --- a/mlir/include/mlir/IR/CommonTypeConstraints.td +++ b/mlir/include/mlir/IR/CommonTypeConstraints.td @@ -329,31 +329,31 @@ def F64 : F<64>; def F80 : F<80>; def F128 : F<128>; -def BF16 : Type, "bfloat16 type">, +def BF16 : Type($_self)">, "bfloat16 type">, BuildableType<"$_builder.getType()">; -def TF32 : Type, "tf32 type">, +def TF32 : Type($_self)">, "tf32 type">, BuildableType<"$_builder.getType()">; -def F8E4M3FN : Type, "f8E4M3FN type">, +def F8E4M3FN : Type($_self)">, "f8E4M3FN type">, BuildableType<"$_builder.getType()">; -def F8E5M2 : Type, "f8E5M2 type">, +def F8E5M2 : Type($_self)">, "f8E5M2 type">, BuildableType<"$_builder.getType()">; -def F8E4M3 : Type, "f8E4M3 type">, +def F8E4M3 : Type($_self)">, "f8E4M3 type">, BuildableType<"$_builder.getType()">; -def F8E4M3FNUZ : Type, "f8E4M3FNUZ type">, +def F8E4M3FNUZ : Type($_self)">, "f8E4M3FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E4M3B11FNUZ : Type, "f8E4M3B11FNUZ type">, +def F8E4M3B11FNUZ : Type($_self)">, "f8E4M3B11FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E5M2FNUZ : Type, "f8E5M2FNUZ type">, +def F8E5M2FNUZ : Type($_self)">, "f8E5M2FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E3M4 : Type, "f8E3M4 type">, +def F8E3M4 : Type($_self)">, "f8E3M4 type">, BuildableType<"$_builder.getType()">; -def F4E2M1FN : Type, "f4E2M1FN type">, +def F4E2M1FN : Type($_self)">, "f4E2M1FN type">, BuildableType<"$_builder.getType()">; -def F6E2M3FN : Type, "f6E2M3FN type">, +def F6E2M3FN : Type($_self)">, "f6E2M3FN type">, BuildableType<"$_builder.getType()">; -def F6E3M2FN : Type, "f6E3M2FN type">, +def F6E3M2FN : Type, "f6E3M2FN type">, BuildableType<"$_builder.getType()">; -def F8E8M0FNU : Type, "f8E8M0FNU type">, +def F8E8M0FNU : Type($_self)">, "f8E8M0FNU type">, BuildableType<"$_builder.getType()">; def AnyComplex : Type($_self)">, diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h index acd0f894abbbe6..0e82ad2be907ab 100644 --- a/mlir/include/mlir/IR/Types.h +++ b/mlir/include/mlir/IR/Types.h @@ -125,17 +125,6 @@ class Type { // Convenience predicates. This is only for floating point types, // derived types should use isa/dyn_cast. bool isIndex() const; - bool isFloat4E2M1FN() const; - bool isFloat6E2M3FN() const; - bool isFloat6E3M2FN() const; - bool isFloat8E5M2() const; - bool isFloat8E4M3() const; - bool isFloat8E4M3FN() const; - bool isFloat8E5M2FNUZ() const; - bool isFloat8E4M3FNUZ() const; - bool isFloat8E4M3B11FNUZ() const; - bool isFloat8E3M4() const; - bool isFloat8E8M0FNU() const; bool isBF16() const; bool isF16() const; bool isTF32() const; diff --git a/mlir/lib/CAPI/IR/BuiltinTypes.cpp b/mlir/lib/CAPI/IR/BuiltinTypes.cpp index 250e4a6bbf8dfd..313d6830b41b2a 100644 --- a/mlir/lib/CAPI/IR/BuiltinTypes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinTypes.cpp @@ -90,7 +90,7 @@ MlirTypeID mlirFloat4E2M1FNTypeGetTypeID() { } bool mlirTypeIsAFloat4E2M1FN(MlirType type) { - return unwrap(type).isFloat4E2M1FN(); + return llvm::isa(unwrap(type)); } MlirType mlirFloat4E2M1FNTypeGet(MlirContext ctx) { @@ -102,7 +102,7 @@ MlirTypeID mlirFloat6E2M3FNTypeGetTypeID() { } bool mlirTypeIsAFloat6E2M3FN(MlirType type) { - return unwrap(type).isFloat6E2M3FN(); + return llvm::isa(unwrap(type)); } MlirType mlirFloat6E2M3FNTypeGet(MlirContext ctx) { @@ -114,7 +114,7 @@ MlirTypeID mlirFloat6E3M2FNTyp
[llvm-branch-commits] [mlir] [mlir][IR] Remove `isF...()` type API for low-precision FP types (PR #123326)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matthias Springer (matthias-springer) Changes Remove `type.isFloat4E2M1FN()` etc. Use `isa(type)` instead. For details, see: https://discourse.llvm.org/t/rethink-on-approach-to-low-precision-fp-types/82361/28 Depends on #123321. --- Patch is 22.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123326.diff 11 Files Affected: - (modified) mlir/include/mlir/IR/CommonTypeConstraints.td (+13-13) - (modified) mlir/include/mlir/IR/Types.h (-11) - (modified) mlir/lib/CAPI/IR/BuiltinTypes.cpp (+24-16) - (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+20-18) - (modified) mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp (+2-2) - (modified) mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp (+4-5) - (modified) mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp (+4-4) - (modified) mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp (+2-2) - (modified) mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp (+3-3) - (modified) mlir/lib/Dialect/Tosa/IR/TosaOps.cpp (+1-2) - (modified) mlir/lib/IR/Types.cpp (-19) ``diff diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td index 6f52195c1d7c92..e752cdfb47fbb1 100644 --- a/mlir/include/mlir/IR/CommonTypeConstraints.td +++ b/mlir/include/mlir/IR/CommonTypeConstraints.td @@ -329,31 +329,31 @@ def F64 : F<64>; def F80 : F<80>; def F128 : F<128>; -def BF16 : Type, "bfloat16 type">, +def BF16 : Type($_self)">, "bfloat16 type">, BuildableType<"$_builder.getType()">; -def TF32 : Type, "tf32 type">, +def TF32 : Type($_self)">, "tf32 type">, BuildableType<"$_builder.getType()">; -def F8E4M3FN : Type, "f8E4M3FN type">, +def F8E4M3FN : Type($_self)">, "f8E4M3FN type">, BuildableType<"$_builder.getType()">; -def F8E5M2 : Type, "f8E5M2 type">, +def F8E5M2 : Type($_self)">, "f8E5M2 type">, BuildableType<"$_builder.getType()">; -def F8E4M3 : Type, "f8E4M3 type">, +def F8E4M3 : Type($_self)">, "f8E4M3 type">, BuildableType<"$_builder.getType()">; -def F8E4M3FNUZ : Type, "f8E4M3FNUZ type">, +def F8E4M3FNUZ : Type($_self)">, "f8E4M3FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E4M3B11FNUZ : Type, "f8E4M3B11FNUZ type">, +def F8E4M3B11FNUZ : Type($_self)">, "f8E4M3B11FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E5M2FNUZ : Type, "f8E5M2FNUZ type">, +def F8E5M2FNUZ : Type($_self)">, "f8E5M2FNUZ type">, BuildableType<"$_builder.getType()">; -def F8E3M4 : Type, "f8E3M4 type">, +def F8E3M4 : Type($_self)">, "f8E3M4 type">, BuildableType<"$_builder.getType()">; -def F4E2M1FN : Type, "f4E2M1FN type">, +def F4E2M1FN : Type($_self)">, "f4E2M1FN type">, BuildableType<"$_builder.getType()">; -def F6E2M3FN : Type, "f6E2M3FN type">, +def F6E2M3FN : Type($_self)">, "f6E2M3FN type">, BuildableType<"$_builder.getType()">; -def F6E3M2FN : Type, "f6E3M2FN type">, +def F6E3M2FN : Type, "f6E3M2FN type">, BuildableType<"$_builder.getType()">; -def F8E8M0FNU : Type, "f8E8M0FNU type">, +def F8E8M0FNU : Type($_self)">, "f8E8M0FNU type">, BuildableType<"$_builder.getType()">; def AnyComplex : Type($_self)">, diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h index acd0f894abbbe6..0e82ad2be907ab 100644 --- a/mlir/include/mlir/IR/Types.h +++ b/mlir/include/mlir/IR/Types.h @@ -125,17 +125,6 @@ class Type { // Convenience predicates. This is only for floating point types, // derived types should use isa/dyn_cast. bool isIndex() const; - bool isFloat4E2M1FN() const; - bool isFloat6E2M3FN() const; - bool isFloat6E3M2FN() const; - bool isFloat8E5M2() const; - bool isFloat8E4M3() const; - bool isFloat8E4M3FN() const; - bool isFloat8E5M2FNUZ() const; - bool isFloat8E4M3FNUZ() const; - bool isFloat8E4M3B11FNUZ() const; - bool isFloat8E3M4() const; - bool isFloat8E8M0FNU() const; bool isBF16() const; bool isF16() const; bool isTF32() const; diff --git a/mlir/lib/CAPI/IR/BuiltinTypes.cpp b/mlir/lib/CAPI/IR/BuiltinTypes.cpp index 250e4a6bbf8dfd..313d6830b41b2a 100644 --- a/mlir/lib/CAPI/IR/BuiltinTypes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinTypes.cpp @@ -90,7 +90,7 @@ MlirTypeID mlirFloat4E2M1FNTypeGetTypeID() { } bool mlirTypeIsAFloat4E2M1FN(MlirType type) { - return unwrap(type).isFloat4E2M1FN(); + return llvm::isa(unwrap(type)); } MlirType mlirFloat4E2M1FNTypeGet(MlirContext ctx) { @@ -102,7 +102,7 @@ MlirTypeID mlirFloat6E2M3FNTypeGetTypeID() { } bool mlirTypeIsAFloat6E2M3FN(MlirType type) { - return unwrap(type).isFloat6E2M3FN(); + return llvm::isa(unwrap(type)); } MlirType mlirFloat6E2M3FNTypeGet(MlirContext ctx) { @@ -114,7 +114,7 @@ MlirTypeID mlirFloat6E3M2FNTypeGetTypeID() { } bool mlirTypeIsAFloat6E3M2FN(
[llvm-branch-commits] [flang] [Flang] Introduce FortranSupport (PR #122069)
https://github.com/jplehr approved this pull request. I did not find issues when running this through a couple of configurations I typically use. https://github.com/llvm/llvm-project/pull/122069 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits