[openmp] [clang] [llvm] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/76587 This pull request is the first part of an ongoing effort to extends PGO instrumentation to GPU device code. This PR makes the following changes: - Adds blank registration functions to device RTL - Gives PGO globals protected visibility when targeting a supported GPU - Handles any addrspace casts for PGO calls - Implements PGO global extraction in GPU plugins (currently only dumps info) These changes can be tested by supplying `-fprofile-instrument=clang` while targeting a GPU. >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/5] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/5] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.
[llvm] [openmp] [clang] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -428,13 +428,22 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &triple = M.getTargetTriple(); + return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 || + triple.rfind("r600", 0) == 0; +} + EthanLuisMcDonough wrote: I did briefly consider turning the `isGPUProfTarget` function into a method of either the Module or Triple class, but I was worried that would be out of the scope of this PR. Do you think adding an `isGPU` method to the Triple class would make things cleaner? https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [openmp] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/6] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/6] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This
[clang-tools-extra] [openmp] [llvm] [clang] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/7] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/7] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This
[clang-tools-extra] [openmp] [llvm] [clang] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/8] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/8] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This
[llvm] [openmp] [libc] [libcxx] [compiler-rt] [lldb] [lld] [clang-tools-extra] [flang] [clang] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/8] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/8] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This
[openmp] [libcxx] [clang-tools-extra] [clang] [llvm] [libc] [flang] [lld] [lldb] [compiler-rt] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -58,6 +60,22 @@ class GlobalTy { void setPtr(void *P) { Ptr = P; } }; +typedef void *IntPtrT; +struct __llvm_profile_data { +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// PGO profiling data extracted from a GPU device +struct GPUProfGlobals { + std::string names; + std::vector> counts; + std::vector<__llvm_profile_data> data; + Triple targetTriple; + EthanLuisMcDonough wrote: The basic idea is that the fields in this struct are going to be passed to compiler-rt to be made into a profraw file for this specific target. I don't know if `llvm::StringRef` would work for the names field. The names data is constant on the GPU device, but dynamic data still has to be allocated on the host in order to read the GPU value. I think it makes the most sense to make the struct own the name data. Do you think I should change all the vectors to `SmallVector` or just the innermost `Counts` vector? https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc] [lld] [openmp] [llvm] [flang] [libcxx] [lldb] [compiler-rt] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -58,6 +60,22 @@ class GlobalTy { void setPtr(void *P) { Ptr = P; } }; +typedef void *IntPtrT; EthanLuisMcDonough wrote: `IntPtrT` isn't defined in `profile/InstrProfData.inc`. There are multiple examples of this type being defined. before InstProfData.inc is included. I'm not entirely sure what its for, but I'm assuming its to account for different platforms with different integer sizes. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[openmp] [clang] [lldb] [libc] [compiler-rt] [libcxx] [flang] [lld] [clang-tools-extra] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -163,3 +163,87 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, return Plugin::success(); } + +bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GlobalTy global(getInstrProfNamesVarName().str(), 0); + if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) { +consumeError(std::move(Err)); +return false; + } + return true; +} + +Expected +GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GPUProfGlobals profdata; + auto ELFObj = getELFObjectFile(Image); + if (!ELFObj) +return ELFObj.takeError(); + profdata.targetTriple = ELFObj->makeTriple(); + // Iterate through elf symbols + for (auto &sym : ELFObj->symbols()) { +if (auto name = sym.getName()) { + // Check if given current global is a profiling global based + // on name + if (name->equals(getInstrProfNamesVarName())) { +// Read in profiled function names +std::vector chars(sym.getSize() / sizeof(char), ' '); EthanLuisMcDonough wrote: I'm allocating a vector so that I have memory to write the GPU value to. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [openmp] [libc] [compiler-rt] [llvm] [clang] [lld] [flang] [libcxx] [lldb] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -163,3 +163,87 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, return Plugin::success(); } + +bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GlobalTy global(getInstrProfNamesVarName().str(), 0); + if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) { +consumeError(std::move(Err)); +return false; + } + return true; +} + +Expected +GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GPUProfGlobals profdata; + auto ELFObj = getELFObjectFile(Image); + if (!ELFObj) +return ELFObj.takeError(); + profdata.targetTriple = ELFObj->makeTriple(); + // Iterate through elf symbols + for (auto &sym : ELFObj->symbols()) { +if (auto name = sym.getName()) { + // Check if given current global is a profiling global based + // on name + if (name->equals(getInstrProfNamesVarName())) { +// Read in profiled function names +std::vector chars(sym.getSize() / sizeof(char), ' '); +GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data()); EthanLuisMcDonough wrote: `__llvm_prf_nm` is emitted as an array. For example: ``` @__llvm_prf_nm = protected addrspace(1) constant [63 x i8] c"_=x\DA+I-.\D1K\B6\8A\8F\CF\CF-\88\CFOK\CB\C9OL\C9\CCK\8F7\B1\887M65J41\B5\88\CFM\CC\CC\8B\CF1\B4d,!Z\B1\B1\01X\B1!\00O\F6 \CF", section "__llvm_prf_names", align 1 ``` Eventually this data is going to be passed to a function in compiler-rt's profiling library so it can be used to generate a profiling file. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc] [lld] [openmp] [llvm] [flang] [libcxx] [lldb] [compiler-rt] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -163,3 +163,87 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, return Plugin::success(); } + +bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GlobalTy global(getInstrProfNamesVarName().str(), 0); + if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) { +consumeError(std::move(Err)); +return false; + } + return true; +} + +Expected +GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GPUProfGlobals profdata; + auto ELFObj = getELFObjectFile(Image); + if (!ELFObj) +return ELFObj.takeError(); + profdata.targetTriple = ELFObj->makeTriple(); + // Iterate through elf symbols + for (auto &sym : ELFObj->symbols()) { +if (auto name = sym.getName()) { + // Check if given current global is a profiling global based + // on name + if (name->equals(getInstrProfNamesVarName())) { +// Read in profiled function names +std::vector chars(sym.getSize() / sizeof(char), ' '); +GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data()); +if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal)) + return Err; +std::string names(chars.begin(), chars.end()); +profdata.names = std::move(names); + } else if (name->starts_with(getInstrProfCountersVarPrefix())) { EthanLuisMcDonough wrote: Those functions are defined in [InstrProf.h](https://github.com/llvm/llvm-project/blob/597086c60959dd5b3c032552e8b42dd1d053f233/llvm/include/llvm/ProfileData/InstrProf.h#L92-L96). https://github.com/llvm/llvm-project/blob/597086c60959dd5b3c032552e8b42dd1d053f233/llvm/include/llvm/ProfileData/InstrProf.h#L92-L96 They're used in the creation of PGO global names. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[flang] [libc] [llvm] [clang] [lld] [lldb] [compiler-rt] [clang-tools-extra] [libcxx] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -58,6 +60,22 @@ class GlobalTy { void setPtr(void *P) { Ptr = P; } }; +typedef void *IntPtrT; +struct __llvm_profile_data { +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// PGO profiling data extracted from a GPU device +struct GPUProfGlobals { + std::string names; + std::vector> counts; + std::vector<__llvm_profile_data> data; + Triple targetTriple; + EthanLuisMcDonough wrote: I can't seem to use `SmallVector` for `__llvm_profile_data` because many of its defined fields are const qualified and it needs an assignment operator definition. I'm able to use `std::remove_const`, but that feels kind of hack-y. Not sure if the best solution is to use `std::vector`, `std::remove_const`, or remove the const qualifiers in [InstrProfData.inc](https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/ProfileData/InstrProfData.inc). https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lld] [libcxx] [clang-tools-extra] [libc] [flang] [compiler-rt] [lldb] [clang] [openmp] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 1/9] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 2/9] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This
[lldb] [libc] [compiler-rt] [flang] [libcxx] [clang] [openmp] [llvm] [lld] [clang-tools-extra] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -959,8 +959,12 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext())); EthanLuisMcDonough wrote: `FuncNameVar` has an addrspace of 1 on GPU devices. The `instrprof_increment` throws a type error if you pass in a pointer without addrspace 0. Is there a reason why one should use `PointerType::get(ctx, 0)` over `PointerType::getUnqual`? Is the latter bad practice/being deprecated? https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/13] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/13] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/18] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/18] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[libc] [openmp] [clang] [flang] [clang-tools-extra] [lldb] [libcxx] [lld] [compiler-rt] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/13] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00e..1d887d5cb581276 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab4..55ee15d068c67b7 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 000..68c7744cd60752f --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 000..799477f5e47d273 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/13] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b1647..edae6885b528ac7 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrsp
[clang] [libc] [libcxx] [flang] [openmp] [lld] [compiler-rt] [lldb] [clang-tools-extra] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/13] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00e..1d887d5cb581276 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab4..55ee15d068c67b7 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 000..68c7744cd60752f --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 000..799477f5e47d273 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/13] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b1647..edae6885b528ac7 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrsp
[libcxx] [clang-tools-extra] [clang] [compiler-rt] [openmp] [llvm] [flang] [lld] [libc] [lldb] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/10] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/10] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[llvm] [clang-tools-extra] [lldb] [libcxx] [openmp] [clang] [compiler-rt] [libc] [flang] [lld] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/11] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00e..1d887d5cb581276 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab4..55ee15d068c67b7 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 000..68c7744cd60752f --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 000..799477f5e47d273 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/11] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b1647..edae6885b528ac7 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrsp
[lld] [libc] [llvm] [lldb] [flang] [openmp] [clang] [compiler-rt] [clang-tools-extra] [libcxx] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/11] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/11] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [flang] [libc] [openmp] [lldb] [llvm] [lld] [libcxx] [clang-tools-extra] [compiler-rt] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/12] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/12] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] ce3a1c5 - [flang] Handle unsupported warning flags
Author: Ethan Luis McDonough Date: 2023-02-21T16:14:19-06:00 New Revision: ce3a1c59e18027e7d18a2a0a99e4bc81ccc03491 URL: https://github.com/llvm/llvm-project/commit/ce3a1c59e18027e7d18a2a0a99e4bc81ccc03491 DIFF: https://github.com/llvm/llvm-project/commit/ce3a1c59e18027e7d18a2a0a99e4bc81ccc03491.diff LOG: [flang] Handle unsupported warning flags This PR makes flang emit a warning when the user passes an unsupported gfortran warning flag in as a CLI arg. This PR also checks each `-W` argument instead of just looking at the last one passed in. Reviewed By: awarzynski Differential Revision: https://reviews.llvm.org/D143301 Added: flang/test/Driver/w-arg-unsupported.f90 flang/test/Driver/werror-all.f90 flang/test/Driver/wextra-ok.f90 Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Flang.cpp flang/docs/FlangDriver.md flang/lib/Frontend/CompilerInvocation.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 35325de4a34a4..77fb1e00585a0 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -113,6 +113,9 @@ def warn_drv_unsupported_option_for_target : Warning< def warn_drv_unsupported_option_for_flang : Warning< "the argument '%0' is not supported for option '%1'. Mapping to '%1%2'">, InGroup; +def warn_drv_unsupported_diag_option_for_flang : Warning< + "The warning option '-%0' is not supported">, + InGroup; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a26af0af2c946..a25854daf6a1b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -235,6 +235,10 @@ def clang_ignored_f_Group : OptionGroup<"">, def clang_ignored_m_Group : OptionGroup<"">, Group, Flags<[Ignored]>; +// Unsupported flang groups +def flang_ignored_w_Group : OptionGroup<"">, + Group, Flags<[FlangOnlyOption, Ignored]>; + // Group for clang options in the process of deprecation. // Please include the version that deprecated the flag as comment to allow // easier garbage collection. @@ -4871,6 +4875,10 @@ multiclass BooleanFFlag { def fno_#NAME : Flag<["-"], "fno-"#name>; } +multiclass FlangIgnoredDiagOpt { + def unsupported_warning_w#NAME : Flag<["-", "--"], "W"#name>, Group; +} + defm : BooleanFFlag<"keep-inline-functions">, Group; def fprofile_dir : Joined<["-"], "fprofile-dir=">, Group; @@ -5030,6 +5038,36 @@ defm second_underscore : BooleanFFlag<"second-underscore">, Group, Group; defm whole_file : BooleanFFlag<"whole-file">, Group; +// -W options unsupported by the flang compiler +// If any of these options are passed into flang's compiler driver, +// a warning will be raised and the argument will be claimed +defm : FlangIgnoredDiagOpt<"extra">; +defm : FlangIgnoredDiagOpt<"aliasing">; +defm : FlangIgnoredDiagOpt<"ampersand">; +defm : FlangIgnoredDiagOpt<"array-bounds">; +defm : FlangIgnoredDiagOpt<"c-binding-type">; +defm : FlangIgnoredDiagOpt<"character-truncation">; +defm : FlangIgnoredDiagOpt<"conversion">; +defm : FlangIgnoredDiagOpt<"do-subscript">; +defm : FlangIgnoredDiagOpt<"function-elimination">; +defm : FlangIgnoredDiagOpt<"implicit-interface">; +defm : FlangIgnoredDiagOpt<"implicit-procedure">; +defm : FlangIgnoredDiagOpt<"intrinsic-shadow">; +defm : FlangIgnoredDiagOpt<"use-without-only">; +defm : FlangIgnoredDiagOpt<"intrinsics-std">; +defm : FlangIgnoredDiagOpt<"line-truncation">; +defm : FlangIgnoredDiagOpt<"no-align-commons">; +defm : FlangIgnoredDiagOpt<"no-overwrite-recursive">; +defm : FlangIgnoredDiagOpt<"no-tabs">; +defm : FlangIgnoredDiagOpt<"real-q-constant">; +defm : FlangIgnoredDiagOpt<"surprising">; +defm : FlangIgnoredDiagOpt<"underflow">; +defm : FlangIgnoredDiagOpt<"unused-parameter">; +defm : FlangIgnoredDiagOpt<"realloc-lhs">; +defm : FlangIgnoredDiagOpt<"realloc-lhs-all">; +defm : FlangIgnoredDiagOpt<"frontend-loop-interchange">; +defm : FlangIgnoredDiagOpt<"target-lifetime">; + // C++ SYCL options def fsycl : Flag<["-"], "fsycl">, Flags<[NoXarchOption, CoreOption]>, Group, HelpText<"Enables SYCL kernels compilation for device">; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index cd3907c099481..b50d2c5c4f8d2 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -329,6 +329,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, A->render(Args, CmdArgs); } + // Remove any unsupported gfortran diagnostic options + for (const Arg *A : Args.filtered(options::OPT_flang_ignored_w_Group)) { +
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/14] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/14] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/14] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/14] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/15] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/15] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/15] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/15] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/16] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/16] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/17] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b00..1d887d5cb58127 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80ab..55ee15d068c67b 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 00..68c7744cd60752 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 00..799477f5e47d27 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/17] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b164..edae6885b528ac 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + //
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
@@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy, Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy, bool OnlyIfReduced) { + // Skip cast if types are identical EthanLuisMcDonough wrote: `CastInst::castIsValid` returns false for addrspace casts if two pointers share the same addrspace. https://github.com/llvm/llvm-project/blob/2de269a641e4ffbb7a44e559c4c0a91bb66df823/llvm/lib/IR/Instructions.cpp#L3802-L3805 https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/102691 This pull request is a revised version of #76587. This pull request fixes some build issues that were present in the previous version of this change. > This pull request is the first part of an ongoing effort to extends PGO > instrumentation to GPU device code. This PR makes the following changes: > > - Adds blank registration functions to device RTL > - Gives PGO globals protected visibility when targeting a supported GPU > - Handles any addrspace casts for PGO calls > - Implements PGO global extraction in GPU plugins (currently only dumps info) > > These changes can be tested by supplying `-fprofile-instrument=clang` while > targeting a GPU. >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/3] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a..a628082851938 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28..51e97458825ac 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a14..818a34bfddfb9 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af5..1b9a5249cbae5 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool is
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/102691 >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/4] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a1..a6280828519385 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28e..51e97458825ac5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a146..818a34bfddfb97 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af55..1b9a5249cbae51 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &T = Triple(M.getTargetTriple()); + return T.isAMDGPU() || T.isNVPTX(); +} + +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) +FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) +FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/102691 >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/4] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a1..a6280828519385 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28e..51e97458825ac5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a146..818a34bfddfb97 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af55..1b9a5249cbae51 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &T = Triple(M.getTargetTriple()); + return T.isAMDGPU() || T.isNVPTX(); +} + +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) +FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) +FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/102691 >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/5] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a1..a6280828519385 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28e..51e97458825ac5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a146..818a34bfddfb97 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af55..1b9a5249cbae51 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &T = Triple(M.getTargetTriple()); + return T.isAMDGPU() || T.isNVPTX(); +} + +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) +FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) +FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/93365 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/28] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/28] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough ready_for_review https://github.com/llvm/llvm-project/pull/93365 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/93365 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/29] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/29] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/20] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/20] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/93365 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/29] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/29] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/21] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/21] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/93365 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/30] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/30] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
@@ -985,7 +990,8 @@ void FunctionInstrumenter::instrument() { Intrinsic::getDeclaration(&M, PGOBlockCoverage ? Intrinsic::instrprof_cover : Intrinsic::instrprof_increment), -{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); +{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters), + Builder.getInt32(I++)}); EthanLuisMcDonough wrote: Thanks for reaching out. `instrumentSelects` accepts a `GlobalVariable` as an argument, not a `Value`. The global variable addrspace cast handling is done inside `SelectInstVisitor::instrumentOneSelectInst`: https://github.com/llvm/llvm-project/blob/ee764a2603269001aae808c906d2ed05ddbd0471/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp#L1728-L1734 I haven't encountered any errors raised by this snippet, but please let me know if you've been having any issues with it on your end. https://github.com/llvm/llvm-project/pull/102691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/18] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/18] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/18] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/18] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/18] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/18] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/20] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/20] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/20] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/20] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/93365 This pull request is the second part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on #76587. This PR makes the following changes: - Introduces `__llvm_write_custom_profile` to PGO compiler-rt library. This is an external function that can be used to write profiles with custom data to target-specific files. - Adds `__llvm_write_custom_profile` as weak symbol to libomptarget so that it can write the collected data to a profraw file. >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/27] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/27] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/93365 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/27] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/27] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/102691 >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/6] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a1..a6280828519385 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28e..51e97458825ac5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a146..818a34bfddfb97 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af55..1b9a5249cbae51 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &T = Triple(M.getTargetTriple()); + return T.isAMDGPU() || T.isNVPTX(); +} + +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) +FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) +FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/102691 >From 24b1a99a1c014e1015fbba137430c5c6f3e414c5 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Jun 2024 12:39:19 -0500 Subject: [PATCH 1/6] Changes from old gpuprof branch --- clang/lib/CodeGen/CodeGenPGO.cpp | 13 ++- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 4 + llvm/lib/ProfileData/InstrProf.cpp| 25 - .../Instrumentation/InstrProfiling.cpp| 44 +++-- .../Instrumentation/PGOInstrumentation.cpp| 24 +++-- offload/DeviceRTL/CMakeLists.txt | 2 + offload/DeviceRTL/include/Profiling.h | 21 offload/DeviceRTL/src/Profiling.cpp | 22 + .../common/include/GlobalHandler.h| 29 +- .../common/src/GlobalHandler.cpp | 96 +++ .../common/src/PluginInterface.cpp| 14 +++ offload/test/CMakeLists.txt | 6 ++ offload/test/lit.cfg | 3 + offload/test/lit.site.cfg.in | 2 +- offload/test/offloading/pgo1.c| 77 +++ 16 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 offload/DeviceRTL/include/Profiling.h create mode 100644 offload/DeviceRTL/src/Profiling.cpp create mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ea726b5708a4a1..a6280828519385 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,10 +1193,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter), StepV}; + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedFuncNameVarPtr = + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); + + llvm::Value *Args[] = { + NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28e..51e97458825ac5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a146..818a34bfddfb97 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af55..1b9a5249cbae51 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &T = Triple(M.getTargetTriple()); + return T.isAMDGPU() || T.isNVPTX(); +} + +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) +FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) +FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M
[clang] [llvm] [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (PR #102691)
https://github.com/EthanLuisMcDonough closed https://github.com/llvm/llvm-project/pull/102691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
EthanLuisMcDonough wrote: @jhuber6 The clang format errors are mostly due to my local version of `clang-format` disagreeing with the buildbot's version. Its a bit annoying, but it shouldn't be too much of a problem given I plan on squashing and merging once this gets approved. I added new flags for GPU PGO specifically because I didn't want to modify the PGO flags' existing behavior. PGO has a significant runtime cost, so I figured it would be best for the end user experience to only enable PGO on the GPU when it was specifically requested. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
EthanLuisMcDonough wrote: > Is this something that specifically requires its own flag? Or could we just > do `-Xarch_device -fprofile-generate`. Right now the `-fprofile-generate-gpu` and `-fprofile-instr-generate-gpu` flags make sure that the compiler-rt profiling library is included even if host profiling isn't enabled, but your suggestion seems quite nice and compact. I'm open to looking into this. Thoughts @jdoerfert? https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587 >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/23] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 + 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//=== Profiling.h - OpenMP interface -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// +//===--===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===--- Profiling.cpp C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/23] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 -- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is re
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/93365 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
https://github.com/EthanLuisMcDonough closed https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
EthanLuisMcDonough wrote: Which platform are you building this on? https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
EthanLuisMcDonough wrote: > I'm building on an x86 + AMD GPU. What fails is this command: > > ``` > [8/14] Performing build step for 'runtimes' > [1/4] Linking CXX shared library > /home/dobercea/upstream/llvm-project/build/lib/libomptarget.so.19.0git > FAILED: /home/dobercea/upstream/llvm-project/build/lib/libomptarget.so.19.0git > : && /home/dobercea/upstream/llvm-project/build/./bin/clang++ > --target=x86_64-unknown-linux-gnu -fPIC [] > ``` Could you share your CMake build command? I'd like to try and replicate this. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 2c8b912 - Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)"
Author: Ethan Luis McDonough Date: 2024-06-28T12:30:45-05:00 New Revision: 2c8b912f630f9ec647a4870b9c5ee922c2ec1298 URL: https://github.com/llvm/llvm-project/commit/2c8b912f630f9ec647a4870b9c5ee922c2ec1298 DIFF: https://github.com/llvm/llvm-project/commit/2c8b912f630f9ec647a4870b9c5ee922c2ec1298.diff LOG: Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)" This reverts commit 5fd2af38e461445c583d7ffc2fe23858966eee76. It caused build issues and broke the buildbot. Added: Modified: clang/lib/CodeGen/CodeGenPGO.cpp llvm/include/llvm/Frontend/OpenMP/OMPKinds.def llvm/include/llvm/ProfileData/InstrProf.h llvm/lib/ProfileData/InstrProf.cpp llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp offload/DeviceRTL/CMakeLists.txt offload/plugins-nextgen/common/include/GlobalHandler.h offload/plugins-nextgen/common/src/GlobalHandler.cpp offload/plugins-nextgen/common/src/PluginInterface.cpp offload/test/CMakeLists.txt offload/test/lit.cfg offload/test/lit.site.cfg.in Removed: offload/DeviceRTL/include/Profiling.h offload/DeviceRTL/src/Profiling.cpp offload/test/offloading/pgo1.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index a628082851938..ea726b5708a4a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1193,15 +1193,10 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - // Make sure that pointer to global is passed in with zero addrspace - // This is relevant during GPU profiling - auto *NormalizedFuncNameVarPtr = - llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); - - llvm::Value *Args[] = { - NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; + llvm::Value *Args[] = {FuncNameVar, + Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), + Builder.getInt32(Counter), StepV}; if (llvm::EnableSingleByteCoverage) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 51e97458825ac..fe09bb8177c28 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -502,9 +502,6 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) -__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) -__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) - __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 818a34bfddfb9..7fa6d44990a14 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -177,10 +177,6 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } -/// Determines whether module targets a GPU eligable for PGO -/// instrumentation -bool isGPUProfTarget(const Module &M); - /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is /// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 1b9a5249cbae5..c7749f33d9af5 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -432,31 +432,13 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } -bool isGPUProfTarget(const Module &M) { - const auto &T = Triple(M.getTargetTriple()); - return T.isAMDGPU() || T.isNVPTX(); -} - -void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { - // If the target is a GPU, make the symbol protected so it can - // be read from the host device - if (isGPUProfTarget(M)) -FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); - // Hide the symbol so that we correctly get a copy for each executable. - else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) -FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); -} - GlobalVariable *createPGOFuncNameVar(Module &M, GlobalValue::LinkageTypes Linkage, StringRef
[clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
EthanLuisMcDonough wrote: > Yes of course: > > ``` > cmake \ > -DCMAKE_BUILD_TYPE=Release \ > -DCMAKE_INSTALL_PREFIX=~/rocm/trunk_1.0 \ > -DLLVM_ENABLE_PROJECTS="clang;lld;llvm;clang-tools-extra;compiler-rt;flang" > \ > -DLLVM_LIT_ARGS="-vv --show-unsupported --show-xfail -j 32" \ > -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU" \ > -DLLVM_ENABLE_ASSERTIONS=ON \ > -DLIBOMPTARGET_ENABLE_DEBUG=ON \ > -DPACKAGE_VENDOR="AMD" \ > -DCLANG_DEFAULT_LINKER=lld \ > -DLLVM_ENABLE_RUNTIMES="openmp;offload" \ > -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc \ > -DLLVM_RUNTIME_TARGETS="default;amdgcn-amd-amdhsa" \ > -DBUILD_SHARED_LIBS=ON \ > ../llvm -GNinja > ``` Thanks for this. I'll revert this for now, it seems like the test also fails on the buildbot. https://github.com/llvm/llvm-project/pull/76587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
EthanLuisMcDonough wrote: > For some codes, I get the following error for a gfx906: > > ``` > LLVM ERROR: Relocation for CG Profile could not be created: unknown > relocation name > ``` > > I see it for OpenMC, but the following is a simpler example: > > ``` > $ cat test.c > #include > #include > __attribute__((noinline)) > double test(double x, int n) { > double res = 1; > for (int i = 0; i < n; ++i) > res *= x; > return res; > } > int main(int argc, char *argv[]) { > double x = atof(argv[1]); > unsigned n = atoi(argv[2]); > #pragma omp target map(tofrom:x) > x = test(x, n); > printf("%f\n", x); > return 0; > } > > $ clang -O2 -g -fopenmp --offload-arch=native test.c -o test \ > -fprofile-generate -fprofile-generate-gpu > > $ LLVM_PROFILE_FILE=test.profraw ./test 2 4 > 16.00 > > $ llvm-profdata merge -output=test.profdata *.profraw > > $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \ > -fprofile-use-gpu=test.profdata > ``` > > I can prevent the error by lowering the last -O2 to -O1 or by removing the > `__attribute__((noinline))`. Am I doing something wrong? Not sure if this is still relevant, but I think https://github.com/llvm/llvm-project/pull/114617 should fix this issue. The ASM builder was trying to create a reloc directive with the kind `BFD_RELOC_NONE`, which isn't defined in `getFixupKind` for AMDGPU. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
EthanLuisMcDonough wrote: > > > For some codes, I get the following error for a gfx906: > > > ``` > > > LLVM ERROR: Relocation for CG Profile could not be created: unknown > > > relocation name > > > ``` > > > Not sure if this is still relevant, but I think #114617 should fix this > > issue. The ASM builder was trying to create a reloc directive with the kind > > `BFD_RELOC_NONE`, which isn't defined in `getFixupKind` for AMDGPU. > > Thanks, it does seem to fix it. > > If possible, would you please keep this PR in sync with PR #93365? That > should bring in the above fix and generally help me to review against my use > cases. Sure, I'll sync this branch with my changes in the other PR. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [PGO][Offload] Fix profile function visibility (PR #127257)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/127257 >From 2f747025061b8be4b4294bf6351e653f02349105 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 14 Feb 2025 14:39:26 -0600 Subject: [PATCH] [PGO][Offload] Fix profile function visibility --- clang/lib/Driver/ToolChains/Darwin.cpp | 14 +- compiler-rt/lib/profile/InstrProfilingFile.c | 10 -- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index b26c5bf1a909e..75f126965e0ac 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1480,11 +1480,15 @@ void Darwin::addProfileRTLibs(const ArgList &Args, // If we have a symbol export directive and we're linking in the profile // runtime, automatically export symbols necessary to implement some of the // runtime's functionality. - if (hasExportSymbolDirective(Args) && ForGCOV) { -addExportedSymbol(CmdArgs, "___gcov_dump"); -addExportedSymbol(CmdArgs, "___gcov_reset"); -addExportedSymbol(CmdArgs, "_writeout_fn_list"); -addExportedSymbol(CmdArgs, "_reset_fn_list"); + if (hasExportSymbolDirective(Args)) { +if (ForGCOV) { + addExportedSymbol(CmdArgs, "___gcov_dump"); + addExportedSymbol(CmdArgs, "___gcov_reset"); + addExportedSymbol(CmdArgs, "_writeout_fn_list"); + addExportedSymbol(CmdArgs, "_reset_fn_list"); +} else { + addExportedSymbol(CmdArgs, "___llvm_write_custom_profile"); +} } // Align __llvm_prf_{cnts,bits,data} sections to the maximum expected page diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 503d159fd9817..e5eca7947cf9b 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1362,12 +1362,10 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } -int __llvm_write_custom_profile(const char *Target, -const __llvm_profile_data *DataBegin, -const __llvm_profile_data *DataEnd, -const char *CountersBegin, -const char *CountersEnd, const char *NamesBegin, -const char *NamesEnd) { +COMPILER_RT_USED int __llvm_write_custom_profile( +const char *Target, const __llvm_profile_data *DataBegin, +const __llvm_profile_data *DataEnd, const char *CountersBegin, +const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) { int ReturnValue = 0, FilenameLength, TargetLength; char *FilenameBuf, *TargetFilename; const char *Filename; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [Darwin][PGO] Fix profile function visibility bug (PR #127257)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/127257 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [Darwin][PGO] Fix profile function visibility bug (PR #127257)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/127257 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [Clang][PGO] Fix profile function visibility bug (PR #127257)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/127257 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [Darwin][PGO] Fix profile function visibility bug (PR #127257)
https://github.com/EthanLuisMcDonough ready_for_review https://github.com/llvm/llvm-project/pull/127257 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
EthanLuisMcDonough wrote: Hi @dtellenbach and @qiongsiwu. Thank you for letting me know about this. I'm currently investigating the issue and trying to reproduce the issue locally on darwin. I'm going to try and fix this as soon as possible. https://github.com/llvm/llvm-project/pull/93365 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [Clang][PGO] Fix profile function visibility bug (PR #127257)
https://github.com/EthanLuisMcDonough closed https://github.com/llvm/llvm-project/pull/127257 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ #define INSTR_PROF_DATA_DEFINED #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) EthanLuisMcDonough wrote: This ensures that a GPU profile is written with the version pulled from the GPU. Previously, if the host used LLVM-level instrumentation and the device has clang-level instrumentation, the GPU profile would use the host's format. This change ensures that is not the case. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/94268 >From 3a2047c273d948d035b50eb486b772d5b3bdc401 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 18 Mar 2025 16:20:14 -0500 Subject: [PATCH 1/3] [PGO][Offload] Allow PGO flags to be used on GPU targets --- clang/lib/Driver/ToolChains/Clang.cpp | 6 +- clang/test/Driver/cuda-no-pgo-or-coverage.cu | 33 compiler-rt/include/profile/InstrProfData.inc | 2 +- compiler-rt/lib/profile/InstrProfiling.h | 3 +- .../lib/profile/InstrProfilingBuffer.c| 3 +- compiler-rt/lib/profile/InstrProfilingFile.c | 22 +++-- .../lib/profile/InstrProfilingInternal.h | 3 +- .../lib/profile/InstrProfilingWriter.c| 20 ++--- .../llvm/ProfileData/InstrProfData.inc| 2 +- .../Instrumentation/PGOInstrumentation.cpp| 5 +- .../llvm-profdata/binary-ids-padding.test | 2 +- ...alformed-not-space-for-another-header.test | 2 +- .../malformed-num-counters-zero.test | 2 +- .../malformed-ptr-to-counter-array.test | 2 +- .../common/include/GlobalHandler.h| 6 +- .../common/src/GlobalHandler.cpp | 18 +++- offload/test/offloading/gpupgo/pgo1.c | 84 +++ offload/test/offloading/gpupgo/pgo2.c | 76 + offload/test/offloading/pgo1.c| 66 --- 19 files changed, 220 insertions(+), 137 deletions(-) delete mode 100644 clang/test/Driver/cuda-no-pgo-or-coverage.cu create mode 100644 offload/test/offloading/gpupgo/pgo1.c create mode 100644 offload/test/offloading/gpupgo/pgo2.c delete mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1012128085c7a..e0f1206496486 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions, options::OPT_fno_convergent_functions); - // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support - // for sampling, overhead of call arc collection is way too high and there's - // no way to collect the output. - if (!Triple.isNVPTX() && !Triple.isAMDGCN()) -addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); + addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu deleted file mode 100644 index b84587e1e182b..0 --- a/clang/test/Driver/cuda-no-pgo-or-coverage.cu +++ /dev/null @@ -1,33 +0,0 @@ -// Check that profiling/coverage arguments doen't get passed down to device-side -// compilation. -// -// -// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// XRUN: -fprofile-generate %s 2>&1 | \ -// XRUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -ftest-coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// -// CHECK-NOT: error: unsupported option '-fprofile -// CHECK-NOT: error: invalid argument -// CHECK-DAG: "-fcuda-is-device" -// CHECK-NOT: "-f{{[^"/]*coverage.*}}" -// CHECK-NOT: "-fprofile{{[^"]*}}" -// CHECK: "-triple" "x86_64-unknown-linux-gnu" -// PROF: "-fprofile{{.*}}" -// GCOV: "-coverage-notes-file= diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 2cdfea9a579a4..d51b58386f168 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ #define INSTR_PROF_DATA_DEFINED #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) diff
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/94268 >From 3a2047c273d948d035b50eb486b772d5b3bdc401 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 18 Mar 2025 16:20:14 -0500 Subject: [PATCH 1/5] [PGO][Offload] Allow PGO flags to be used on GPU targets --- clang/lib/Driver/ToolChains/Clang.cpp | 6 +- clang/test/Driver/cuda-no-pgo-or-coverage.cu | 33 compiler-rt/include/profile/InstrProfData.inc | 2 +- compiler-rt/lib/profile/InstrProfiling.h | 3 +- .../lib/profile/InstrProfilingBuffer.c| 3 +- compiler-rt/lib/profile/InstrProfilingFile.c | 22 +++-- .../lib/profile/InstrProfilingInternal.h | 3 +- .../lib/profile/InstrProfilingWriter.c| 20 ++--- .../llvm/ProfileData/InstrProfData.inc| 2 +- .../Instrumentation/PGOInstrumentation.cpp| 5 +- .../llvm-profdata/binary-ids-padding.test | 2 +- ...alformed-not-space-for-another-header.test | 2 +- .../malformed-num-counters-zero.test | 2 +- .../malformed-ptr-to-counter-array.test | 2 +- .../common/include/GlobalHandler.h| 6 +- .../common/src/GlobalHandler.cpp | 18 +++- offload/test/offloading/gpupgo/pgo1.c | 84 +++ offload/test/offloading/gpupgo/pgo2.c | 76 + offload/test/offloading/pgo1.c| 66 --- 19 files changed, 220 insertions(+), 137 deletions(-) delete mode 100644 clang/test/Driver/cuda-no-pgo-or-coverage.cu create mode 100644 offload/test/offloading/gpupgo/pgo1.c create mode 100644 offload/test/offloading/gpupgo/pgo2.c delete mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1012128085c7a..e0f1206496486 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions, options::OPT_fno_convergent_functions); - // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support - // for sampling, overhead of call arc collection is way too high and there's - // no way to collect the output. - if (!Triple.isNVPTX() && !Triple.isAMDGCN()) -addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); + addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu deleted file mode 100644 index b84587e1e182b..0 --- a/clang/test/Driver/cuda-no-pgo-or-coverage.cu +++ /dev/null @@ -1,33 +0,0 @@ -// Check that profiling/coverage arguments doen't get passed down to device-side -// compilation. -// -// -// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// XRUN: -fprofile-generate %s 2>&1 | \ -// XRUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -ftest-coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// -// CHECK-NOT: error: unsupported option '-fprofile -// CHECK-NOT: error: invalid argument -// CHECK-DAG: "-fcuda-is-device" -// CHECK-NOT: "-f{{[^"/]*coverage.*}}" -// CHECK-NOT: "-fprofile{{[^"]*}}" -// CHECK: "-triple" "x86_64-unknown-linux-gnu" -// PROF: "-fprofile{{.*}}" -// GCOV: "-coverage-notes-file= diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 2cdfea9a579a4..d51b58386f168 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ #define INSTR_PROF_DATA_DEFINED #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) diff
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ #define INSTR_PROF_DATA_DEFINED #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) EthanLuisMcDonough wrote: I fixed the version detection and made sure to set the `Version` property the custom value instead of changing the default value. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/94268 >From 3a2047c273d948d035b50eb486b772d5b3bdc401 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 18 Mar 2025 16:20:14 -0500 Subject: [PATCH 1/4] [PGO][Offload] Allow PGO flags to be used on GPU targets --- clang/lib/Driver/ToolChains/Clang.cpp | 6 +- clang/test/Driver/cuda-no-pgo-or-coverage.cu | 33 compiler-rt/include/profile/InstrProfData.inc | 2 +- compiler-rt/lib/profile/InstrProfiling.h | 3 +- .../lib/profile/InstrProfilingBuffer.c| 3 +- compiler-rt/lib/profile/InstrProfilingFile.c | 22 +++-- .../lib/profile/InstrProfilingInternal.h | 3 +- .../lib/profile/InstrProfilingWriter.c| 20 ++--- .../llvm/ProfileData/InstrProfData.inc| 2 +- .../Instrumentation/PGOInstrumentation.cpp| 5 +- .../llvm-profdata/binary-ids-padding.test | 2 +- ...alformed-not-space-for-another-header.test | 2 +- .../malformed-num-counters-zero.test | 2 +- .../malformed-ptr-to-counter-array.test | 2 +- .../common/include/GlobalHandler.h| 6 +- .../common/src/GlobalHandler.cpp | 18 +++- offload/test/offloading/gpupgo/pgo1.c | 84 +++ offload/test/offloading/gpupgo/pgo2.c | 76 + offload/test/offloading/pgo1.c| 66 --- 19 files changed, 220 insertions(+), 137 deletions(-) delete mode 100644 clang/test/Driver/cuda-no-pgo-or-coverage.cu create mode 100644 offload/test/offloading/gpupgo/pgo1.c create mode 100644 offload/test/offloading/gpupgo/pgo2.c delete mode 100644 offload/test/offloading/pgo1.c diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1012128085c7a..e0f1206496486 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions, options::OPT_fno_convergent_functions); - // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support - // for sampling, overhead of call arc collection is way too high and there's - // no way to collect the output. - if (!Triple.isNVPTX() && !Triple.isAMDGCN()) -addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); + addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu deleted file mode 100644 index b84587e1e182b..0 --- a/clang/test/Driver/cuda-no-pgo-or-coverage.cu +++ /dev/null @@ -1,33 +0,0 @@ -// Check that profiling/coverage arguments doen't get passed down to device-side -// compilation. -// -// -// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// XRUN: -fprofile-generate %s 2>&1 | \ -// XRUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -ftest-coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// -// CHECK-NOT: error: unsupported option '-fprofile -// CHECK-NOT: error: invalid argument -// CHECK-DAG: "-fcuda-is-device" -// CHECK-NOT: "-f{{[^"/]*coverage.*}}" -// CHECK-NOT: "-fprofile{{[^"]*}}" -// CHECK: "-triple" "x86_64-unknown-linux-gnu" -// PROF: "-fprofile{{.*}}" -// GCOV: "-coverage-notes-file= diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 2cdfea9a579a4..d51b58386f168 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ #define INSTR_PROF_DATA_DEFINED #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) diff
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
https://github.com/EthanLuisMcDonough edited https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Profile profraw generation for GPU instrumentation #76587 (PR #93365)
EthanLuisMcDonough wrote: > @EthanLuisMcDonough I think your patch effectively introduces a dependency on > libc because `__llvm_write_custom_profile` has `__attribute__((used))` but > calls e.g. `atoi` through `setupIOBuffer`. > > In compiler-rt it's not safe to make that assumption because it potentially > breaks embedded platforms. IMO it's also bad practice to force used symbols > into a static archive. Would you please take another look and at least make > the functionality dependent on offloading or something similar? > > Thanks a lot! Thank you for bringing this to my attention. I'll make sure to look into a fix for this. https://github.com/llvm/llvm-project/pull/93365 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [PGO][Offload] Hide GPU entrypoint on Darwin (PR #132966)
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/132966 This PR partially reverts 83e180c and instead opts to hide the GPU entry point on Darwin platforms. Marking `__llvm_write_custom_profile` as used was causing issues on embedded platforms. >From e418000ddb31f6df2aa96061bea6fcc56d7b09a9 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 25 Mar 2025 12:41:59 -0500 Subject: [PATCH] [PGO][Offload] Hide GPU entrypoint on Darwin --- clang/lib/Driver/ToolChains/Darwin.cpp | 14 +- compiler-rt/lib/profile/InstrProfilingFile.c | 12 +--- compiler-rt/lib/profile/InstrProfilingPort.h | 2 ++ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index e67997314da36..4c88722730598 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1481,15 +1481,11 @@ void Darwin::addProfileRTLibs(const ArgList &Args, // If we have a symbol export directive and we're linking in the profile // runtime, automatically export symbols necessary to implement some of the // runtime's functionality. - if (hasExportSymbolDirective(Args)) { -if (ForGCOV) { - addExportedSymbol(CmdArgs, "___gcov_dump"); - addExportedSymbol(CmdArgs, "___gcov_reset"); - addExportedSymbol(CmdArgs, "_writeout_fn_list"); - addExportedSymbol(CmdArgs, "_reset_fn_list"); -} else { - addExportedSymbol(CmdArgs, "___llvm_write_custom_profile"); -} + if (hasExportSymbolDirective(Args) && ForGCOV) { +addExportedSymbol(CmdArgs, "___gcov_dump"); +addExportedSymbol(CmdArgs, "___gcov_reset"); +addExportedSymbol(CmdArgs, "_writeout_fn_list"); +addExportedSymbol(CmdArgs, "_reset_fn_list"); } // Align __llvm_prf_{cnts,bits,data} sections to the maximum expected page diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 19467429cf4c3..9f75ce5ef97d1 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1273,13 +1273,11 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } -int __llvm_write_custom_profile(const char *Target, -const __llvm_profile_data *DataBegin, -const __llvm_profile_data *DataEnd, -const char *CountersBegin, -const char *CountersEnd, const char *NamesBegin, -const char *NamesEnd, -const uint64_t *VersionOverride) { +COMPILER_RT_GPU_ENTRYPOINT int __llvm_write_custom_profile( +const char *Target, const __llvm_profile_data *DataBegin, +const __llvm_profile_data *DataEnd, const char *CountersBegin, +const char *CountersEnd, const char *NamesBegin, const char *NamesEnd, +const uint64_t *VersionOverride) { int ReturnValue = 0, FilenameLength, TargetLength; char *FilenameBuf, *TargetFilename; const char *Filename; diff --git a/compiler-rt/lib/profile/InstrProfilingPort.h b/compiler-rt/lib/profile/InstrProfilingPort.h index 66d697885eaee..ed56beb6577a3 100644 --- a/compiler-rt/lib/profile/InstrProfilingPort.h +++ b/compiler-rt/lib/profile/InstrProfilingPort.h @@ -43,8 +43,10 @@ #if defined(__APPLE__) #define COMPILER_RT_SEG "__DATA," +#define COMPILER_RT_GPU_ENTRYPOINT COMPILER_RT_VISIBILITY #else #define COMPILER_RT_SEG "" +#define COMPILER_RT_GPU_ENTRYPOINT #endif #ifdef _MSC_VER ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [PGO][Offload] Hide GPU entrypoint on Darwin (PR #132966)
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/132966 >From e418000ddb31f6df2aa96061bea6fcc56d7b09a9 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 25 Mar 2025 12:41:59 -0500 Subject: [PATCH 1/2] [PGO][Offload] Hide GPU entrypoint on Darwin --- clang/lib/Driver/ToolChains/Darwin.cpp | 14 +- compiler-rt/lib/profile/InstrProfilingFile.c | 12 +--- compiler-rt/lib/profile/InstrProfilingPort.h | 2 ++ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index e67997314da36..4c88722730598 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1481,15 +1481,11 @@ void Darwin::addProfileRTLibs(const ArgList &Args, // If we have a symbol export directive and we're linking in the profile // runtime, automatically export symbols necessary to implement some of the // runtime's functionality. - if (hasExportSymbolDirective(Args)) { -if (ForGCOV) { - addExportedSymbol(CmdArgs, "___gcov_dump"); - addExportedSymbol(CmdArgs, "___gcov_reset"); - addExportedSymbol(CmdArgs, "_writeout_fn_list"); - addExportedSymbol(CmdArgs, "_reset_fn_list"); -} else { - addExportedSymbol(CmdArgs, "___llvm_write_custom_profile"); -} + if (hasExportSymbolDirective(Args) && ForGCOV) { +addExportedSymbol(CmdArgs, "___gcov_dump"); +addExportedSymbol(CmdArgs, "___gcov_reset"); +addExportedSymbol(CmdArgs, "_writeout_fn_list"); +addExportedSymbol(CmdArgs, "_reset_fn_list"); } // Align __llvm_prf_{cnts,bits,data} sections to the maximum expected page diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 19467429cf4c3..9f75ce5ef97d1 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1273,13 +1273,11 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } -int __llvm_write_custom_profile(const char *Target, -const __llvm_profile_data *DataBegin, -const __llvm_profile_data *DataEnd, -const char *CountersBegin, -const char *CountersEnd, const char *NamesBegin, -const char *NamesEnd, -const uint64_t *VersionOverride) { +COMPILER_RT_GPU_ENTRYPOINT int __llvm_write_custom_profile( +const char *Target, const __llvm_profile_data *DataBegin, +const __llvm_profile_data *DataEnd, const char *CountersBegin, +const char *CountersEnd, const char *NamesBegin, const char *NamesEnd, +const uint64_t *VersionOverride) { int ReturnValue = 0, FilenameLength, TargetLength; char *FilenameBuf, *TargetFilename; const char *Filename; diff --git a/compiler-rt/lib/profile/InstrProfilingPort.h b/compiler-rt/lib/profile/InstrProfilingPort.h index 66d697885eaee..ed56beb6577a3 100644 --- a/compiler-rt/lib/profile/InstrProfilingPort.h +++ b/compiler-rt/lib/profile/InstrProfilingPort.h @@ -43,8 +43,10 @@ #if defined(__APPLE__) #define COMPILER_RT_SEG "__DATA," +#define COMPILER_RT_GPU_ENTRYPOINT COMPILER_RT_VISIBILITY #else #define COMPILER_RT_SEG "" +#define COMPILER_RT_GPU_ENTRYPOINT #endif #ifdef _MSC_VER >From 64b85d93a48948a17244ad14e7f569997e2c16b6 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 25 Mar 2025 20:51:34 -0500 Subject: [PATCH 2/2] [PGO][Offload] Don't define entrypoint on Darwin --- compiler-rt/lib/profile/InstrProfilingFile.c | 14 +- compiler-rt/lib/profile/InstrProfilingPort.h | 1 - 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 9f75ce5ef97d1..d48a870647e86 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1273,11 +1273,14 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } -COMPILER_RT_GPU_ENTRYPOINT int __llvm_write_custom_profile( -const char *Target, const __llvm_profile_data *DataBegin, -const __llvm_profile_data *DataEnd, const char *CountersBegin, -const char *CountersEnd, const char *NamesBegin, const char *NamesEnd, -const uint64_t *VersionOverride) { +#ifdef COMPILER_RT_GPU_ENTRYPOINT +int __llvm_write_custom_profile(const char *Target, +const __llvm_profile_data *DataBegin, +const __llvm_profile_data *DataEnd, +const char *CountersBegin, +const char *CountersEnd, const char *NamesBegin, +const char *NamesEnd, +const uint64_t *V
[clang] [compiler-rt] [PGO][Offload] Hide GPU entrypoint on Darwin (PR #132966)
@@ -1273,6 +1273,7 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } +#ifdef COMPILER_RT_GPU_ENTRYPOINT EthanLuisMcDonough wrote: My reasoning was that it would give us more configuration power if we had a macro that was defined only when we want an entry point. This would give us an easy way to disable the entry point on other platforms if needed. I don't expect that to happen, so I guess we can use `__APPLE__` for now. https://github.com/llvm/llvm-project/pull/132966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
EthanLuisMcDonough wrote: Thank you all for making me aware of this. I'll look into fixing this as soon as possible. https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
EthanLuisMcDonough wrote: This patch should disable PGO on nvidia targets: https://github.com/llvm/llvm-project/pull/133522. I'll merge it after I'm done running some tests https://github.com/llvm/llvm-project/pull/94268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][Offload] Disable PGO on NVPTX (PR #133522)
https://github.com/EthanLuisMcDonough closed https://github.com/llvm/llvm-project/pull/133522 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [PGO][Offload] Hide GPU entrypoint on Darwin (PR #132966)
EthanLuisMcDonough wrote: > It worked before your patch right? Is this what it exposed before then? In 83e180c, I marked `__llvm_write_custom_profile` as used, which fixed the Darwin code stripping bug but broke the library on embedded systems. In c50d39f, I removed the used attribute, which breaks the Darwin code stripping test. This PR should fix both issues. https://github.com/llvm/llvm-project/pull/132966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][Offload] Disable PGO on NVPTX (PR #133522)
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/133522 None >From d21330ea2a3d2cd7ea035361e239bb3da16fbb9f Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Fri, 28 Mar 2025 16:17:44 -0500 Subject: [PATCH] [PGO][Offload] Disable PGO on NVPTX --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++- clang/test/Driver/cuda-no-pgo-or-coverage.cu | 33 +++ .../malformed-ptr-to-counter-array.test | 2 +- offload/test/offloading/gpupgo/pgo1.c | 2 +- offload/test/offloading/gpupgo/pgo2.c | 2 +- 5 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 clang/test/Driver/cuda-no-pgo-or-coverage.cu diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 26fa234dd4e9b..5f45cf0865b9e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6397,7 +6397,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions, options::OPT_fno_convergent_functions); - addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); + // NVPTX doesn't support PGO or coverage + if (!Triple.isNVPTX()) +addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu new file mode 100644 index 0..b84587e1e182b --- /dev/null +++ b/clang/test/Driver/cuda-no-pgo-or-coverage.cu @@ -0,0 +1,33 @@ +// Check that profiling/coverage arguments doen't get passed down to device-side +// compilation. +// +// +// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ +// XRUN: -fprofile-generate %s 2>&1 | \ +// XRUN: FileCheck --check-prefixes=CHECK,PROF %s +// +// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ +// RUN: -fprofile-instr-generate %s 2>&1 | \ +// RUN: FileCheck --check-prefixes=CHECK,PROF %s +// +// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ +// RUN: -coverage %s 2>&1 | \ +// RUN: FileCheck --check-prefixes=CHECK,GCOV %s +// +// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ +// RUN: -ftest-coverage %s 2>&1 | \ +// RUN: FileCheck --check-prefixes=CHECK,GCOV %s +// +// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \ +// RUN: FileCheck --check-prefixes=CHECK,PROF %s +// +// +// CHECK-NOT: error: unsupported option '-fprofile +// CHECK-NOT: error: invalid argument +// CHECK-DAG: "-fcuda-is-device" +// CHECK-NOT: "-f{{[^"/]*coverage.*}}" +// CHECK-NOT: "-fprofile{{[^"]*}}" +// CHECK: "-triple" "x86_64-unknown-linux-gnu" +// PROF: "-fprofile{{.*}}" +// GCOV: "-coverage-notes-file= diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test index 49c5ae9b0931d..83cf76f68fb63 100644 --- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test +++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test @@ -1,7 +1,7 @@ // Header // // INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) +// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c index c8011cbae83c0..af6d8e806cd37 100644 --- a/offload/test/offloading/gpupgo/pgo1.c +++ b/offload/test/offloading/gpupgo/pgo1.c @@ -14,7 +14,7 @@ // RUN: %target_triple.%basename_t.clang.profraw | \ // RUN: %fcheck-generic --check-prefix="CLANG-PGO" -// REQUIRES: gpu +// REQUIRES: amdgpu // REQUIRES: pgo int test1(int a) { return a / 2; } diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c index b75b0beaffdec..b67a63bc8fc3d 100644 --- a/offload/test/offloading/gpupgo/pgo2.c +++ b/offload/test/offloading/gpupgo/pgo2.c @@ -48,7 +48,7 @@ // RUN: %target_triple.%basename_t.hfdi.profraw \ // RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE" -// REQUIRES: gpu +// REQUIRES: amdgpu // REQUIRES: pgo int main() { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits