[PATCH] D132975: [clang][BOLT] Add clangbolt target (WIP)

2022-08-30 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added a reviewer: bolt.
Herald added subscribers: treapster, wenlei, mgorny.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This patch adds `CLANG_BOLT_INSTRUMENT` option that applies BOLT instrumentation
to Clang, performs a bootstrap build with the resulting Clang, merges resulting
fdata files into a single profile file, and uses it to perform BOLT optimization
on the original Clang binary.

The intended use of the functionality is through BOLT CMake cache file, similar
to PGO 2-stage build:

  cmake /llvm -C /clang/cmake/caches/BOLT.cmake
  ninja clang-bolt


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake


Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,12 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -878,6 +878,57 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  find_program(LLVM_BOLT llvm-bolt)
+  find_program(MERGE_FDATA merge-fdata)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt.inst
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt.inst
+DEPENDS clang
+COMMAND ${LLVM_BOLT} ${CMAKE_BINARY_DIR}/bin/clang -o
+  ${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt.inst
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+)
+  # Make a symlink from clang.bolt.inst to clang++.bolt.inst
+  add_clang_symlink(${CMAKE_CURRENT_BINARY_DIR}/bin/clang++.bolt.inst
+${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt.inst)
+
+  # Configure and build Clang with instrumented Clang to collect the profile
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+
-DCMAKE_CXX_COMPILER=${CMAKE_CURRENT_BINARY_DIR}/bin/clang++.bolt.inst
+
-DCMAKE_C_COMPILER=${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt.inst
+STEP_TARGETS configure build install
+  )
+  # Merge profiles into one using merge-fdata
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata
+COMMAND ${MERGE_FDATA} ${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata.*
+  -o ${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata
+  )
+  # Optimize original (pre-bolt) Clang using the collected profile
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata clang
+COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-bolt ${CMAKE_BINARY_DIR}/bin/clang
+-o ${CMAKE_BINARY_DIR}/bin/clang.bolt -fdata
+${CMAKE_CURRENT_BINARY_DIR}/bin/prof.fdata
+  )
+  add_custom_target(clang-bolt
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/clang.bolt
+)
+endif()
+
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)
   add_subdirectory(utils/ClangVisualizers)
 endif()


Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,12 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -878,

[PATCH] D132975: [clang][BOLT] Add clang-bolt target (WIP)

2022-08-30 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 456799.
Amir added a comment.

CMAKE_CURRENT_BINARY_DIR already contains bin/


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake


Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,12 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -878,6 +878,56 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  find_program(LLVM_BOLT llvm-bolt)
+  find_program(MERGE_FDATA merge-fdata)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+DEPENDS clang
+COMMAND ${LLVM_BOLT} ${CMAKE_BINARY_DIR}/clang -o
+  ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+)
+  # Make a symlink from clang.bolt.inst to clang++.bolt.inst
+  add_clang_symlink(${CMAKE_CURRENT_BINARY_DIR}/clang++.bolt.inst
+${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst)
+
+  # Configure and build Clang with instrumented Clang to collect the profile
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+
-DCMAKE_CXX_COMPILER=${CMAKE_CURRENT_BINARY_DIR}/clang++.bolt.inst
+-DCMAKE_C_COMPILER=${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+STEP_TARGETS configure build install
+  )
+  # Merge profiles into one using merge-fdata
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMAND ${MERGE_FDATA} ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata.*
+  -o ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  # Optimize original (pre-bolt) Clang using the collected profile
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata clang
+COMMAND ${LLVM_BOLT} ${CMAKE_BINARY_DIR}/clang
+-o ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt -data 
${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  add_custom_target(clang-bolt
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt
+)
+endif()
+
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)
   add_subdirectory(utils/ClangVisualizers)
 endif()


Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,12 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -878,6 +878,56 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  find_program(LLVM_BOLT llvm-bolt)
+  find_program(MERGE_FDATA merge-fdata)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+DEPENDS clang
+COMMAND ${LLVM_BOLT} ${CMAKE_BINARY_DIR}/clang -o
+  ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt.inst
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CM

[PATCH] D132975: [clang][BOLT] Add clang-bolt target (WIP)

2022-08-31 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 457102.
Amir added a comment.

Succeeded instrumenting Clang with BOLT


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,14 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "lld" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "lld" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -878,6 +878,97 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(C_COMPILER "clang.bolt.inst")
+  set(CXX_COMPILER "clang++.bolt.inst")
+  set(C_COMPILER_PATH ${CMAKE_CURRENT_BINARY_DIR}/${C_COMPILER})
+  set(CXX_COMPILER_PATH ${CMAKE_CURRENT_BINARY_DIR}/${CXX_COMPILER})
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${C_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${C_COMPILER_PATH}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt $ -o ${C_COMPILER_PATH}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from ${C_COMPILER} to ${CXX_COMPILER}
+  add_custom_target(clang++-instrumented
+DEPENDS ${CXX_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${CXX_COMPILER_PATH}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${C_COMPILER_PATH}
+  ${CXX_COMPILER_PATH}
+COMMENT "Creating a symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Configure and build Clang with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang-instrumented clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_CXX_COMPILER=${CXX_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+LIST_SEPARATOR |
+  )
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+DEPENDS merge-fdata bolt-instrumentation-profile-build
+COMMAND merge-fdata ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata.*
+  -o ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Preparing BOLT profile"
+VERBATIM
+  )
+
+  # Optimize original (pre-bolt) Clang using the collected profile
+  add_custom_target(clang-bolt
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt
+  )
+  add_custom_command(

[PATCH] D132975: [clang][BOLT] Add clang-bolt target (WIP)

2022-08-31 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 457172.
Amir added a comment.

Successfully invoke the bootstrap/profiling build


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,14 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "lld" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "lld" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -878,6 +878,97 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(C_COMPILER "clang-bolt.inst")
+  set(CXX_COMPILER "clang++-bolt.inst")
+  set(C_COMPILER_PATH ${CMAKE_BINARY_DIR}/bin/${C_COMPILER})
+  set(CXX_COMPILER_PATH ${CMAKE_BINARY_DIR}/bin/${CXX_COMPILER})
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${C_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${C_COMPILER_PATH}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt $ -o ${C_COMPILER_PATH}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from ${C_COMPILER} to ${CXX_COMPILER}
+  add_custom_target(clang++-instrumented
+DEPENDS ${CXX_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${CXX_COMPILER_PATH}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${C_COMPILER_PATH}
+  ${CXX_COMPILER_PATH}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Configure and build Clang with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_CXX_COMPILER=${CXX_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+  )
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+DEPENDS merge-fdata bolt-instrumentation-profile-build
+COMMAND merge-fdata ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata.*
+  -o ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Preparing BOLT profile"
+VERBATIM
+  )
+
+  # Optimize original (pre-bolt) Clang using the collected profile
+  add_custom_target(clang-bolt
+DEPENDS ${CMAKE_BINARY_DIR}/bin/clang-bolt
+  )
+  ad

[PATCH] D132975: [clang][BOLT] Add clang-bolt target (WIP)

2022-09-01 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 457400.
Amir added a comment.

Succeeded in producing optimized Clang. Switch the default profiling target 
from `lld` to `count`, which produces a sufficient Clang coverage of 5.3B exec
insns (along with configure-stage Clang invocations).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,14 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -878,6 +878,99 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(C_COMPILER "clang-bolt.inst")
+  set(CXX_COMPILER "clang++-bolt.inst")
+  set(C_COMPILER_PATH ${CMAKE_BINARY_DIR}/bin/${C_COMPILER})
+  set(CXX_COMPILER_PATH ${CMAKE_BINARY_DIR}/bin/${CXX_COMPILER})
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${C_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${C_COMPILER_PATH}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt $ -o ${C_COMPILER_PATH}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from ${C_COMPILER} to ${CXX_COMPILER}
+  add_custom_target(clang++-instrumented
+DEPENDS ${CXX_COMPILER_PATH}
+  )
+  add_custom_command(OUTPUT ${CXX_COMPILER_PATH}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${C_COMPILER_PATH}
+  ${CXX_COMPILER_PATH}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Configure and build Clang with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_CXX_COMPILER=${CXX_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER=${C_COMPILER_PATH}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+  )
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+DEPENDS merge-fdata bolt-instrumentation-profile-build
+WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+COMMAND sh -c "$ prof.fdata.* -o prof.fdata"
+COMMENT "Preparing BOLT profile"
+VERBATIM
+  )
+
+  # Opti

[PATCH] D132975: [clang][BOLT] Add clang-bolt target

2022-09-01 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Hi Petr, thank you for your comments!

In D132975#3763264 , @phosek wrote:

> This was already on my list of build system features I'd like to implement 
> and I'm glad someone else is already looking into it, thank you! I have two 
> high level comments about your approach.
>
> The first one is related to the use of Clang build as the training data. I 
> think that Clang build is both unnecessarily heavyweight, but also not 
> particularly representative of typical workloads (most Clang users don't use 
> it to build Clang). Ideally, we would give vendors the flexibility to supply 
> their own training data. I'd prefer reusing the existing perf-training 
>  
> setup to do so. In fact, I'd imagine most vendors would likely use the same 
> training data for both PGO and BOLT and that use case should be supported.

Agree that perf-training might be useful for vendors. I'll try to enable it in 
a follow-up diff.

Please note that the target for profile collection is not hardcoded to clang, 
it's configurable via CLANG_BOLT_INSTRUMENT_PROJECTS and 
CLANG_BOLT_INSTRUMENT_TARGETS. Right now it's the llvm/not tool (the smallest 
possible). Also, that the

> The second one is related to applicability. I don't think this mechanism 
> should be limited only to Clang. Ideally, it should be possible to instrument 
> and optimize other tools in the toolchain distribution as well; LLD is likely 
> going to be the most common one after Clang.

I thought about it, and I think we can accommodate optimizing arbitrary targets 
is by providing an interface to instrument specified target(s) via 
`-DBOLT_INSTRUMENT_TARGETS`. For each of the target binaries, CMake would 
create targets like `bolt-instrument-$TARGET` and `bolt-optimize-$TARGET`. 
For `bolt-instrument-$TARGET`, BOLT would instrument the target binary, placing 
instrumented binary next to the original one (e.g. `target`-bolt.inst). End 
users would use those instrumented binaries on representative workloads to 
collect the profile. For `bolt-optimize-$TARGET`, BOLT would post-process the 
profiles and create optimized binary (`target`-bolt).

I appreciate your suggestions. Do you think we can move incrementally from this 
diff towards more general uses in follow-up diffs?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132975: [clang][BOLT] Add clang-bolt target

2022-09-01 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 457467.
Amir added a comment.

Fix up paths


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,14 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -878,6 +878,114 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+  set(CLANGXX_PATH ${CLANG_PATH}++)
+  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
+  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CLANG_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
+  add_custom_target(clang++-instrumented
+DEPENDS ${CLANGXX_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${CLANG_INSTRUMENTED}
+  ${CLANGXX_INSTRUMENTED}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Build specified targets with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+  )
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+DEPENDS merge-fdata bolt-instrumentation-profile-build
+WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+COMMAND sh -c "$ prof.fdata.* -o prof.fdata"
+COMMENT "Preparing BOLT profile"
+VERBATIM
+  )
+
+  # Optimize original (pre-bolt) Clang using the collected profile
+  add_cust

[PATCH] D132975: [clang][BOLT] Add clang-bolt target

2022-09-03 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D132975#3768391 , @tschuett wrote:

> Will there be eventually a way to build a fully optimised clang/lld with 
> ThinLTO, PGO, and Bolt?

Short answer is likely yes.
For clang, I think this diff should be compatible with PGO, with a caveat that 
BOLT should be applied to stage-2 clang built with PGO, which means that 
`BOOTSTRAP_` options should be set carefully. And for sure it's compatible with 
ThinLTO - this one is completely orthogonal. 
For lld, I can envision a similar fully automated optimized build, but likely 
in a future separate diff.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-09-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: phosek, MaskRay.
Herald added subscribers: StephenFan, mgorny.
Herald added a reviewer: alexander-shaposhnikov.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Factor out the common parts of Clang bootstrap configuration into a separate
CMake module.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133633

Files:
  clang/CMakeLists.txt
  clang/cmake/modules/ClangBootstrap.cmake

Index: clang/cmake/modules/ClangBootstrap.cmake
===
--- /dev/null
+++ clang/cmake/modules/ClangBootstrap.cmake
@@ -0,0 +1,54 @@
+include(ExternalProject)
+
+# clang_Bootstrap_Add(name ...
+#   DEPENDS targets...
+# Targets that this project depends on
+#   TABLEGEN
+#   LINKER
+#   AR
+#   RANLIB
+#   OBJCOPY
+#   STRIP
+# Toolchain binaries
+#   CMAKE_ARGS arguments...
+# Optional cmake arguments to pass when configuring the project
+#   BUILD_TOOL_ARGS arguments...
+# Optional arguments to pass to the build tool
+macro(clang_Bootstrap_Add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"
+"DEPENDS;TABLEGEN;CMAKE_ARGS;BUILD_TOOL_ARGS"
+${ARGN})
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/)
+  # Build arguments for native tool used in CMake.
+  set(build_configuration "$")
+
+  ExternalProject_Add(${name}
+DEPENDS ${ARG_DEPENDS}
+PREFIX ${name}
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+${ARG_TABLEGEN}
+${ARG_LINKER}
+${ARG_AR}
+${ARG_RANLIB}
+${ARG_OBJCOPY}
+${ARG_STRIP}
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+${ARG_CMAKE_ARGS}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   ${ARG_BUILD_TOOL_ARGS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+LIST_SEPARATOR |
+  )
+endmacro()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -797,45 +797,29 @@
   endforeach()
 
   # Build arguments for native tool used in CMake.
-  set(build_configuration "$")
   set(build_tool_args "${LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS}")
   if(NOT build_tool_args STREQUAL "")
 string(PREPEND build_tool_args "-- ")
 separate_arguments(build_tool_args UNIX_COMMAND "${build_tool_args}")
   endif()
 
-  ExternalProject_Add(${NEXT_CLANG_STAGE}
+  include(ClangBootstrap)
+  clang_Bootstrap_Add(${NEXT_CLANG_STAGE}
 DEPENDS clang-bootstrap-deps
-PREFIX ${NEXT_CLANG_STAGE}
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
+TABLEGEN ${${CLANG_STAGE}_TABLEGEN}
+LINKER   ${${CLANG_STAGE}_LINKER}
+AR   ${${CLANG_STAGE}_AR}
+RANLIB   ${${CLANG_STAGE}_RANLIB}
+OBJCOPY  ${${CLANG_STAGE}_OBJCOPY}
+STRIP${${CLANG_STAGE}_STRIP}
 CMAKE_ARGS
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
 ${PASSTHROUGH_VARIABLES}
 ${CLANG_BOOTSTRAP_CMAKE_ARGS}
  -DCLANG_STAGE=${NEXT_CLANG_STAGE}
 ${COMPILER_OPTIONS}
-${${CLANG_STAGE}_TABLEGEN}
 ${LTO_LIBRARY} ${verbose} ${PGO_OPT}
-${${CLANG_STAGE}_LINKER}
-${${CLANG_STAGE}_AR}
-${${CLANG_STAGE}_RANLIB}
-${${CLANG_STAGE}_OBJCOPY}
-${${CLANG_STAGE}_STRIP}
-BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
-   --config ${build_configuration}
-   ${build_tool_args}
-INSTALL_COMMAND ""
-STEP_TARGETS configure build
-USES_TERMINAL_CONFIGURE 1
-USES_TERMINAL_BUILD 1
-USES_TERMINAL_INSTALL 1
-LIST_SEPARATOR |
-)
+BUILD_TOOL_ARGS ${build_tool_args}
+  )
 
   # exclude really-install from main target
   set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On)
@@ -916,36 +900,31 @@
   )
 
   # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/

[PATCH] D132975: [CMake] Add clang-bolt target

2022-09-12 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 459539.
Amir added a comment.

Address @phosek's comment about dependency on shell


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -38,7 +38,7 @@
 
 def merge(args):
   if len(args) != 3:
-print('Usage: %s clean   \n' % __file__ +
+print('Usage: %s merge   \n' % __file__ +
   '\tMerges all profraw files from path into output.')
 return 1
   cmd = [args[0], 'merge', '-o', args[1]]
@@ -46,6 +46,16 @@
   subprocess.check_call(cmd)
   return 0
 
+def merge_fdata(args):
+  if len(args) != 3:
+print('Usage: %s merge-fdata   \n' % __file__ +
+  '\tMerges all fdata files from path into output.')
+return 1
+  cmd = [args[0], '-o', args[1]]
+  cmd.extend(findFilesWithExtension(args[2], "fdata"))
+  subprocess.check_call(cmd)
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -395,10 +405,12 @@
   return 0
 
 commands = {'clean' : clean,
-  'merge' : merge, 
+  'merge' : merge,
   'dtrace' : dtrace,
   'cc1' : cc1,
-  'gen-order-file' : genOrderFile}
+  'gen-order-file' : genOrderFile,
+  'merge-fdata' : merge_fdata,
+  }
 
 def main():
   f = commands[sys.argv[1]]
Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,14 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -881,6 +881,117 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+  set(CLANGXX_PATH ${CLANG_PATH}++)
+  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
+  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CLANG_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
+  add_custom_target(clang++-instrumented
+DEPENDS ${CLANGXX_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${CLANG_INSTRUMENTED}
+  ${CLANGXX_INSTRUMENTED}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Build specified targets with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead

[PATCH] D132975: [CMake] Add clang-bolt target

2022-09-12 Thread Amir Ayupov via Phabricator via cfe-commits
Amir marked an inline comment as not done.
Amir added inline comments.



Comment at: clang/CMakeLists.txt:930-937
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}

MaskRay wrote:
> phosek wrote:
> > I don't think this is sufficient in the general case, we would need to pass 
> > additional variables like `CMAKE_AR` the same way we do for the existing 
> > bootstrap logic, see 
> > https://github.com/llvm/llvm-project/blob/dc549bf0013e11e8fcccba8a8d59c3a4bb052a3b/clang/CMakeLists.txt#L825.
> > 
> > For example, on Fuchsia builders we don't have any system-wide toolchain 
> > installation, instead we manually set  all necessary `CMAKE_` 
> > variables for the first stage, so this call will fail for us because it 
> > won't be able to find tools like the archiver.
> > 
> > Since handling this properly would likely duplicate a lot of the existing 
> > logic from the existing bootstrap logic, I'm wondering if we should instead 
> > try to refactor the existing logic and break it up into macros/functions 
> > which could then be reused here as well.
> Supporting other cmake variables will be awesome. I use something like 
> `-DCMAKE_CXX_ARCHIVE_CREATE="$HOME/llvm/out/stable/bin/llvm-ar qcS --thin 
>  " -DCMAKE_CXX_ARCHIVE_FINISH=:` to make my build smaller.
Addressed in D133633


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132975: [CMake] Add clang-bolt target

2022-09-12 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 459588.
Amir added a comment.

Add an ability to pass extra cmake flags


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -38,7 +38,7 @@
 
 def merge(args):
   if len(args) != 3:
-print('Usage: %s clean   \n' % __file__ +
+print('Usage: %s merge   \n' % __file__ +
   '\tMerges all profraw files from path into output.')
 return 1
   cmd = [args[0], 'merge', '-o', args[1]]
@@ -46,6 +46,16 @@
   subprocess.check_call(cmd)
   return 0
 
+def merge_fdata(args):
+  if len(args) != 3:
+print('Usage: %s merge-fdata   \n' % __file__ +
+  '\tMerges all fdata files from path into output.')
+return 1
+  cmd = [args[0], '-o', args[1]]
+  cmd.extend(findFilesWithExtension(args[2], "fdata"))
+  subprocess.check_call(cmd)
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -395,10 +405,12 @@
   return 0
 
 commands = {'clean' : clean,
-  'merge' : merge, 
+  'merge' : merge,
   'dtrace' : dtrace,
   'cc1' : cc1,
-  'gen-order-file' : genOrderFile}
+  'gen-order-file' : genOrderFile,
+  'merge-fdata' : merge_fdata,
+  }
 
 def main():
   f = commands[sys.argv[1]]
Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,15 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -881,6 +881,118 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+  set(CLANGXX_PATH ${CLANG_PATH}++)
+  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
+  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CLANG_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
+  add_custom_target(clang++-instrumented
+DEPENDS ${CLANGXX_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${CLANG_INSTRUMENTED}
+  ${CLANGXX_INSTRUMENTED}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Build specified targets with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
+   

[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-09-12 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 459589.
Amir added a comment.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

Files:
  clang/CMakeLists.txt
  clang/cmake/modules/ClangBootstrap.cmake

Index: clang/cmake/modules/ClangBootstrap.cmake
===
--- /dev/null
+++ clang/cmake/modules/ClangBootstrap.cmake
@@ -0,0 +1,54 @@
+include(ExternalProject)
+
+# clang_Bootstrap_Add(name ...
+#   DEPENDS targets...
+# Targets that this project depends on
+#   TABLEGEN
+#   LINKER
+#   AR
+#   RANLIB
+#   OBJCOPY
+#   STRIP
+# Toolchain binaries
+#   CMAKE_ARGS arguments...
+# Optional cmake arguments to pass when configuring the project
+#   BUILD_TOOL_ARGS arguments...
+# Optional arguments to pass to the build tool
+macro(clang_Bootstrap_Add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"
+"DEPENDS;TABLEGEN;CMAKE_ARGS;BUILD_TOOL_ARGS"
+${ARGN})
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/)
+  # Build arguments for native tool used in CMake.
+  set(build_configuration "$")
+
+  ExternalProject_Add(${name}
+DEPENDS ${ARG_DEPENDS}
+PREFIX ${name}
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+${ARG_TABLEGEN}
+${ARG_LINKER}
+${ARG_AR}
+${ARG_RANLIB}
+${ARG_OBJCOPY}
+${ARG_STRIP}
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+${ARG_CMAKE_ARGS}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   ${ARG_BUILD_TOOL_ARGS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+LIST_SEPARATOR |
+  )
+endmacro()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -797,45 +797,29 @@
   endforeach()
 
   # Build arguments for native tool used in CMake.
-  set(build_configuration "$")
   set(build_tool_args "${LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS}")
   if(NOT build_tool_args STREQUAL "")
 string(PREPEND build_tool_args "-- ")
 separate_arguments(build_tool_args UNIX_COMMAND "${build_tool_args}")
   endif()
 
-  ExternalProject_Add(${NEXT_CLANG_STAGE}
+  include(ClangBootstrap)
+  clang_Bootstrap_Add(${NEXT_CLANG_STAGE}
 DEPENDS clang-bootstrap-deps
-PREFIX ${NEXT_CLANG_STAGE}
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
+TABLEGEN ${${CLANG_STAGE}_TABLEGEN}
+LINKER   ${${CLANG_STAGE}_LINKER}
+AR   ${${CLANG_STAGE}_AR}
+RANLIB   ${${CLANG_STAGE}_RANLIB}
+OBJCOPY  ${${CLANG_STAGE}_OBJCOPY}
+STRIP${${CLANG_STAGE}_STRIP}
 CMAKE_ARGS
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
 ${PASSTHROUGH_VARIABLES}
 ${CLANG_BOOTSTRAP_CMAKE_ARGS}
  -DCLANG_STAGE=${NEXT_CLANG_STAGE}
 ${COMPILER_OPTIONS}
-${${CLANG_STAGE}_TABLEGEN}
 ${LTO_LIBRARY} ${verbose} ${PGO_OPT}
-${${CLANG_STAGE}_LINKER}
-${${CLANG_STAGE}_AR}
-${${CLANG_STAGE}_RANLIB}
-${${CLANG_STAGE}_OBJCOPY}
-${${CLANG_STAGE}_STRIP}
-BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
-   --config ${build_configuration}
-   ${build_tool_args}
-INSTALL_COMMAND ""
-STEP_TARGETS configure build
-USES_TERMINAL_CONFIGURE 1
-USES_TERMINAL_BUILD 1
-USES_TERMINAL_INSTALL 1
-LIST_SEPARATOR |
-)
+BUILD_TOOL_ARGS ${build_tool_args}
+  )
 
   # exclude really-install from main target
   set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On)
@@ -916,37 +900,32 @@
   )
 
   # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
-  set(build_configuration "$")
-  include(ExternalProject)
-  ExternalProject_Add(bolt-instrumentation-profile
+  include(ClangBootstrap)
+  set(COMPILER_OPTIONS
+-DCMAKE

[PATCH] D132975: [CMake] Add clang-bolt target

2022-09-12 Thread Amir Ayupov via Phabricator via cfe-commits
Amir marked an inline comment as done.
Amir added inline comments.



Comment at: clang/CMakeLists.txt:930-937
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER_ID=Clang
+-DCMAKE_BUILD_TYPE=Release
+-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}

Amir wrote:
> MaskRay wrote:
> > phosek wrote:
> > > I don't think this is sufficient in the general case, we would need to 
> > > pass additional variables like `CMAKE_AR` the same way we do for the 
> > > existing bootstrap logic, see 
> > > https://github.com/llvm/llvm-project/blob/dc549bf0013e11e8fcccba8a8d59c3a4bb052a3b/clang/CMakeLists.txt#L825.
> > > 
> > > For example, on Fuchsia builders we don't have any system-wide toolchain 
> > > installation, instead we manually set  all necessary `CMAKE_` 
> > > variables for the first stage, so this call will fail for us because it 
> > > won't be able to find tools like the archiver.
> > > 
> > > Since handling this properly would likely duplicate a lot of the existing 
> > > logic from the existing bootstrap logic, I'm wondering if we should 
> > > instead try to refactor the existing logic and break it up into 
> > > macros/functions which could then be reused here as well.
> > Supporting other cmake variables will be awesome. I use something like 
> > `-DCMAKE_CXX_ARCHIVE_CREATE="$HOME/llvm/out/stable/bin/llvm-ar qcS --thin 
> >  " -DCMAKE_CXX_ARCHIVE_FINISH=:` to make my build smaller.
> Addressed in D133633
Done: -DCLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS is passed to the cmake step of 
bolt-instrumentation-profile target.
I tested it with 
```
-DCLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS='-DCMAKE_CXX_ARCHIVE_CREATE="/llvm-ar
 qcS --thin  " -DCMAKE_CXX_ARCHIVE_FINISH=:'
```
and that appeared to work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127800: [llvm-driver] Generate symlinks instead of executables for tools

2022-09-13 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added inline comments.



Comment at: llvm/cmake/modules/AddLLVM.cmake:1283
 macro(add_llvm_tool name)
+  cmake_parse_arguments(ARG "DEPENDS;GENERATE_DRIVER" "" "" ${ARGN})
   if( NOT LLVM_BUILD_TOOLS )

Sorry for a late question but I don't see any use of ARG_DEPENDS - is it 
intentional or there's an omission somewhere?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127800/new/

https://reviews.llvm.org/D127800

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123100: [Support/Hash functions] Change the `final()` and `result()` of the hashing functions to return an array of bytes

2022-04-04 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added inline comments.



Comment at: bolt/lib/Core/DebugData.cpp:823
 Hasher.update(AbbrevData);
-StringRef Key = Hasher.final();
+auto Hash = Hasher.final();
+StringRef Key((const char *)Hash.data(), Hash.size());

I think it would be more in line with LLVM coding standards to expand `auto` in 
this case (and others) – see 
https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable.
 
My understanding is that `auto` is fine where the type is obvious from the 
context (is explicitly available on the same line e.g. with casts), is abstract 
(T::iterator types), or doesn't matter (e.g. lambdas).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123100/new/

https://reviews.llvm.org/D123100

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123100: [Support/Hash functions] Change the `final()` and `result()` of the hashing functions to return an array of bytes

2022-04-05 Thread Amir Ayupov via Phabricator via cfe-commits
Amir accepted this revision.
Amir added a comment.

BOLT changes LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123100/new/

https://reviews.llvm.org/D123100

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124836: [AArch64] Add support for -fzero-call-used-regs

2022-05-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Looks like this commit breaks msvc build: 
https://lab.llvm.org/buildbot/#/builders/222/builds/532


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124836/new/

https://reviews.llvm.org/D124836

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124836: [AArch64] Add support for -fzero-call-used-regs

2022-05-21 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Hi @void,

The msvc build is still broken. 
https://lab.llvm.org/buildbot/#/builders/222/builds/532

In D124836#3528529 , @void wrote:

> In D124836#3528521 , @vvereschaka 
> wrote:
>
>> got it. Yes, looks like it fixed. The test got passed during the last build: 
>> https://lab.llvm.org/buildbot/#/builders/104/builds/7812
>> Thank you.
>
> I'm sorry for the failure. I thought I had reverted the offending change, but 
> didn't push it. :-/




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124836/new/

https://reviews.llvm.org/D124836

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D136023: [CMake] Disable BOLT instrumentation of Clang on instrumented build

2022-10-19 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG076240fa0624: [CMake] Disable BOLT instrumentation of Clang 
on instrumented build (authored by Amir).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136023/new/

https://reviews.llvm.org/D136023

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO.cmake


Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -0,0 +1,11 @@
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+
+set(CLANG_BOOTSTRAP_TARGETS
+  stage2-clang++-bolt
+  CACHE STRING "")
+set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
+  clang++-bolt
+  CACHE STRING "")
+
+set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -868,7 +868,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)


Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -0,0 +1,11 @@
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+
+set(CLANG_BOOTSTRAP_TARGETS
+  stage2-clang++-bolt
+  CACHE STRING "")
+set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
+  clang++-bolt
+  CACHE STRING "")
+
+set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -868,7 +868,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132975: [CMake] Add clang-bolt target

2022-09-23 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3dab7fede201: [CMake] Add clang-bolt target (authored by 
Amir).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -38,7 +38,7 @@
 
 def merge(args):
   if len(args) != 3:
-print('Usage: %s clean   \n' % __file__ +
+print('Usage: %s merge   \n' % __file__ +
   '\tMerges all profraw files from path into output.')
 return 1
   cmd = [args[0], 'merge', '-o', args[1]]
@@ -46,6 +46,16 @@
   subprocess.check_call(cmd)
   return 0
 
+def merge_fdata(args):
+  if len(args) != 3:
+print('Usage: %s merge-fdata   \n' % __file__ +
+  '\tMerges all fdata files from path into output.')
+return 1
+  cmd = [args[0], '-o', args[1]]
+  cmd.extend(findFilesWithExtension(args[2], "fdata"))
+  subprocess.check_call(cmd)
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -395,10 +405,12 @@
   return 0
 
 commands = {'clean' : clean,
-  'merge' : merge, 
+  'merge' : merge,
   'dtrace' : dtrace,
   'cc1' : cc1,
-  'gen-order-file' : genOrderFile}
+  'gen-order-file' : genOrderFile,
+  'merge-fdata' : merge_fdata,
+  }
 
 def main():
   f = commands[sys.argv[1]]
Index: clang/cmake/caches/BOLT.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,15 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -443,7 +443,7 @@
   "HAVE_CLANG_PLUGIN_SUPPORT" OFF)
 
 # If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in 
+# itself, which is not possible in many platforms because of current limitations in
 # JIT stack. (more platforms need to be supported by JITLink)
 if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
   set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -881,6 +881,118 @@
   endforeach()
 endif()
 
+if (CLANG_BOLT_INSTRUMENT)
+  set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+  set(CLANGXX_PATH ${CLANG_PATH}++)
+  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
+  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
+
+  # Instrument clang with BOLT
+  add_custom_target(clang-instrumented
+DEPENDS ${CLANG_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+DEPENDS clang llvm-bolt
+COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+  -instrument --instrumentation-file-append-pid
+  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+COMMENT "Instrumenting clang binary with BOLT"
+VERBATIM
+  )
+
+  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
+  add_custom_target(clang++-instrumented
+DEPENDS ${CLANGXX_INSTRUMENTED}
+  )
+  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+DEPENDS clang-instrumented
+COMMAND ${CMAKE_COMMAND} -E create_symlink
+  ${CLANG_INSTRUMENTED}
+  ${CLANGXX_INSTRUMENTED}
+COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+VERBATIM
+  )
+
+  # Build specified targets with instrumented Clang to collect the profile
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  set(build_configuration "$")
+  include(ExternalProject)
+  ExternalProject_Add(bolt-instrumentation-profile
+DEPENDS clang++-instrumented
+PREFIX bolt-instrumentation-profile
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+$

[PATCH] D136023: [CMake] Disable BOLT instrumentation of Clang on instrumented build

2022-10-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: phosek, nikic, beanz.
Herald added a subscriber: wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This enables multi-stage PGO build optimized by BOLT using BOLT.cmake cache.

The issue is that `-DPGO_BUILD_CONFIGURATION` cache file is passed to both
stage2-instrumented and stage2-optimized builds (for them to be identical),
but in case of BOLT.cmake, it doesn't make sense to BOLT-instrument the
instrumented binary (it's not going to be optimized). Hence turn off 
`CLANG_BOLT_INSTRUMENT` code if `LLVM_BUILD_INSTRUMENTED` is enabled.

The final workflow that enables multi-stage InstrPGO+ThinLTO+BOLT Clang build:

  cmake /llvm -GNinja -DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_PROJECTS="bolt;clang;lld" -DLLVM_ENABLE_LLD=ON \
-DBOOTSTRAP_LLVM_ENABLE_LLD=ON -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
-DPGO_INSTRUMENT_LTO=Thin  -DCLANG_BOOTSTRAP_TARGETS="stage2-clang++-bolt" \
-DBOOTSTRAP_CLANG_BOOTSTRAP_TARGETS=clang++-bolt \

-DPGO_BUILD_CONFIGURATION=/path/to/llvm-project/clang/cmake/caches/BOLT.cmake \
-C llvm-project/clang/cmake/caches/PGO.cmake
  ninja stage2-clang++-bolt


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D136023

Files:
  clang/CMakeLists.txt


Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,7 +874,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)


Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,7 +874,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D136023: [CMake] Disable BOLT instrumentation of Clang on instrumented build

2022-10-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 468030.
Amir added a comment.

Add BOLT-PGO cmake cache file


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136023/new/

https://reviews.llvm.org/D136023

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO.cmake


Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -0,0 +1,11 @@
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+
+set(CLANG_BOOTSTRAP_TARGETS
+  stage2-clang++-bolt
+  CACHE STRING "")
+set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
+  clang++-bolt
+  CACHE STRING "")
+
+set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,7 +874,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)


Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -0,0 +1,11 @@
+set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
+
+set(CLANG_BOOTSTRAP_TARGETS
+  stage2-clang++-bolt
+  CACHE STRING "")
+set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
+  clang++-bolt
+  CACHE STRING "")
+
+set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,7 +874,7 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT)
+if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71507: [perf-training] Make training data location configurable

2022-10-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.
Herald added a subscriber: wenlei.
Herald added a project: All.

Hi @smeenai,

Sorry for asking on this diff, but... Do you have any pointers for training 
sets?

Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71507/new/

https://reviews.llvm.org/D71507

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132975: [CMake] Add clang-bolt target

2022-10-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D132975#3763264 , @phosek wrote:

> This was already on my list of build system features I'd like to implement 
> and I'm glad someone else is already looking into it, thank you! I have two 
> high level comments about your approach.
>
> The first one is related to the use of Clang build as the training data. I 
> think that Clang build is both unnecessarily heavyweight, but also not 
> particularly representative of typical workloads (most Clang users don't use 
> it to build Clang). Ideally, we would give vendors the flexibility to supply 
> their own training data. I'd prefer reusing the existing perf-training 
>  
> setup to do so. In fact, I'd imagine most vendors would likely use the same 
> training data for both PGO and BOLT and that use case should be supported.

Do you happen to know any existing perf-training sets? Or is there a simple way 
to create one?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132975: [CMake] Add clang-bolt target

2022-10-16 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D132975#3861334 , @phosek wrote:

> In D132975#3860896 , @Amir wrote:
>
>> In D132975#3763264 , @phosek wrote:
>>
>>> This was already on my list of build system features I'd like to implement 
>>> and I'm glad someone else is already looking into it, thank you! I have two 
>>> high level comments about your approach.
>>>
>>> The first one is related to the use of Clang build as the training data. I 
>>> think that Clang build is both unnecessarily heavyweight, but also not 
>>> particularly representative of typical workloads (most Clang users don't 
>>> use it to build Clang). Ideally, we would give vendors the flexibility to 
>>> supply their own training data. I'd prefer reusing the existing 
>>> perf-training 
>>>  
>>> setup to do so. In fact, I'd imagine most vendors would likely use the same 
>>> training data for both PGO and BOLT and that use case should be supported.
>>
>> Do you happen to know any existing perf-training sets? Or is there a simple 
>> way to create one?
>
> I'm working on a script for generating perf-training sets from Ninja-based 
> build systems, I can contribute it to LLVM if you think it'd be useful.

Yes, that would be super useful. BOLT should then also leverage that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132975/new/

https://reviews.llvm.org/D132975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2022-12-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 484102.
Amir added a comment.

Typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  llvm/docs/AdvancedBuilds.rst

Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -241,6 +241,62 @@
 
   $ ninja stage2-clang-bolt
 
+BOLT profile
+
+BOLT uses the profile collected by either Linux `perf` or via BOLT's own
+instrumentation. Both modes are supported by CMake automation, with
+instrumentation being the default (`-DCLANG_BOLT=INSTRUMENT`).
+
+It's strongly recommended to use `perf` if host system supports it as it
+is a significantly faster and potentially more reliable method:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=perf \
+  -C /clang/cmake/caches/BOLT.cmake
+
+If the host system supports profiling branch stacks (e.g. AMD or Intel LBR
+(Last Branch Record), Armv9-A BRBE (Branch Record Buffer Extension)), it can be
+enabled with `-DCLANG_BOLT=LBR` to further improve the profile quality:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=LBR \
+  -C /clang/cmake/caches/BOLT.cmake
+
+The following matrix describes supported profiling methods. Note that Linux/ELF
+is the only supported platform.
+
+ ===
+Architecture `-DCLANG_BOLT` value
+  -- ---
+ `Instrument` `perf` `LBR`
+  == ===
+x86_64   Yes  YesYes
+AArch64  No   YesNo HW exist
+  == ===
+
+Profiling variables
+---
+BOLT profile is collected from building one of in-tree projects/targets with
+Clang as a workload. The following configuration options can be used to change
+the profiling build and profiling mechanism:
+
+**CLANG_BOLT**
+  Profiling mechanism to be used. Supported values: `Instrument` (default),
+  `perf` (requires OS support), `LBR` (requires hardware support).
+
+**CLANG_BOLT_PROJECTS**
+  Projects to enable in profiling build. Defaults to `llvm`.
+
+**CLANG_BOLT_TARGETS**
+  Targets to build in profiling build. Defaults to `count` in instrumentation
+  build and `FileCheck` in perf-build.
+
+**CLANG_BOLT_EXTRA_CMAKE_FLAGS**
+  Extra CMake flags to pass to profiling build at configuration time.
+
+
 3-Stage Non-Determinism
 ===
 
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,17 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+  May be specified as Instrument or Perf or LBR to use a particular profiling \
+  mechanism.")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY

[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2022-12-21 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 484727.
Amir added a comment.

Convert perf profile using perf2bolt (aggregate-only mode)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  llvm/docs/AdvancedBuilds.rst

Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -241,6 +241,62 @@
 
   $ ninja stage2-clang-bolt
 
+BOLT profile
+
+BOLT uses the profile collected by either Linux `perf` or via BOLT's own
+instrumentation. Both modes are supported by CMake automation, with
+instrumentation being the default (`-DCLANG_BOLT=INSTRUMENT`).
+
+It's strongly recommended to use `perf` if host system supports it as it
+is a significantly faster and potentially more reliable method:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=perf \
+  -C /clang/cmake/caches/BOLT.cmake
+
+If the host system supports profiling branch stacks (e.g. AMD or Intel LBR
+(Last Branch Record), Armv9-A BRBE (Branch Record Buffer Extension)), it can be
+enabled with `-DCLANG_BOLT=LBR` to further improve the profile quality:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=LBR \
+  -C /clang/cmake/caches/BOLT.cmake
+
+The following matrix describes supported profiling methods. Note that Linux/ELF
+is the only supported platform.
+
+ ===
+Architecture `-DCLANG_BOLT` value
+  -- ---
+ `Instrument` `perf` `LBR`
+  == ===
+x86_64   Yes  YesYes
+AArch64  No   YesNo HW exist
+  == ===
+
+Profiling variables
+---
+BOLT profile is collected from building one of in-tree projects/targets with
+Clang as a workload. The following configuration options can be used to change
+the profiling build and profiling mechanism:
+
+**CLANG_BOLT**
+  Profiling mechanism to be used. Supported values: `Instrument` (default),
+  `perf` (requires OS support), `LBR` (requires hardware support).
+
+**CLANG_BOLT_PROJECTS**
+  Projects to enable in profiling build. Defaults to `llvm`.
+
+**CLANG_BOLT_TARGETS**
+  Targets to build in profiling build. Defaults to `count` in instrumentation
+  build and `FileCheck` in perf-build.
+
+**CLANG_BOLT_EXTRA_CMAKE_FLAGS**
+  Extra CMake flags to pass to profiling build at configuration time.
+
+
 3-Stage Non-Determinism
 ===
 
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,17 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+  May be specified as Instrument or Perf or LBR to use a particular profiling \
+  mechanism.")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pi

[PATCH] D140565: [Clang][CMake] Set up distribution target for Clang-BOLT

2022-12-22 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added a reviewer: phosek.
Herald added a subscriber: wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Provide a way to install usable BOLT-optimized Clang
(clang + resource headers) using
`ninja clang-bolt install-distribution` with BOLT.cmake cache file
or `ninja stage2-clang-bolt stage2-install-distribution`
with BOLT-PGO.cmake cache file.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140565

Files:
  clang/cmake/caches/BOLT-PGO.cmake
  clang/cmake/caches/BOLT.cmake


Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -15,3 +15,10 @@
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,10 +2,16 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
+  stage2-distribution
+  stage2-install-distribution
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
+  distribution
+  install-distribution
   CACHE STRING "")
 
-set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
+set(PGO_BUILD_CONFIGURATION
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)


Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -15,3 +15,10 @@
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,10 +2,16 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
+  stage2-distribution
+  stage2-install-distribution
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
+  distribution
+  install-distribution
   CACHE STRING "")
 
-set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
+set(PGO_BUILD_CONFIGURATION
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108265: .clang-tidy: Push variable related readability-identifier-naming options down to projects

2022-12-22 Thread Amir Ayupov via Phabricator via cfe-commits
Amir accepted this revision.
Amir added a comment.
This revision is now accepted and ready to land.

LGTM for BOLT


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108265/new/

https://reviews.llvm.org/D108265

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141342: [perf-training] Check extension in findFilesWithExtension

2023-01-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: phosek, beanz.
Herald added a subscriber: pengfei.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

`findFilesWithExtension` helper checks for `endswith(extension)` instead of
exactly matching the file extension. This causes it to match unrelated files,
for example, `.profdata` files while matching `.fdata` files:

http://157.230.108.44:8011/#/builders/56/builds/247

  Merging data from 
/worker/worker/bolt-x86_64-ubuntu-clang-bolt-gcc/build/tools/clang/prof.fdata.1124569.fdata...
  Merging data from 
/worker/worker/bolt-x86_64-ubuntu-clang-bolt-gcc/build/tools/clang/test/Frontend/Output/optimization-remark-with-hotness-new-pm.c.tmp.profdata...


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D141342

Files:
  clang/utils/perf-training/perf-helper.py


Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -23,7 +23,7 @@
   filenames = []
   for root, dirs, files in os.walk(path): 
 for filename in files:
-  if filename.endswith(extension):
+  if os.path.splitext(filename)[1] == extension:
 filenames.append(os.path.join(root, filename))
   return filenames
 


Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -23,7 +23,7 @@
   filenames = []
   for root, dirs, files in os.walk(path): 
 for filename in files:
-  if filename.endswith(extension):
+  if os.path.splitext(filename)[1] == extension:
 filenames.append(os.path.join(root, filename))
   return filenames
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141342: [perf-training] Check extension in findFilesWithExtension

2023-01-10 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1fbbf92e4fda: [perf-training] Check extension in 
findFilesWithExtension (authored by Amir).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141342/new/

https://reviews.llvm.org/D141342

Files:
  clang/utils/perf-training/perf-helper.py


Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -23,7 +23,7 @@
   filenames = []
   for root, dirs, files in os.walk(path): 
 for filename in files:
-  if filename.endswith(extension):
+  if os.path.splitext(filename)[1] == extension:
 filenames.append(os.path.join(root, filename))
   return filenames
 


Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -23,7 +23,7 @@
   filenames = []
   for root, dirs, files in os.walk(path): 
 for filename in files:
-  if filename.endswith(extension):
+  if os.path.splitext(filename)[1] == extension:
 filenames.append(os.path.join(root, filename))
   return filenames
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137338: Fix dupe word typos

2022-11-03 Thread Amir Ayupov via Phabricator via cfe-commits
Amir accepted this revision.
Amir added a comment.

BOLT changes LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137338/new/

https://reviews.llvm.org/D137338

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-11-14 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 475309.
Amir added a comment.

Move out toolchain tools back into CMAKE_ARGS


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

Files:
  clang/CMakeLists.txt
  clang/cmake/modules/ClangBootstrap.cmake

Index: clang/cmake/modules/ClangBootstrap.cmake
===
--- /dev/null
+++ clang/cmake/modules/ClangBootstrap.cmake
@@ -0,0 +1,41 @@
+include(ExternalProject)
+
+# clang_Bootstrap_Add(name ...
+#   DEPENDS targets...
+# Targets that this project depends on
+#   CMAKE_ARGS arguments...
+# Optional cmake arguments to pass when configuring the project
+#   BUILD_TOOL_ARGS arguments...
+# Optional arguments to pass to the build tool
+macro(clang_Bootstrap_Add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"
+"DEPENDS;TABLEGEN;CMAKE_ARGS;BUILD_TOOL_ARGS"
+${ARGN})
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/)
+  # Build arguments for native tool used in CMake.
+  set(build_configuration "$")
+
+  ExternalProject_Add(${name}
+DEPENDS ${ARG_DEPENDS}
+PREFIX ${name}
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+${ARG_CMAKE_ARGS}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   ${ARG_BUILD_TOOL_ARGS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+LIST_SEPARATOR |
+  )
+endmacro()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -785,24 +785,16 @@
   endforeach()
 
   # Build arguments for native tool used in CMake.
-  set(build_configuration "$")
   set(build_tool_args "${LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS}")
   if(NOT build_tool_args STREQUAL "")
 string(PREPEND build_tool_args "-- ")
 separate_arguments(build_tool_args UNIX_COMMAND "${build_tool_args}")
   endif()
 
-  ExternalProject_Add(${NEXT_CLANG_STAGE}
+  include(ClangBootstrap)
+  clang_Bootstrap_Add(${NEXT_CLANG_STAGE}
 DEPENDS clang-bootstrap-deps
-PREFIX ${NEXT_CLANG_STAGE}
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
 CMAKE_ARGS
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
 ${PASSTHROUGH_VARIABLES}
 ${CLANG_BOOTSTRAP_CMAKE_ARGS}
  -DCLANG_STAGE=${NEXT_CLANG_STAGE}
@@ -814,16 +806,8 @@
 ${${CLANG_STAGE}_RANLIB}
 ${${CLANG_STAGE}_OBJCOPY}
 ${${CLANG_STAGE}_STRIP}
-BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
-   --config ${build_configuration}
-   ${build_tool_args}
-INSTALL_COMMAND ""
-STEP_TARGETS configure build
-USES_TERMINAL_CONFIGURE 1
-USES_TERMINAL_BUILD 1
-USES_TERMINAL_INSTALL 1
-LIST_SEPARATOR |
-)
+BUILD_TOOL_ARGS ${build_tool_args}
+  )
 
   # exclude really-install from main target
   set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On)
@@ -904,37 +888,23 @@
   )
 
   # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
-  set(build_configuration "$")
-  include(ExternalProject)
-  ExternalProject_Add(bolt-instrumentation-profile
+  include(ClangBootstrap)
+  set(COMPILER_OPTIONS
+-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER_ID=Clang
+  )
+  clang_Bootstrap_Add(bolt-instrumentation-profile
 DEPENDS clang++-instrumented
-PREFIX bolt-instrumentation-profile
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
 CMAKE_ARGS
 ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_I

[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-11-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 475505.
Amir added a comment.

s/clang_Bootstrap_Add/clang_bootstrap_add


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

Files:
  clang/CMakeLists.txt
  clang/cmake/modules/ClangBootstrap.cmake

Index: clang/cmake/modules/ClangBootstrap.cmake
===
--- /dev/null
+++ clang/cmake/modules/ClangBootstrap.cmake
@@ -0,0 +1,41 @@
+include(ExternalProject)
+
+# clang_bootstrap_add(name ...
+#   DEPENDS targets...
+# Targets that this project depends on
+#   CMAKE_ARGS arguments...
+# Optional cmake arguments to pass when configuring the project
+#   BUILD_TOOL_ARGS arguments...
+# Optional arguments to pass to the build tool
+macro(clang_bootstrap_add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"
+"DEPENDS;TABLEGEN;CMAKE_ARGS;BUILD_TOOL_ARGS"
+${ARGN})
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/)
+  # Build arguments for native tool used in CMake.
+  set(build_configuration "$")
+
+  ExternalProject_Add(${name}
+DEPENDS ${ARG_DEPENDS}
+PREFIX ${name}
+SOURCE_DIR ${CMAKE_SOURCE_DIR}
+STAMP_DIR ${STAMP_DIR}
+BINARY_DIR ${BINARY_DIR}
+EXCLUDE_FROM_ALL 1
+CMAKE_ARGS
+# We shouldn't need to set this here, but INSTALL_DIR doesn't
+# seem to work, so instead I'm passing this through
+-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+${ARG_CMAKE_ARGS}
+BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+   --config ${build_configuration}
+   ${ARG_BUILD_TOOL_ARGS}
+INSTALL_COMMAND ""
+STEP_TARGETS configure build
+USES_TERMINAL_CONFIGURE 1
+USES_TERMINAL_BUILD 1
+USES_TERMINAL_INSTALL 1
+LIST_SEPARATOR |
+  )
+endmacro()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -785,24 +785,16 @@
   endforeach()
 
   # Build arguments for native tool used in CMake.
-  set(build_configuration "$")
   set(build_tool_args "${LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS}")
   if(NOT build_tool_args STREQUAL "")
 string(PREPEND build_tool_args "-- ")
 separate_arguments(build_tool_args UNIX_COMMAND "${build_tool_args}")
   endif()
 
-  ExternalProject_Add(${NEXT_CLANG_STAGE}
+  include(ClangBootstrap)
+  clang_bootstrap_add(${NEXT_CLANG_STAGE}
 DEPENDS clang-bootstrap-deps
-PREFIX ${NEXT_CLANG_STAGE}
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
 CMAKE_ARGS
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
 ${PASSTHROUGH_VARIABLES}
 ${CLANG_BOOTSTRAP_CMAKE_ARGS}
  -DCLANG_STAGE=${NEXT_CLANG_STAGE}
@@ -814,16 +806,8 @@
 ${${CLANG_STAGE}_RANLIB}
 ${${CLANG_STAGE}_OBJCOPY}
 ${${CLANG_STAGE}_STRIP}
-BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
-   --config ${build_configuration}
-   ${build_tool_args}
-INSTALL_COMMAND ""
-STEP_TARGETS configure build
-USES_TERMINAL_CONFIGURE 1
-USES_TERMINAL_BUILD 1
-USES_TERMINAL_INSTALL 1
-LIST_SEPARATOR |
-)
+BUILD_TOOL_ARGS ${build_tool_args}
+  )
 
   # exclude really-install from main target
   set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On)
@@ -904,37 +888,23 @@
   )
 
   # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
-  set(build_configuration "$")
-  include(ExternalProject)
-  ExternalProject_Add(bolt-instrumentation-profile
+  include(ClangBootstrap)
+  set(COMPILER_OPTIONS
+-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+-DCMAKE_ASM_COMPILER_ID=Clang
+  )
+  clang_bootstrap_add(bolt-instrumentation-profile
 DEPENDS clang++-instrumented
-PREFIX bolt-instrumentation-profile
-SOURCE_DIR ${CMAKE_SOURCE_DIR}
-STAMP_DIR ${STAMP_DIR}
-BINARY_DIR ${BINARY_DIR}
-EXCLUDE_FROM_ALL 1
 CMAKE_ARGS
 ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
-# We shouldn't need to set this here, but INSTALL_DIR doesn't
-# seem to work, so instead I'm passing this through
--DCMAKE_INSTA

[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-11-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir marked an inline comment as done.
Amir added inline comments.



Comment at: clang/cmake/modules/ClangBootstrap.cmake:10
+# Optional arguments to pass to the build tool
+macro(clang_Bootstrap_Add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"

phosek wrote:
> We usually use lowercase names.
Thanks! I was a bit confused by CMake's case use (e.g. `ExternalProject_Add`)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139454: [CMake] Replace clang binary if using clang-bolt target

2023-01-24 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Ping @phosek @MaskRay


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139454/new/

https://reviews.llvm.org/D139454

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139454: [CMake] Replace clang binary if using clang-bolt target

2023-01-26 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 492638.
Amir added a comment.

Use generator expression, put clang-bolt temp binary to CMAKE_CURRENT_BINARY_DIR


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139454/new/

https://reviews.llvm.org/D139454

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO.cmake
  llvm/docs/AdvancedBuilds.rst


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -216,7 +216,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -235,12 +235,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -1,10 +1,10 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
 
 set(CLANG_BOOTSTRAP_TARGETS
-  stage2-clang++-bolt
+  stage2-clang-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
-  clang++-bolt
+  clang-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,8 +874,6 @@
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
-  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
-  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
@@ -953,6 +951,7 @@
   )
 
   # Optimize original (pre-bolt) Clang using the collected profile
+  set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
   add_custom_target(clang-bolt
 DEPENDS ${CLANG_OPTIMIZED}
   )
@@ -963,22 +962,10 @@
   -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
   -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
   -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $
 COMMENT "Optimizing Clang with BOLT"
 VERBATIM
   )
-
-  # Make a symlink from clang-bolt to clang++-bolt
-  add_custom_target(clang++-bolt
-DEPENDS ${CLANGXX_OPTIMIZED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_OPTIMIZED}
-DEPENDS clang-bolt
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_OPTIMIZED}
-  ${CLANGXX_OPTIMIZED}
-COMMENT "Creating symlink from BOLT optimized clang to clang++"
-VERBATIM
-  )
 endif()
 
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -216,7 +216,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -235,12 +235,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -1,10 +1,10 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
 
 set(CLANG_BOOTSTRAP_TARGETS
-  stage2-clang++-bolt
+  stage2-clang-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
-  clang++-bolt
+  clang-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -874,8 +874,6 @@
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
-  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
-  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
   # Instrument clang with BOLT
   

[PATCH] D139454: [CMake] Replace clang binary if using clang-bolt target

2023-01-28 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe67f849001bb: [CMake] Replace clang binary if using 
clang-bolt target (authored by Amir).

Changed prior to commit:
  https://reviews.llvm.org/D139454?vs=492638&id=493025#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139454/new/

https://reviews.llvm.org/D139454

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO.cmake
  llvm/docs/AdvancedBuilds.rst


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -224,7 +224,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -243,12 +243,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,11 +2,9 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
-  stage2-clang++-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
-  clang++-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -856,8 +856,6 @@
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
-  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
-  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
@@ -935,6 +933,7 @@
   )
 
   # Optimize original (pre-bolt) Clang using the collected profile
+  set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
   add_custom_target(clang-bolt
 DEPENDS ${CLANG_OPTIMIZED}
   )
@@ -945,22 +944,10 @@
   -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
   -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
   -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $
 COMMENT "Optimizing Clang with BOLT"
 VERBATIM
   )
-
-  # Make a symlink from clang-bolt to clang++-bolt
-  add_custom_target(clang++-bolt
-DEPENDS ${CLANGXX_OPTIMIZED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_OPTIMIZED}
-DEPENDS clang-bolt
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_OPTIMIZED}
-  ${CLANGXX_OPTIMIZED}
-COMMENT "Creating symlink from BOLT optimized clang to clang++"
-VERBATIM
-  )
 endif()
 
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -224,7 +224,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -243,12 +243,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,11 +2,9 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
-  stage2-clang++-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
-  clang++-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -856,8 +856,6 @@
   set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
-  set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
-  set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
   # Instrument clang with BOLT
   a

[PATCH] D155419: [Clang][CMake][WIP] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

2023-07-17 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 541222.
Amir added a comment.

More plumbing


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155419/new/

https://reviews.llvm.org/D155419

Files:
  clang/CMakeLists.txt
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/lit.cfg
  clang/utils/perf-training/lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py
  llvm/CMakeLists.txt
  llvm/cmake/modules/HandleLLVMOptions.cmake

Index: llvm/cmake/modules/HandleLLVMOptions.cmake
===
--- llvm/cmake/modules/HandleLLVMOptions.cmake
+++ llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1071,7 +1071,7 @@
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1104,6 +1104,15 @@
 CMAKE_EXE_LINKER_FLAGS
 CMAKE_SHARED_LINKER_FLAGS)
 endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
   else()
 append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
   CMAKE_CXX_FLAGS
@@ -1154,6 +1163,21 @@
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
+  else()
+message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
 append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\" -fcoverage-mapping"
Index: llvm/CMakeLists.txt
===
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -849,6 +849,9 @@
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # Lit test suite requires at least python 3.6
   set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -69,10 +69,16 @@
 
 def perf(args):
 parser = argparse.ArgumentParser(
-prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+prog="perf-helper perf",
+description="perf wrapper for BOLT/CSSPGO profile collection"
 )
 parser.add_argument(
-"--lbr", required=False, action="store_true", help="Use perf with branch stacks"
+"--lbr", required=False, action="store_true",
+help="Use perf with branch stacks"
+)
+parser.add_argument(
+"--call-graph", required=False, action="store_true",
+help="Collect call graph"
 )
 parser.add_argument("cmd", nargs="*", help="")
 
@@ -97,6 +103,8 @@
 )
 if opts.lbr:
 perf_args += ["--branch-filter=any,u"]
+if opts.call_graph:
+perf_args += ["--call-graph=fp"]
 perf_args.extend(cmd)
 
 start_time = time.time()
@@ -132,6 +140,26 @@
 return 0
 
 
+def perf2prof(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2prof",
+description="perf to CSSPGO prof conversion wrapper",
+)
+parser.add_argument("profgen", help="Path to llvm-profgen binary")
+parser.add_argument("binary", help="Input binary")
+pa

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 542060.
Amir added a comment.

Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,69 @@
 return 0
 
 
+def perf(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+)
+parser.add_argument(
+"--lbr", action="store_true", help="Use perf with branch stacks"
+)
+parser.add_argument("cmd", nargs="*", help="")
+
+# Use python's arg parser to handle all leading option arguments, but pass
+# everything else through to perf
+first_cmd = next(arg for arg in args if not arg.startswith("--"))
+last_arg_idx = args.index(first_cmd)
+
+opts = parser.parse_args(args[:last_arg_idx])
+cmd = args[last_arg_idx:]
+
+perf_args = [
+"perf",
+"record",
+"--event=cycles:u",
+"--freq=max",
+"--output=%d.perf.data" % os.getpid(),
+]
+if opts.lbr:
+perf_args += ["--branch-filter=any,u"]
+perf_args.extend(cmd)
+
+start_time = time.time()
+subprocess.check_call(perf_args)
+
+elapsed = time.time() - start_time
+print("... data collection took %.4fs" % elapsed)
+return 0
+
+
+def perf2bolt(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2bolt",
+description="perf2bolt conversion wrapper for perf.data files",
+)
+parser.add_argument("bolt", help="Path to llvm-bolt")
+parser.add_argument("path", help="Path containing perf.data files")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument(
+"--lbr", action="store_true", help="Use LBR perf2bolt mode"
+)
+opts = parser.parse_args(args)
+
+p2b_args = [
+opts.bolt,
+opts.binary,
+"--aggregate-only",
+"--profile-format=yaml",
+]
+if not opts.lbr:
+p2b_args += ["-nl"]
+p2b_args += ["-p"]
+for filename in findFilesWithExtension(opts.path, "perf.data"):
+subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+return 0
+
+
 def dtrace(args):
 parser = argparse.ArgumentParser(
 prog="perf-helper dtrace",
@@ -507,6 +570,8 @@
 "cc1": cc1,
 "gen-order-file": genOrderFile,
 "merge-fdata": merge_fdata,
+"perf": perf,
+"perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,8 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
+config.clang_bolt_name = "@CLANG_INSTRUMENTED@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+clang_binary = config.clang_bolt_name
+else:  # perf or LBR
+perf_wrapper = "%s %s/perf-helper.py perf" % (
+config.python_exe,
+config.perf_helper_dir,
+)
+if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+".c",
+".cc",
+".cpp",
+".m",
+".mm",
+".cu",
+".ll",
+".cl",
+".s",
+".S",
+".modulemap",
+".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir marked 7 inline comments as done.
Amir added inline comments.



Comment at: clang/utils/perf-training/perf-helper.py:75
+parser.add_argument(
+"--lbr", required=False, action="store_true", help="Use perf with 
branch stacks"
+)

phosek wrote:
> This could be omitted.
I think it's best to keep it as optional argument, not positional.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 542264.
Amir marked an inline comment as done.
Amir added a comment.

Fix instrumentation mode


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,69 @@
 return 0
 
 
+def perf(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+)
+parser.add_argument(
+"--lbr", action="store_true", help="Use perf with branch stacks"
+)
+parser.add_argument("cmd", nargs="*", help="")
+
+# Use python's arg parser to handle all leading option arguments, but pass
+# everything else through to perf
+first_cmd = next(arg for arg in args if not arg.startswith("--"))
+last_arg_idx = args.index(first_cmd)
+
+opts = parser.parse_args(args[:last_arg_idx])
+cmd = args[last_arg_idx:]
+
+perf_args = [
+"perf",
+"record",
+"--event=cycles:u",
+"--freq=max",
+"--output=%d.perf.data" % os.getpid(),
+]
+if opts.lbr:
+perf_args += ["--branch-filter=any,u"]
+perf_args.extend(cmd)
+
+start_time = time.time()
+subprocess.check_call(perf_args)
+
+elapsed = time.time() - start_time
+print("... data collection took %.4fs" % elapsed)
+return 0
+
+
+def perf2bolt(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2bolt",
+description="perf2bolt conversion wrapper for perf.data files",
+)
+parser.add_argument("bolt", help="Path to llvm-bolt")
+parser.add_argument("path", help="Path containing perf.data files")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument(
+"--lbr", action="store_true", help="Use LBR perf2bolt mode"
+)
+opts = parser.parse_args(args)
+
+p2b_args = [
+opts.bolt,
+opts.binary,
+"--aggregate-only",
+"--profile-format=yaml",
+]
+if not opts.lbr:
+p2b_args += ["-nl"]
+p2b_args += ["-p"]
+for filename in findFilesWithExtension(opts.path, "perf.data"):
+subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+return 0
+
+
 def dtrace(args):
 parser = argparse.ArgumentParser(
 prog="perf-helper dtrace",
@@ -507,6 +570,8 @@
 "cc1": cc1,
 "gen-order-file": genOrderFile,
 "merge-fdata": merge_fdata,
+"perf": perf,
+"perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,8 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
+config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+clang_binary = config.clang_bolt_name
+else:  # perf or LBR
+perf_wrapper = "%s %s/perf-helper.py perf" % (
+config.python_exe,
+config.perf_helper_dir,
+)
+if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+".c",
+".cc",
+".cpp",
+".m",
+".mm",
+".cu",
+".ll",
+".cl",
+".s",
+".S",
+".modulemap",
+".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 542272.
Amir added a comment.

Set CLANG_BOLT_INSTRUMENTED in parent scope too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,69 @@
 return 0
 
 
+def perf(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+)
+parser.add_argument(
+"--lbr", action="store_true", help="Use perf with branch stacks"
+)
+parser.add_argument("cmd", nargs="*", help="")
+
+# Use python's arg parser to handle all leading option arguments, but pass
+# everything else through to perf
+first_cmd = next(arg for arg in args if not arg.startswith("--"))
+last_arg_idx = args.index(first_cmd)
+
+opts = parser.parse_args(args[:last_arg_idx])
+cmd = args[last_arg_idx:]
+
+perf_args = [
+"perf",
+"record",
+"--event=cycles:u",
+"--freq=max",
+"--output=%d.perf.data" % os.getpid(),
+]
+if opts.lbr:
+perf_args += ["--branch-filter=any,u"]
+perf_args.extend(cmd)
+
+start_time = time.time()
+subprocess.check_call(perf_args)
+
+elapsed = time.time() - start_time
+print("... data collection took %.4fs" % elapsed)
+return 0
+
+
+def perf2bolt(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2bolt",
+description="perf2bolt conversion wrapper for perf.data files",
+)
+parser.add_argument("bolt", help="Path to llvm-bolt")
+parser.add_argument("path", help="Path containing perf.data files")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument(
+"--lbr", action="store_true", help="Use LBR perf2bolt mode"
+)
+opts = parser.parse_args(args)
+
+p2b_args = [
+opts.bolt,
+opts.binary,
+"--aggregate-only",
+"--profile-format=yaml",
+]
+if not opts.lbr:
+p2b_args += ["-nl"]
+p2b_args += ["-p"]
+for filename in findFilesWithExtension(opts.path, "perf.data"):
+subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+return 0
+
+
 def dtrace(args):
 parser = argparse.ArgumentParser(
 prog="perf-helper dtrace",
@@ -507,6 +570,8 @@
 "cc1": cc1,
 "gen-order-file": genOrderFile,
 "merge-fdata": merge_fdata,
+"perf": perf,
+"perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,8 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
+config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+clang_binary = config.clang_bolt_name
+else:  # perf or LBR
+perf_wrapper = "%s %s/perf-helper.py perf" % (
+config.python_exe,
+config.perf_helper_dir,
+)
+if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+".c",
+".cc",
+".cpp",
+".m",
+".mm",
+".cu",
+".ll",
+".cl",
+".s",
+".S",
+".modulemap",
+".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 542280.
Amir added a comment.

Make the name of BOLT-instrumented Clang binary (CLANG_BOLT_INSTRUMENTED)
a user-settable cache variable


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,69 @@
 return 0
 
 
+def perf(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+)
+parser.add_argument(
+"--lbr", action="store_true", help="Use perf with branch stacks"
+)
+parser.add_argument("cmd", nargs="*", help="")
+
+# Use python's arg parser to handle all leading option arguments, but pass
+# everything else through to perf
+first_cmd = next(arg for arg in args if not arg.startswith("--"))
+last_arg_idx = args.index(first_cmd)
+
+opts = parser.parse_args(args[:last_arg_idx])
+cmd = args[last_arg_idx:]
+
+perf_args = [
+"perf",
+"record",
+"--event=cycles:u",
+"--freq=max",
+"--output=%d.perf.data" % os.getpid(),
+]
+if opts.lbr:
+perf_args += ["--branch-filter=any,u"]
+perf_args.extend(cmd)
+
+start_time = time.time()
+subprocess.check_call(perf_args)
+
+elapsed = time.time() - start_time
+print("... data collection took %.4fs" % elapsed)
+return 0
+
+
+def perf2bolt(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2bolt",
+description="perf2bolt conversion wrapper for perf.data files",
+)
+parser.add_argument("bolt", help="Path to llvm-bolt")
+parser.add_argument("path", help="Path containing perf.data files")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument(
+"--lbr", action="store_true", help="Use LBR perf2bolt mode"
+)
+opts = parser.parse_args(args)
+
+p2b_args = [
+opts.bolt,
+opts.binary,
+"--aggregate-only",
+"--profile-format=yaml",
+]
+if not opts.lbr:
+p2b_args += ["-nl"]
+p2b_args += ["-p"]
+for filename in findFilesWithExtension(opts.path, "perf.data"):
+subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+return 0
+
+
 def dtrace(args):
 parser = argparse.ArgumentParser(
 prog="perf-helper dtrace",
@@ -507,6 +570,8 @@
 "cc1": cc1,
 "gen-order-file": genOrderFile,
 "merge-fdata": merge_fdata,
+"perf": perf,
+"perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,8 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
+config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+clang_binary = config.clang_bolt_name
+else:  # perf or LBR
+perf_wrapper = "%s %s/perf-helper.py perf" % (
+config.python_exe,
+config.perf_helper_dir,
+)
+if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+".c",
+".cc",
+".cpp",
+".m",
+".mm",
+".cu",
+".ll",
+".cl",
+".s",
+".S",
+".modulemap",
+".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%cla

[PATCH] D155419: [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

2023-08-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 550532.
Amir added a comment.

Append to PGO_OPT


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155419/new/

https://reviews.llvm.org/D155419

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/CSSPGO.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/lit.cfg
  clang/utils/perf-training/lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py
  llvm/CMakeLists.txt
  llvm/cmake/modules/HandleLLVMOptions.cmake

Index: llvm/cmake/modules/HandleLLVMOptions.cmake
===
--- llvm/cmake/modules/HandleLLVMOptions.cmake
+++ llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1071,7 +1071,7 @@
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1104,6 +1104,15 @@
 CMAKE_EXE_LINKER_FLAGS
 CMAKE_SHARED_LINKER_FLAGS)
 endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
   else()
 append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
   CMAKE_CXX_FLAGS
@@ -1154,6 +1163,21 @@
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
+  else()
+message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
 append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\" -fcoverage-mapping"
Index: llvm/CMakeLists.txt
===
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -849,6 +849,9 @@
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # Lit test suite requires at least python 3.6
   set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -69,11 +69,15 @@
 
 def perf(args):
 parser = argparse.ArgumentParser(
-prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+prog="perf-helper perf",
+description="perf wrapper for BOLT/CSSPGO profile collection"
 )
 parser.add_argument(
 "--lbr", action="store_true", help="Use perf with branch stacks"
 )
+parser.add_argument(
+"--call-graph", action="store_true", help="Collect call graph"
+)
 parser.add_argument("cmd", nargs="*", help="")
 
 # Use python's arg parser to handle all leading option arguments, but pass
@@ -93,6 +97,8 @@
 ]
 if opts.lbr:
 perf_args += ["--branch-filter=any,u"]
+if opts.call_graph:
+perf_args += ["--call-graph=fp"]
 perf_args.extend(cmd)
 
 start_time = time.time()
@@ -130,6 +136,26 @@
 return 0
 
 
+def perf2prof(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2prof",
+description="perf to CSSPGO prof conversion wrapper",
+)
+parser.add_argument("profgen", help="Path to llvm-profgen binary")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument("path

[PATCH] D155419: [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

2023-08-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 550607.
Amir added a comment.

perf2prof


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155419/new/

https://reviews.llvm.org/D155419

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/CSSPGO.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/lit.cfg
  clang/utils/perf-training/lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py
  llvm/CMakeLists.txt
  llvm/cmake/modules/HandleLLVMOptions.cmake

Index: llvm/cmake/modules/HandleLLVMOptions.cmake
===
--- llvm/cmake/modules/HandleLLVMOptions.cmake
+++ llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1076,7 +1076,7 @@
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1109,6 +1109,15 @@
 CMAKE_EXE_LINKER_FLAGS
 CMAKE_SHARED_LINKER_FLAGS)
 endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
   else()
 append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
   CMAKE_CXX_FLAGS
@@ -1159,6 +1168,21 @@
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
+  else()
+message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
Index: llvm/CMakeLists.txt
===
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -853,6 +853,9 @@
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # Lit test suite requires at least python 3.6
   set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -69,11 +69,15 @@
 
 def perf(args):
 parser = argparse.ArgumentParser(
-prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+prog="perf-helper perf",
+description="perf wrapper for BOLT/CSSPGO profile collection"
 )
 parser.add_argument(
 "--lbr", action="store_true", help="Use perf with branch stacks"
 )
+parser.add_argument(
+"--call-graph", action="store_true", help="Collect call graph"
+)
 parser.add_argument("cmd", nargs="*", help="")
 
 # Use python's arg parser to handle all leading option arguments, but pass
@@ -93,6 +97,8 @@
 ]
 if opts.lbr:
 perf_args += ["--branch-filter=any,u"]
+if opts.call_graph:
+perf_args += ["--call-graph=fp"]
 perf_args.extend(cmd)
 
 start_time = time.time()
@@ -130,6 +136,26 @@
 return 0
 
 
+def perf2prof(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2prof",
+description="perf to CSSPGO prof conversion wrapper",
+)
+parser.add_argument("profgen", help="Path to llvm-profgen binary")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument("path", help="Path containing perf.data f

[PATCH] D155419: [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

2023-08-16 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 550996.
Amir added a comment.

Bugfixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155419/new/

https://reviews.llvm.org/D155419

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/CSSPGO.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/lit.cfg
  clang/utils/perf-training/lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py
  llvm/CMakeLists.txt
  llvm/cmake/modules/HandleLLVMOptions.cmake

Index: llvm/cmake/modules/HandleLLVMOptions.cmake
===
--- llvm/cmake/modules/HandleLLVMOptions.cmake
+++ llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1076,7 +1076,7 @@
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1109,6 +1109,15 @@
 CMAKE_EXE_LINKER_FLAGS
 CMAKE_SHARED_LINKER_FLAGS)
 endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
   else()
 append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
   CMAKE_CXX_FLAGS
@@ -1159,6 +1168,21 @@
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
+  else()
+message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
Index: llvm/CMakeLists.txt
===
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -853,6 +853,9 @@
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # Lit test suite requires at least python 3.6
   set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -42,14 +42,20 @@
 
 
 def merge(args):
-if len(args) != 3:
-print(
-"Usage: %s merge   \n" % __file__
-+ "\tMerges all profraw files from path into output."
-)
-return 1
-cmd = [args[0], "merge", "-o", args[1]]
-cmd.extend(findFilesWithExtension(args[2], "profraw"))
+parser = argparse.ArgumentParser(
+prog="perf-helper merge",
+description="Merges all profraw files from path into output"
+)
+parser.add_argument("profdata", help="Path to llvm-profdata tool")
+parser.add_argument("output", help="Output filename")
+parser.add_argument("path", help="Folder containing input profraw files")
+parser.add_argument("--sample", action="store_true", help="Sample profile")
+opts = parser.parse_args(args)
+
+cmd = [opts.profdata, "merge", "-o", opts.output]
+if opts.sample:
+cmd += ["--sample"]
+cmd.extend(findFilesWithExtension(opts.path, "profraw"))
 subprocess.check_call(cmd)
 return 0
 
@@ -69,11 +75,19 @@
 
 def perf(args):
 parser = argparse.ArgumentParser(
-prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+prog="perf-helper perf",
+description="perf wrapper f

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-10-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir abandoned this revision.
Amir added a comment.

Migrated to https://github.com/llvm/llvm-project/pull/69133


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D156607: [Clang][CMake] Allow using passthrough BOLT in BOLT-PGO.cmake

2023-07-29 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: bolt, phosek, beanz.
Herald added subscribers: wlei, ekilmer, wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Factor out bolt project dependency from Clang-BOLT usage.
Allow using

- user-specified BOLT: `LLVM_BOLT` and `MERGE_FDATA` variables,
- BOLT from system path (with CMake's `find_program`),
- built from `bolt` project (the only previously supported way).

Customize BOLT-PGO.cmake cache file to build BOLT in first-stage build, and
passthrough and use it in the final clang-bolt invocation (on top of PGO 
binary).
This avoids building PGO'd BOLT (first instrumented and then optimized), cutting
the build time.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D156607

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO-stage2.cmake
  clang/cmake/caches/BOLT-PGO.cmake
  clang/utils/perf-training/CMakeLists.txt

Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -78,9 +78,21 @@
 COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
 COMMENT "Clearing old BOLT fdata")
 
-  # Merge profiles into one using merge-fdata
-  add_custom_target(clang-bolt-profile
-COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
-COMMENT "Merging BOLT fdata"
-DEPENDS merge-fdata generate-bolt-fdata)
+  add_custom_target(clang-bolt-profile-deps)
+  if ("bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+add_dependencies(clang-bolt-profile-deps merge-fdata)
+set(MERGE_FDATA $)
+  elseif (NOT MERGE_FDATA)
+find_program(MERGE_FDATA merge-fdata)
+  endif()
+
+  if (NOT MERGE_FDATA AND NOT "bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+message(STATUS "To enable optimizing Clang with BOLT enable bolt project or set MERGE_FDATA")
+  else()
+# Merge profiles into one using merge-fdata
+add_custom_target(clang-bolt-profile
+  COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata ${MERGE_FDATA} ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+  COMMENT "Merging BOLT fdata"
+  DEPENDS generate-bolt-fdata)
+  endif()
 endif()
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -11,7 +11,9 @@
   install-distribution
   CACHE STRING "")
 
+set(BOOTSTRAP_LLVM_BOLT ON)
+
 set(PGO_BUILD_CONFIGURATION
-  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT-PGO-stage2.cmake
   CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/cmake/caches/BOLT-PGO-stage2.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO-stage2.cmake
@@ -0,0 +1,13 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -711,6 +711,16 @@
 endif()
   endif()
 
+  if(BOOTSTRAP_LLVM_BOLT)
+add_dependencies(clang-bootstrap-deps llvm-bolt merge-fdata)
+set(LLVM_BOLT ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-bolt)
+set(MERGE_FDATA ${LLVM_RUNTIME_OUTPUT_INTDIR}/merge-fdata)
+list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
+  LLVM_BOLT
+  MERGE_FDATA
+)
+  endif()
+
   if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
 add_dependencies(clang-bootstrap-deps llvm-profdata)
 set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
@@ -854,35 +864,47 @@
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${BOLT_FDATA}
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  add_custom_target(bolt-clang-instrumented-deps)
+  if ("bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+add_dependencies(bolt-clang-instrument

[PATCH] D156607: [Clang][CMake] Allow using passthrough BOLT in BOLT-PGO.cmake

2023-07-29 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 545406.
Amir added a comment.

Add clang-bolt-profile-deps dependency


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D156607/new/

https://reviews.llvm.org/D156607

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO-stage2.cmake
  clang/cmake/caches/BOLT-PGO.cmake
  clang/utils/perf-training/CMakeLists.txt

Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -78,9 +78,21 @@
 COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
 COMMENT "Clearing old BOLT fdata")
 
-  # Merge profiles into one using merge-fdata
-  add_custom_target(clang-bolt-profile
-COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
-COMMENT "Merging BOLT fdata"
-DEPENDS merge-fdata generate-bolt-fdata)
+  add_custom_target(clang-bolt-profile-deps)
+  if ("bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+add_dependencies(clang-bolt-profile-deps merge-fdata)
+set(MERGE_FDATA $)
+  elseif (NOT MERGE_FDATA)
+find_program(MERGE_FDATA merge-fdata)
+  endif()
+
+  if (NOT MERGE_FDATA AND NOT "bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+message(STATUS "To enable optimizing Clang with BOLT enable bolt project or set MERGE_FDATA")
+  else()
+# Merge profiles into one using merge-fdata
+add_custom_target(clang-bolt-profile
+  COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata ${MERGE_FDATA} ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+  COMMENT "Merging BOLT fdata"
+  DEPENDS clang-bolt-profile-deps generate-bolt-fdata)
+  endif()
 endif()
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -11,7 +11,9 @@
   install-distribution
   CACHE STRING "")
 
+set(BOOTSTRAP_LLVM_BOLT ON)
+
 set(PGO_BUILD_CONFIGURATION
-  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT-PGO-stage2.cmake
   CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
Index: clang/cmake/caches/BOLT-PGO-stage2.cmake
===
--- /dev/null
+++ clang/cmake/caches/BOLT-PGO-stage2.cmake
@@ -0,0 +1,13 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -711,6 +711,16 @@
 endif()
   endif()
 
+  if(BOOTSTRAP_LLVM_BOLT)
+add_dependencies(clang-bootstrap-deps llvm-bolt merge-fdata)
+set(LLVM_BOLT ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-bolt)
+set(MERGE_FDATA ${LLVM_RUNTIME_OUTPUT_INTDIR}/merge-fdata)
+list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
+  LLVM_BOLT
+  MERGE_FDATA
+)
+  endif()
+
   if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
 add_dependencies(clang-bootstrap-deps llvm-profdata)
 set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
@@ -854,35 +864,47 @@
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${BOLT_FDATA}
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  add_custom_target(bolt-clang-instrumented-deps)
+  if ("bolt" IN_LIST LLVM_ENABLE_PROJECTS)
+add_dependencies(bolt-clang-instrumented-deps llvm-bolt)
+set(LLVM_BOLT $)
+  elseif (NOT LLVM_BOLT)
+find_program(LLVM_BOLT llvm-bolt)
+  endif()
 
-  # Optimize original (pre-bolt) Clang using the collected profile
-  set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
-  add_custom_target(clang-bolt
-DEPENDS ${CLANG_OPTIMIZED}
-  )
-  add_custom_command(OUTPUT ${CLANG_OPTIMIZED}
-DEPENDS clang-bolt-profile
-COMMAND llvm-bolt ${CLANG_PATH}
-  -o ${CLANG_OPTIMIZED}
-  -data ${BOLT_FDATA}
-  -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-  -sp

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-07-07 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 538271.
Amir added a comment.

Rebase + reformatted with `black` (D150761 )


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,71 @@
 return 0
 
 
+def perf(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+)
+parser.add_argument(
+"--lbr", required=False, action="store_true", help="Use perf with branch stacks"
+)
+parser.add_argument("cmd", nargs="*", help="")
+
+# Use python's arg parser to handle all leading option arguments, but pass
+# everything else through to perf
+first_cmd = next(arg for arg in args if not arg.startswith("--"))
+last_arg_idx = args.index(first_cmd)
+
+opts = parser.parse_args(args[:last_arg_idx])
+# cmd = shlex.split(args[last_arg_idx:])
+cmd = args[last_arg_idx:]
+
+perf_args = []
+perf_args.extend(
+(
+"perf",
+"record",
+"--event=cycles:u",
+"--freq=max",
+"--output=%d.perf.data" % os.getpid(),
+)
+)
+if opts.lbr:
+perf_args += ["--branch-filter=any,u"]
+perf_args.extend(cmd)
+
+start_time = time.time()
+subprocess.check_call(perf_args)
+
+elapsed = time.time() - start_time
+print("... data collection took %.4fs" % elapsed)
+return 0
+
+
+def perf2bolt(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2bolt",
+description="perf2bolt conversion wrapper for perf.data files",
+)
+parser.add_argument("p2b_path", help="Path to llvm-bolt")
+parser.add_argument("path", help="Path containing perf.data files")
+parser.add_argument("binary", help="Input binary")
+parser.add_argument(
+"--nolbr", required=False, action="store_true", help="Use -nl perf2bolt mode"
+)
+opts = parser.parse_args(args)
+
+p2b_args = []
+p2b_args.extend(
+(opts.p2b_path, opts.binary, "--aggregate-only", "--profile-format=yaml")
+)
+if opts.nolbr:
+p2b_args += ["-nl"]
+p2b_args += ["-p"]
+for filename in findFilesWithExtension(opts.path, "perf.data"):
+subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+return 0
+
+
 def dtrace(args):
 parser = argparse.ArgumentParser(
 prog="perf-helper dtrace",
@@ -507,6 +572,8 @@
 "cc1": cc1,
 "gen-order-file": genOrderFile,
 "merge-fdata": merge_fdata,
+"perf": perf,
+"perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+clang_binary = "clang-bolt.inst"
+else:  # perf or LBR
+perf_wrapper = "%s %s/perf-helper.py perf" % (
+config.python_exe,
+config.perf_helper_dir,
+)
+if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+".c",
+".cc",
+".cpp",
+".m",
+".mm",
+".cu",
+".ll",
+".cl",
+".s",
+".S",
+".modulemap",
+".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.S

[PATCH] D155419: [Clang][CMake][WIP] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

2023-07-16 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Herald added subscribers: wlei, ekilmer, wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155419

Files:
  clang/CMakeLists.txt
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/lit.cfg
  clang/utils/perf-training/lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py
  llvm/CMakeLists.txt
  llvm/cmake/modules/HandleLLVMOptions.cmake

Index: llvm/cmake/modules/HandleLLVMOptions.cmake
===
--- llvm/cmake/modules/HandleLLVMOptions.cmake
+++ llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1071,7 +1071,7 @@
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1104,6 +1104,15 @@
 CMAKE_EXE_LINKER_FLAGS
 CMAKE_SHARED_LINKER_FLAGS)
 endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
   else()
 append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
   CMAKE_CXX_FLAGS
@@ -1154,6 +1163,21 @@
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS)
+if(NOT LINKER_IS_LLD_LINK)
+  append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+CMAKE_EXE_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS)
+endif()
+  else()
+message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
 append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\" -fcoverage-mapping"
Index: llvm/CMakeLists.txt
===
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -849,6 +849,9 @@
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # Lit test suite requires at least python 3.6
   set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -69,10 +69,16 @@
 
 def perf(args):
 parser = argparse.ArgumentParser(
-prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+prog="perf-helper perf",
+description="perf wrapper for BOLT/CSSPGO profile collection"
 )
 parser.add_argument(
-"--lbr", required=False, action="store_true", help="Use perf with branch stacks"
+"--lbr", required=False, action="store_true",
+help="Use perf with branch stacks"
+)
+parser.add_argument(
+"--call-graph", required=False, action="store_true",
+help="Collect call graph"
 )
 parser.add_argument("cmd", nargs="*", help="")
 
@@ -97,6 +103,8 @@
 )
 if opts.lbr:
 perf_args += ["--branch-filter=any,u"]
+if opts.call_graph:
+perf_args += ["--call-graph=fp"]
 perf_args.extend(cmd)
 
 start_time = time.time()
@@ -132,6 +140,26 @@
 return 0
 
 
+def perf2prof(args):
+parser = argparse.ArgumentParser(
+prog="perf-helper perf2prof",
+description="perf to CSSPGO prof conversion wrapper",
+)
+parser.add_argument("profgen", help="Path t

[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2022-11-26 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added inline comments.



Comment at: clang/cmake/modules/ClangBootstrap.cmake:11
+macro(clang_bootstrap_add name)
+  cmake_parse_arguments(ARG "" "LINKER;AR;RANLIB;OBJCOPY;STRIP"
+"DEPENDS;TABLEGEN;CMAKE_ARGS;BUILD_TOOL_ARGS"

thevinster wrote:
> Were you planning to also use the single arguments list such as `ARG_LINKER` 
> in the `CMAKE_ARGS`? Without it, I have to supply an override to 
> `CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS` so I can avoid using the gnu linker.
Yes, I added those in the first version of the diff and just forgot to remove 
them. But as Peter mentioned:
> I don't think we need a dedicated keyword for each tool, I'd just pass these 
> through CMAKE_ARGS.
I'm neutral about adding ARG_LINKER or setting it through EXTRA_CMAKE_FLAGS, 
but I think explicit overrides for each tool are a bit too verbose. 
Do you think having ARG_LINKER and passing the rest as EXTRA_CMAKE_FLAGS is a 
good tradeoff? cc @phosek 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143553: [Clang][CMake] Use perf-training for Clang-BOLT

2023-02-27 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 500956.
Amir added a comment.

Rebase, reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
+config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -61,3 +61,26 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS merge-fdata generate-bolt-fdata)
+endif()
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -861,9 +861,8 @@
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
@@ -873,73 +872,11 @@
 DEPENDS clang llvm-bolt
 COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
   -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  -

[PATCH] D143553: [Clang][CMake] Use perf-training for Clang-BOLT

2023-02-27 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Ping @phosek


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-03-11 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 504392.
Amir added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
-config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s --driver-mode=g++ ' % (perf_wrapper, config.

[PATCH] D150752: [bolt] Use correct output paths and passthrough necessary options

2023-05-19 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Please retitle as "[BOLT][CMake] ..."


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150752/new/

https://reviews.llvm.org/D150752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151595: [BOLT][CMake] Redo the build and install targets

2023-05-31 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D151595#4385506 , @phosek wrote:

> @Amir does this change look good to you?

Looks good overall. Thanks for adding AddBolt cmake module. Testing the build 
internally, will reply shortly.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151595/new/

https://reviews.llvm.org/D151595

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151595: [BOLT][CMake] Redo the build and install targets

2023-05-31 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added inline comments.



Comment at: bolt/test/CMakeLists.txt:40
   llvm-bolt
-  llvm-boltdiff
   llvm-bolt-heatmap

We have a number of dependencies on llvm-boltdiff and perf2bolt, e.g. in 
internal and upstream binary tests: 
https://github.com/rafaelauler/bolt-tests/blob/main/CMakeLists.txt#L22. Can we 
keep these targets somehow?



Comment at: bolt/tools/heatmap/CMakeLists.txt:8
 
-add_bolt_tool(llvm-bolt-heatmap
+add_bolt_executable(llvm-bolt-heatmap
   heatmap.cpp

phosek wrote:
> smeenai wrote:
> > Why this change?
> This tool wasn't previously included in the `bolt` umbrella target so I 
> assume that it doesn't need an install target, but I'd be helpful for @Amir 
> to confirm.
It wasn't included by omission. Let's include it into the umbrella target.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151595/new/

https://reviews.llvm.org/D151595

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151595: [BOLT][CMake] Redo the build and install targets

2023-05-31 Thread Amir Ayupov via Phabricator via cfe-commits
Amir accepted this revision.
Amir added a comment.

Thanks. Will update the dependencies, removing always-installed symlinks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151595/new/

https://reviews.llvm.org/D151595

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151595: [BOLT][CMake] Redo the build and install targets

2023-06-01 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Merged the PR. Please reland


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151595/new/

https://reviews.llvm.org/D151595

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140565: [Clang][CMake] Set up distribution target for Clang-BOLT

2023-02-07 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG42129deaa67b: [Clang][CMake] Set up distribution target for 
Clang-BOLT (authored by Amir).

Changed prior to commit:
  https://reviews.llvm.org/D140565?vs=484885&id=495651#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140565/new/

https://reviews.llvm.org/D140565

Files:
  clang/cmake/caches/BOLT-PGO.cmake
  clang/cmake/caches/BOLT.cmake


Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -8,6 +8,13 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
+
 # Disable function splitting enabled by default in GCC8+
 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,10 +2,16 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
+  stage2-distribution
+  stage2-install-distribution
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
+  distribution
+  install-distribution
   CACHE STRING "")
 
-set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
+set(PGO_BUILD_CONFIGURATION
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)


Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -8,6 +8,13 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  CACHE STRING "")
+
 # Disable function splitting enabled by default in GCC8+
 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -2,10 +2,16 @@
 
 set(CLANG_BOOTSTRAP_TARGETS
   stage2-clang-bolt
+  stage2-distribution
+  stage2-install-distribution
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
   clang-bolt
+  distribution
+  install-distribution
   CACHE STRING "")
 
-set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
+set(PGO_BUILD_CONFIGURATION
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  CACHE STRING "")
 include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143553: [Clang][CMake] Add BOLT perf-training (WIP)

2023-02-07 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added a reviewer: phosek.
Herald added a subscriber: wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Remove the bootstrapped build for BOLT profile collection.
Adopt perf-training workflow, similar to pgo and order-file.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,13 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -59,3 +59,30 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-bolt-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  if(NOT MERGE_FDATA)
+find_program(MERGE_FDATA merge-fdata)
+  endif()
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(merge-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata ${MERGE_FDATA} ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS generate-bolt-fdata)
+endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -852,99 +852,28 @@
 endif()
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
-  set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(CLANG_BOLT_INSTRUMENTED ${CMAKE_CURRENT_BINARY_DIR}/clang-bolt.inst)
 
   # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  add_custom_target(clang-bolt-instrumented
 DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+COMMAND llvm-bolt $ -o ${CLANG_BOLT_INSTRUMENTED}
   -instrument --instrumentation-file-append-pid
   --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
 COMMENT "Instrumenting clang binary with BOLT"
 VERBATIM
   )
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_comman

[PATCH] D143553: [Clang][CMake] Add BOLT perf-training (WIP)

2023-02-08 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 495915.
Amir added a comment.

Fixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,21 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+sysroot_flags = ''
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -59,3 +59,27 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(merge-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py
+merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS merge-fdata generate-bolt-fdata)
+endif()
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -853,95 +853,29 @@
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
 DEPENDS ${CLANG_INSTRUMENTED}
   )
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
   add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
 DEPENDS clang llvm-bolt
 COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
   -instr

[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2023-02-08 Thread Amir Ayupov via Phabricator via cfe-commits
Amir abandoned this revision.
Amir added a comment.

Abandon in favor of D143553 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143553: [Clang][CMake] Add BOLT perf-training

2023-02-08 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 495941.
Amir added a comment.

Fixed BOLT-PGO build


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,21 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+sysroot_flags = ''
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -59,3 +59,26 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(merge-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS merge-fdata generate-bolt-fdata)
+endif()
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -853,95 +853,29 @@
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
 DEPENDS ${CLANG_INSTRUMENTED}
   )
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
   add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
 DEPENDS clang llvm-bolt
 COMMAND llvm-bolt ${CLANG_PA

[PATCH] D143617: [Clang][CMake] Break out Instrument from CLANG_BOLT option

2023-02-08 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added a reviewer: bolt.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Split up and refactor CLANG_BOLT_INSTRUMENT into pieces that would work with
other profiling modes, namely perf no-LBR and perf with LBR.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt


Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -60,7 +60,7 @@
 DEPENDS generate-dtrace-logs)
 endif()
 
-if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
 ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
 ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -69,7 +69,7 @@
   add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
 ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
 EXCLUDE_FROM_CHECK_ALL
-DEPENDS clang-instrumented clear-bolt-fdata
+DEPENDS clang-bolt-training-deps clear-bolt-fdata
 )
 
   add_custom_target(clear-bolt-fdata
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,12 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -851,7 +851,12 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+  May be specified as Instrument or Perf or LBR to use a particular profiling \
+  mechanism.")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+
+if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT" AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
 
@@ -868,8 +873,11 @@
 COMMENT "Instrumenting clang binary with BOLT"
 VERBATIM
   )
+  add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
+endif()
 
-  # Optimize original (pre-bolt) Clang using the collected profile
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
+  # Optimize original Clang using the collected profile
   set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
   add_custom_target(clang-bolt
 DEPENDS merge-bolt-fdata


Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -60,7 +60,7 @@
 DEPENDS generate-dtrace-logs)
 endif()
 
-if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
 ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
 ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -69,7 +69,7 @@
   add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
 ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
 EXCLUDE_FROM_CHECK_ALL
-DEPENDS clang-instrumented clear-bolt-fdata
+DEPENDS clang-bolt-training-deps clear-bolt-fdata
 )
 
   add_custom_target(clear-bolt-fdata
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,12 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/

[PATCH] D143617: [Clang][CMake] Break out Instrument from CLANG_BOLT option

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496249.
Amir added a comment.

Add perf and perf+LBR support


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496253.
Amir added a comment.

Reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.s

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496271.
Amir added a comment.

Reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.s

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496272.
Amir added a comment.

Reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.s

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496273.
Amir added a comment.

Reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.s

[PATCH] D143553: [Clang][CMake] Use perf-training for Clang-BOLT

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496274.
Amir edited the summary of this revision.
Amir added a comment.

Remove sysroot_flags


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
+config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -59,3 +59,26 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(merge-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS merge-fdata generate-bolt-fdata)
+endif()
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -853,95 +853,29 @@
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
 DEPENDS ${CLANG_INSTRUMENTED}
   )
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
   add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
 DEPENDS clang llvm-bolt
 COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
   -instrument --

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-02-09 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 496276.
Amir added a comment.

Reduce changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
-config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s --driver-mode=g++ ' % (perf_wrapper,

[PATCH] D143553: [Clang][CMake] Use perf-training for Clang-BOLT

2023-05-13 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D143553#4339755 , @beanz wrote:

> LGTM.
>
> Sorry for the delays reviewing!

Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143553: [Clang][CMake] Use perf-training for Clang-BOLT

2023-05-13 Thread Amir Ayupov via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG76b2915fdbbb: [Clang][CMake] Use perf-training for 
Clang-BOLT (authored by Amir).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143553/new/

https://reviews.llvm.org/D143553

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in

Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- /dev/null
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
+config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -61,3 +61,26 @@
 COMMENT "Generating order file"
 DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+)
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+EXCLUDE_FROM_CHECK_ALL
+DEPENDS clang-instrumented clear-bolt-fdata
+)
+
+  add_custom_target(clear-bolt-fdata
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+COMMENT "Merging BOLT fdata"
+DEPENDS merge-fdata generate-bolt-fdata)
+endif()
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -851,9 +851,8 @@
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
@@ -863,73 +862,11 @@
 DEPENDS clang llvm-bolt
 COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_IN

[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-05-13 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 521919.
Amir added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
   subprocess.check_call(cmd)
   return 0
 
+def perf(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf',
+description='perf wrapper for BOLT profile collection')
+  parser.add_argument('--lbr', required=False, action='store_true',
+help='Use perf with branch stacks')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to perf
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  #cmd = shlex.split(args[last_arg_idx:])
+  cmd = args[last_arg_idx:]
+
+  perf_args = []
+  perf_args.extend((
+'perf', 'record', '--event=cycles:u', '--freq=max',
+'--output=%d.perf.data' % os.getpid()))
+  if opts.lbr:
+perf_args += ['--branch-filter=any,u']
+  perf_args.extend(cmd)
+
+  start_time = time.time()
+  subprocess.check_call(perf_args)
+
+  elapsed = time.time() - start_time
+  print("... data collection took %.4fs" % elapsed)
+  return 0
+
+def perf2bolt(args):
+  parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+description='perf2bolt conversion wrapper for perf.data files')
+  parser.add_argument('p2b_path', help='Path to llvm-bolt')
+  parser.add_argument('path', help='Path containing perf.data files')
+  parser.add_argument('binary', help='Input binary')
+  parser.add_argument('--nolbr', required=False, action='store_true',
+help='Use -nl perf2bolt mode')
+  opts = parser.parse_args(args)
+
+  p2b_args = []
+  p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+'--profile-format=yaml'))
+  if opts.nolbr:
+p2b_args += ['-nl']
+  p2b_args += ['-p']
+  for filename in findFilesWithExtension(opts.path, 'perf.data'):
+subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+  return 0
+
 def dtrace(args):
   parser = argparse.ArgumentParser(prog='perf-helper dtrace',
 description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
   'cc1' : cc1,
   'gen-order-file' : genOrderFile,
   'merge-fdata' : merge_fdata,
+  'perf' : perf,
+  'perf2bolt' : perf2bolt,
   }
 
 def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,25 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+  clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+  perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+  if config.clang_bolt_mode.lower() == "lbr":
+perf_wrapper += " --lbr"
+  perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
 
 config.name = 'Clang Perf Training'
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
-config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s --driver-mode=g++ ' % (perf_wrapper, config.

[PATCH] D133633: [CMake] Add ClangBootstrap configuration

2023-05-16 Thread Amir Ayupov via Phabricator via cfe-commits
Amir abandoned this revision.
Amir added a comment.
Herald added a subscriber: ekilmer.

No longer needed for Clang-BOLT. @thevinster – feel free to commandeer if it 
fits your needs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133633/new/

https://reviews.llvm.org/D133633

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143617: [Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options

2023-06-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

Ping @beanz, @phosek


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139454: [CMake] Replace clang binary if using clang-bolt target

2022-12-06 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: bolt, phosek, MaskRay.
Herald added subscribers: StephenFan, wenlei.
Herald added a project: All.
Amir requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Instead of creating an extra clang-bolt binary and clang++-bolt symlink,
replace the original clang binary with BOLT-optimized one.

This fixes the issue with installing optimized binary as `install-clang`
target now copies the new version.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139454

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT-PGO.cmake
  llvm/docs/AdvancedBuilds.rst


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -216,7 +216,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -235,12 +235,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -1,10 +1,10 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
 
 set(CLANG_BOOTSTRAP_TARGETS
-  stage2-clang++-bolt
+  stage2-clang-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
-  clang++-bolt
+  clang-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING 
"")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -963,22 +963,10 @@
   -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
   -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
   -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} 
${CLANG_PATH}-${CLANG_VERSION_MAJOR}
 COMMENT "Optimizing Clang with BOLT"
 VERBATIM
   )
-
-  # Make a symlink from clang-bolt to clang++-bolt
-  add_custom_target(clang++-bolt
-DEPENDS ${CLANGXX_OPTIMIZED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_OPTIMIZED}
-DEPENDS clang-bolt
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_OPTIMIZED}
-  ${CLANGXX_OPTIMIZED}
-COMMENT "Creating symlink from BOLT optimized clang to clang++"
-VERBATIM
-  )
 endif()
 
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)


Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -216,7 +216,7 @@
 
 .. code-block:: console
 
-  $ ninja clang++-bolt
+  $ ninja clang-bolt
 
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the CMAKE_C_COMPILER and
@@ -235,12 +235,11 @@
   -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
   -DPGO_INSTRUMENT_LTO=Thin
 
-Then, to build the final optimized binary, build the stage2-clang++-bolt
-target:
+Then, to build the final optimized binary, build the stage2-clang-bolt target:
 
 .. code-block:: console
 
-  $ ninja stage2-clang++-bolt
+  $ ninja stage2-clang-bolt
 
 3-Stage Non-Determinism
 ===
Index: clang/cmake/caches/BOLT-PGO.cmake
===
--- clang/cmake/caches/BOLT-PGO.cmake
+++ clang/cmake/caches/BOLT-PGO.cmake
@@ -1,10 +1,10 @@
 set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
 
 set(CLANG_BOOTSTRAP_TARGETS
-  stage2-clang++-bolt
+  stage2-clang-bolt
   CACHE STRING "")
 set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
-  clang++-bolt
+  clang-bolt
   CACHE STRING "")
 
 set(PGO_BUILD_CONFIGURATION ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -963,22 +963,10 @@
   -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
   -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
   -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} ${CLANG_PATH}-${CLANG_VERSION_MAJOR}
 COMMENT "Optimizing Clang with BOLT"
 VERBATIM
   )
-
-  # Make a symlink from clang-bolt to clang++-bolt
-  add_custom_target(clang++-bolt
-DEPENDS ${CLANGXX_OPTIMIZED}
-  )
-  

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-06 Thread Amir Ayupov via Phabricator via cfe-commits
Amir created this revision.
Amir added reviewers: bolt, phosek.
Herald added a project: All.
Amir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

`perf` provides a faster and easier way to collect BOLT
profile. Generalize CMake handling of applying BOLT to
Clang to allow using perf with or without LBR stacks
for profile collection.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,15 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+endif()
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,93 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-DEPENDS clang-instrumented
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_INSTRUMENTED}
-  ${CLANGXX_INSTRUMENTED}
-COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-VERBATIM
-  )
+# Instrument clang with BOLT
+add_custom_target(clang-instrumented
+  DEPENDS ${CLANG_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  DEPENDS clang llvm-bolt
+  COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+-instrument --instrumentation-file-append-pid
+--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  COMMENT "Instrumenting clang binary with BOLT"
+  VERBATIM
+)
+
+# Make a symlink from clang-bolt.inst to clang++-bolt.inst
+add_custom_target(clang++-instrumented
+  DEPENDS ${CLANGXX_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+  DEPENDS clang-instrumented
+  COMMAND ${CMAKE_COMMAND} -E create_symlink
+${CLANG_INSTRUMENTED}
+${CLANGXX_INSTRUMENTED}
+  COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+  VERBATIM
+)
+  endif()
+
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+set(CLANG_BOLT_CC ${CLANG_PATH})
+set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+# Perf sampling:
+# - use maximum frequency to reduce training time
+# - use cycle events instead of branches - empirically found to produce better results
+# - if available, enable taken branch stack/LBR sampling (-j/--branch-filter)
+set(PERF_CMDLINE perf record --event=cycles:u --output=${CMAKE_CURRENT_BINARY_DIR}/perf.data --freq=max)
+if (CLANG_BOLT_PERF_LBR)
+  list(APPEND PERF_CMDLINE --branch-filter=any,u)
+endif()
+list(APPEND PERF_CMDLINE --)
+
+list(APPEND CLANG_BOLT_EXTRA_CMAKE_FLAGS
+  -DCMAKE_C_COMPILER_LAUNCHER=${PERF_CMDLINE}
+  -DCMAKE_CXX_COMPILER_LAUNCHER=${PE

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-06 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 480739.
Amir added a comment.

Fix dependence between bolt-profile and clang (either instrumented or not)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,23 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+endif()
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 
 # Disable function splitting enabled by default in GCC8+
 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition"
+CACHE FORCE)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition"
+CACHE FORCE)
 endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,99 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-DEPENDS clang-instrumented
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_INSTRUMENTED}
-  ${CLANGXX_INSTRUMENTED}
-COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-VERBATIM
-  )
+# Instrument clang with BOLT
+add_custom_target(clang-instrumented
+  DEPENDS ${CLANG_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  DEPENDS clang llvm-bolt
+  COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+-instrument --instrumentation-file-append-pid
+--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  COMMENT "Instrumenting clang binary with BOLT"
+  VERBATIM
+)
+
+# Make a symlink from clang-bolt.inst to clang++-bolt.inst
+add_custom_target(clang++-instrumented
+  DEPENDS ${CLANGXX_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+  DEPENDS clang-instrumented
+  COMMAND ${CMAKE_COMMAND} -E create_symlink
+${CLANG_INSTRUMENTED}
+${CLANGXX_INSTRUMENTED}
+  COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+  VERBATIM
+)
+  endif()
 
-  # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+set(CLANG_BOLT_CC ${CLANG_PATH})
+set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+# Perf sampling:
+# - use maximum frequency to reduce training time
+# - use cycle events instead of branches - empir

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-06 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 480760.
Amir added a comment.

Fixed COMPILER_LAUNCHER, perf2bolt invocation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,23 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+endif()
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 
 # Disable function splitting enabled by default in GCC8+
 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition"
+CACHE FORCE)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition"
+CACHE FORCE)
 endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-DEPENDS clang-instrumented
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_INSTRUMENTED}
-  ${CLANGXX_INSTRUMENTED}
-COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-VERBATIM
-  )
+# Instrument clang with BOLT
+add_custom_target(clang-instrumented
+  DEPENDS ${CLANG_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  DEPENDS clang llvm-bolt
+  COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+-instrument --instrumentation-file-append-pid
+--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  COMMENT "Instrumenting clang binary with BOLT"
+  VERBATIM
+)
+
+# Make a symlink from clang-bolt.inst to clang++-bolt.inst
+add_custom_target(clang++-instrumented
+  DEPENDS ${CLANGXX_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+  DEPENDS clang-instrumented
+  COMMAND ${CMAKE_COMMAND} -E create_symlink
+${CLANG_INSTRUMENTED}
+${CLANGXX_INSTRUMENTED}
+  COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+  VERBATIM
+)
+  endif()
 
-  # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+set(CLANG_BOLT_CC ${CLANG_PATH})
+set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+# Perf sampling:
+# - use maximum frequency to reduce training time
+# - use cycle events instead of branches - empirically found to produce
+   

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-07 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 480982.
Amir added a comment.

Avoid using perf2bolt, provide perf.data directly


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,23 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 
 # Disable function splitting enabled by default in GCC8+
 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+  set(CMAKE_C_FLAGS -fno-reorder-blocks-and-partition CACHE STRING "")
+  set(CMAKE_CXX_FLAGS -fno-reorder-blocks-and-partition CACHE STRING "")
 endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,98 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-DEPENDS clang-instrumented
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_INSTRUMENTED}
-  ${CLANGXX_INSTRUMENTED}
-COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-VERBATIM
-  )
+# Instrument clang with BOLT
+add_custom_target(clang-instrumented
+  DEPENDS ${CLANG_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  DEPENDS clang llvm-bolt
+  COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+-instrument --instrumentation-file-append-pid
+--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  COMMAND ${CMAKE_COMMAND} -E create_symlink
+${CLANG_INSTRUMENTED}
+${CLANGXX_INSTRUMENTED}
+  COMMENT "Instrumenting clang binary with BOLT"
+  VERBATIM
+)
+  endif()
 
-  # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+set(CLANG_BOLT_CC ${CLANG_PATH})
+set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+# Perf sampling:
+# - use maximum frequency to reduce training time
+# - use cycle events instead of branches - empirically found to produce
+#   better results
+# - if available, enable taken branch stack/LBR sampling
+#   (-j/--branch-filter)
+set(PERF_CMDLINE
+  perf record --event=cycles:u
+  --output=${CMAKE_CURRENT_BINARY_DIR}/prof.data
+  --freq=max
+)
+if (CLANG_BOLT_PERF_LBR)
+  list(APPEND PERF_CMDLINE --bra

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-07 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 481078.
Amir added a comment.

Fix instrumentation and no-LBR modes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake

Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,17 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-DEPENDS clang-instrumented
-COMMAND ${CMAKE_COMMAND} -E create_symlink
-  ${CLANG_INSTRUMENTED}
-  ${CLANGXX_INSTRUMENTED}
-COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-VERBATIM
-  )
+# Instrument clang with BOLT
+add_custom_target(clang-instrumented
+  DEPENDS ${CLANG_INSTRUMENTED}
+)
+add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+  DEPENDS clang llvm-bolt
+  COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+-instrument --instrumentation-file-append-pid
+--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+  COMMAND ${CMAKE_COMMAND} -E create_symlink
+${CLANG_INSTRUMENTED}
+${CLANGXX_INSTRUMENTED}
+  COMMENT "Instrumenting clang binary with BOLT"
+  VERBATIM
+)
+  endif()
+
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+set(CLANG_BOLT_CC ${CLANG_PATH})
+set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+# Perf sampling:
+# - use maximum frequency to reduce training time
+# - use cycle events instead of branches - empirically found to produce
+#   better results
+# - if available, enable taken branch stack/LBR sampling
+#   (-j/--branch-filter)
+set(PERF_CMDLINE
+  perf record --event=cycles:u
+  --output=${CMAKE_CURRENT_BINARY_DIR}/prof.data
+  --freq=max
+)
+if (CLANG_BOLT_PERF_LBR)
+  list(APPEND PERF_CMDLINE --branch-filter=any,u)
+endif()
+list(APPEND PERF_CMDLINE --)
+  endif()
+
+  # Build specified targets to collect the profile
+  add_custom_target(bolt-profile-deps)
+  if (CLANG_BOLT_INSTRUMENT)
+add_dependencies(bolt-profile-deps clang-instrumented)
+set(CLANG_BOLT_PROFILE ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata)
+  else()
+add_dependencies(bolt-profile-deps clang)
+   

[PATCH] D139496: [CMake] Use perf with LBR for clang-bolt (WIP)

2022-12-07 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 481138.
Amir added a comment.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Documentation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  llvm/docs/AdvancedBuilds.rst

Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -241,6 +241,56 @@
 
   $ ninja stage2-clang-bolt
 
+BOLT profile
+
+BOLT uses the profile collected by either Linux `perf` or via BOLT's own
+instrumentation. Both modes are supported by CMake automation, with
+instrumentation being the default in `BOLT.cmake` and `BOLT-PGO.cmake`.
+
+It's strongly recommended to use `perf` if host system supports it as it
+is a significantly faster and potentially more reliable method:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT_PERF=ON \
+  -C /clang/cmake/caches/BOLT.cmake
+
+If the host system supports profiling branch stacks (e.g. AMD or Intel LBR
+(Last Branch Record), Armv9-A BRBE (Branch Record Buffer Extension)), it can be
+enabled with `-DCLANG_BOLT_PERF_LBR` to further improve the profile quality:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT_PERF=ON -DCLANG_BOLT_PERF_LBR=ON \
+  -C /clang/cmake/caches/BOLT.cmake
+
+The following matrix describes supported profiling methods. Note that Linux/ELF
+is the only supported platform.
+
+ === == ===
+Architecture Instrumentation Linux perf Linux perf with LBR
+ === == ===
+x86_64   Yes YesYes
+AArch64  No  YesNot tested
+ === == ===
+
+Profiling targets
+-
+BOLT profile is collected from building one of in-tree projects/targets with
+Clang as a workload. The following configuration options can be used to change
+the profiling build:
+
+**CLANG_BOLT_PROJECTS**
+  Projects to enable in profiling build. Defaults to `llvm`.
+
+**CLANG_BOLT_TARGETS**
+  Targets to build in profiling build. Defaults to `count` in instrumentation
+  build and `FileCheck` in perf-build.
+
+**CLANG_BOLT_EXTRA_CMAKE_FLAGS**
+  Extra CMake flags to pass to profiling build at configuration time.
+
+
 3-Stage Non-Determinism
 ===
 
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,17 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrumentation-file-append-pid
-  --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-COMMENT "Instrumenting clang binary with BOLT"
-VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+set(CLANG_INSTRUMENTED ${CLANG_PA

[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2022-12-10 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

In D139496#3986435 , @phosek wrote:

> Could we add the `perf` related logic to 
> https://github.com/llvm/llvm-project/blob/ba3d808feedaa7f31750d8bc02754e15b372c868/clang/utils/perf-training/perf-helper.py?
>  I think that's a better place since we eventually want to replace the use of 
> `ExternalProject_Add` with 
> https://github.com/llvm/llvm-project/tree/main/clang/utils/perf-training so 
> we should try to keep the amount of logic in CMake down to minimum.

Sure! I didn't realize perf-helper had dtrace functionality in place. Adding 
Linux perf functions would be logical.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2022-12-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir updated this revision to Diff 483400.
Amir added a comment.

Generalize to -DCLANG_BOLT={Instrument,perf,LBR}, update documentation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  llvm/docs/AdvancedBuilds.rst

Index: llvm/docs/AdvancedBuilds.rst
===
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -241,6 +241,62 @@
 
   $ ninja stage2-clang-bolt
 
+BOLT profile
+
+BOLT uses the profile collected by either Linux `perf` or via BOLT's own
+instrumentation. Both modes are supported by CMake automation, with
+instrumentation being the default (`-DCLANG_BOLT=INSTRUMENT`).
+
+It's strongly recommended to use `perf` if host system supports it as it
+is a significantly faster and potentially more reliable method:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=perf \
+  -C /clang/cmake/caches/BOLT.cmake
+
+If the host system supports profiling branch stacks (e.g. AMD or Intel LBR
+(Last Branch Record), Armv9-A BRBE (Branch Record Buffer Extension)), it can be
+enabled with `-DCLANG_BOLT=LBR` to further improve the profile quality:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT=LBR \
+  -C /clang/cmake/caches/BOLT.cmake
+
+The following matrix describes supported profiling methods. Note that Linux/ELF
+is the only supported platform.
+
+ ===
+Architecture `-DCLANG_BOLT` value
+  -- ---
+ `Instrument` `perf` `LBR`
+  == ===
+x86_64   Yes  YesYes
+AArch64  No   YesNo HW exist
+  == ===
+
+Profiling variables
+---
+BOLT profile is collected from building one of in-tree projects/targets with
+Clang as a workload. The following configuration options can be used to change
+the profiling build and profiling mechanism:
+
+**CLANG_BOLT**
+  Profiling mechanism to be used. Supported values: `Instrument` (default),
+  `perf` (requires OS support), `LBR` (requires hardware support).
+
+**CLANG_BOLT_PROJECTS**
+  Projects to enable in profiling build. Defaults to `llvm`.
+
+**CLANG_BOLT_TARGETS**
+  Targets to build in profiling build. Defaults to `count` in instrumentation
+  build and `FileCheck` in perf-build.
+
+**CLANG_BOLT_EXTRA_CMAKE_FLAGS**
+  Extra CMake flags to pass to profiling build at configuration time.
+
+
 3-Stage Non-Determinism
 ===
 
Index: clang/cmake/caches/BOLT.cmake
===
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,18 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+  May be specified as Instrument or Perf or LBR to use a particular profiling \
+  mechanism.")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+else()
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-DEPENDS clang llvm-bolt
-COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-  -instrument --instrument

[PATCH] D139496: [CMake] Add perf profiling for clang-bolt

2022-12-15 Thread Amir Ayupov via Phabricator via cfe-commits
Amir added a comment.

@phosek – this diff adds support for AArch64 via Linux perf. I believe it makes 
sense to add this functionality first in an incremental fashion and refactor it 
later, moving parts into perf-training script. What do you think? (And thank 
you for reviewing this stuff!)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119918: [CMake] Rename TARGET_TRIPLE to LLVM_TARGET_TRIPLE

2022-03-04 Thread Amir Ayupov via Phabricator via cfe-commits
Amir accepted this revision.
Amir added a comment.

LGTM from BOLT side


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119918/new/

https://reviews.llvm.org/D119918

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits