================ @@ -26,9 +30,23 @@ if(LLVM_BUILD_INSTRUMENTED) message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") else() add_custom_target(generate-profdata - COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ COMMENT "Merging profdata" DEPENDS generate-profraw) + if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR) + llvm_ExternalProject_Add(generate-profraw-external ${CLANG_PGO_TRAINING_DATA_SOURCE_DIR} + USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw) + add_dependencies(generate-profdata generate-profraw-external) + else() + # Default to compiling a file from clang. This also builds all the + # dependencies needed to build this file, like TableGen. + set(generate_profraw_clang_sema tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.o) + llvm_ExternalProject_Add(generate-profraw-clang ${CMAKE_CURRENT_SOURCE_DIR}/../../../llvm + USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw + EXTRA_TARGETS generate_profraw_clang_sema ---------------- tstellar wrote:
> This is going to be really fragile. The paths of object file outputs are an > implementation detail of CMake that has changed in the past and could change > in the future, also I think only the makefile and ninja generator actually > export those as buildable targets. > What about using the library target `clangSema` instead of the individual file? > Beyond that I also have a philosophical problem with the approach of using > living clang sources as training data. I think this can result in > unexplainable or difficult to diagnose differences in the efficacy of PGO > because as you change the compiler you're also innately changing the training > data. A potential better solution would be to have a preprocessed Sema.cpp > from a recent LLVM release that we check in as a stable copy. This would be > similar to how the GCC compile-time benchmark works. The first thing I tried to do was add a preprocessed version of CommentSema.cpp, but the file was 10MB, and I thought that might be too big. Sema.cpp preprocessed is 5MB, which is smaller but still pretty big. I would be OK with this approach if we think it's OK to add a big file like this to the repo. I'm also not concerned about efficacy of PGO, because building Sema.cpp is just the default. I really want there to be a good default experience for users, so that they can use the PGO cache files and things will 'just work'. This patch also adds a configuration option for advanced users that need something more consistent. https://github.com/llvm/llvm-project/pull/77347 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits