https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/178184
>From 5179189c73ea949d5c764e06a6cc0bb9d911c0e0 Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Mon, 2 Feb 2026 23:19:10 -0500 Subject: [PATCH] [PowerPC][AIX] Support #pragma comment copyright for AIX - Emit !aix.copyright.comment from Clang for the pragma. - Lower it in LLVM to a TU-local string + llvm.used + !implicit.ref. - Add module-import and backend relocation tests. --- .../clang/Basic/DiagnosticParseKinds.td | 4 + clang/include/clang/Basic/PragmaKinds.h | 3 +- clang/lib/AST/TextNodeDumper.cpp | 3 + clang/lib/CodeGen/CodeGenModule.cpp | 51 +++++- clang/lib/CodeGen/CodeGenModule.h | 11 ++ clang/lib/Parse/ParsePragma.cpp | 53 +++++-- .../pragma-comment-copyright-modules.cpp | 30 ++++ clang/test/CodeGen/PowerPC/pragma-comment.c | 42 +++++ clang/test/CodeGen/lto-newpm-pipeline.c | 2 + clang/test/Parser/pragma-comment.c | 11 ++ .../Transforms/Utils/LowerCommentStringPass.h | 24 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 7 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + .../Utils/LowerCommentStringPass.cpp | 148 ++++++++++++++++++ .../CodeGen/AArch64/print-pipeline-passes.ll | 2 +- .../CodeGen/Hexagon/print-pipeline-passes.ll | 2 +- llvm/test/Other/new-pm-defaults.ll | 1 + .../Other/new-pm-thinlto-postlink-defaults.ll | 1 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 1 + ...-pm-thinlto-postlink-samplepgo-defaults.ll | 1 + .../lower-comment-string.ll | 59 +++++++ 23 files changed, 444 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/PowerPC/pragma-comment-copyright-modules.cpp create mode 100644 clang/test/CodeGen/PowerPC/pragma-comment.c create mode 100644 clang/test/Parser/pragma-comment.c create mode 100644 llvm/include/llvm/Transforms/Utils/LowerCommentStringPass.h create mode 100644 llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp create mode 100644 llvm/test/Transforms/LowerCommentString/lower-comment-string.ll diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 19edc7f7a3b23..964251bba9c41 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1358,6 +1358,10 @@ def err_pragma_comment_unknown_kind : Error<"unknown kind of pragma comment">; // PS4 recognizes only #pragma comment(lib) def warn_pragma_comment_ignored : Warning<"'#pragma comment %0' ignored">, InGroup<IgnoredPragmas>; +def warn_pragma_comment_once + : Warning<"'#pragma comment %0' " + "ignored: it can be specified only once per translation unit">, + InGroup<IgnoredPragmas>; // - #pragma detect_mismatch def err_pragma_detect_mismatch_malformed : Error< "pragma detect_mismatch is malformed; it requires two comma-separated " diff --git a/clang/include/clang/Basic/PragmaKinds.h b/clang/include/clang/Basic/PragmaKinds.h index 42f049f7323d2..52ca58855d460 100644 --- a/clang/include/clang/Basic/PragmaKinds.h +++ b/clang/include/clang/Basic/PragmaKinds.h @@ -17,7 +17,8 @@ enum PragmaMSCommentKind { PCK_Lib, // #pragma comment(lib, ...) PCK_Compiler, // #pragma comment(compiler, ...) PCK_ExeStr, // #pragma comment(exestr, ...) - PCK_User // #pragma comment(user, ...) + PCK_User, // #pragma comment(user, ...) + PCK_Copyright // #pragma comment(copyright, ...) }; enum PragmaMSStructKind { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 250ec8b666e05..51c97e6700102 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -2560,6 +2560,9 @@ void TextNodeDumper::VisitPragmaCommentDecl(const PragmaCommentDecl *D) { case PCK_User: OS << "user"; break; + case PCK_Copyright: + OS << "copyright"; + break; } StringRef Arg = D->getArg(); if (!Arg.empty()) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index daaa846bf42bc..eaa64b10e2368 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1632,6 +1632,8 @@ void CodeGenModule::Release() { EmitBackendOptionsMetadata(getCodeGenOpts()); + EmitLoadTimeComment(); + // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -3541,6 +3543,39 @@ void CodeGenModule::AddDependentLib(StringRef Lib) { LinkerOptionsMetadata.push_back(llvm::MDNode::get(C, MDOpts)); } +/// Process copyright pragma and create LLVM metadata. +/// #pragma comment(copyright, "string") embeds copyright information into a +/// loadable program-data section of the object file for inclusion in the linked +/// module. +/// +/// Example: #pragma comment(copyright, "Copyright string") +/// +/// This should only be called once per translation unit. +void CodeGenModule::ProcessPragmaComment(PragmaMSCommentKind Kind, + StringRef Comment, + bool isFromASTFile) { + // Ensure we are only processing Copyright Pragmas + assert(Kind == PCK_Copyright && + "Unexpected pragma comment kind, ProcessPragmaComment should only be " + "called for PCK_Copyright"); + // Target Guard: Only AIX supports PCK_Copyright currently. + assert(getTriple().isOSAIX() && + "pragma comment copyright is supported only when targeting AIX"); + + // Deserialization Guard: Only process if copyright originated in this TU. + if (isFromASTFile) + return; + + // Only one copyright pragma allowed per translation unit + assert(!LoadTimeComment && + "Only one copyright pragma allowed per translation unit"); + + // Create llvm metadata with the comment string + auto &C = getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(C, Comment)}; + LoadTimeComment = llvm::MDNode::get(C, Ops); +} + /// Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, @@ -4061,6 +4096,16 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { return getContext().DeclMustBeEmitted(Global); } +void CodeGenModule::EmitLoadTimeComment() { + // Emit copyright metadata for #pragma comment(copyright, ...). + // The LoadTimeComment is set during pragma processing and contains the + // copyright string that should be embedded in the executable. + if (LoadTimeComment) { + auto *NMD = getModule().getOrInsertNamedMetadata("comment_string.loadtime"); + NMD->addOperand(LoadTimeComment); + } +} + bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // In OpenMP 5.0 variables and function may be marked as // device_type(host/nohost) and we should not emit them eagerly unless we sure @@ -7793,7 +7838,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: - AddDependentLib(PCD->getArg()); + AddDependentLib(PCD->getArg()); + break; + case PCK_Copyright: + ProcessPragmaComment(PCD->getCommentKind(), PCD->getArg(), + PCD->isFromASTFile()); break; case PCK_Compiler: case PCK_ExeStr: diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 0a697c84b66a7..d859943ebfb78 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -592,6 +592,9 @@ class CodeGenModule : public CodeGenTypeCache { /// A vector of metadata strings for dependent libraries for ELF. SmallVector<llvm::MDNode *, 16> ELFDependentLibraries; + /// Metadata for copyright pragma comment (if present). + llvm::MDNode *LoadTimeComment = nullptr; + /// @name Cache for Objective-C runtime types /// @{ @@ -1495,6 +1498,9 @@ class CodeGenModule : public CodeGenTypeCache { /// Appends a dependent lib to the appropriate metadata value. void AddDependentLib(StringRef Lib); + /// Process pragma comment + void ProcessPragmaComment(PragmaMSCommentKind Kind, StringRef Comment, + bool isFromASTFile); llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); @@ -2075,6 +2081,11 @@ class CodeGenModule : public CodeGenTypeCache { /// false, the definition can be emitted lazily if it's used. bool MustBeEmitted(const ValueDecl *D); + /// Emit load-time comment metadata for #pragma comment(copyright, ...). + /// This adds the copyright string to the module's named metadata, which will + /// be processed by the backend to include it in the generated executable. + void EmitLoadTimeComment(); + /// Determine whether the definition can be emitted eagerly, or should be /// delayed until the end of the translation unit. This is relevant for /// definitions whose linkage can change, e.g. implicit function instantions diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 734d095d74cdf..d87967042089a 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -237,6 +237,7 @@ struct PragmaCommentHandler : public PragmaHandler { private: Sema &Actions; + bool SeenCopyrightInTU = false; // TU-scoped }; struct PragmaDetectMismatchHandler : public PragmaHandler { @@ -480,7 +481,8 @@ void Parser::initializePragmaHandlers() { PP.AddPragmaHandler(OpenACCHandler.get()); if (getLangOpts().MicrosoftExt || - getTargetInfo().getTriple().isOSBinFormatELF()) { + getTargetInfo().getTriple().isOSBinFormatELF() || + getTargetInfo().getTriple().isOSAIX()) { MSCommentHandler = std::make_unique<PragmaCommentHandler>(Actions); PP.AddPragmaHandler(MSCommentHandler.get()); } @@ -607,7 +609,8 @@ void Parser::resetPragmaHandlers() { OpenACCHandler.reset(); if (getLangOpts().MicrosoftExt || - getTargetInfo().getTriple().isOSBinFormatELF()) { + getTargetInfo().getTriple().isOSBinFormatELF() || + getTargetInfo().getTriple().isOSAIX()) { PP.RemovePragmaHandler(MSCommentHandler.get()); MSCommentHandler.reset(); } @@ -3265,7 +3268,9 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP, /// \code /// #pragma comment(linker, "foo") /// \endcode -/// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user. +/// 'linker' is one of six identifiers: compiler, copyright, exestr, lib, +/// linker, user. +/// /// "foo" is a string, which is fully macro expanded, and permits string /// concatenation, embedded escape characters etc. See MSDN for more details. void PragmaCommentHandler::HandlePragma(Preprocessor &PP, @@ -3285,27 +3290,49 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP, return; } - // Verify that this is one of the 5 explicitly listed options. + // Verify that this is one of the 6 explicitly listed options. IdentifierInfo *II = Tok.getIdentifierInfo(); PragmaMSCommentKind Kind = - llvm::StringSwitch<PragmaMSCommentKind>(II->getName()) - .Case("linker", PCK_Linker) - .Case("lib", PCK_Lib) - .Case("compiler", PCK_Compiler) - .Case("exestr", PCK_ExeStr) - .Case("user", PCK_User) - .Default(PCK_Unknown); + llvm::StringSwitch<PragmaMSCommentKind>(II->getName()) + .Case("linker", PCK_Linker) + .Case("lib", PCK_Lib) + .Case("compiler", PCK_Compiler) + .Case("exestr", PCK_ExeStr) + .Case("user", PCK_User) + .Case("copyright", PCK_Copyright) + .Default(PCK_Unknown); + if (Kind == PCK_Unknown) { PP.Diag(Tok.getLocation(), diag::err_pragma_comment_unknown_kind); return; } + // Handle AIX Target restrictions (all non-copyright kinds) + if (PP.getTargetInfo().getTriple().isOSAIX() && Kind != PCK_Copyright) { + // pragma comment kinds linker, lib, compiler, exestr and user are + // ignored when targeting AIX. + PP.Diag(Tok.getLocation(), diag::warn_pragma_comment_ignored) + << II->getName(); + return; + } + if (PP.getTargetInfo().getTriple().isOSBinFormatELF() && Kind != PCK_Lib) { PP.Diag(Tok.getLocation(), diag::warn_pragma_comment_ignored) << II->getName(); return; } + // Handle pragma comment copyright + if (Kind == PCK_Copyright) { + if (SeenCopyrightInTU) { + // On AIX, pragma comment copyright can each appear only once in a TU. + PP.Diag(Tok.getLocation(), diag::warn_pragma_comment_once) + << II->getName(); + return; + } + SeenCopyrightInTU = true; + } + // Read the optional string if present. PP.Lex(Tok); std::string ArgumentString; @@ -3332,6 +3359,10 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP, return; } + // Skip further processing for well-formed copyright with an empty string + if (Kind == PCK_Copyright && ArgumentString.empty()) + return; + // If the pragma is lexically sound, notify any interested PPCallbacks. if (PP.getPPCallbacks()) PP.getPPCallbacks()->PragmaComment(CommentLoc, II, ArgumentString); diff --git a/clang/test/CodeGen/PowerPC/pragma-comment-copyright-modules.cpp b/clang/test/CodeGen/PowerPC/pragma-comment-copyright-modules.cpp new file mode 100644 index 0000000000000..b8641531177ac --- /dev/null +++ b/clang/test/CodeGen/PowerPC/pragma-comment-copyright-modules.cpp @@ -0,0 +1,30 @@ +// RUN: split-file %s %t + +// Build the module interface to a PCM +// RUN: %clang_cc1 -std=c++20 -triple powerpc-ibm-aix \ +// RUN: -emit-module-interface %t/copymod.cppm -o %t/copymod.pcm + +// Verify that module interface emits copyright global when compiled to IR + +// RUN: %clang_cc1 -std=c++20 -triple powerpc-ibm-aix -emit-llvm %t/copymod.cppm -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK-MOD +// CHECK-MOD: @__loadtime_comment_str = internal unnamed_addr constant [10 x i8] c"module me\00", section "__loadtime_comment" +// CHECK-MOD: @llvm.used = appending global {{.*}} @__loadtime_comment_str + +// Compile an importing TU that uses the prebuilt module and verify that it +// does NOT re-emit the module's copyright global. + +// RUN: %clang_cc1 -std=c++20 -triple powerpc-ibm-aix \ +// RUN: -fprebuilt-module-path=%t -emit-llvm %t/importmod.cc -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK-IMPORT +// CHECK-IMPORT-NOT: @__loadtime_comment_str +// CHECK-IMPORT-NOT: c"module me\00" + +//--- copymod.cppm +export module copymod; +#pragma comment(copyright, "module me") +export inline void f() {} + +//--- importmod.cc +import copymod; +void g() { f(); } diff --git a/clang/test/CodeGen/PowerPC/pragma-comment.c b/clang/test/CodeGen/PowerPC/pragma-comment.c new file mode 100644 index 0000000000000..a8eb71b558180 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/pragma-comment.c @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 %s -DTEST_EMPTY_COPYRIGHT -triple powerpc-ibm-aix -verify +// RUN: %clang_cc1 %s -DTEST_OTHER_COMMENT_KINDS -triple powerpc-ibm-aix -verify +// RUN: %clang_cc1 %s -triple powerpc-ibm-aix -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple powerpc-ibm-aix -verify + +// RUN: %clang_cc1 %s -DTEST_EMPTY_COPYRIGHT -triple powerpc64-ibm-aix -verify +// RUN: %clang_cc1 %s -DTEST_OTHER_COMMENT_KINDS -triple powerpc64-ibm-aix -verify +// RUN: %clang_cc1 %s -triple powerpc64-ibm-aix -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple powerpc64-ibm-aix -verify + +#ifdef TEST_EMPTY_COPYRIGHT + +// 1. Verify no diagnostics for empty copyright string +#pragma comment(copyright, "") // expected-no-diagnostics +int main() {return 0; } + +#elif defined(TEST_OTHER_COMMENT_KINDS) + +// 2. Verify warnings for lib/linker and silent ignore for others +#pragma comment(lib, "m") // expected-warning {{'#pragma comment lib' ignored}} +#pragma comment(linker, "foo") // expected-warning {{'#pragma comment linker' ignored}} + +// These are recognized but ignored in CodeGen +#pragma comment(compiler) // expected-warning {{'#pragma comment compiler' ignored}} +#pragma comment(exestr, "foo") // expected-warning {{'#pragma comment exestr' ignored}} +#pragma comment(user, "foo\abar\nbaz\tsomething") // expected-warning {{'#pragma comment user' ignored}} +int main() {return 0; } + +#else + +// 3. Default Path: Verify metadata generation and duplicate warning +#pragma comment(copyright, "@(#) Copyright") +#pragma comment(copyright, "Duplicate Copyright") // expected-warning {{'#pragma comment copyright' ignored: it can be specified only once per translation unit}} + +int main() { return 0; } +// Check that both metadata sections are present +// CHECK: !comment_string.loadtime = !{![[copyright:[0-9]+]]} + +// Check individual metadata content +// CHECK: ![[copyright]] = !{!"@(#) Copyright"} + +#endif diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index ea9784a76f923..b9466b9558b06 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -27,6 +27,7 @@ // CHECK-FULL-O0: Running pass: VerifierPass // CHECK-FULL-O0-NEXT: Running analysis: VerifierAnalysis +// CHECK-FULL-O0-NEXT: Running pass: LowerCommentStringPass // CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-FULL-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass @@ -41,6 +42,7 @@ // CHECK-THIN-O0: Running pass: VerifierPass // CHECK-THIN-O0-NEXT: Running analysis: VerifierAnalysis +// CHECK-THIN-O0-NEXT: Running pass: LowerCommentStringPass // CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-THIN-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass diff --git a/clang/test/Parser/pragma-comment.c b/clang/test/Parser/pragma-comment.c new file mode 100644 index 0000000000000..c9618d44c0f2e --- /dev/null +++ b/clang/test/Parser/pragma-comment.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple systemz -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple powerpc64-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -DEXPOK -triple powerpc-ibm-aix -fsyntax-only -verify %s + +#ifdef EXPOK +#pragma comment(copyright,"copyright") // expected-no-diagnostics +#else +#pragma comment(copyright,"copyright") // expected-warning {{'#pragma comment copyright' ignored}} +#endif diff --git a/llvm/include/llvm/Transforms/Utils/LowerCommentStringPass.h b/llvm/include/llvm/Transforms/Utils/LowerCommentStringPass.h new file mode 100644 index 0000000000000..4c6109e1176d3 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LowerCommentStringPass.h @@ -0,0 +1,24 @@ +//===-- LowerCommentStringPass.h - Lower Comment string metadata --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOWERCOMMENTSTRINGPASS_H +#define LLVM_TRANSFORMS_UTILS_LOWERCOMMENTSTRINGPASS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class LowerCommentStringPass : public PassInfoMixin<LowerCommentStringPass> { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOWERCOMMENTSTRINGPASS_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a23d64b491a79..9cc5aed7ab5b1 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -363,6 +363,7 @@ #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/LowerCommentStringPass.h" #include "llvm/Transforms/Utils/LowerGlobalDtors.h" #include "llvm/Transforms/Utils/LowerIFunc.h" #include "llvm/Transforms/Utils/LowerInvoke.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 5e4688f4dd7ef..7a35efc6a9339 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -142,6 +142,7 @@ #include "llvm/Transforms/Utils/ExtraPassManager.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LowerCommentStringPass.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" @@ -1480,6 +1481,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, const bool LTOPreLink = isLTOPreLink(LTOPhase); ModulePassManager MPM; + // Process copyright metadata early, before any optimizations + MPM.addPass(LowerCommentStringPass()); + // Run partial inlining pass to partially inline functions that have // large bodies. if (RunPartialInlining) @@ -2328,6 +2332,9 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, ModulePassManager MPM; + // Process copyright metadata at O0 before any other transformations + MPM.addPass(LowerCommentStringPass()); + // Perform pseudo probe instrumentation in O0 mode. This is for the // consistency between different build modes. For example, a LTO build can be // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index c92d93d7ae396..e5d180fcf9bfd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -62,6 +62,7 @@ MODULE_PASS("check-debugify", NewPMCheckDebugifyPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("coro-cleanup", CoroCleanupPass()) MODULE_PASS("coro-early", CoroEarlyPass()) +MODULE_PASS("lower-comment-string", LowerCommentStringPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("ctx-instr-gen", PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)) diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 2b5f5cf344e60..28f86c5e96010 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -53,6 +53,7 @@ add_llvm_component_library(LLVMTransformUtils LoopUtils.cpp LoopVersioning.cpp LowerAtomic.cpp + LowerCommentStringPass.cpp LowerGlobalDtors.cpp LowerIFunc.cpp LowerInvoke.cpp diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp new file mode 100644 index 0000000000000..6deef2f75e0a3 --- /dev/null +++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp @@ -0,0 +1,148 @@ +//===-- LowerCommentStringPass.cpp - Lower Comment string metadata -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// LowerCommentStringPass pass lowers module-level comment string metadata +// emitted by Clang: +// +// !comment_string.loadtime = !{!"Copyright ..."} +// +// into concrete, translation-unit–local globals. +// This Pass is enabled only for AIX. +// For each module (translation unit), the pass performs the following: +// +// 1. Creates a null-terminated, internal constant string global +// (`__loadtime_comment_str`) containing the copyright text in +// `__loadtime_comment` section. +// +// 2. Marks the string in `llvm.used` so it cannot be dropped by +// optimization or LTO. +// +// 3. Attaches `!implicit.ref` metadata referencing the string to every +// defined function in the module. The PowerPC AIX backend recognizes +// this metadata and emits a `.ref` directive from the function to the +// string, creating a concrete relocation that prevents the linker from +// discarding it (as long as the referencing symbol is kept). +// +// Input IR: +// !comment_string.loadtime = !{!"Copyright"} +// Output IR: +// @__loadtime_comment_str = internal constant [N x i8] c"Copyright\00", +// section "__loadtime_comment" +// @llvm.used = appending global [1 x ptr] [ptr @__loadtime_comment_str] +// +// define i32 @func() !implicit.ref !5 { ... } +// !5 = !{ptr @__loadtime_comment_str} +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerCommentStringPass.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#define DEBUG_TYPE "lower-comment-string" + +using namespace llvm; + +static cl::opt<bool> + DisableCopyrightMetadata("disable-lower-comment-string", cl::ReallyHidden, + cl::desc("Disable LowerCommentString pass."), + cl::init(false)); + +static bool isAIXTriple(const Module &M) { + return Triple(M.getTargetTriple()).isOSAIX(); +} + +PreservedAnalyses LowerCommentStringPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (DisableCopyrightMetadata || !isAIXTriple(M)) + return PreservedAnalyses::all(); + + LLVMContext &Ctx = M.getContext(); + + // Single-metadata: !comment_string.loadtime = !{!0} + // Each operand node is expected to have one MDString operand. + NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime"); + if (!MD || MD->getNumOperands() == 0) + return PreservedAnalyses::all(); + + // At this point we are guarateed that one TU contains a single copyright + // metadata entry. Create TU-local string global for that metadata entry. + MDNode *MdNode = MD->getOperand(0); + if (!MdNode || MdNode->getNumOperands() == 0) + return PreservedAnalyses::all(); + + auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0)); + if (!MdString) + return PreservedAnalyses::all(); + + StringRef Text = MdString->getString(); + if (Text.empty()) + return PreservedAnalyses::all(); + + // 1. Create a single NULL-terminated string global + Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true); + + // Internal, constant, TU-local--avoids duplicate symbol issues across TUs. + auto *StrGV = new GlobalVariable(M, StrInit->getType(), + /*isConstant=*/true, + GlobalValue::InternalLinkage, StrInit, + /*Name=*/"__loadtime_comment_str"); + // Set unnamed_addr to allow the linker to merge identical strings + StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + StrGV->setAlignment(Align(1)); + // Place in the "__loadtime_comment" section. + // The GV is constant, so we expect a read-only section. + StrGV->setSection("__loadtime_comment"); + + // 2. Add the string to llvm.used to prevent LLVM optimization/LTO passes from + // removing it. + appendToUsed(M, {StrGV}); + + // 3. Attach !implicit ref to every defined function + // Create a metadata node pointing to the copyright string: + // !N = !{ptr @__loadtime_comment_str} + Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)}; + MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops); + + // Lambda to attach implicit.ref metadata to a function. + auto AddImplicitRef = [&](Function &F) { + if (F.isDeclaration()) + return; + // Attach the implicit.ref metadata to the function + F.setMetadata("implicit.ref", ImplicitRefMD); + LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: " + << F.getName() << "\n"); + }; + + // Process all functions in the module + for (Function &F : M) + AddImplicitRef(F); + + // Cleanup the processed metadata. + MD->eraseFromParent(); + LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n"); + + return PreservedAnalyses::all(); +} diff --git a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll index 86090324c770c..f2b2738b89416 100644 --- a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll +++ b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll @@ -2,7 +2,7 @@ ; RUN: opt -mtriple=aarch64 -S -passes='default<O2>' -print-pipeline-passes < %s | FileCheck %s ; CHECK: loop-idiom-vectorize -; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}} +; O0: {{^}}lower-comment-string,function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}} define void @foo() { entry: diff --git a/llvm/test/CodeGen/Hexagon/print-pipeline-passes.ll b/llvm/test/CodeGen/Hexagon/print-pipeline-passes.ll index 0f80e9c60c441..50476ae020d68 100644 --- a/llvm/test/CodeGen/Hexagon/print-pipeline-passes.ll +++ b/llvm/test/CodeGen/Hexagon/print-pipeline-passes.ll @@ -3,7 +3,7 @@ ; CHECK: hexagon-loop-idiom ; CHECK: hexagon-vlcr -; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}} +; O0: {{^}}lower-comment-string,function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}} define void @test_hexagon_passes() { entry: diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index f074b2fdd3ab8..979a7897b4c02 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -238,6 +238,7 @@ ; CHECK-O-NEXT: Running pass: CoroCleanupPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass +; CHECK-O-NEXT: Running pass: LowerCommentStringPass ; CHECK-DEFAULT-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-LTO-NOT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index b0d08316de4f0..d03b15d276e0b 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -162,6 +162,7 @@ ; CHECK-O-NEXT: Running pass: CoroCleanupPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass +; CHECK-POSTLINK-O-NEXT: Running pass: LowerCommentStringPass ; CHECK-POSTLINK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-POSTLINK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-POSTLINK-O-NEXT: Running pass: RecomputeGlobalsAAPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 6b3e82a752899..b5b0d55bf410c 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -146,6 +146,7 @@ ; CHECK-O-NEXT: Running pass: CoroCleanupPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass +; CHECK-O-NEXT: Running pass: LowerCommentStringPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RecomputeGlobalsAAPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 88dc18f605ce2..57561975769d0 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -155,6 +155,7 @@ ; CHECK-O-NEXT: Running pass: CoroCleanupPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass +; CHECK-O-NEXT: Running pass: LowerCommentStringPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RecomputeGlobalsAAPass diff --git a/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll new file mode 100644 index 0000000000000..ba7ad17f4c4b7 --- /dev/null +++ b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll @@ -0,0 +1,59 @@ +; RUN: opt -passes=lower-comment-string -S %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 + +; Verify that lower-comment-string is enabled by default on all opt pipelines. +; RUN: opt --O0 -S %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +; RUN: opt --O1 -S %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ON +; RUN: opt --O2 -S %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ON +; RUN: opt --O3 -S %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ON + +; Verify that LowerCommentStringPass lowers !loadtime.copyright.comment +; into concrete, translation-unit–local globals. +; +; For each module (translation unit), the pass performs the following: +; +; 1. Creates a null-terminated, internal constant string global +; (`__loadtime_comment_str`) containing the copyright text in +; `__loadtime_comment` section. +; +; 2. Marks the string in `llvm.used` so it cannot be dropped by +; optimization or LTO. +; +; 3. Attaches `!implicit.ref` metadata referencing the string to every +; defined function in the module. The PowerPC AIX backend recognizes +; this metadata and emits a `.ref` directive from the function to the +; string, creating a concrete relocation that prevents the linker from +; discarding it (as long as the referencing symbol is kept). + +target triple = "powerpc-ibm-aix" + +define void @f0() { +entry: + ret void +} +define i32 @main() { +entry: + ret i32 0 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"wchar_size", i32 2} + +!comment_string.loadtime = !{!1} +!1 = !{!"@(#) Copyright IBM 2025"} + + +; ---- Globals-------------------------------------------- +; CHECK: @__loadtime_comment_str = internal unnamed_addr constant [24 x i8] c"@(#) Copyright IBM 2025\00", section "__loadtime_comment", align 1 +; Preservation in llvm.used sets +; CHECK-NEXT: @llvm.used = appending global [1 x ptr] [ptr @__loadtime_comment_str], section "llvm.metadata" +; CHECK-NOT: ![[copyright:[0-9]+]] = !{!"@(#) Copyright IBM 2025"} + +; Function has an implicit ref MD pointing at the string: +; CHECK-O0: define void @f0() !implicit.ref ![[MD:[0-9]+]] +; CHECK-ON: define void @f0() local_unnamed_addr #0 !implicit.ref ![[MD:[0-9]+]] +; CHECK-O0: define i32 @main() !implicit.ref ![[MD]] +; CHECK-ON: define noundef i32 @main() local_unnamed_addr #0 !implicit.ref ![[MD]] + +; Verify metadata content +; CHECK-O0: ![[MD]] = !{ptr @__loadtime_comment_str} +; CHECK-ON: ![[MD]] = !{ptr @__loadtime_comment_str} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
