[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From 813226efec4aac6b8db595a19b0ebb9f3aa67d1d Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH] [IR] "modular-format" attribute for functions using format
 strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index f3aba8755b20c..cc8c0e471a196 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2620,6 +2620,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index b6ed1dc4331d2..579e5769796c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -3915,6 +3916,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -3936,6 +3994,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+return 

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From 619dfb750f9d262328f630a4735c1fcd62056d48 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH] [clang] "modular_format" attribute for functions using format
 strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  Aspects.

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 028a89f8a92de3b7939d092332c975c4964e0e4b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vlist)

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 028a89f8a92de3b7939d092332c975c4964e0e4b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vlist)

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From 813226efec4aac6b8db595a19b0ebb9f3aa67d1d Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH] [IR] "modular-format" attribute for functions using format
 strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index f3aba8755b20c..cc8c0e471a196 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2620,6 +2620,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index b6ed1dc4331d2..579e5769796c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -3915,6 +3916,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -3936,6 +3994,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+return 

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-08 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From 619dfb750f9d262328f630a4735c1fcd62056d48 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH] [clang] "modular_format" attribute for functions using format
 strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  Aspects.

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-15 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 68ff227b10bc8bd6c58526790e19787cbc554fb6 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vlist)

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-15 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 68ff227b10bc8bd6c58526790e19787cbc554fb6 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vlist)

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-15 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From e77a85615ee9d775b397cc877b16eed91e1ad2e1 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/2] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  Aspe

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-22 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath edited 
https://github.com/llvm/llvm-project/pull/147431
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-22 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath edited 
https://github.com/llvm/llvm-project/pull/147431
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-22 Thread Daniel Thornburgh via llvm-branch-commits


@@ -9427,3 +9427,37 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute (or standard library functions) to indicate that the
+implementation is modular on the format string argument. When the format string
+for a given call is constant, the compiler may redirect the call to the symbol
+given as the first argument to the attribute (the modular implementation
+function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+``_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+For example, say ``printf`` is annotated with
+``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
+become a call to ``__modular_printf`` with the same arguments, as would

mysterymath wrote:

> My concern is more about dispatching in ways the user may not anticipate and 
> getting observably different behavior. e.g., the user calls `printf("%I64d", 
> 0LL)` and they were getting the MSVC CRT `printf` call which supported that 
> modifier but now calls `__modular_printf` which doesn't know about the 
> modifier. What happens in that kind of situation?

Ah, if I understand what you're getting at, that can't happen; it's explicitly 
out of scope for the feature.

The `modular_format` attribute exists to advertise to compiler that is 
compiling calls to a function that the implementation can be split by 
redirecting calls and emitting relocs to various symbols. The only plausible 
mechanism to do so would be a header file, and that means that the header would 
need to be provided by and intrinsically tied to a specific version of the 
implementation. Otherwise, it would be impossible to determine what aspects the 
implementation requires to be emitted to function correctly.

Accordingly, this feature would primarily be useful for cases where libc is 
statically linked in and paired with its own headers. (llvm-libc, various 
embedded libcs, etc.) I suppose it's technically possible to break out printf 
implementation parts into a family of individual dynamic libraries, but even 
then, any libc header set that required that the libc implementation be 
dynamically replaceable would not be able to include `modular_format`.

So, for implementations that use this feature, `printf` and `__modular_printf` 
would always be designed together. To avoid ever introducing two full `printf` 
implementations into the link, `printf` would be a thin wrapper around 
`__modular_printf` that also requests every possible aspect of the 
implementation. This would mean that the two could never diverge.


As an aside, this is my first time landing a RFC across so many components of 
LLVM. I wasn't sure how much detail to include in each change; my intuition was 
to try to provide links to the RFC instead. I don't want the above reasoning to 
 get buried, and it gives me pause that it wasn't readily accessible. But I'm 
also not entirely sure where it should live going forward. Advice would be 
appreciated.


https://github.com/llvm/llvm-project/pull/147431
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-28 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From 92dad1410839afdc18ef2d92b3b9055d72f85188 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/5] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  Aspe

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-28 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 49be509b0506ac6e6032f5b8cfebf0c7e978cd1f Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH 1/5] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vl

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-07-28 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From 92dad1410839afdc18ef2d92b3b9055d72f85188 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/5] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  Aspe

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-07-28 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From 4f58c112defb97bda3ed5685f61f6fdc9dda7507 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH 1/6] [IR] "modular-format" attribute for functions using
 format strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b8d987317f5e7..60769f6643e7c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2620,6 +2620,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index b6ed1dc4331d2..579e5769796c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -3915,6 +3916,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -3936,6 +3994,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+ret

[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)

2025-07-28 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147426

>From 49be509b0506ac6e6032f5b8cfebf0c7e978cd1f Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Thu, 19 Dec 2024 11:57:27 -0800
Subject: [PATCH 1/5] [libc] Modular printf option (float only)

This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support
(later, others) to be weakly linked into the implementation.
__printf_modular becomes the main entry point of the implementaiton, an
printf itself wraps __printf_modular. printf it also contains a
BFD_RELOC_NONE relocation to bring in the float aspect.

See issue #146159 for context.
---
 libc/config/config.json   |  4 ++
 libc/docs/configure.rst   |  1 +
 libc/src/stdio/generic/CMakeLists.txt |  7 ++-
 libc/src/stdio/generic/printf_modular.cpp | 40 +
 libc/src/stdio/printf.h   |  1 +
 libc/src/stdio/printf_core/CMakeLists.txt |  7 ++-
 .../stdio/printf_core/float_dec_converter.h   | 25 +++--
 .../printf_core/float_dec_converter_limited.h | 24 ++--
 .../stdio/printf_core/float_hex_converter.h   | 10 +++-
 libc/src/stdio/printf_core/float_impl.cpp | 41 ++
 libc/src/stdio/printf_core/parser.h   | 56 ++-
 libc/src/stdio/printf_core/printf_config.h|  7 +++
 libc/src/stdio/printf_core/printf_main.h  | 13 -
 .../src/stdio/printf_core/vfprintf_internal.h | 13 -
 14 files changed, 216 insertions(+), 33 deletions(-)
 create mode 100644 libc/src/stdio/generic/printf_modular.cpp
 create mode 100644 libc/src/stdio/printf_core/float_impl.cpp

diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..4278169cd5940 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -45,6 +45,10 @@
 "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
   "value": true,
   "doc": "Use dynamic dispatch for the output mechanism to reduce code 
size."
+},
+"LIBC_CONF_PRINTF_MODULAR": {
+  "value": true,
+  "doc": "Split printf implementation into modules that can be lazily 
linked in."
 }
   },
   "scanf": {
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..1998c067dc77a 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -45,6 +45,7 @@ to learn about the defaults for your platform and target.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for 
faster and smaller but less accurate printf doubles.
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative 
printf float implementation based on 320-bit floats
 - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large 
table for better printf long double performance.
+- ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules 
that can be lazily linked in.
 - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the 
output mechanism to reduce code size.
 * **"pthread" options**
 - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins 
before blocking if a mutex is in contention (default to 100).
diff --git a/libc/src/stdio/generic/CMakeLists.txt 
b/libc/src/stdio/generic/CMakeLists.txt
index 6361822b61999..41b18bc7195ca 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+set(printf_srcs printf.cpp)
+if (LIBC_CONF_PRINTF_MODULAR)
+  list(APPEND printf_srcs printf_modular.cpp)
+endif()
+
 add_generic_entrypoint_object(
   printf
   SRCS
-printf.cpp
+${printf_srcs}
   HDRS
 ../printf.h
   DEPENDS
diff --git a/libc/src/stdio/generic/printf_modular.cpp 
b/libc/src/stdio/generic/printf_modular.cpp
new file mode 100644
index 0..3a6a580002062
--- /dev/null
+++ b/libc/src/stdio/generic/printf_modular.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of 
printf_modular---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "src/stdio/printf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/vfprintf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include 
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT LIBC_NAMESPACE::stdout
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define PRINTF_STDOUT ::stdout
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, __printf_modular,
+   (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vl

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-10-17 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From bb3b826b652c7bed83aa6c06d4af095dcb772592 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH 01/10] [IR] "modular-format" attribute for functions using
 format strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fa0b580ee77cb..582f3ae110719 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2742,6 +2742,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e1e24a99d0474..452561570db48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -4071,6 +4072,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -4092,6 +4150,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+r

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-10-17 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From bb3b826b652c7bed83aa6c06d4af095dcb772592 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH 01/10] [IR] "modular-format" attribute for functions using
 format strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fa0b580ee77cb..582f3ae110719 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2742,6 +2742,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e1e24a99d0474..452561570db48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -4071,6 +4072,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -4092,6 +4150,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+r

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-10-17 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147429

>From bb3b826b652c7bed83aa6c06d4af095dcb772592 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Wed, 2 Apr 2025 16:24:57 -0700
Subject: [PATCH 01/10] [IR] "modular-format" attribute for functions using
 format strings

A new InstCombine transform uses this attribute to rewrite calls to a
modular version of the implementation along with llvm.reloc.none
relocations against aspects of the implementation needed by the call.

This change only adds support for the 'float' aspect, but it also builds
the structure needed for others.

See issue #146159
---
 llvm/docs/LangRef.rst | 17 +
 .../InstCombine/InstCombineCalls.cpp  | 62 +++
 2 files changed, 79 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fa0b580ee77cb..582f3ae110719 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2742,6 +2742,23 @@ For example:
 This attribute indicates that outlining passes should not modify the
 function.
 
+``"modular_format"=""``
+This attribute indicates that the implementation is modular on a particular
+format string argument . When the argument for a given call is constant, 
the
+compiler may redirect the call to a modular implementation function
+instead.
+
+The compiler also emits relocations to report various aspects of the format
+string and arguments that were present. The compiler reports an aspect by
+issing a relocation for the symbol `_``. This arranges
+for code and data needed to support the aspect of the implementation to be
+brought into the link to satisfy weak references in the modular
+implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+
 Call Site Attributes
 --
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e1e24a99d0474..452561570db48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -4071,6 +4072,63 @@ Instruction 
*InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
   return visitCallBase(CBI);
 }
 
+static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasFnAttr("modular-format"))
+return nullptr;
+
+  SmallVector Args(
+  llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
+  // TODO: Examine the format argument in Args[0].
+  // TODO: Error handling
+  unsigned FirstArgIdx;
+  if (!llvm::to_integer(Args[1], FirstArgIdx))
+return nullptr;
+  if (FirstArgIdx == 0)
+return nullptr;
+  --FirstArgIdx;
+  StringRef FnName = Args[2];
+  StringRef ImplName = Args[3];
+  DenseSet Aspects(llvm::from_range,
+  ArrayRef(Args).drop_front(4));
+  Module *M = CI->getModule();
+  Function *Callee = CI->getCalledFunction();
+  FunctionCallee ModularFn =
+  M->getOrInsertFunction(FnName, Callee->getFunctionType(),
+ Callee->getAttributes().removeFnAttribute(
+ M->getContext(), "modular-format"));
+  CallInst *New = cast(CI->clone());
+  New->setCalledFunction(ModularFn);
+  New->removeFnAttr("modular-format");
+  B.Insert(New);
+
+  const auto ReferenceAspect = [&](StringRef Aspect) {
+SmallString<20> Name = ImplName;
+Name += '_';
+Name += Aspect;
+Constant *Sym =
+M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext()));
+Function *RelocNoneFn =
+Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
+B.CreateCall(RelocNoneFn, {Sym});
+  };
+
+  if (Aspects.contains("float")) {
+Aspects.erase("float");
+if (llvm::any_of(
+llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
+ CI->arg_end()),
+[](Value *V) { return V->getType()->isFloatingPointTy(); }))
+  ReferenceAspect("float");
+  }
+
+  SmallVector UnknownAspects(Aspects.begin(), Aspects.end());
+  llvm::sort(UnknownAspects);
+  for (StringRef Request : UnknownAspects)
+ReferenceAspect(Request);
+
+  return New;
+}
+
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
@@ -4092,6 +4150,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst 
*CI) {
 ++NumSimplified;
 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
   }
+  if (Value *With = optimizeModularFormat(CI, Builder)) {
+++NumSimplified;
+r

[llvm-branch-commits] [llvm] [IR] "modular-format" attribute for functions using format strings (PR #147429)

2025-11-03 Thread Daniel Thornburgh via llvm-branch-commits

mysterymath wrote:

Coming back from the US dev meeting, I wanted to ping this one again. Is there 
anything left for me to do before this is ready to land?

https://github.com/llvm/llvm-project/pull/147429
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-11-03 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From a9ac2282d609b7aaca4f7d733960301602e1637b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/8] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 22e60aa9fe312..69f5bf5bba461 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5290,3 +5290,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index e0bbda083b5cf..ebf1a45dbbb50 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9635,3 +9635,28 @@ silence diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 741fa44713ac8..67765f7fab28b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2557,6 +2557,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index e6f8748db7644..8fcfb38661a8f 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6783,6 +6783,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+  

[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

2025-11-04 Thread Daniel Thornburgh via llvm-branch-commits

https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From 3ac748150a5c7caf8fed4d7c488770722d505068 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 01/12] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td | 11 +++
 clang/include/clang/Basic/AttrDocs.td | 25 +
 clang/lib/CodeGen/CGCall.cpp  | 12 
 clang/lib/Sema/SemaDeclAttr.cpp   | 27 +++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 749f531ec9ab1..8605032df2eee 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5309,3 +5309,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+IdentifierArgument<"ModularImplFn">,
+StringArgument<"ImplName">,
+VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 2fdd041c1b46e..7baee073b5cfd 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9674,3 +9674,28 @@ silence diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`_``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 465f3f4e670c2..cfff662757c78 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2557,6 +2557,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
 if (TargetDecl->hasAttr())
   FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+if (auto *ModularFormat = TargetDecl->getAttr()) {
+  // TODO: Error checking
+  FormatAttr *Format = TargetDecl->getAttr();
+  std::string FormatIdx = std::to_string(Format->getFormatIdx());
+  std::string FirstArg = std::to_string(Format->getFirstArg());
+  SmallVector Args = {
+  FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+  ModularFormat->getImplName()};
+  llvm::append_range(Args, ModularFormat->aspects());
+  FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+}
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a9e7b44ac9d73..484e4ad921835 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6911,6 +6911,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
   CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+return;
+  SmallVector Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+StringRef Aspect;
+if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+  return;
+Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+  S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+