https://github.com/nikic updated 
https://github.com/llvm/llvm-project/pull/134396

>From 194ce2dfa10eebc3c0d45f944a09473bd88412d9 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npo...@redhat.com>
Date: Fri, 4 Apr 2025 16:57:30 +0200
Subject: [PATCH 1/2] [Clang] Always verify LLVM IR inputs

We get a lot of issues that basically boil down to "I passed
malformed LLVM IR to clang and it crashed". Clang does not perform
IR verification by default in (non-assertion-enabled) release
builds, and that's sensible for IR that Clang itself produces,
which is expected to always be valid. However, if people pass in
their own handwritten IR, we should report if it is malformed,
instead of crashing. We should also report it in a way that does
not produce a crash trace and ask for a bug report, as currently
happens in assertions-enabled builds.

I've only added the verification for textual IR inputs. I don't
want to force verification for bitcode inputs, as these would
affect typical LTO scenarios, and are usually coming from Clang
itself.
---
 .../clang/Basic/DiagnosticFrontendKinds.td        |  2 ++
 clang/lib/CodeGen/CodeGenAction.cpp               | 15 ++++++++++++++-
 clang/test/CodeGen/invalid_llvm_ir.ll             | 10 ++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/invalid_llvm_ir.ll

diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td 
b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 5f64b1cbfac87..6c72775197823 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -379,6 +379,8 @@ def err_ast_action_on_llvm_ir : Error<
   "cannot apply AST actions to LLVM IR file '%0'">,
   DefaultFatal;
 
+def err_invalid_llvm_ir : Error<"invalid LLVM IR input: %0">;
+
 def err_os_unsupport_riscv_fmv : Error<
   "function multiversioning is currently only supported on Linux">;
 
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp 
b/clang/lib/CodeGen/CodeGenAction.cpp
index 4321efd49af36..09d76a161079a 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LLVMRemarkStreamer.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/Linker/Linker.h"
@@ -1048,8 +1049,20 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) {
 
   // Handle textual IR and bitcode file with one single module.
   llvm::SMDiagnostic Err;
-  if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext))
+  if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext)) {
+    // For textual LLVM IR files, always verify the input and report the error
+    // in a way that does not ask people to report an issue for it.
+    if (!llvm::isBitcode((const unsigned char *)MBRef.getBufferStart(),
+                         (const unsigned char *)MBRef.getBufferEnd())) {
+      std::string VerifierErr;
+      raw_string_ostream VerifierErrStream(VerifierErr);
+      if (llvm::verifyModule(*M, &VerifierErrStream)) {
+        CI.getDiagnostics().Report(diag::err_invalid_llvm_ir) << VerifierErr;
+        return {};
+      }
+    }
     return M;
+  }
 
   // If MBRef is a bitcode with multiple modules (e.g., -fsplit-lto-unit
   // output), place the extra modules (actually only one, a regular LTO module)
diff --git a/clang/test/CodeGen/invalid_llvm_ir.ll 
b/clang/test/CodeGen/invalid_llvm_ir.ll
new file mode 100644
index 0000000000000..573fe6e351f3e
--- /dev/null
+++ b/clang/test/CodeGen/invalid_llvm_ir.ll
@@ -0,0 +1,10 @@
+; RUN: not %clang %s 2>&1 | FileCheck %s
+
+; CHECK: error: invalid LLVM IR input: PHINode should have one entry for each 
predecessor of its parent basic block!
+; CHECK-NEXT: %phi = phi i32 [ 0, %entry ]
+
+define void @test() {
+entry:
+  %phi = phi i32 [ 0, %entry ]
+  ret void
+}

>From 1d6a46d823396f5b6a5feda7081e7549053b2822 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita....@gmail.com>
Date: Fri, 4 Apr 2025 20:32:25 +0200
Subject: [PATCH 2/2] also verify bitcode

---
 clang/lib/CodeGen/CodeGenAction.cpp   | 17 +++++++----------
 clang/test/CodeGen/invalid_llvm_ir.ll |  2 ++
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenAction.cpp 
b/clang/lib/CodeGen/CodeGenAction.cpp
index 09d76a161079a..1f5eb427b566f 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -1050,16 +1050,13 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) {
   // Handle textual IR and bitcode file with one single module.
   llvm::SMDiagnostic Err;
   if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext)) {
-    // For textual LLVM IR files, always verify the input and report the error
-    // in a way that does not ask people to report an issue for it.
-    if (!llvm::isBitcode((const unsigned char *)MBRef.getBufferStart(),
-                         (const unsigned char *)MBRef.getBufferEnd())) {
-      std::string VerifierErr;
-      raw_string_ostream VerifierErrStream(VerifierErr);
-      if (llvm::verifyModule(*M, &VerifierErrStream)) {
-        CI.getDiagnostics().Report(diag::err_invalid_llvm_ir) << VerifierErr;
-        return {};
-      }
+    // For LLVM IR files, always verify the input and report the error in a way
+    // that does not ask people to report an issue for it.
+    std::string VerifierErr;
+    raw_string_ostream VerifierErrStream(VerifierErr);
+    if (llvm::verifyModule(*M, &VerifierErrStream)) {
+      CI.getDiagnostics().Report(diag::err_invalid_llvm_ir) << VerifierErr;
+      return {};
     }
     return M;
   }
diff --git a/clang/test/CodeGen/invalid_llvm_ir.ll 
b/clang/test/CodeGen/invalid_llvm_ir.ll
index 573fe6e351f3e..97a6802bc105e 100644
--- a/clang/test/CodeGen/invalid_llvm_ir.ll
+++ b/clang/test/CodeGen/invalid_llvm_ir.ll
@@ -1,4 +1,6 @@
 ; RUN: not %clang %s 2>&1 | FileCheck %s
+; RUN: llvm-as -disable-verify < %s > %t.bc
+; RUN: not %clang %t.bc 2>&1 | FileCheck %s
 
 ; CHECK: error: invalid LLVM IR input: PHINode should have one entry for each 
predecessor of its parent basic block!
 ; CHECK-NEXT: %phi = phi i32 [ 0, %entry ]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to