junaire created this revision.
junaire added reviewers: aaron.ballman, pmor13.
Herald added a project: All.
junaire requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Previously we only have an extension that warn void pointer deferencing
in C++, but for C we did nothing.

C2x 6.5.3.2p4 says The unary * operator denotes indirection. If it points
to an object, the result is an lvalue designating the object. However, there
is no way to form an lvalue designating an object of an incomplete type as
6.3.2.1p1 says "an lvalue is an expression (with an object type other than
void)", so the behavior is undefined.

Fixes https://github.com/llvm/llvm-project/issues/53631
Signed-off-by: Jun Zhang <j...@junz.org>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134461

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Parse/Parser.h
  clang/include/clang/Sema/Sema.h
  clang/lib/Parse/ParseExpr.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/Analysis/misc-ps.m
  clang/test/C/drs/dr1xx.c
  clang/test/Sema/asm.c
  clang/test/Sema/builtins-arm.c
  clang/test/Sema/conditional-expr.c
  clang/test/Sema/expr-address-of.c
  clang/test/Sema/i-c-e.c
  clang/test/Sema/warn-deference-void-ptr.c

Index: clang/test/Sema/warn-deference-void-ptr.c
===================================================================
--- /dev/null
+++ clang/test/Sema/warn-deference-void-ptr.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-unused-value -fsyntax-only -verify %s
+
+void foo(void* p) {
+  *p; // expected-warning {{deference a void pointer has undefined behavior}}
+  &*p;
+  &(*p); // expected-warning {{deference a void pointer has undefined behavior}}
+}
Index: clang/test/Sema/i-c-e.c
===================================================================
--- clang/test/Sema/i-c-e.c
+++ clang/test/Sema/i-c-e.c
@@ -3,7 +3,7 @@
 #include <stdint.h>
 #include <limits.h>
 
-int a(void) {int p; *(1 ? &p : (void*)(0 && (a(),1))) = 10;} // expected-error {{incomplete type 'void' is not assignable}}
+int a(void) {int p; *(1 ? &p : (void*)(0 && (a(),1))) = 10;} // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
 
 // rdar://6091492 - ?: with __builtin_constant_p as the operand is an i-c-e.
 int expr;
Index: clang/test/Sema/expr-address-of.c
===================================================================
--- clang/test/Sema/expr-address-of.c
+++ clang/test/Sema/expr-address-of.c
@@ -105,7 +105,7 @@
   int* dummy2 = &(t2.a); // expected-error {{address of bit-field requested}}
   int* dummy3 = &(t2.b); // expected-error {{address of bit-field requested}}
 
-  void* t3 = &(*(void*)0);
+  void* t3 = &*(void*)0;
 }
 
 void f8(void) {
Index: clang/test/Sema/conditional-expr.c
===================================================================
--- clang/test/Sema/conditional-expr.c
+++ clang/test/Sema/conditional-expr.c
@@ -2,11 +2,11 @@
 void foo(void) {
   *(0 ? (double *)0 : (void *)0) = 0;
   // FIXME: GCC doesn't consider the following two statements to be errors.
-  *(0 ? (double *)0 : (void *)(int *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}}
-  *(0 ? (double *)0 : (void *)(double *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}}
-  *(0 ? (double *)0 : (int *)(void *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{pointer type mismatch ('double *' and 'int *')}}
+  *(0 ? (double *)0 : (void *)(int *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
+  *(0 ? (double *)0 : (void *)(double *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
+  *(0 ? (double *)0 : (int *)(void *)0) = 0; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{pointer type mismatch ('double *' and 'int *')}} expected-warning {{deference a void pointer has undefined behavior}}
   *(0 ? (double *)0 : (double *)(void *)0) = 0;
-  *((void *) 0) = 0; // expected-error {{incomplete type 'void' is not assignable}}
+  *((void *) 0) = 0; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
   double *dp;
   int *ip;
   void *vp;
Index: clang/test/Sema/builtins-arm.c
===================================================================
--- clang/test/Sema/builtins-arm.c
+++ clang/test/Sema/builtins-arm.c
@@ -18,7 +18,7 @@
 void test1(void) {
   __builtin_va_list ptr;
   ptr.__ap = "x";
-  *(ptr.__ap) = '0'; // expected-error {{incomplete type 'void' is not assignable}}
+  *(ptr.__ap) = '0'; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
 }
 #else
 // va_list on ARM apcs-gnu is void*.
@@ -30,7 +30,7 @@
 
 void test2(void) {
   __builtin_va_list ptr = "x";
-  *ptr = '0'; // expected-error {{incomplete type 'void' is not assignable}}
+  *ptr = '0'; // expected-error {{incomplete type 'void' is not assignable}} expected-warning {{deference a void pointer has undefined behavior}}
 }
 #endif
 
Index: clang/test/Sema/asm.c
===================================================================
--- clang/test/Sema/asm.c
+++ clang/test/Sema/asm.c
@@ -50,8 +50,8 @@
 // <rdar://problem/6156893>
 void test4(const volatile void *addr)
 {
-    asm ("nop" : : "r"(*addr)); // expected-error {{invalid type 'const volatile void' in asm input for constraint 'r'}}
-    asm ("nop" : : "m"(*addr));
+    asm ("nop" : : "r"(*addr)); // expected-error {{invalid type 'const volatile void' in asm input for constraint 'r'}} expected-warning {{deference a void pointer has undefined behavior}}
+    asm ("nop" : : "m"(*addr)); // expected-warning {{deference a void pointer has undefined behavior}}
 
     asm ("nop" : : "r"(test4(addr))); // expected-error {{invalid type 'void' in asm input for constraint 'r'}}
     asm ("nop" : : "m"(test4(addr))); // expected-error {{invalid lvalue in asm input for constraint 'm'}}
Index: clang/test/C/drs/dr1xx.c
===================================================================
--- clang/test/C/drs/dr1xx.c
+++ clang/test/C/drs/dr1xx.c
@@ -139,8 +139,8 @@
   /* The behavior changed between C89 and C99. */
   (void)&*p; /* c89only-warning {{ISO C forbids taking the address of an expression of type 'void'}} */
   /* The behavior of all three of these is undefined. */
-  (void)*p;
-  (void)(i ? *p : *p);
+  (void)*p; /* expected-warning {{deference a void pointer has undefined behavior}} */
+  (void)(i ? *p : *p); /* expected-warning {{deference a void pointer has undefined behavior}} */
   (void)(*p, *p); /* expected-warning {{left operand of comma operator has no effect}} */
 }
 
Index: clang/test/Analysis/misc-ps.m
===================================================================
--- clang/test/Analysis/misc-ps.m
+++ clang/test/Analysis/misc-ps.m
@@ -133,7 +133,7 @@
   void* q;
   
   if (!flag) {
-    if (sizeof(*q) == 1)
+    if (sizeof(*q) == 1) // expected-warning {{deference a void pointer has undefined behavior}}
       return;
     // Infeasibe.
     *p = 1; // no-warning
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -14493,7 +14493,8 @@
 
 /// CheckIndirectionOperand - Type check unary indirection (prefix '*').
 static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK,
-                                        SourceLocation OpLoc) {
+                                        SourceLocation OpLoc,
+                                        bool IsAfterAmp = false) {
   if (Op->isTypeDependent())
     return S.Context.DependentTy;
 
@@ -14530,18 +14531,18 @@
     return QualType();
   }
 
-  // Note that per both C89 and C99, indirection is always legal, even if Result
-  // is an incomplete type or void.  It would be possible to warn about
-  // dereferencing a void pointer, but it's completely well-defined, and such a
-  // warning is unlikely to catch any mistakes. In C++, indirection is not valid
-  // for pointers to 'void' but is fine for any other pointer type:
-  //
-  // C++ [expr.unary.op]p1:
-  //   [...] the expression to which [the unary * operator] is applied shall
-  //   be a pointer to an object type, or a pointer to a function type
-  if (S.getLangOpts().CPlusPlus && Result->isVoidType())
-    S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer)
-      << OpTy << Op->getSourceRange();
+  if (Result->isVoidType()) {
+    unsigned Kind = 0;
+    // C++ [expr.unary.op]p1:
+    //   [...] the expression to which [the unary * operator] is applied shall
+    //   be a pointer to an object type, or a pointer to a function type
+    if (S.getLangOpts().CPlusPlus)
+      Kind = diag::ext_typecheck_indirection_through_void_pointer;
+    else if (S.getLangOpts().C99 && !IsAfterAmp)
+      Kind = diag::warn_deference_void_pointer;
+
+    Kind &&S.Diag(OpLoc, Kind) << OpTy << Op->getSourceRange();
+  }
 
   // Dereferences are usually l-values...
   VK = VK_LValue;
@@ -15530,8 +15531,8 @@
 }
 
 ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
-                                      UnaryOperatorKind Opc,
-                                      Expr *InputExpr) {
+                                      UnaryOperatorKind Opc, Expr *InputExpr,
+                                      bool IsAfterAmp) {
   ExprResult Input = InputExpr;
   ExprValueKind VK = VK_PRValue;
   ExprObjectKind OK = OK_Ordinary;
@@ -15581,7 +15582,8 @@
   case UO_Deref: {
     Input = DefaultFunctionArrayLvalueConversion(Input.get());
     if (Input.isInvalid()) return ExprError();
-    resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc);
+    resultType =
+        CheckIndirectionOperand(*this, Input.get(), VK, OpLoc, IsAfterAmp);
     break;
   }
   case UO_Plus:
@@ -15801,7 +15803,8 @@
 }
 
 ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc,
-                              UnaryOperatorKind Opc, Expr *Input) {
+                              UnaryOperatorKind Opc, Expr *Input,
+                              bool IsAfterAmp) {
   // First things first: handle placeholders so that the
   // overloaded-operator check considers the right type.
   if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) {
@@ -15840,13 +15843,14 @@
     return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input);
   }
 
-  return CreateBuiltinUnaryOp(OpLoc, Opc, Input);
+  return CreateBuiltinUnaryOp(OpLoc, Opc, Input, IsAfterAmp);
 }
 
 // Unary Operators.  'Tok' is the token for the operator.
-ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc,
-                              tok::TokenKind Op, Expr *Input) {
-  return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input);
+ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op,
+                              Expr *Input, bool IsAfterAmp) {
+  return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input,
+                      IsAfterAmp);
 }
 
 /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo".
Index: clang/lib/Parse/ParseExpr.cpp
===================================================================
--- clang/lib/Parse/ParseExpr.cpp
+++ clang/lib/Parse/ParseExpr.cpp
@@ -672,18 +672,14 @@
 /// \p isAddressOfOperand exists because an id-expression that is the
 /// operand of address-of gets special treatment due to member pointers.
 ///
-ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
-                                       bool isAddressOfOperand,
-                                       TypeCastState isTypeCast,
-                                       bool isVectorLiteral,
-                                       bool *NotPrimaryExpression) {
+ExprResult
+Parser::ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand,
+                            TypeCastState isTypeCast, bool isVectorLiteral,
+                            bool *NotPrimaryExpression, bool IsAfterAmp) {
   bool NotCastExpr;
-  ExprResult Res = ParseCastExpression(ParseKind,
-                                       isAddressOfOperand,
-                                       NotCastExpr,
-                                       isTypeCast,
-                                       isVectorLiteral,
-                                       NotPrimaryExpression);
+  ExprResult Res = ParseCastExpression(ParseKind, isAddressOfOperand,
+                                       NotCastExpr, isTypeCast, isVectorLiteral,
+                                       NotPrimaryExpression, IsAfterAmp);
   if (NotCastExpr)
     Diag(Tok, diag::err_expected_expression);
   return Res;
@@ -910,12 +906,11 @@
 ///                   '__is_rvalue_expr'
 /// \endverbatim
 ///
-ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
-                                       bool isAddressOfOperand,
-                                       bool &NotCastExpr,
-                                       TypeCastState isTypeCast,
-                                       bool isVectorLiteral,
-                                       bool *NotPrimaryExpression) {
+ExprResult
+Parser::ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand,
+                            bool &NotCastExpr, TypeCastState isTypeCast,
+                            bool isVectorLiteral, bool *NotPrimaryExpression,
+                            bool IsAfterAmp) {
   ExprResult Res;
   tok::TokenKind SavedKind = Tok.getKind();
   auto SavedType = PreferredType;
@@ -1360,7 +1355,12 @@
     // Special treatment because of member pointers
     SourceLocation SavedLoc = ConsumeToken();
     PreferredType.enterUnary(Actions, Tok.getLocation(), tok::amp, SavedLoc);
-    Res = ParseCastExpression(AnyCastExpr, true);
+
+    Res = ParseCastExpression(AnyCastExpr, /*isAddressOfOperand=*/true,
+                              /*isTypeCast=*/NotTypeCast,
+                              /*isVectorLitera=*/false,
+                              /*NotPrimaryExpression=*/nullptr,
+                              /*IsAfterAmp=*/true);
     if (!Res.isInvalid()) {
       Expr *Arg = Res.get();
       Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Arg);
@@ -1385,7 +1385,8 @@
     Res = ParseCastExpression(AnyCastExpr);
     if (!Res.isInvalid()) {
       Expr *Arg = Res.get();
-      Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Arg);
+      Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Arg,
+                                 IsAfterAmp);
       if (Res.isInvalid())
         Res = Actions.CreateRecoveryExpr(SavedLoc, Arg->getEndLoc(), Arg);
     }
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -5604,11 +5604,11 @@
 
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   ExprResult CreateBuiltinUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
-                                  Expr *InputExpr);
-  ExprResult BuildUnaryOp(Scope *S, SourceLocation OpLoc,
-                          UnaryOperatorKind Opc, Expr *Input);
-  ExprResult ActOnUnaryOp(Scope *S, SourceLocation OpLoc,
-                          tok::TokenKind Op, Expr *Input);
+                                  Expr *InputExpr, bool IsAfterAmp = false);
+  ExprResult BuildUnaryOp(Scope *S, SourceLocation OpLoc, UnaryOperatorKind Opc,
+                          Expr *Input, bool IsAfterAmp = false);
+  ExprResult ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op,
+                          Expr *Input, bool IsAfterAmp = false);
 
   bool isQualifiedMemberAccess(Expr *E);
   QualType CheckAddressOfOperand(ExprResult &Operand, SourceLocation OpLoc);
Index: clang/include/clang/Parse/Parser.h
===================================================================
--- clang/include/clang/Parse/Parser.h
+++ clang/include/clang/Parse/Parser.h
@@ -1783,17 +1783,19 @@
     UnaryExprOnly,
     PrimaryExprOnly
   };
+
   ExprResult ParseCastExpression(CastParseKind ParseKind,
-                                 bool isAddressOfOperand,
-                                 bool &NotCastExpr,
+                                 bool isAddressOfOperand, bool &NotCastExpr,
                                  TypeCastState isTypeCast,
                                  bool isVectorLiteral = false,
-                                 bool *NotPrimaryExpression = nullptr);
+                                 bool *NotPrimaryExpression = nullptr,
+                                 bool IsAfterAmp = false);
   ExprResult ParseCastExpression(CastParseKind ParseKind,
                                  bool isAddressOfOperand = false,
                                  TypeCastState isTypeCast = NotTypeCast,
                                  bool isVectorLiteral = false,
-                                 bool *NotPrimaryExpression = nullptr);
+                                 bool *NotPrimaryExpression = nullptr,
+                                 bool IsAfterAmp = false);
 
   /// Returns true if the next token cannot start an expression.
   bool isNotExpressionStart();
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6921,6 +6921,9 @@
 def ext_typecheck_indirection_through_void_pointer : ExtWarn<
   "ISO C++ does not allow indirection on operand of type %0">,
   InGroup<DiagGroup<"void-ptr-dereference">>;
+def warn_deference_void_pointer : Warning<
+  "deference a void pointer has undefined behavior">, InGroup<
+  DiagGroup<"deference-void-pointer">>;
 def warn_indirection_through_null : Warning<
   "indirection of non-volatile null pointer will be deleted, not trap">,
   InGroup<NullDereference>;
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -210,6 +210,8 @@
   ``LL`` suffix.
 - Clang now correctly diagnoses index that refers past the last possible element
   of FAM-like arrays.
+- Clang now correctly diagnoses defercencing a void pointer as undefined behavior
+  in C mode. This fixes `Issue 53631 <https://github.com/llvm/llvm-project/issues/53631>`_
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to