llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: Andy Kaylor (andykaylor) <details> <summary>Changes</summary> This adds standard-comforming handling for calls to functions that were declared in C source in the no prototype form. --- Patch is 22.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150553.diff 11 Files Affected: - (modified) clang/include/clang/CIR/Dialect/IR/CIROps.td (+5-7) - (modified) clang/include/clang/CIR/MissingFeatures.h (+8-5) - (modified) clang/lib/CIR/CodeGen/CIRGenCall.cpp (+9) - (modified) clang/lib/CIR/CodeGen/CIRGenCall.h (+5) - (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+41-1) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+85-1) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.h (+3) - (modified) clang/lib/CIR/Dialect/IR/CIRDialect.cpp (+7) - (modified) clang/test/CIR/CodeGen/call.c (+13-13) - (added) clang/test/CIR/CodeGen/no-prototype.c (+84) - (modified) clang/test/CIR/IR/func.cir (+8) ``````````diff diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 32bb9009aeec9..76f278e7a9408 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1946,6 +1946,10 @@ def CIR_FuncOp : CIR_Op<"func", [ The function linkage information is specified by `linkage`, as defined by `GlobalLinkageKind` attribute. + The `no_proto` keyword is used to identify functions that were declared + without a prototype and, consequently, may contain calls with invalid + arguments and undefined behavior. + Example: ```mlir @@ -1964,6 +1968,7 @@ def CIR_FuncOp : CIR_Op<"func", [ let arguments = (ins SymbolNameAttr:$sym_name, CIR_VisibilityAttr:$global_visibility, TypeAttrOf<CIR_FuncType>:$function_type, + UnitAttr:$no_proto, UnitAttr:$dso_local, DefaultValuedAttr<CIR_GlobalLinkageKind, "cir::GlobalLinkageKind::ExternalLinkage">:$linkage, @@ -2005,13 +2010,6 @@ def CIR_FuncOp : CIR_Op<"func", [ return getFunctionType().getReturnTypes(); } - // TODO(cir): this should be an operand attribute, but for now we just hard- - // wire this as a function. Will later add a $no_proto argument to this op. - bool getNoProto() { - assert(!cir::MissingFeatures::opFuncNoProto()); - return false; - } - //===------------------------------------------------------------------===// // SymbolOpInterface Methods //===------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index e1a5c3d9ca337..3e2dc6bc91297 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -73,14 +73,16 @@ struct MissingFeatures { // FuncOp handling static bool opFuncOpenCLKernelMetadata() { return false; } static bool opFuncAstDeclAttr() { return false; } + static bool opFuncAttributesForDefinition() { return false; } static bool opFuncCallingConv() { return false; } - static bool opFuncExtraAttrs() { return false; } - static bool opFuncNoProto() { return false; } static bool opFuncCPUAndFeaturesAttributes() { return false; } - static bool opFuncSection() { return false; } - static bool opFuncMultipleReturnVals() { return false; } - static bool opFuncAttributesForDefinition() { return false; } + static bool opFuncExceptions() { return false; } + static bool opFuncExtraAttrs() { return false; } static bool opFuncMaybeHandleStaticInExternC() { return false; } + static bool opFuncMultipleReturnVals() { return false; } + static bool opFuncOperandBundles() { return false; } + static bool opFuncParameterAttributes() { return false; } + static bool opFuncSection() { return false; } static bool setLLVMFunctionFEnvAttributes() { return false; } static bool setFunctionAttributes() { return false; } @@ -109,6 +111,7 @@ struct MissingFeatures { static bool opCallCIRGenFuncInfoExtParamInfo() { return false; } static bool opCallLandingPad() { return false; } static bool opCallContinueBlock() { return false; } + static bool opCallChain() { return false; } // CXXNewExpr static bool exprNewNullCheck() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 938d1436a76ea..4d21ad51986c8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -582,6 +582,15 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, cir::FuncOp directFuncOp; if (auto fnOp = dyn_cast<cir::FuncOp>(calleePtr)) { directFuncOp = fnOp; + } else if (auto getGlobalOp = dyn_cast<cir::GetGlobalOp>(calleePtr)) { + // FIXME(cir): This peephole optimization avoids indirect calls for + // builtins. This should be fixed in the builtin declaration instead by + // not emitting an unecessary get_global in the first place. + // However, this is also used for no-prototype functions. + mlir::Operation *globalOp = cgm.getGlobalValue(getGlobalOp.getName()); + assert(globalOp && "undefined global function"); + directFuncOp = llvm::dyn_cast<cir::FuncOp>(globalOp); + assert(directFuncOp && "operation is not a function"); } else { [[maybe_unused]] mlir::ValueTypeRange<mlir::ResultRange> resultTypes = calleePtr->getResultTypes(); diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h index bd113293fdafd..a78956b4df887 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.h +++ b/clang/lib/CIR/CodeGen/CIRGenCall.h @@ -116,6 +116,11 @@ class CIRGenCallee { assert(isOrdinary()); return reinterpret_cast<mlir::Operation *>(kindOrFunctionPtr); } + + void setFunctionPointer(mlir::Operation *functionPtr) { + assert(isOrdinary()); + kindOrFunctionPtr = SpecialKind(reinterpret_cast<uintptr_t>(functionPtr)); + } }; /// Type for representing both the decl and type of parameters to a function. diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 7ff5f26be21b4..908d1690a7d99 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1269,7 +1269,7 @@ RValue CIRGenFunction::getUndefRValue(QualType ty) { } RValue CIRGenFunction::emitCall(clang::QualType calleeTy, - const CIRGenCallee &callee, + const CIRGenCallee &origCallee, const clang::CallExpr *e, ReturnValueSlot returnValue) { // Get the actual function type. The callee type will always be a pointer to @@ -1280,6 +1280,8 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy, calleeTy = getContext().getCanonicalType(calleeTy); auto pointeeTy = cast<PointerType>(calleeTy)->getPointeeType(); + CIRGenCallee callee = origCallee; + if (getLangOpts().CPlusPlus) assert(!cir::MissingFeatures::sanitizers()); @@ -1296,6 +1298,44 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy, const CIRGenFunctionInfo &funcInfo = cgm.getTypes().arrangeFreeFunctionCall(args, fnType); + // C99 6.5.2.2p6: + // If the expression that denotes the called function has a type that does + // not include a prototype, [the default argument promotions are performed]. + // If the number of arguments does not equal the number of parameters, the + // behavior is undefined. If the function is defined with a type that + // includes a prototype, and either the prototype ends with an ellipsis (, + // ...) or the types of the arguments after promotion are not compatible + // with the types of the parameters, the behavior is undefined. If the + // function is defined with a type that does not include a prototype, and + // the types of the arguments after promotion are not compatible with those + // of the parameters after promotion, the behavior is undefined [except in + // some trivial cases]. + // That is, in the general case, we should assume that a call through an + // unprototyped function type works like a *non-variadic* call. The way we + // make this work is to cast to the exxact type fo the promoted arguments. + if (isa<FunctionNoProtoType>(fnType)) { + assert(!cir::MissingFeatures::opCallChain()); + assert(!cir::MissingFeatures::addressSpace()); + cir::FuncType calleeTy = getTypes().getFunctionType(funcInfo); + // get non-variadic function type + calleeTy = cir::FuncType::get(calleeTy.getInputs(), + calleeTy.getReturnType(), false); + auto calleePtrTy = cir::PointerType::get(calleeTy); + + mlir::Operation *fn = callee.getFunctionPointer(); + mlir::Value addr; + if (auto funcOp = llvm::dyn_cast<cir::FuncOp>(fn)) { + addr = builder.create<cir::GetGlobalOp>( + getLoc(e->getSourceRange()), + cir::PointerType::get(funcOp.getFunctionType()), funcOp.getSymName()); + } else { + addr = fn->getResult(0); + } + + fn = builder.createBitcast(addr, calleePtrTy).getDefiningOp(); + callee.setFunctionPointer(fn); + } + assert(!cir::MissingFeatures::opCallNoPrototypeFunc()); assert(!cir::MissingFeatures::opCallFnInfoOpts()); assert(!cir::MissingFeatures::hip()); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 350270518156e..f432f0ed536b1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1103,6 +1103,60 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator( return cir::GlobalLinkageKind::ExternalLinkage; } +/// This function is called when we implement a function with no prototype, e.g. +/// "int foo() {}". If there are existing call uses of the old function in the +/// module, this adjusts them to call the new function directly. +/// +/// This is not just a cleanup: the always_inline pass requires direct calls to +/// functions to be able to inline them. If there is a bitcast in the way, it +/// won't inline them. Instcombine normally deletes these calls, but it isn't +/// run at -O0. +void CIRGenModule::replaceUsesOfNonProtoTypeWithRealFunction( + mlir::Operation *old, cir::FuncOp newFn) { + // If we're redefining a global as a function, don't transform it. + auto oldFn = mlir::dyn_cast<cir::FuncOp>(old); + if (!oldFn) + return; + + // TODO(cir): this RAUW ignores the features below. + assert(!cir::MissingFeatures::opFuncExceptions()); + assert(!cir::MissingFeatures::opFuncParameterAttributes()); + assert(!cir::MissingFeatures::opFuncOperandBundles()); + if (oldFn->getAttrs().size() <= 1) + errorNYI(old->getLoc(), + "replaceUsesOfNonProtoTypeWithRealFunction: Attribute forwarding"); + + // Mark new function as originated from a no-proto declaration. + newFn.setNoProtoAttr(oldFn.getNoProtoAttr()); + + // Iterate through all calls of the no-proto function. + std::optional<mlir::SymbolTable::UseRange> symUses = + oldFn.getSymbolUses(oldFn->getParentOp()); + for (const mlir::SymbolTable::SymbolUse &use : symUses.value()) { + mlir::OpBuilder::InsertionGuard guard(builder); + + if (auto noProtoCallOp = mlir::dyn_cast<cir::CallOp>(use.getUser())) { + builder.setInsertionPoint(noProtoCallOp); + + // Patch call type with the real function type. + cir::CallOp realCallOp = builder.createCallOp( + noProtoCallOp.getLoc(), newFn, noProtoCallOp.getOperands()); + + // Replace old no proto call with fixed call. + noProtoCallOp.replaceAllUsesWith(realCallOp); + noProtoCallOp.erase(); + } else if (auto getGlobalOp = + mlir::dyn_cast<cir::GetGlobalOp>(use.getUser())) { + // Replace type + getGlobalOp.getAddr().setType( + cir::PointerType::get(newFn.getFunctionType())); + } else { + errorNYI(use.getUser()->getLoc(), + "replaceUsesOfNonProtoTypeWithRealFunction: unexpected use"); + } + } +} + cir::GlobalLinkageKind CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { assert(!isConstant && "constant variables NYI"); @@ -1729,6 +1783,34 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction( invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()), mangledName, mlir::cast<cir::FuncType>(funcType), funcDecl); + // If we already created a function with the same mangled name (but different + // type) before, take its name and add it to the list of functions to be + // replaced with F at the end of CodeGen. + // + // This happens if there is a prototype for a function (e.g. "int f()") and + // then a definition of a different type (e.g. "int f(int x)"). + if (entry) { + + // Fetch a generic symbol-defining operation and its uses. + auto symbolOp = mlir::cast<mlir::SymbolOpInterface>(entry); + + // TODO(cir): When can this symbol be something other than a function? + if (!isa<cir::FuncOp>(entry)) + errorNYI(d->getSourceRange(), "getOrCreateCIRFunction: non-FuncOp"); + + // This might be an implementation of a function without a prototype, in + // which case, try to do special replacement of calls which match the new + // prototype. The really key thing here is that we also potentially drop + // arguments from the call site so as to make a direct call, which makes the + // inliner happier and suppresses a number of optimizer warnings (!) about + // dropping arguments. + if (symbolOp.getSymbolUses(symbolOp->getParentOp())) + replaceUsesOfNonProtoTypeWithRealFunction(entry, funcOp); + + // Obliterate no-proto declaration. + entry->erase(); + } + if (d) setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk); @@ -1805,7 +1887,9 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, func = builder.create<cir::FuncOp>(loc, name, funcType); assert(!cir::MissingFeatures::opFuncAstDeclAttr()); - assert(!cir::MissingFeatures::opFuncNoProto()); + + if (funcDecl && !funcDecl->hasPrototype()) + func.setNoProtoAttr(builder.getUnitAttr()); assert(func.isDeclaration() && "expected empty body"); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 16922b115027e..b9340a855afd9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -308,6 +308,9 @@ class CIRGenModule : public CIRGenTypeCache { static void setInitializer(cir::GlobalOp &op, mlir::Attribute value); + void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old, + cir::FuncOp newFn); + cir::FuncOp getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType, clang::GlobalDecl gd, bool forVTable, diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index cd77166622fac..69a4ee413d295 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1464,10 +1464,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { llvm::SMLoc loc = parser.getCurrentLocation(); mlir::Builder &builder = parser.getBuilder(); + mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name); mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name); mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name); mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name); + if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded()) + state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr()); + // Default to external linkage if no keyword is provided. state.addAttribute(getLinkageAttrNameString(), GlobalLinkageKindAttr::get( @@ -1572,6 +1576,9 @@ mlir::Region *cir::FuncOp::getCallableRegion() { } void cir::FuncOp::print(OpAsmPrinter &p) { + if (getNoProto()) + p << " no_proto"; + if (getComdat()) p << " comdat"; diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index 83a66fca638c2..9d516c6d831d8 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -11,7 +11,7 @@ struct S { }; void f1(struct S); -void f2() { +void f2(void) { struct S s; f1(s); } @@ -28,8 +28,8 @@ void f2() { // OGCG: %[[S:.+]] = load i64, ptr %{{.+}}, align 4 // OGCG-NEXT: call void @f1(i64 %[[S]]) -struct S f3(); -void f4() { +struct S f3(void); +void f4(void) { struct S s = f3(); } @@ -38,11 +38,11 @@ void f4() { // CIR-NEXT: cir.store align(4) %[[S]], %{{.+}} : !rec_S, !cir.ptr<!rec_S> // LLVM-LABEL: define{{.*}} void @f4() { -// LLVM: %[[S:.+]] = call %struct.S (...) @f3() +// LLVM: %[[S:.+]] = call %struct.S @f3() // LLVM-NEXT: store %struct.S %[[S]], ptr %{{.+}}, align 4 // OGCG-LABEL: define{{.*}} void @f4() #0 { -// OGCG: %[[S:.+]] = call i64 (...) @f3() +// OGCG: %[[S:.+]] = call i64 @f3() // OGCG-NEXT: store i64 %[[S]], ptr %{{.+}}, align 4 struct Big { @@ -50,9 +50,9 @@ struct Big { }; void f5(struct Big); -struct Big f6(); +struct Big f6(void); -void f7() { +void f7(void) { struct Big b; f5(b); } @@ -69,7 +69,7 @@ void f7() { // OGCG: %[[B:.+]] = alloca %struct.Big, align 8 // OGCG-NEXT: call void @f5(ptr noundef byval(%struct.Big) align 8 %[[B]]) -void f8() { +void f8(void) { struct Big b = f6(); } @@ -78,14 +78,14 @@ void f8() { // CIR: cir.store align(4) %[[B]], %{{.+}} : !rec_Big, !cir.ptr<!rec_Big> // LLVM-LABEL: define{{.*}} void @f8() { -// LLVM: %[[B:.+]] = call %struct.Big (...) @f6() +// LLVM: %[[B:.+]] = call %struct.Big @f6() // LLVM-NEXT: store %struct.Big %[[B]], ptr %{{.+}}, align 4 // OGCG-LABEL: define{{.*}} void @f8() #0 { // OGCG: %[[B:.+]] = alloca %struct.Big, align 4 -// OGCG-NEXT: call void (ptr, ...) @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]]) +// OGCG-NEXT: call void @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]]) -void f9() { +void f9(void) { f1(f3()); } @@ -98,14 +98,14 @@ void f9() { // LLVM-LABEL: define{{.*}} void @f9() { // LLVM: %[[SLOT:.+]] = alloca %struct.S, i64 1, align 4 -// LLVM-NEXT: %[[RET:.+]] = call %struct.S (...) @f3() +// LLVM-NEXT: %[[RET:.+]] = call %struct.S @f3() // LLVM-NEXT: store %struct.S %[[RET]], ptr %[[SLOT]], align 4 // LLVM-NEXT: %[[ARG:.+]] = load %struct.S, ptr %[[SLOT]], align 4 // LLVM-NEXT: call void @f1(%struct.S %[[ARG]]) // OGCG-LABEL: define{{.*}} void @f9() #0 { // OGCG: %[[SLOT:.+]] = alloca %struct.S, align 4 -// OGCG-NEXT: %[[RET:.+]] = call i64 (...) @f3() +// OGCG-NEXT: %[[RET:.+]] = call i64 @f3() // OGCG-NEXT: store i64 %[[RET]], ptr %[[SLOT]], align 4 // OGCG-NEXT: %[[ARG:.+]] = load i64, ptr %[[SLOT]], align 4 // OGCG-NEXT: call void @f1(i64 %[[ARG]]) diff --git a/clang/test/CIR/CodeGen/no-prototype.c b/clang/test/CIR/CodeGen/no-prototype.c new file mode 100644 index 0000000000000..4be6a94c12129 --- /dev/null +++ b/clang/test/CIR/CodeGen/no-prototype.c @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +//===----------------------------------------------------------------------===// +// DEFINED BEHAVIOUR +//===----------------------------------------------------------------------===// + +// No-proto definition followed by a correct call. +int noProto0(x) int x; { return x; } +// CHECK: cir.func no_proto dso_local @noProto0(%arg0: !s32i {{.+}}) -> !s32i +int test0(int x) { + // CHECK: cir.func dso_local @test0 + return noProto0(x); // We know the definition. Should be a direct call. + // CHECK: %{{.+}} = cir.call @noProto0(%{{.+}}) +} + +// Declaration without prototype followed by its definition, then a correct call. +// +// Prototyped definition overrides no-proto declaration before any call is made, +// only allowing calls with proper arguments. This is the only case where the +// definition is not marked as no-proto. +int noProto1(); +int noProto1(int x) { return x; } +// CHECK: cir.func dso_local @noProto1(%arg0: !s32i {{.+}}) -> !s32i +int test1(int x) { + // CHECK: cir.func dso_local @test1 + return noProto1(x); + // CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i +} + +// Declaration without prototype followed by a correct call, then its definition. +// +// Call to no-proto is made before definition, so a variadic call that takes anything +// is created. Later, when the definition is found, no-proto is replaced. +int noProto2(); +int test2(int x) { + return noProto2(x); + // CHECK: [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<(!s32i) -> !s32i>> + // CHECK: {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i +} +int noProto2(int x) { return x; } +// CHECK: cir.func no_proto dso_local @noProto2(%arg0: !s32i {{.+}}) -> !s32i + +// No-proto declarat... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150553 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits