gchatelet updated this revision to Diff 198436.
gchatelet added a comment.
Herald added a subscriber: jdoerfert.
- Add documentation.
- Fix permissive HasNoRuntimeAttribute
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D61634/new/
https://reviews.llvm.org/D61634
Files:
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/lib/CodeGen/CGCall.cpp
clang/lib/Sema/SemaDeclAttr.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/IRBuilder.cpp
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
llvm/test/CodeGen/X86/memcpy.ll
Index: llvm/test/CodeGen/X86/memcpy.ll
===================================================================
--- llvm/test/CodeGen/X86/memcpy.ll
+++ llvm/test/CodeGen/X86/memcpy.ll
@@ -7,7 +7,7 @@
; Variable memcpy's should lower to calls.
-define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+define void @test1(i8* %a, i8* %b, i64 %n) nounwind {
; LINUX-LABEL: test1:
; LINUX: # %bb.0: # %entry
; LINUX-NEXT: jmp memcpy # TAILCALL
@@ -17,11 +17,11 @@
; DARWIN-NEXT: jmp _memcpy ## TAILCALL
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 )
- ret i8* %a
+ ret void
}
; Variable memcpy's should lower to calls.
-define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+define void @test2(i64* %a, i64* %b, i64 %n) nounwind {
; LINUX-LABEL: test2:
; LINUX: # %bb.0: # %entry
; LINUX-NEXT: jmp memcpy # TAILCALL
@@ -33,7 +33,25 @@
%tmp14 = bitcast i64* %a to i8*
%tmp25 = bitcast i64* %b to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp14, i8* align 8 %tmp25, i64 %n, i1 0 )
- ret i8* %tmp14
+ ret void
+}
+
+; Variable length memcpy's with disabled runtime should lower to repmovsb.
+define void @memcpy_no_runtime(i8* %a, i8* %b, i64 %n) nounwind {
+; LINUX-LABEL: memcpy_no_runtime:
+; LINUX: # %bb.0: # %entry
+; LINUX-NEXT: movq %rdx, %rcx
+; LINUX-NEXT: rep;movsb (%rsi), %es:(%rdi)
+; LINUX-NEXT: retq
+;
+; DARWIN-LABEL: memcpy_no_runtime:
+; DARWIN: ## %bb.0: ## %entry
+; DARWIN-NEXT: movq %rdx, %rcx
+; DARWIN-NEXT: rep;movsb (%rsi), %es:(%rdi)
+; DARWIN-NEXT: retq
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 ) "no_runtime_for" = "memcpy"
+ ret void
}
; Large constant memcpy's should lower to a call when optimizing for size.
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -314,5 +314,9 @@
Size.getValueType(), Align, isVolatile,
AlwaysInline, DstPtrInfo, SrcPtrInfo);
+ /// Handle runtime sizes through repmovsb when we AlwaysInline.
+ if (AlwaysInline)
+ return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
return SDValue();
}
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -96,6 +96,17 @@
return II;
}
+static void ForwardNoRuntimeAttribute(const Function *F,
+ StringRef FunctionName,
+ CallInst *CI) {
+ if (F->hasFnAttribute("no_runtime_for")) {
+ const Attribute A = F->getFnAttribute("no_runtime_for");
+ if (A.getValueAsString().contains(FunctionName)) {
+ CI->addAttribute(AttributeList::FunctionIndex, A);
+ }
+ }
+}
+
CallInst *IRBuilderBase::
CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -103,7 +114,8 @@
Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
Type *Tys[] = { Ptr->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -121,6 +133,8 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardNoRuntimeAttribute(F, "memset", CI);
+
return CI;
}
@@ -165,7 +179,8 @@
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -190,6 +205,8 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardNoRuntimeAttribute(F, "memcpy", CI);
+
return CI;
}
@@ -245,7 +262,8 @@
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -266,6 +284,8 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardNoRuntimeAttribute(F, "memmove", CI);
+
return CI;
}
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5547,6 +5547,16 @@
}
}
+static bool HasNoRuntimeAttribute(const CallInst &I, StringRef FunctionName) {
+ if (!I.hasFnAttr("no_runtime_for"))
+ return false;
+ SmallVector<StringRef, 4> pieces;
+ I.getAttribute(AttributeList::FunctionIndex, "no_runtime_for")
+ .getValueAsString()
+ .split(pieces, ",");
+ return is_contained(pieces, FunctionName);
+}
+
/// Lower the call to the specified intrinsic function. If we want to emit this
/// as a call to a named external function, return the name. Otherwise, lower it
/// and return null.
@@ -5622,8 +5632,9 @@
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
+ bool isAlwaysInline = HasNoRuntimeAttribute(I, "memcpy");
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
- false, isTC,
+ isAlwaysInline, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -1104,6 +1104,30 @@
cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
}
+static void handleNoRuntimeFor(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (D->hasAttr<NoRuntimeForAttr>()) {
+ S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
+ return;
+ }
+
+ if (!checkAttributeAtLeastNumArgs(S, AL, 1))
+ return;
+
+ std::vector<StringRef> FunctionNames;
+ for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+ StringRef FunctionName;
+ SourceLocation LiteralLoc;
+ if (!S.checkStringLiteralArgumentAttr(AL, I, FunctionName, &LiteralLoc))
+ return;
+ // Check valid function name.
+ FunctionNames.push_back(FunctionName);
+ }
+
+ D->addAttr(::new (S.Context) NoRuntimeForAttr(
+ AL.getRange(), S.Context, FunctionNames.data(), FunctionNames.size(),
+ AL.getAttributeSpellingListIndex()));
+}
+
static void handlePassObjectSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->hasAttr<PassObjectSizeAttr>()) {
S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
@@ -6734,6 +6758,9 @@
case ParsedAttr::AT_DiagnoseIf:
handleDiagnoseIfAttr(S, D, AL);
break;
+ case ParsedAttr::AT_NoRuntimeFor:
+ handleNoRuntimeFor(S, D, AL);
+ break;
case ParsedAttr::AT_ExtVectorType:
handleExtVectorTypeAttr(S, D, AL);
break;
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1846,6 +1846,16 @@
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (const auto *Attr = TargetDecl->getAttr<NoRuntimeForAttr>()) {
+ llvm::SmallVector<StringRef, 4> FunctionNames(Attr->functionNames_begin(),
+ Attr->functionNames_end());
+ llvm::sort(FunctionNames);
+ FunctionNames.erase(
+ std::unique(FunctionNames.begin(), FunctionNames.end()),
+ FunctionNames.end());
+ FuncAttrs.addAttribute("no_runtime_for",
+ llvm::join(FunctionNames, ","));
+ }
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -3738,7 +3738,7 @@
def WebAssemblyImportModuleDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
-Clang supports the ``__attribute__((import_module(<module_name>)))``
+Clang supports the ``__attribute__((import_module(<module_name>)))``
attribute for the WebAssembly target. This attribute may be attached to a
function declaration, where it modifies how the symbol is to be imported
within the WebAssembly linking environment.
@@ -3755,7 +3755,7 @@
def WebAssemblyImportNameDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
-Clang supports the ``__attribute__((import_name(<name>)))``
+Clang supports the ``__attribute__((import_name(<name>)))``
attribute for the WebAssembly target. This attribute may be attached to a
function declaration, where it modifies how the symbol is to be imported
within the WebAssembly linking environment.
@@ -3926,7 +3926,7 @@
(`start_routine`) is called zero or more times by the `pthread_create` function,
and that the fourth parameter (`arg`) is passed along. Note that the callback
behavior of `pthread_create` is automatically recognized by Clang. In addition,
-the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
+the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
`#pragma omp target teams` and `#pragma omp parallel`, respectively, are also
automatically recognized as broker functions. Further functions might be added
in the future.
@@ -4105,3 +4105,30 @@
``__attribute__((malloc))``.
}];
}
+
+def NoRuntimeForDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+The ``no_runtime_for`` attribute prevents the compiler from synthesizing calls
+to specific runtime functions.
+It is useful when designing runtime functions like ``memcpy`` or Objective-C
+runtime where the compiler would otherwise be able to replace the implementation
+by a call to the runtime library: resulting in a chicken and egg problem.
+
+.. code-block:: c++
+
+ extern "C" void *memcpy(char *dst, const char *src, size_t count)
+ __attribute__((no_runtime_for("memcpy"))) {
+ #pragma clang loop vectorize(enable) interleave(enable) unroll(disable)
+ for (;count >= 4;count -= 4, dst += 4, src += 4)
+ __builtin_memcpy(dst, src, 4);
+ switch (count) {
+ case 1: __builtin_memcpy(dst, src, 1); break;
+ case 2: __builtin_memcpy(dst, src, 2); break;
+ case 3: __builtin_memcpy(dst, src, 3); break;
+ default: break;
+ }
+ return dst;
+ }
+ }];
+}
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -3237,3 +3237,10 @@
let Subjects = SubjectList<[NonParmVar, Function, Block, ObjCMethod]>;
let Documentation = [ObjCExternallyRetainedDocs];
}
+
+def NoRuntimeFor : InheritableAttr {
+ let Spellings = [Clang<"no_runtime_for">];
+ let Args = [VariadicStringArgument<"FunctionNames">];
+ let Subjects = SubjectList<[Function]>;
+ let Documentation = [NoRuntimeForDocs];
+}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits