gchatelet updated this revision to Diff 203386.
gchatelet added a comment.
- Add documentation.
- Fix permissive HasNoRuntimeAttribute
- Mark interleave as disabled in the documentation.
- Use no-builtin instead of no-runtime-for.
- Adding an llvm.memcpy.inline intrinsic.
- Adding __builtin_memcpy_inline clang builtin.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D61634/new/
https://reviews.llvm.org/D61634
Files:
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/Builtins.def
clang/lib/CodeGen/CGBuilder.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGCall.cpp
clang/lib/Sema/SemaDeclAttr.cpp
llvm/docs/LangRef.rst
llvm/include/llvm/IR/IRBuilder.h
llvm/include/llvm/IR/IntrinsicInst.h
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/IRBuilder.cpp
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
llvm/test/CodeGen/X86/memcpy-inline.ll
llvm/test/CodeGen/X86/memcpy.ll
Index: llvm/test/CodeGen/X86/memcpy.ll
===================================================================
--- llvm/test/CodeGen/X86/memcpy.ll
+++ llvm/test/CodeGen/X86/memcpy.ll
@@ -7,7 +7,7 @@
; Variable memcpy's should lower to calls.
-define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+define void @test1(i8* %a, i8* %b, i64 %n) nounwind {
; LINUX-LABEL: test1:
; LINUX: # %bb.0: # %entry
; LINUX-NEXT: jmp memcpy # TAILCALL
@@ -17,11 +17,11 @@
; DARWIN-NEXT: jmp _memcpy ## TAILCALL
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 )
- ret i8* %a
+ ret void
}
; Variable memcpy's should lower to calls.
-define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+define void @test2(i64* %a, i64* %b, i64 %n) nounwind {
; LINUX-LABEL: test2:
; LINUX: # %bb.0: # %entry
; LINUX-NEXT: jmp memcpy # TAILCALL
@@ -33,7 +33,25 @@
%tmp14 = bitcast i64* %a to i8*
%tmp25 = bitcast i64* %b to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp14, i8* align 8 %tmp25, i64 %n, i1 0 )
- ret i8* %tmp14
+ ret void
+}
+
+; Variable length memcpy's with disabled runtime should lower to repmovsb.
+define void @memcpy_no_runtime(i8* %a, i8* %b, i64 %n) nounwind {
+; LINUX-LABEL: memcpy_no_runtime:
+; LINUX: # %bb.0: # %entry
+; LINUX-NEXT: movq %rdx, %rcx
+; LINUX-NEXT: rep;movsb (%rsi), %es:(%rdi)
+; LINUX-NEXT: retq
+;
+; DARWIN-LABEL: memcpy_no_runtime:
+; DARWIN: ## %bb.0: ## %entry
+; DARWIN-NEXT: movq %rdx, %rcx
+; DARWIN-NEXT: rep;movsb (%rsi), %es:(%rdi)
+; DARWIN-NEXT: retq
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 ) "no-builtin-memcpy"
+ ret void
}
; Large constant memcpy's should lower to a call when optimizing for size.
Index: llvm/test/CodeGen/X86/memcpy-inline.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/memcpy-inline.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s
+
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @test1(i8* %a, i8* %b) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rsi), %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
+ tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
+ ret void
+}
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -314,5 +314,9 @@
Size.getValueType(), Align, isVolatile,
AlwaysInline, DstPtrInfo, SrcPtrInfo);
+ /// Handle runtime sizes through repmovsb when we AlwaysInline.
+ if (AlwaysInline)
+ return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
return SDValue();
}
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -96,6 +96,14 @@
return II;
}
+static void ForwardAttribute(const Function *F, StringRef Attribute,
+ CallInst *CI) {
+ if (F->hasFnAttribute(Attribute)) {
+ CI->addAttribute(AttributeList::FunctionIndex,
+ F->getFnAttribute(Attribute));
+ }
+}
+
CallInst *IRBuilderBase::
CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -103,7 +111,8 @@
Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
Type *Tys[] = { Ptr->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -121,6 +130,8 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardAttribute(F, "no-builtin-memset", CI);
+
return CI;
}
@@ -165,7 +176,8 @@
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -190,6 +202,36 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardAttribute(F, "no-builtin-memcpy", CI);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::CreateMemCpyInline(Value *Dst, unsigned DstAlign,
+ Value *Src, unsigned SrcAlign,
+ Value *Size) {
+ assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) &&
+ "Must be 0 or a power of 2");
+ assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) &&
+ "Must be 0 or a power of 2");
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+ Value *IsVolatile = getInt1(false);
+
+ Value *Ops[] = {Dst, Src, Size, IsVolatile};
+ Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy_inline, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ auto *MCI = cast<MemCpyInlineInst>(CI);
+ if (DstAlign > 0)
+ MCI->setDestAlignment(DstAlign);
+ if (SrcAlign > 0)
+ MCI->setSourceAlignment(SrcAlign);
+
return CI;
}
@@ -245,7 +287,8 @@
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
- Module *M = BB->getParent()->getParent();
+ Function *F = BB->getParent();
+ Module *M = F->getParent();
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -266,6 +309,8 @@
if (NoAliasTag)
CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+ ForwardAttribute(F, "no-builtin-memmove", CI);
+
return CI;
}
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5615,8 +5615,11 @@
case Intrinsic::longjmp:
lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
return;
+ case Intrinsic::memcpy_inline:
case Intrinsic::memcpy: {
- const auto &MCI = cast<MemCpyInst>(I);
+ const auto &MCI = cast<MemTransferInst>(I);
+ assert((isa<MemCpyInlineInst>(I) || isa<MemCpyInst>(I)) &&
+ "must be a memcpy");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -5628,8 +5631,10 @@
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
+ bool isAlwaysInline =
+ isa<MemCpyInlineInst>(I) || I.hasFnAttr("no-builtin-memcpy");
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
- false, isTC,
+ isAlwaysInline, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -483,6 +483,13 @@
llvm_i32_ty],
[]>;
+// Memcpy semantic that is guaranteed to be inlined.
+def int_memcpy_inline
+ : Intrinsic<[],
+ [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
+ [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>,
+ ImmArg<2>, ImmArg<3> ]>;
+
//===------------------- Standard C Library Intrinsics --------------------===//
//
Index: llvm/include/llvm/IR/IntrinsicInst.h
===================================================================
--- llvm/include/llvm/IR/IntrinsicInst.h
+++ llvm/include/llvm/IR/IntrinsicInst.h
@@ -578,6 +578,7 @@
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
return true;
@@ -606,8 +607,14 @@
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::memcpy ||
- I->getIntrinsicID() == Intrinsic::memmove;
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memmove:
+ return true;
+ default:
+ return false;
+ }
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -626,6 +633,18 @@
}
};
+ /// This class wraps the llvm.memcpy.inline intrinsic.
+ class MemCpyInlineInst : public MemTransferInst {
+ public:
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::memcpy_inline;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
/// This class wraps the llvm.memmove intrinsic.
class MemMoveInst : public MemTransferInst {
public:
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -460,6 +460,9 @@
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
+ CallInst *CreateMemCpyInline(Value *Dst, unsigned DstAlign, Value *Src,
+ unsigned SrcAlign, Value *Size);
+
/// Create and insert an element unordered-atomic memcpy between the
/// specified pointers.
///
Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -11347,6 +11347,27 @@
other operations necessary to locate the TLS area. Not all targets support
this intrinsic.
+'``llvm.memcpy.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare i8* @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+ i32 <len>)
+
+Overview:
+"""""""""
+
+The '``llvm.memcpy.inline``' intrinsic ...
+
+Semantics:
+""""""""""
+
+The '``llvm.memcpy.inline``' intrinsic ...
+
Standard C Library Intrinsics
-----------------------------
@@ -15154,7 +15175,7 @@
<t_vector>` of floating point values. This argument must be larger in size
than the result.
-The second and third arguments specify the rounding mode and exception
+The second and third arguments specify the rounding mode and exception
behavior as described above.
Semantics:
@@ -15178,7 +15199,7 @@
Overview:
"""""""""
-The '``llvm.experimental.constrained.fpext``' intrinsic extends a
+The '``llvm.experimental.constrained.fpext``' intrinsic extends a
floating-point ``value`` to a larger floating-point value.
Arguments:
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -1104,6 +1104,30 @@
cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
}
+static void handleNoBuiltin(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (D->hasAttr<NoBuiltinAttr>()) {
+ S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
+ return;
+ }
+
+ if (!checkAttributeAtLeastNumArgs(S, AL, 1))
+ return;
+
+ std::vector<StringRef> FunctionNames;
+ for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+ StringRef FunctionName;
+ SourceLocation LiteralLoc;
+ if (!S.checkStringLiteralArgumentAttr(AL, I, FunctionName, &LiteralLoc))
+ return;
+ // Check valid function name.
+ FunctionNames.push_back(FunctionName);
+ }
+
+ D->addAttr(::new (S.Context) NoBuiltinAttr(
+ AL.getRange(), S.Context, FunctionNames.data(), FunctionNames.size(),
+ AL.getAttributeSpellingListIndex()));
+}
+
static void handlePassObjectSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->hasAttr<PassObjectSizeAttr>()) {
S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
@@ -6746,6 +6770,9 @@
case ParsedAttr::AT_DiagnoseIf:
handleDiagnoseIfAttr(S, D, AL);
break;
+ case ParsedAttr::AT_NoBuiltin:
+ handleNoBuiltin(S, D, AL);
+ break;
case ParsedAttr::AT_ExtVectorType:
handleExtVectorTypeAttr(S, D, AL);
break;
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1846,6 +1846,15 @@
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (const auto *Attr = TargetDecl->getAttr<NoBuiltinAttr>()) {
+ // TODO: check that function names are valid for the TargetLibraryInfo.
+ for(const auto& FunctionName : Attr->functionNames()){
+ SmallString<32> AttributeName;
+ AttributeName += "no-builtin-";
+ AttributeName += FunctionName;
+ FuncAttrs.addAttribute(AttributeName);
+ }
+ }
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -2362,7 +2362,18 @@
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
return RValue::get(Dest.getPointer());
}
-
+ case Builtin::BI__builtin_memcpy_inline: {
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ Address Src = EmitPointerWithAlignment(E->getArg(1));
+ uint64_t Size =
+ E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+ EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc(), FD, 0);
+ EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
+ E->getArg(1)->getExprLoc(), FD, 1);
+ Builder.CreateMemCpyInline(Dest, Src, Size);
+ return RValue::get(nullptr);
+ }
case Builtin::BI__builtin_char_memchr:
BuiltinID = Builtin::BI__builtin_memchr;
break;
Index: clang/lib/CodeGen/CGBuilder.h
===================================================================
--- clang/lib/CodeGen/CGBuilder.h
+++ clang/lib/CodeGen/CGBuilder.h
@@ -284,6 +284,14 @@
Size, IsVolatile);
}
+ using CGBuilderBaseTy::CreateMemCpyInline;
+ llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src,
+ uint64_t Size) {
+ return CreateMemCpyInline(
+ Dest.getPointer(), Dest.getAlignment().getQuantity(), Src.getPointer(),
+ Src.getAlignment().getQuantity(), getInt64(Size));
+ }
+
using CGBuilderBaseTy::CreateMemMove;
llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -504,6 +504,7 @@
BUILTIN(__builtin_vsnprintf, "ic*zcC*a", "nFP:2:")
BUILTIN(__builtin_thread_pointer, "v*", "nc")
BUILTIN(__builtin_launder, "v*v*", "nt")
+BUILTIN(__builtin_memcpy_inline, "vv*vC*z", "n")
LANGBUILTIN(__builtin_is_constant_evaluated, "b", "n", CXX_LANG)
// GCC exception builtins
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -3757,7 +3757,7 @@
def WebAssemblyImportModuleDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
-Clang supports the ``__attribute__((import_module(<module_name>)))``
+Clang supports the ``__attribute__((import_module(<module_name>)))``
attribute for the WebAssembly target. This attribute may be attached to a
function declaration, where it modifies how the symbol is to be imported
within the WebAssembly linking environment.
@@ -3774,7 +3774,7 @@
def WebAssemblyImportNameDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
-Clang supports the ``__attribute__((import_name(<name>)))``
+Clang supports the ``__attribute__((import_name(<name>)))``
attribute for the WebAssembly target. This attribute may be attached to a
function declaration, where it modifies how the symbol is to be imported
within the WebAssembly linking environment.
@@ -3978,7 +3978,7 @@
(`start_routine`) is called zero or more times by the `pthread_create` function,
and that the fourth parameter (`arg`) is passed along. Note that the callback
behavior of `pthread_create` is automatically recognized by Clang. In addition,
-the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
+the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
`#pragma omp target teams` and `#pragma omp parallel`, respectively, are also
automatically recognized as broker functions. Further functions might be added
in the future.
@@ -4157,3 +4157,9 @@
``__attribute__((malloc))``.
}];
}
+
+def NoBuiltinDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+ }];
+}
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -304,9 +304,6 @@
def ObjCNonFragileRuntime : LangOpt<"ObjCNonFragileRuntime",
"LangOpts.ObjCRuntime.allowsClassStubs()">;
-// Language option for CMSE extensions
-def Cmse : LangOpt<"Cmse">;
-
// Defines targets for target-specific attributes. Empty lists are unchecked.
class TargetSpec {
// Specifies Architectures for which the target applies, based off the
@@ -3252,3 +3249,10 @@
let Subjects = SubjectList<[NonParmVar, Function, Block, ObjCMethod]>;
let Documentation = [ObjCExternallyRetainedDocs];
}
+
+def NoBuiltin : InheritableAttr {
+ let Spellings = [Clang<"no_builtin">];
+ let Args = [VariadicStringArgument<"FunctionNames">];
+ let Subjects = SubjectList<[Function]>;
+ let Documentation = [NoBuiltinDocs];
+}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits