gchatelet updated this revision to Diff 200998.
gchatelet added a comment.

- Use no-builtin instead of no-runtime-for.
- Use one attribute per runtime function to make merging easier.
The patch is still WIP and needs more work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61634/new/

https://reviews.llvm.org/D61634

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
  llvm/test/CodeGen/X86/memcpy.ll

Index: llvm/test/CodeGen/X86/memcpy.ll
===================================================================
--- llvm/test/CodeGen/X86/memcpy.ll
+++ llvm/test/CodeGen/X86/memcpy.ll
@@ -7,7 +7,7 @@
 
 
 ; Variable memcpy's should lower to calls.
-define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+define void @test1(i8* %a, i8* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test1:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -17,11 +17,11 @@
 ; DARWIN-NEXT:    jmp _memcpy ## TAILCALL
 entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 )
-	ret i8* %a
+  ret void
 }
 
 ; Variable memcpy's should lower to calls.
-define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+define void @test2(i64* %a, i64* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test2:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -33,7 +33,25 @@
 	%tmp14 = bitcast i64* %a to i8*
 	%tmp25 = bitcast i64* %b to i8*
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp14, i8* align 8 %tmp25, i64 %n, i1 0 )
-	ret i8* %tmp14
+  ret void
+}
+
+; Variable length memcpy's with disabled runtime should lower to repmovsb.
+define void @memcpy_no_runtime(i8* %a, i8* %b, i64 %n) nounwind {
+; LINUX-LABEL: memcpy_no_runtime:
+; LINUX:       # %bb.0: # %entry
+; LINUX-NEXT:    movq %rdx, %rcx
+; LINUX-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; LINUX-NEXT:    retq
+;
+; DARWIN-LABEL: memcpy_no_runtime:
+; DARWIN:       ## %bb.0: ## %entry
+; DARWIN-NEXT:    movq %rdx, %rcx
+; DARWIN-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; DARWIN-NEXT:    retq
+entry:
+	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 ) "no-builtin-memcpy"
+  ret void
 }
 
 ; Large constant memcpy's should lower to a call when optimizing for size.
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -314,5 +314,9 @@
                                   Size.getValueType(), Align, isVolatile,
                                   AlwaysInline, DstPtrInfo, SrcPtrInfo);
 
+  /// Handle runtime sizes through repmovsb when we AlwaysInline.
+  if (AlwaysInline)
+    return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
   return SDValue();
 }
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -96,6 +96,14 @@
   return II;
 }
 
+static void ForwardAttribute(const Function *F, StringRef Attribute,
+                             CallInst *CI) {
+  if (F->hasFnAttribute(Attribute)) {
+    CI->addAttribute(AttributeList::FunctionIndex,
+                     F->getFnAttribute(Attribute));
+  }
+}
+
 CallInst *IRBuilderBase::
 CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -103,7 +111,8 @@
   Ptr = getCastedInt8PtrValue(Ptr);
   Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
   Type *Tys[] = { Ptr->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -121,6 +130,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memset", CI);
+
   return CI;
 }
 
@@ -165,7 +176,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -190,6 +202,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memcpy", CI);
+
   return CI;
 }
 
@@ -245,7 +259,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -266,6 +281,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memmove", CI);
+
   return CI;
 }
 
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5630,8 +5630,9 @@
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
     // FIXME: Support passing different dest/src alignments to the memcpy DAG
     // node.
+    bool isAlwaysInline = I.hasFnAttr("no-builtin-memcpy");
     SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               false, isTC,
+                               isAlwaysInline, isTC,
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MC);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -1104,6 +1104,30 @@
       cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
 }
 
+static void handleNoBuiltin(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (D->hasAttr<NoBuiltinAttr>()) {
+    S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
+    return;
+  }
+
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
+    return;
+
+  std::vector<StringRef> FunctionNames;
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef FunctionName;
+    SourceLocation LiteralLoc;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, FunctionName, &LiteralLoc))
+      return;
+    // Check valid function name.
+    FunctionNames.push_back(FunctionName);
+  }
+
+  D->addAttr(::new (S.Context) NoBuiltinAttr(
+      AL.getRange(), S.Context, FunctionNames.data(), FunctionNames.size(),
+      AL.getAttributeSpellingListIndex()));
+}
+
 static void handlePassObjectSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (D->hasAttr<PassObjectSizeAttr>()) {
     S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
@@ -6746,6 +6770,9 @@
   case ParsedAttr::AT_DiagnoseIf:
     handleDiagnoseIfAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NoBuiltin:
+    handleNoBuiltin(S, D, AL);
+    break;
   case ParsedAttr::AT_ExtVectorType:
     handleExtVectorTypeAttr(S, D, AL);
     break;
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1846,6 +1846,15 @@
       FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
     if (TargetDecl->hasAttr<ConvergentAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+    if (const auto *Attr = TargetDecl->getAttr<NoBuiltinAttr>()) {
+      // TODO: check that function names are valid for the TargetLibraryInfo.
+      for(const auto& FunctionName : Attr->functionNames()){
+        SmallString<32> AttributeName;
+        AttributeName += "no-builtin-";
+        AttributeName += FunctionName;
+        FuncAttrs.addAttribute(AttributeName);
+      }
+    }
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       AddAttributesFromFunctionProtoType(
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -3738,7 +3738,7 @@
 def WebAssemblyImportModuleDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_module(<module_name>)))`` 
+Clang supports the ``__attribute__((import_module(<module_name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3755,7 +3755,7 @@
 def WebAssemblyImportNameDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_name(<name>)))`` 
+Clang supports the ``__attribute__((import_name(<name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3959,7 +3959,7 @@
 (`start_routine`) is called zero or more times by the `pthread_create` function,
 and that the fourth parameter (`arg`) is passed along. Note that the callback
 behavior of `pthread_create` is automatically recognized by Clang. In addition,
-the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for 
+the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
 `#pragma omp target teams` and `#pragma omp parallel`, respectively, are also
 automatically recognized as broker functions. Further functions might be added
 in the future.
@@ -4138,3 +4138,9 @@
 ``__attribute__((malloc))``.
 }];
 }
+
+def NoBuiltinDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+  }];
+}
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -299,9 +299,6 @@
 def BlocksSupported : LangOpt<"Blocks">;
 def ObjCAutoRefCount : LangOpt<"ObjCAutoRefCount">;
 
-// Language option for CMSE extensions
-def Cmse : LangOpt<"Cmse">;
-
 // Defines targets for target-specific attributes. Empty lists are unchecked.
 class TargetSpec {
   // Specifies Architectures for which the target applies, based off the
@@ -3240,3 +3237,10 @@
   let Subjects = SubjectList<[NonParmVar, Function, Block, ObjCMethod]>;
   let Documentation = [ObjCExternallyRetainedDocs];
 }
+
+def NoBuiltin : InheritableAttr {
+  let Spellings = [Clang<"no_builtin">];
+  let Args = [VariadicStringArgument<"FunctionNames">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [NoBuiltinDocs];
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to