[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-05-21 Thread Daniil Fukalov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC332848: [AMDGPU] fixes for lds f32 builtins (authored by 
dfukalov, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D43281?vs=142412&id=147801#toc

Repository:
  rC Clang

https://reviews.llvm.org/D43281

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl
  test/SemaOpenCL/builtins-amdgcn-error.cl

Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -10088,6 +10088,49 @@
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)
+  Args.push_back(EmitScalarExpr(E->getArg(I)));
+const llvm::Type *PtrTy = Args[0]->getType();
+// check pointer parameter
+if (!PtrTy->isPointerTy() ||
+E->getArg(0)
+->getType()
+->getPointeeType()
+.getQualifiers()
+.getAddressSpace() != LangAS::opencl_local ||
+!PtrTy->getPointerElementType()->isFloatTy()) {
+   CGM.Error(E->getArg(0)->getLocStart(),
+"parameter should have type \"local float*\"");
+  return nullptr;
+}
+// check float parameter
+if (!Args[1]->getType()->isFloatTy()) {
+  CGM.Error(E->getArg(1)->getLocStart(),
+"parameter should have type \"float\"");
+  return nullptr;
+}
+
+Intrinsic::ID ID;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+  ID = Intrinsic::amdgcn_ds_fadd;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+  ID = Intrinsic::amdgcn_ds_fmin;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+  ID = Intrinsic::amdgcn_ds_fmax;
+  break;
+default:
+  llvm_unreachable("Unknown BuiltinID");
+}
+Value *F = CGM.getIntrinsic(ID);
+return Builder.CreateCall(F, Args);
+  }
 
   // amdgcn workitem
   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,9 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
-BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_faddf, "ff*fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fminf, "ff*fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*fIiIiIb", "n")
 
 //===--===//
 // VI+ only builtins.
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -91,18 +91,18 @@
 
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+void test_ds_faddf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmin
 // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+void test_ds_fminf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmax
 // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+void test_ds_fmaxf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
 }
Index: test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- test/SemaOpenCL/builtins-amdgcn-error.cl
+++ test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -102,3 +102,20 @@
   *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
 }
 
+void test_ds_faddf(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_faddf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' mus

[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-02-14 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov created this revision.
dfukalov added reviewers: b-sumner, arsenm.
dfukalov added a project: AMDGPU.
Herald added subscribers: cfe-commits, t-tye, tpr, dstuttard, yaxunl, nhaehnle, 
wdng, kzhuravl.

1. removed addrspace 3 specifications from builtins description strings since 
it's not target addrspaces
2. added custom processing for these builtins


Repository:
  rC Clang

https://reviews.llvm.org/D43281

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -91,18 +91,18 @@
 
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
-void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fadd(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmin
 // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
-void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmin(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmax
 // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
-void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmax(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
 }
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9860,6 +9860,29 @@
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmax: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)
+  Args.push_back(EmitScalarExpr(E->getArg(I)));
+Intrinsic::ID ID;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  ID = Intrinsic::amdgcn_ds_fadd;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  ID = Intrinsic::amdgcn_ds_fmin;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmax:
+  ID = Intrinsic::amdgcn_ds_fmax;
+  break;
+default:
+  llvm_unreachable("Unknown BuiltinID");
+}
+Value *F = CGM.getIntrinsic(ID);
+return Builder.CreateCall(F, Args);
+  }
 
   // amdgcn workitem
   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,9 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
-BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*fiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*fiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*fiIiIb", "n")
 
 
//===--===//
 // VI+ only builtins.


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -91,18 +91,18 @@
 
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fadd(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmin
 // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmin(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmax
 // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmax(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
 }
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9860,6 +9860,29 @@
  

[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-02-15 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 134503.
dfukalov edited the summary of this revision.
dfukalov added a comment.

diff updated as requested by reviewer


https://reviews.llvm.org/D43281

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl
  test/SemaOpenCL/builtins-amdgcn-error.cl

Index: test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- test/SemaOpenCL/builtins-amdgcn-error.cl
+++ test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -102,3 +102,20 @@
   *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
 }
 
+void test_ds_fadd(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_fadd(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+}
+
+void test_ds_fmin(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_fmin(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+}
+
+void test_ds_fmax(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_fmax(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -91,18 +91,18 @@
 
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fadd(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmin
 // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmin(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmax
 // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
+void test_ds_fmax(local float *out, float src) {
   *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
 }
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9860,6 +9860,49 @@
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmax: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)
+  Args.push_back(EmitScalarExpr(E->getArg(I)));
+const llvm::Type *PtrTy = Args[0]->getType();
+// check pointer parameter
+if (!PtrTy->isPointerTy() ||
+LangAS::opencl_local != E->getArg(0)
+->getType()
+->getPointeeType()
+.getQualifiers()
+.getAddressSpace() ||
+!PtrTy->getPointerElementType()->isFloatTy()) {
+  CGM.Error(E->getArg(0)->getLocStart(),
+"parameter should have type \"local float*\"");
+  return nullptr;
+}
+// check float parameter
+if (!Args[1]->getType()->isFloatTy()) {
+  CGM.Error(E->getArg(1)->getLocStart(),
+"parameter should have type \"float\"");
+  return nullptr;
+}
+
+Intrinsic::ID ID;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  ID = Intrinsic::amdgcn_ds_fadd;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  ID = Intrinsic::amdgcn_ds_fmin;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmax

[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-02-15 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov marked an inline comment as done.
dfukalov added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:9866
+  case AMDGPU::BI__builtin_amdgcn_ds_fmax: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)

b-sumner wrote:
> Can the pointer argument address space be checked here?
added code to check pointer and flaot parameters
though this part is not active at the moment since the builtins are not 
processed as custom.
going to switch to custom path by removing attribute of the intrinsics in llvm 
trunk in next step


https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-02-24 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov marked an inline comment as done.
dfukalov added a comment.

ping...


https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-02-26 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

In https://reviews.llvm.org/D43281#1018657, @arsenm wrote:

> Can’t you just change the description to be the LangAS value? I also thought 
> these happened to be the same already


Am I right that you mean to change the semantic of the addrspace number in a 
description string for all targets?

At the moment it's finally checked in `ASTContext::getAddrSpaceQualType` that 
`LangAS` is equal addrspace returned by `QualType`.
And for addrspace "2" specified in description, this `QualType` is 
"__attribute__((address_space(3))) float". And returns "11" since it's target 
addrspace, defined in `LangAS` after `FirstTargetAddressSpace`.
So `ASTContext::getAddrSpaceQualType` goes through check 
`CanT.getAddressSpace() == AddressSpace` and then hits assertions that the 
parameter already has addrspace specified.


https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-03-01 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

The problem is that if set addrspace "2" in description string, 
CanT.getAddressSpace() returns target addrspace value "11" (shifted in the 
enum) and compares it with input LangAS addrspace ("2", "opencl_local" in our 
case).
So I cannot set a number a description string that will be equal to LangAS 
addrspace "opencl_local".

Moreover, this change is preparation to move to custom processing of these 
builtins. Then I'm going to remove link (GCCBuiltin in IntrinsicsAMDGPU.td) 
from the llvm intrinsics definitions. Then I'll be able to switch on custom 
processing in cfe.


https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-03-02 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 136752.
dfukalov edited the summary of this revision.
dfukalov set the repository for this revision to rC Clang.
dfukalov added a comment.

addrspace specifications are kept in descriptions strings


Repository:
  rC Clang

https://reviews.llvm.org/D43281

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/SemaOpenCL/builtins-amdgcn-error.cl


Index: test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- test/SemaOpenCL/builtins-amdgcn-error.cl
+++ test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -102,3 +102,20 @@
   *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument 
to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
 }
 
+void test_ds_fadd(__attribute__((address_space(3))) float *out, float src, int 
a) {
+  *out = __builtin_amdgcn_ds_fadd(out, src, a, 0, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, a, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, a); // expected-error 
{{argument to '__builtin_amdgcn_ds_fadd' must be a constant integer}}
+}
+
+void test_ds_fmin(__attribute__((address_space(3))) float *out, float src, int 
a) {
+  *out = __builtin_amdgcn_ds_fmin(out, src, a, 0, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, a, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, a); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmin' must be a constant integer}}
+}
+
+void test_ds_fmax(__attribute__((address_space(3))) float *out, float src, int 
a) {
+  *out = __builtin_amdgcn_ds_fmax(out, src, a, 0, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, a, false); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, a); // expected-error 
{{argument to '__builtin_amdgcn_ds_fmax' must be a constant integer}}
+}
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9860,6 +9860,49 @@
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmax: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)
+  Args.push_back(EmitScalarExpr(E->getArg(I)));
+const llvm::Type *PtrTy = Args[0]->getType();
+// check pointer parameter
+if (!PtrTy->isPointerTy() ||
+LangAS::opencl_local != E->getArg(0)
+->getType()
+->getPointeeType()
+.getQualifiers()
+.getAddressSpace() ||
+!PtrTy->getPointerElementType()->isFloatTy()) {
+  CGM.Error(E->getArg(0)->getLocStart(),
+"parameter should have type \"local float*\"");
+  return nullptr;
+}
+// check float parameter
+if (!Args[1]->getType()->isFloatTy()) {
+  CGM.Error(E->getArg(1)->getLocStart(),
+"parameter should have type \"float\"");
+  return nullptr;
+}
+
+Intrinsic::ID ID;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_ds_fadd:
+  ID = Intrinsic::amdgcn_ds_fadd;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmin:
+  ID = Intrinsic::amdgcn_ds_fmin;
+  break;
+case AMDGPU::BI__builtin_amdgcn_ds_fmax:
+  ID = Intrinsic::amdgcn_ds_fmax;
+  break;
+default:
+  llvm_unreachable("Unknown BuiltinID");
+}
+Value *F = CGM.getIntrinsic(ID);
+return Builder.CreateCall(F, Args);
+  }
 
   // amdgcn workitem
   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,9 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
-BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
-BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fIiIiIb", "n")
 
 
//===--===/

[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-03-09 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

ping...


Repository:
  rC Clang

https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-03-19 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

> My real question was what happens if you put 11 in the description string?

in this case CanT.getAddressSpace() returns target addrspace value "20" (also 
shifted in the enum by 9==LangAS::FirstTargetAddressSpace)

So again ASTContext::getAddrSpaceQualType decieds that these addrspaces are not 
equal and then asserts
a
The main reason of these change is to allow usage of the builtins with __local 
described pointers. Such pointers have LangAS AddressSpace in the 
ASTContext::getAddrSpaceQualType() equal to "2".
But we have no way to obtain this value from CanT.getAddressSpace() since it 
returns target addrspace and it is >= 10.

So I'm going to switch to custom processing of these builtins and the patch is 
preparation to be able to remove their links in llvm repo.


Repository:
  rC Clang

https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-04-13 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 142412.
dfukalov edited the summary of this revision.

Repository:
  rC Clang

https://reviews.llvm.org/D43281

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl
  test/SemaOpenCL/builtins-amdgcn-error.cl

Index: test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- test/SemaOpenCL/builtins-amdgcn-error.cl
+++ test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -102,3 +102,20 @@
   *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
 }
 
+void test_ds_faddf(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_faddf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_faddf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
+}
+
+void test_ds_fminf(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_fminf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fminf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fminf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fminf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fminf' must be a constant integer}}
+}
+
+void test_ds_fmaxf(local float *out, float src, int a) {
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -91,18 +91,18 @@
 
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+void test_ds_faddf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmin
 // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+void test_ds_fminf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
 }
 
 // CHECK-LABEL: @test_ds_fmax
 // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
-void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
-  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+void test_ds_fmaxf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
 }
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9897,6 +9897,49 @@
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != 5; ++I)
+  Args.push_back(EmitScalarExpr(E->getArg(I)));
+const llvm::Type *PtrTy = Args[0]->getType();
+// check pointer parameter
+if (!PtrTy->isPointerTy() ||
+LangAS::opencl_local != E->getArg(0)
+->getType()
+->getPointeeType()
+.getQualifiers()
+.getAddressSpace() ||
+!PtrTy->getPointerElementType()->isFloatTy()) {
+  CGM.Error(E->getArg(0)->getLocStart(),
+"parameter should have type \"local float*\"");
+  return nullptr;
+}
+// check float parameter
+if (!Args[1]->getType()->isFloatTy()) {
+  CGM.Error(E->getArg(1)->getLocStart(),
+"parameter should have type \"float\"");
+  return nullptr;
+}
+
+Intrinsic::ID ID;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+  ID = Intrinsic::amdgcn_

[PATCH] D82650: [HIP] Set default FP_CONTRACT to ON

2020-06-26 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov created this revision.
dfukalov added reviewers: yaxunl, b-sumner, rampitec.
Herald added subscribers: cfe-commits, Anastasia.
Herald added a project: clang.

With the FAST default FP_CONTRACT mode' setting for
`func(float a, float b, float c) { return a + b * c; }` FE generates pair
`fmul contract` + `fadd contract` that are fused to an fma operation in BE.

But OpenCL fuses these in FE. This approach seems more effective since avoids a
probabilty that these instructions are not fused in BE. Default setting can be
overridden with `#pragma STDC FP_CONTRACT` by a programmer.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82650

Files:
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGenHIP/fp-contract.hip
  clang/test/CodeGenHIP/lit.local.cfg


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']
Index: clang/test/CodeGenHIP/fp-contract.hip
===
--- /dev/null
+++ clang/test/CodeGenHIP/fp-contract.hip
@@ -0,0 +1,36 @@
+// By default we should fuse multiply/add into llvm.fmuladd instruction.
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-ffp-contract=on -fcuda-is-device -o - %s | FileCheck -check-prefixes 
ENABLED,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-ffp-contract=fast -fcuda-is-device -o - %s | FileCheck -check-prefixes 
FAST,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-ffp-contract=off -fcuda-is-device -o - %s | FileCheck -check-prefixes 
DISABLED,ALL %s
+
+#define __device__ __attribute__((device))
+
+// ALL-LABEL: func
+// ENABLED: call float @llvm.fmuladd.f32
+// FAST: fmul contract float
+// FAST-NEXT: fadd contract float
+// DISABLED: fmul float
+// DISABLED-NEXT: fadd float
+__device__ float func(float a, float b, float c) { return a + b * c; }
+
+// ALL-LABEL: func_on
+// ALL: call float @llvm.fmuladd.f32
+#pragma STDC FP_CONTRACT ON
+__device__ float func_on(float a, float b, float c) { return a + b * c; }
+
+// ALL-LABEL: func_off
+// ALL: fmul float
+// ALL-NEXT: fadd float
+#pragma STDC FP_CONTRACT OFF
+__device__ float func_off(float a, float b, float c) { return a + b * c; }
+
+// ALL-LABEL: func_def
+// ENABLED: call float @llvm.fmuladd.f32
+// FAST: fmul contract float
+// FAST-NEXT: fadd contract float
+// DISABLED: fmul float
+// DISABLED-NEXT: fadd float
+#pragma STDC FP_CONTRACT DEFAULT
+__device__ float func_def(float a, float b, float c) { return a + b * c; }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2374,6 +2374,10 @@
 // Set default FP_CONTRACT to FAST.
 Opts.setDefaultFPContractMode(LangOptions::FPM_Fast);
 
+  // Set default FP_CONTRACT to ON like for OpenCL.
+  if (Opts.HIP)
+Opts.setDefaultFPContractMode(LangOptions::FPM_On);
+
   Opts.RenderScript = IK.getLanguage() == Language::RenderScript;
   if (Opts.RenderScript) {
 Opts.NativeHalfType = 1;


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']
Index: clang/test/CodeGenHIP/fp-contract.hip
===
--- /dev/null
+++ clang/test/CodeGenHIP/fp-contract.hip
@@ -0,0 +1,36 @@
+// By default we should fuse multiply/add into llvm.fmuladd instruction.
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=on -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=fast -fcuda-is-device -o - %s | FileCheck -check-prefixes FAST,ALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=off -fcuda-is-device -o - %s | FileCheck -check-prefixes DISABLED,ALL %s
+
+#define __device__ __attribute__((device))
+
+// ALL-LABEL: func
+// ENABLED: call float @llvm.fmuladd.f32
+// FAST: fmul contract float
+// FAST-NEXT: fadd contract float
+// DISABLED: fmul float
+// DISABLED-NEXT: fadd float
+__device__ float func(float a, float b, float c) { return a + b * c; }
+
+// ALL-LABEL: func_on
+// ALL: call float @llvm.fmuladd.f32
+#pragma STDC FP_CONTRACT ON
+__device__ float func_on(float a, float b, float c) { return a + b * c; }
+
+// 

[PATCH] D82650: [HIP] Set default FP_CONTRACT to ON

2020-06-26 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov planned changes to this revision.
dfukalov added a comment.

In D82650#2116849 , @yaxunl wrote:

> I think the point of -ffp-contract=fast is that it allows emitting fma across 
> statements if there are such opportunities whereas -ffp-contract=on only 
> allows fma in one statement
>
> https://llvm.org/docs/CompileCudaWithLLVM.html#flags-that-control-numerical-code


Yes, you're right. I was confused by current FE behavior and a number of 
different commits, reverts and re-commits.

Currently FE checks `allowFPContractWithinStatement()` and it returns false for 
FAST mode. As I understand, FAST mode should be more aggressive and include 
this case.
I'll re-work the change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82650/new/

https://reviews.llvm.org/D82650



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82650: [HIP] Set default FP_CONTRACT to ON

2020-06-29 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov abandoned this revision.
dfukalov added a comment.

I was completely wrong


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82650/new/

https://reviews.llvm.org/D82650



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82764: [NFC] Fixed ignored .hip test.

2020-06-29 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov created this revision.
dfukalov added a reviewer: hliao.
Herald added subscribers: cfe-commits, yaxunl.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82764

Files:
  clang/test/CodeGenHIP/lit.local.cfg


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82764: [NFC] Fixed ignored .hip test.

2020-06-29 Thread Daniil Fukalov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8cc722ffc749: [NFC] Fixed ignored .hip test. (authored by 
dfukalov).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82764/new/

https://reviews.llvm.org/D82764

Files:
  clang/test/CodeGenHIP/lit.local.cfg


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']


Index: clang/test/CodeGenHIP/lit.local.cfg
===
--- /dev/null
+++ clang/test/CodeGenHIP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.cpp', '.hip']
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-08 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov created this revision.
dfukalov added a reviewer: RKSimon.
Herald added subscribers: kerbowa, asbirlea, jfb, steven_wu, george.burgess.iv, 
zzheng, hiraditya, eraman, nhaehnle, jvesely, arsenm, MatzeB.
dfukalov requested review of this revision.
Herald added projects: clang, LLVM.
Herald added a subscriber: cfe-commits.

Continuing work started in https://reviews.llvm.org/D92489:

1. Removed a bunch of includes from "AliasAnalysis.h" and "LoopPassManager.h".
2. Minor `const` modifiers unifications.
3. Using `AAQueryInfo::IsCapturedCacheT`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D92852

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/examples/Bye/Bye.cpp
  llvm/include/llvm/Analysis/AliasAnalysis.h
  llvm/include/llvm/Analysis/BasicAliasAnalysis.h
  llvm/include/llvm/Analysis/MemorySSA.h
  llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
  llvm/lib/Analysis/AliasAnalysis.cpp
  llvm/lib/Analysis/CaptureTracking.cpp
  llvm/lib/Analysis/MemDepPrinter.cpp
  llvm/lib/Analysis/ScopedNoAliasAA.cpp
  llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
  llvm/lib/CodeGen/LiveIntervals.cpp
  llvm/lib/LTO/Caching.cpp
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
  llvm/lib/Transforms/IPO/FunctionAttrs.cpp
  llvm/lib/Transforms/IPO/HotColdSplitting.cpp
  llvm/lib/Transforms/IPO/Inliner.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
  llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
  llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
  llvm/lib/Transforms/Scalar/Float2Int.cpp
  llvm/lib/Transforms/Scalar/LoopDistribute.cpp
  llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
  llvm/lib/Transforms/Scalar/LoopPassManager.cpp
  llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
  llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
  llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
  llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
  llvm/lib/Transforms/Utils/LoopVersioning.cpp
  llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
  llvm/tools/opt/NewPMDriver.cpp

Index: llvm/tools/opt/NewPMDriver.cpp
===
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -16,6 +16,7 @@
 #include "PassPrinters.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
Index: llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
===
--- llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
+++ llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
@@ -10,6 +10,7 @@
 //
 //===--===//
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/CommandFlags.h"
@@ -18,6 +19,7 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
Index: llvm/lib/Transforms/Utils/LoopVersioning.cpp
===
--- llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSA.h"
Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
===
--- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,7 +22,6 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
===
--- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/Transforms/Utils/LoopRotationUtils.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CodeMetrics.h"
Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
=

[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-08 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added inline comments.



Comment at: llvm/include/llvm/Analysis/AliasAnalysis.h:66
 class Value;
+class TargetLibraryInfo;
 

Just realized wrong order, will fix in updated patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-08 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added inline comments.



Comment at: llvm/include/llvm/Analysis/AliasAnalysis.h:800
 case Instruction::Invoke:
-  return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+  return getModRefInfo((const CallBase *)I, Loc, AAQIP);
 case Instruction::CatchPad:

RKSimon wrote:
> Is this necessary? It doesn't seem to match the pattern used for all the 
> other Instruction types.
Actually there are no `getModRefInfo(CallInst *` nor `getModRefInfo(InvokeInst 
*` but the only `getModRefInfo(CallBase *`.
There were two implicit casts from `CallInst` and `InvokeInst` in the calls to 
their base `CallBase` and it was masked by included Instructions.h.

My thought was we use explicit cast here so I decided to refine this calls.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-09 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 310506.
dfukalov added a comment.

Splitting change as requested.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

Files:
  llvm/include/llvm/Analysis/AliasAnalysis.h
  llvm/lib/Analysis/AliasAnalysis.cpp
  llvm/lib/Analysis/CaptureTracking.cpp

Index: llvm/lib/Analysis/CaptureTracking.cpp
===
--- llvm/lib/Analysis/CaptureTracking.cpp
+++ llvm/lib/Analysis/CaptureTracking.cpp
@@ -413,8 +413,8 @@
 }
 
 bool llvm::isNonEscapingLocalObject(
-const Value *V, AAQueryInfo::IsCapturedCacheT *IsCapturedCache) {
-  AAQueryInfo::IsCapturedCacheT::iterator CacheIt;
+const Value *V, SmallDenseMap *IsCapturedCache) {
+  SmallDenseMap::iterator CacheIt;
   if (IsCapturedCache) {
 bool Inserted;
 std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
Index: llvm/lib/Analysis/AliasAnalysis.cpp
===
--- llvm/lib/Analysis/AliasAnalysis.cpp
+++ llvm/lib/Analysis/AliasAnalysis.cpp
@@ -166,13 +166,12 @@
   return Result;
 }
 
-ModRefInfo AAResults::getModRefInfo(const Instruction *I,
-const CallBase *Call2) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
   AAQueryInfo AAQIP;
   return getModRefInfo(I, Call2, AAQIP);
 }
 
-ModRefInfo AAResults::getModRefInfo(const Instruction *I, const CallBase *Call2,
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
 AAQueryInfo &AAQI) {
   // We may have two calls.
   if (const auto *Call1 = dyn_cast(I)) {
@@ -648,6 +647,43 @@
   return ModRefInfo::ModRef;
 }
 
+ModRefInfo AAResults::getModRefInfo(const Instruction *I,
+const Optional &OptLoc,
+AAQueryInfo &AAQIP) {
+  if (OptLoc == None) {
+if (const auto *Call = dyn_cast(I)) {
+  return createModRefInfo(getModRefBehavior(Call));
+}
+  }
+
+  const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+
+  switch (I->getOpcode()) {
+  case Instruction::VAArg:
+return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
+  case Instruction::Load:
+return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
+  case Instruction::Store:
+return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
+  case Instruction::Fence:
+return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
+  case Instruction::AtomicCmpXchg:
+return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
+  case Instruction::AtomicRMW:
+return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
+  case Instruction::Call:
+return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+  case Instruction::Invoke:
+return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+  case Instruction::CatchPad:
+return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
+  case Instruction::CatchRet:
+return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
+  default:
+return ModRefInfo::NoModRef;
+  }
+}
+
 /// Return information about whether a particular call site modifies
 /// or reads the specified memory location \p MemLoc before instruction \p I
 /// in a BasicBlock.
@@ -657,7 +693,7 @@
 /// with a smarter AA in place, this test is just wasting compile time.
 ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
  const MemoryLocation &MemLoc,
- const DominatorTree *DT) {
+ DominatorTree *DT) {
   if (!DT)
 return ModRefInfo::ModRef;
 
Index: llvm/include/llvm/Analysis/AliasAnalysis.h
===
--- llvm/include/llvm/Analysis/AliasAnalysis.h
+++ llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -62,8 +62,8 @@
 class Function;
 class InvokeInst;
 class PreservedAnalyses;
-class Value;
 class TargetLibraryInfo;
+class Value;
 
 /// The possible results of an alias query.
 ///
@@ -691,7 +691,7 @@
 
   /// Return information about whether a call and an instruction may refer to
   /// the same memory locations.
-  ModRefInfo getModRefInfo(const Instruction *I, const CallBase *Call);
+  ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call);
 
   /// Return information about whether two call sites may refer to the same set
   /// of memory locations. See the AA documentation for details:
@@ -704,12 +704,11 @@
   /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being
   /// set.
   ModRefInfo callCapturesBefore(const Instruction *I,
-const MemoryLocation &MemLoc,
-const DominatorTree *DT);
+const MemoryLocation &MemLoc, DominatorTree *DT);
 
   /// A 

[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-09 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 310509.
dfukalov edited the summary of this revision.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/examples/Bye/Bye.cpp
  llvm/include/llvm/Analysis/AliasAnalysis.h
  llvm/include/llvm/Analysis/BasicAliasAnalysis.h
  llvm/include/llvm/Analysis/MemorySSA.h
  llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
  llvm/lib/Analysis/AliasAnalysis.cpp
  llvm/lib/Analysis/MemDepPrinter.cpp
  llvm/lib/Analysis/ScopedNoAliasAA.cpp
  llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
  llvm/lib/CodeGen/LiveIntervals.cpp
  llvm/lib/LTO/Caching.cpp
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
  llvm/lib/Transforms/IPO/FunctionAttrs.cpp
  llvm/lib/Transforms/IPO/HotColdSplitting.cpp
  llvm/lib/Transforms/IPO/Inliner.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
  llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
  llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
  llvm/lib/Transforms/Scalar/Float2Int.cpp
  llvm/lib/Transforms/Scalar/LoopDistribute.cpp
  llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
  llvm/lib/Transforms/Scalar/LoopPassManager.cpp
  llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
  llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
  llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
  llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
  llvm/lib/Transforms/Utils/LoopVersioning.cpp
  llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
  llvm/tools/opt/NewPMDriver.cpp

Index: llvm/tools/opt/NewPMDriver.cpp
===
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -16,6 +16,7 @@
 #include "PassPrinters.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
Index: llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
===
--- llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
+++ llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
@@ -10,6 +10,7 @@
 //
 //===--===//
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/CommandFlags.h"
@@ -18,6 +19,7 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
Index: llvm/lib/Transforms/Utils/LoopVersioning.cpp
===
--- llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSA.h"
Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
===
--- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,7 +22,6 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
===
--- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/Transforms/Utils/LoopRotationUtils.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CodeMetrics.h"
Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
===
--- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -27,16 +27,17 @@
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/

[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-09 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added inline comments.



Comment at: llvm/lib/Analysis/AliasAnalysis.cpp:685
+  }
+}
+

RKSimon wrote:
> This should probably be pulled out too
It uses `dyn_cast(I)` so clang-tidy reports //incomplete type 
'llvm::CallBase' named in nested name specifier// if the function definition is 
in header. This is result of removing include Instructions.h from 
AliasAnalysis.h.

Actually there is one more same clang-tidy report on `isa` in 
MemorySSA.h that includes AliasAnalysis.h (and so included Instructions.h 
through it). I thought to fix it the same way - moving function with 
`isa` to MemorySSA.cpp

Or should I leave include Instructions.h in AliasAnalysis.h?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-16 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

Ping...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92852: [NFC] Reduce include files dependency and AA header cleanup (part 2).

2020-12-17 Thread Daniil Fukalov via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9ed8e0caab9b: [NFC] Reduce include files dependency and AA 
header cleanup (part 2). (authored by dfukalov).

Changed prior to commit:
  https://reviews.llvm.org/D92852?vs=310509&id=312422#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92852/new/

https://reviews.llvm.org/D92852

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  lld/MachO/Driver.cpp
  llvm/examples/Bye/Bye.cpp
  llvm/include/llvm/Analysis/AliasAnalysis.h
  llvm/include/llvm/Analysis/BasicAliasAnalysis.h
  llvm/include/llvm/Analysis/MemorySSA.h
  llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
  llvm/lib/Analysis/AliasAnalysis.cpp
  llvm/lib/Analysis/MemDepPrinter.cpp
  llvm/lib/Analysis/MemorySSA.cpp
  llvm/lib/Analysis/ScopedNoAliasAA.cpp
  llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
  llvm/lib/CodeGen/LiveIntervals.cpp
  llvm/lib/LTO/Caching.cpp
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
  llvm/lib/Transforms/IPO/FunctionAttrs.cpp
  llvm/lib/Transforms/IPO/HotColdSplitting.cpp
  llvm/lib/Transforms/IPO/Inliner.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
  llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
  llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
  llvm/lib/Transforms/Scalar/Float2Int.cpp
  llvm/lib/Transforms/Scalar/LoopDistribute.cpp
  llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
  llvm/lib/Transforms/Scalar/LoopPassManager.cpp
  llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
  llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
  llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
  llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
  llvm/lib/Transforms/Utils/LoopVersioning.cpp
  llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
  llvm/tools/opt/NewPMDriver.cpp

Index: llvm/tools/opt/NewPMDriver.cpp
===
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -16,6 +16,7 @@
 #include "PassPrinters.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
Index: llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
===
--- llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
+++ llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
@@ -10,6 +10,7 @@
 //
 //===--===//
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/CommandFlags.h"
@@ -18,6 +19,7 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
Index: llvm/lib/Transforms/Utils/LoopVersioning.cpp
===
--- llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSA.h"
Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
===
--- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,7 +22,6 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
===
--- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/Transforms/Utils/LoopRotationUtils.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CodeMetrics.h"
Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
===
--- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ llvm/lib/Trans

[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-03-27 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

ping...


Repository:
  rC Clang

https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D43281: [AMDGPU] fixes for lds f32 builtins

2018-04-03 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

ping...


Repository:
  rC Clang

https://reviews.llvm.org/D43281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42578: [AMDGPU] Add ds_fadd builtin function

2018-01-26 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov created this revision.
dfukalov added reviewers: arsenm, b-sumner.
dfukalov added a project: AMDGPU.
Herald added subscribers: cfe-commits, t-tye, tpr, dstuttard, yaxunl, nhaehnle, 
wdng, kzhuravl.

Repository:
  rC Clang

https://reviews.llvm.org/D42578

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,9 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,7 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "nc")
 
 
//===--===//
 // VI+ only builtins.


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,9 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,7 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "nc")
 
 //===--===//
 // VI+ only builtins.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42578: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins functions

2018-01-26 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 131593.
dfukalov retitled this revision from "[AMDGPU] Add ds_fadd builtin function" to 
"[AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins functions".
dfukalov added a comment.

Sorry, missed them


Repository:
  rC Clang

https://reviews.llvm.org/D42578

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "nc")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "nc")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "nc")
 
 
//===--===//
 // VI+ only builtins.


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "nc")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "nc")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "nc")
 
 //===--===//
 // VI+ only builtins.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42578: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins functions

2018-01-29 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov updated this revision to Diff 131820.
dfukalov added a comment.

fixed builtins descriptions


Repository:
  rC Clang

https://reviews.llvm.org/D42578

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 
//===--===//
 // VI+ only builtins.


Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 //===--===//
 // VI+ only builtins.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42578: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins functions

2018-01-31 Thread Daniil Fukalov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL323890: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins 
functions (authored by dfukalov, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D42578?vs=131820&id=132191#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D42578

Files:
  cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
  cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 
//===--===//
 // VI+ only builtins.


Index: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
Index: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 //===--===//
 // VI+ only builtins.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42578: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins functions

2018-01-31 Thread Daniil Fukalov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC323890: [AMDGPU] Add ds_fadd, ds_fmin, ds_fmax builtins 
functions (authored by dfukalov, committed by ).

Repository:
  rL LLVM

https://reviews.llvm.org/D42578

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl


Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 
//===--===//
 // VI+ only builtins.
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}


Index: include/clang/Basic/BuiltinsAMDGPU.def
===
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 //===--===//
 // VI+ only builtins.
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,23 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmin(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmax(local float *out, float src)
+{
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115283: [AMDGPU] Set "amdgpu_hostcall" module flag if an AMDGPU function has calls to device lib functions that use hostcalls.

2021-12-07 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added a comment.

Needs a test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115283/new/

https://reviews.llvm.org/D115283

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115283: [AMDGPU] Set "amdgpu_hostcall" module flag if an AMDGPU function has calls to device lib functions that use hostcalls.

2021-12-08 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added inline comments.



Comment at: clang/test/CodeGenHIP/amdgpu_hostcall.cpp:2-6
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -DFN_HOSTCALL \
+// RUN:   -o - %s | FileCheck --enable-var-scope %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -DFN_PRINTF \
+// RUN:   -o - %s | FileCheck --enable-var-scope %s

Am I right we don't actually need two runs here, the test may be executed with 
one run, removed `#ifdefs` and, possible, multiplied `CHECK:` lines?
I would suggest to use the llvm/utils/update_cc_test_checks.py script in such 
tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115283/new/

https://reviews.llvm.org/D115283

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115283: [AMDGPU] Set "amdgpu_hostcall" module flag if an AMDGPU function has calls to device lib functions that use hostcalls.

2021-12-09 Thread Daniil Fukalov via Phabricator via cfe-commits
dfukalov added inline comments.



Comment at: clang/test/CodeGenHIP/amdgpu_hostcall.cpp:2-6
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -DFN_HOSTCALL \
+// RUN:   -o - %s | FileCheck --enable-var-scope %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -DFN_PRINTF \
+// RUN:   -o - %s | FileCheck --enable-var-scope %s

kpyzhov wrote:
> dfukalov wrote:
> > Am I right we don't actually need two runs here, the test may be executed 
> > with one run, removed `#ifdefs` and, possible, multiplied `CHECK:` lines?
> > I would suggest to use the llvm/utils/update_cc_test_checks.py script in 
> > such tests.
> Well, it may be executed with one run, but in that case we won't be able to 
> catch an error if one of the functions is broken, because the 2nd one will 
> set the module flag.
> Why do you think I should use the script for this test?
Oh, I see, that indeed should be run with two separate checks.

Regarding the script - it generates CHECK-NEXT sequences so we can be assured 
that substring "amdgpu_hostcall" is not caught from any other place. Of course, 
you can make the test stronger with hand-written `-NEXT` checks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115283/new/

https://reviews.llvm.org/D115283

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits