[clang] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-19 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 created 
https://github.com/llvm/llvm-project/pull/69567

Currently __builtin_read_exec_hi lowers to llvm.read_register, this patch 
lowers it to use amdgcn_ballot.

>From 340e633da9e3ab10efc0c0d430b9546cd2f19cfe Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp | 27 +++--
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl |  4 ++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index db9f354fa8386d3..d60826f293f0c46 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
 
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
   llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
-  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
-  return Call;
+  llvm::Value *Call;
+  Function *F;
+  
+  if (isExecHi) {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);
+return Rt2;
+  } else {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+return Call;
+  }
 }
 
 // Generates the IR for the read/write special register builtin,
@@ -17837,10 +17849,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8938642e3b19f8c..0bc9a54682d3e31 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
 // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
 void test_read_exec_hi(global uint* out) {
   *out = __builtin_amdgcn_read_exec_hi();
 }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-19 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From 19582446dbabffb0b25f9fa8b8b62a06ce6a4c66 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp | 27 +++--
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl |  4 ++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index db9f354fa8386d3..4c167fccb619288 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
 
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
   llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
-  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
-  return Call;
+  llvm::Value *Call;
+  Function *F;
+
+  if (isExecHi) {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);
+return Rt2;
+  } else {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+return Call;
+  }
 }
 
 // Generates the IR for the read/write special register builtin,
@@ -17837,10 +17849,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8938642e3b19f8c..0bc9a54682d3e31 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
 // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
 void test_read_exec_hi(global uint* out) {
   *out = __builtin_amdgcn_read_exec_hi();
 }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-19 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 edited 
https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-19 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 edited 
https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From 536e4b5912f976b295bc2b507e4181c8a65f0d12 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 20 +++-
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..b5318ed41b10c53 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7997,13 +7997,20 @@ enum SpecialRegisterAccessKind {
 
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17864,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ba

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits


@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
 
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
   llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
-  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
-  return Call;
+  llvm::Value *Call;
+  Function *F;
+
+  if (isExecHi) {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);

ranapratap55 wrote:

Updated in the latest patch.

https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits


@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
 
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
   llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
-  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
-  return Call;
+  llvm::Value *Call;
+  Function *F;
+
+  if (isExecHi) {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);
+return Rt2;
+  } else {
+F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+Call = Builder.CreateCall(F, {Builder.getInt1(true)});

ranapratap55 wrote:

Updated.

https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits


@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
 // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)

ranapratap55 wrote:

moved to respected wave32.cl and wave64.cl tests.

https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From c2ed656fa149a9cb60fb43eb34f4b20186166b34 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 22 -
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..02ed6b0c6e56673 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generated the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17866,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-24 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From 65c806a4c1703214fa7eaebecfdaa200b6d0d205 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 22 -
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63d02660dfc6b56 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17866,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From 8a5dfe62b7ce84d49f6684563f04122b75fc35ef Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 21 +++-
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..85be8bdd00516cb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *Rt2 = Builder.CreateLShr(Call, 32);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17865,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK-LA

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits


@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)

ranapratap55 wrote:

Yes, this is generated codegen on wave32.

https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits


@@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+Value *Rt2 = Builder.CreateLShr(Call, C1);

ranapratap55 wrote:

Removed ConstantInt::get.

https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From ebc8ebdebf707f94723b1fe0ee1a1b6afe5dff49 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 21 +++-
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..85be8bdd00516cb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *Rt2 = Builder.CreateLShr(Call, 32);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17865,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK-LA

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/69567

>From bd6e1449bd098d8348cf1402ad875e79cbc274b5 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 21 +++-
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..85be8bdd00516cb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
   llvm::Type *RegisterType,
-  llvm::Type *ValueType) {
+  llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+Value *Rt2 = Builder.CreateLShr(Call, 32);
+Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17865,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, 
int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) 
#[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int 
a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK-LA

[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

2023-10-25 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 closed 
https://github.com/llvm/llvm-project/pull/69567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-05-16 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 edited 
https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/140210

>From d1571dd53f157c9d4180c51e709d9bed0ba00136 Mon Sep 17 00:00:00 2001
From: ranapratap55 
Date: Fri, 16 May 2025 12:50:09 +0530
Subject: [PATCH 1/2] [WIP][AMDGPU] Support for type inferring image load/store
 builtins for AMDGPU

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |  5 +++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   | 24 ++
 .../test/CodeGen/builtins-image-load-2d-f32.c | 31 +++
 3 files changed, 60 insertions(+)
 create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 39fef9e4601f8..67045809fa726 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", 
"nc", "bitop3-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 
+//===--===//
+// Image builtins
+//===--===//
+BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ad012d98635ff..15f5cd89beaa9 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
+
+SmallVector ArgTys;
+ArgTys.push_back(imm0);
+ArgTys.push_back(arg0);
+ArgTys.push_back(arg1);
+ArgTys.push_back(arg2);
+ArgTys.push_back(imm1);
+ArgTys.push_back(imm2);
+
+llvm::CallInst *Call =
+Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, 
ArgTys);
+
+return Call;
+  }
   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c 
b/clang/test/CodeGen/builtins-image-load-2d-f32.c
new file mode 100644
index 0..78dab461c1f38
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> 
noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, 
addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
+// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
+// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
+// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VECI32_ADDR]] to ptr
+// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP3:%.*]] = call float 
@llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [

[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));

ranapratap55 wrote:

Updated the name.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable

ranapratap55 wrote:

removed

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
+
+SmallVector ArgTys;
+ArgTys.push_back(imm0);
+ArgTys.push_back(arg0);
+ArgTys.push_back(arg1);
+ArgTys.push_back(arg2);
+ArgTys.push_back(imm1);
+ArgTys.push_back(imm2);

ranapratap55 wrote:

Done.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> 
noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, 
addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
+// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
+// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
+// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VECI32_ADDR]] to ptr
+// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP3:%.*]] = call float 
@llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 
x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT:ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+  return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);

ranapratap55 wrote:

Added more signature for image load

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> 
noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, 
addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
+// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
+// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
+// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VECI32_ADDR]] to ptr
+// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP3:%.*]] = call float 
@llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 
x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT:ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+  return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+}

ranapratap55 wrote:

Added sema tests for non-imm arguments

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);

ranapratap55 wrote:

updated.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", 
"nc", "bitop3-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 
+//===--===//
+// Image builtins
+//===--===//
+BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")

ranapratap55 wrote:

Updated

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", 
"nc", "bitop3-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", 
"f32-to-f16bf16-cvt-sr-insts")
 
+//===--===//
+// Image builtins
+//===--===//
+BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")

ranapratap55 wrote:

updated.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);

ranapratap55 wrote:

updated.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);

ranapratap55 wrote:

Done.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-17 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s

ranapratap55 wrote:

done.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-22 Thread Rana Pratap Reddy via cfe-commits

ranapratap55 wrote:

@arsenm ping.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-26 Thread Rana Pratap Reddy via cfe-commits


@@ -128,6 +160,16 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned 
BuiltinID,
   return false;
 }
 
+bool SemaAMDGPU::checkImageImmArgFunctionCall(CallExpr *TheCall,
+  unsigned ArgCount) {
+  llvm::APSInt Result;
+  if (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&

ranapratap55 wrote:

updated.

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-26 Thread Rana Pratap Reddy via cfe-commits


@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> 
noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, 
addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
+// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
+// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
+// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VECI32_ADDR]] to ptr
+// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], 
align 32
+// CHECK-NEXT:[[TMP3:%.*]] = call float 
@llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 
x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT:ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+  return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+}

ranapratap55 wrote:

> Need sema tests that verify the builtins are rejected on targets without 
> images

only f16(v4f16) requires gfx8+ targets, all other types are supported from 
gfx6+. Where do I find which targets do not support images?

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-26 Thread Rana Pratap Reddy via cfe-commits


@@ -83,6 +83,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
 return checkMovDPPFunctionCall(TheCall, 6, 2);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+unsigned ArgCount = TheCall->getNumArgs() - 1;

ranapratap55 wrote:

updated

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-26 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,206 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+llvm::Type *RetTy = nullptr;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+  RetTy = llvm::Type::getFloatTy(Builder.getContext());
+  break;
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+  RetTy =
+  FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 
4);
+  break;
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+  RetTy =
+  FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4);
+  break;
+}
+
+llvm::Value *Dmask = EmitScalarExpr(E->getArg(0));
+llvm::Value *S = EmitScalarExpr(E->getArg(1));
+llvm::Value *T = EmitScalarExpr(E->getArg(2));
+llvm::Value *Slice;
+llvm::Value *Mip;
+llvm::Value *Rsrc;
+llvm::Value *Tfe;
+llvm::Value *Cpol;
+
+SmallVector ArgTys;
+
+Intrinsic::ID IID;
+llvm::CallInst *Call;
+
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: {
+  Rsrc = EmitScalarExpr(E->getArg(2));
+  Tfe = EmitScalarExpr(E->getArg(3));
+  Cpol = EmitScalarExpr(E->getArg(4));
+
+  ArgTys = {Dmask, S, Rsrc, Tfe, Cpol};
+  IID = Intrinsic::amdgcn_image_load_1d;
+  Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+  break;
+}
+case AMDGPU::BI_

[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)

2025-06-26 Thread Rana Pratap Reddy via cfe-commits


@@ -683,6 +683,206 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 
 return Builder.CreateInsertElement(I0, A, 1);
   }
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+llvm::Type *RetTy = nullptr;
+switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+  RetTy = llvm::Type::getFloatTy(Builder.getContext());
+  break;
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+  RetTy =
+  FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 
4);
+  break;
+case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+  RetTy =
+  FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4);
+  break;
+}
+
+llvm::Value *Dmask = EmitScalarExpr(E->getArg(0));
+llvm::Value *S = EmitScalarExpr(E->getArg(1));
+llvm::Value *T = EmitScalarExpr(E->getArg(2));
+llvm::Value *Slice;

ranapratap55 wrote:

updated to `nullptr`

https://github.com/llvm/llvm-project/pull/140210
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits