[clang] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 created https://github.com/llvm/llvm-project/pull/69567 Currently __builtin_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot. >From 340e633da9e3ab10efc0c0d430b9546cd2f19cfe Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 27 +++-- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index db9f354fa8386d3..d60826f293f0c46 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); - llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); - return Call; + llvm::Value *Call; + Function *F; + + if (isExecHi) { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); +return Rt2; + } else { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +return Call; + } } // Generates the IR for the read/write special register builtin, @@ -17837,10 +17849,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8938642e3b19f8c..0bc9a54682d3e31 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From 19582446dbabffb0b25f9fa8b8b62a06ce6a4c66 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 27 +++-- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index db9f354fa8386d3..4c167fccb619288 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); - llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); - return Call; + llvm::Value *Call; + Function *F; + + if (isExecHi) { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); +return Rt2; + } else { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +return Call; + } } // Generates the IR for the read/write special register builtin, @@ -17837,10 +17849,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8938642e3b19f8c..0bc9a54682d3e31 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 edited https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 edited https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From 536e4b5912f976b295bc2b507e4181c8a65f0d12 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 20 +++- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..b5318ed41b10c53 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7997,13 +7997,20 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17864,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ba
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); - llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); - return Call; + llvm::Value *Call; + Function *F; + + if (isExecHi) { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); ranapratap55 wrote: Updated in the latest patch. https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); - llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); - return Call; + llvm::Value *Call; + Function *F; + + if (isExecHi) { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); +return Rt2; + } else { +F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); +Call = Builder.CreateCall(F, {Builder.getInt1(true)}); ranapratap55 wrote: Updated. https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) ranapratap55 wrote: moved to respected wave32.cl and wave64.cl tests. https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From c2ed656fa149a9cb60fb43eb34f4b20186166b34 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 22 - .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 66 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..02ed6b0c6e56673 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind { Write, }; +// Generated the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17866,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From 65c806a4c1703214fa7eaebecfdaa200b6d0d205 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 22 - .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 66 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63d02660dfc6b56 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17866,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From 8a5dfe62b7ce84d49f6684563f04122b75fc35ef Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 21 +++- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..85be8bdd00516cb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *Rt2 = Builder.CreateLShr(Call, 32); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LA
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) ranapratap55 wrote: Yes, this is generated codegen on wave32. https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
@@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *C1 = llvm::ConstantInt::get(ValueType, 32); +Value *Rt2 = Builder.CreateLShr(Call, C1); ranapratap55 wrote: Removed ConstantInt::get. https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From ebc8ebdebf707f94723b1fe0ee1a1b6afe5dff49 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 21 +++- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..85be8bdd00516cb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *Rt2 = Builder.CreateLShr(Call, 32); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LA
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567 >From bd6e1449bd098d8348cf1402ad875e79cbc274b5 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 21 +++- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..85be8bdd00516cb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { +Value *Rt2 = Builder.CreateLShr(Call, 32); +Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); +return Rt2; + } + return Call; } @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { -return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } +return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: +return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0bd..43553131f63c549 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1dc..53f34c6a44ae7dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LA
[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)
https://github.com/ranapratap55 closed https://github.com/llvm/llvm-project/pull/69567 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
https://github.com/ranapratap55 edited https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/140210 >From d1571dd53f157c9d4180c51e709d9bed0ba00136 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Fri, 16 May 2025 12:50:09 +0530 Subject: [PATCH 1/2] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 5 +++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 24 ++ .../test/CodeGen/builtins-image-load-2d-f32.c | 31 +++ 3 files changed, 60 insertions(+) create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39fef9e4601f8..67045809fa726 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") +//===--===// +// Image builtins +//===--===// +BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index ad012d98635ff..15f5cd89beaa9 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { +llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); +llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + +llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); +llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); +llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); +llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); + +SmallVector ArgTys; +ArgTys.push_back(imm0); +ArgTys.push_back(arg0); +ArgTys.push_back(arg1); +ArgTys.push_back(arg2); +ArgTys.push_back(imm1); +ArgTys.push_back(imm2); + +llvm::CallInst *Call = +Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys); + +return Call; + } case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c new file mode 100644 index 0..78dab461c1f38 --- /dev/null +++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); ranapratap55 wrote: Updated the name. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable ranapratap55 wrote: removed https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { +llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); +llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + +llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); +llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); +llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); +llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); + +SmallVector ArgTys; +ArgTys.push_back(imm0); +ArgTys.push_back(arg0); +ArgTys.push_back(arg1); +ArgTys.push_back(arg2); +ArgTys.push_back(imm1); +ArgTys.push_back(imm2); ranapratap55 wrote: Done. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT:ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); ranapratap55 wrote: Added more signature for image load https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT:ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); +} ranapratap55 wrote: Added sema tests for non-imm arguments https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { +llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); +llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + +llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); +llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); +llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); +llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); ranapratap55 wrote: updated. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") +//===--===// +// Image builtins +//===--===// +BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n") ranapratap55 wrote: Updated https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "IUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") +//===--===// +// Image builtins +//===--===// +BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n") ranapratap55 wrote: updated. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { +llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); +llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + +llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); +llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); +llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); +llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); ranapratap55 wrote: updated. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { +llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); +llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + +llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); ranapratap55 wrote: Done. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s ranapratap55 wrote: done. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
ranapratap55 wrote: @arsenm ping. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -128,6 +160,16 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, return false; } +bool SemaAMDGPU::checkImageImmArgFunctionCall(CallExpr *TheCall, + unsigned ArgCount) { + llvm::APSInt Result; + if (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) && ranapratap55 wrote: updated. https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:[[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:[[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:[[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:[[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:[[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT:store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:[[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT:[[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT:ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); +} ranapratap55 wrote: > Need sema tests that verify the builtins are rejected on targets without > images only f16(v4f16) requires gfx8+ targets, all other types are supported from gfx6+. Where do I find which targets do not support images? https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -83,6 +83,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_update_dpp: { return checkMovDPPFunctionCall(TheCall, 6, 2); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { +unsigned ArgCount = TheCall->getNumArgs() - 1; ranapratap55 wrote: updated https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,206 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { +llvm::Type *RetTy = nullptr; +switch (BuiltinID) { +case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + RetTy = llvm::Type::getFloatTy(Builder.getContext()); + break; +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + RetTy = + FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 4); + break; +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + RetTy = + FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4); + break; +} + +llvm::Value *Dmask = EmitScalarExpr(E->getArg(0)); +llvm::Value *S = EmitScalarExpr(E->getArg(1)); +llvm::Value *T = EmitScalarExpr(E->getArg(2)); +llvm::Value *Slice; +llvm::Value *Mip; +llvm::Value *Rsrc; +llvm::Value *Tfe; +llvm::Value *Cpol; + +SmallVector ArgTys; + +Intrinsic::ID IID; +llvm::CallInst *Call; + +switch (BuiltinID) { +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: { + Rsrc = EmitScalarExpr(E->getArg(2)); + Tfe = EmitScalarExpr(E->getArg(3)); + Cpol = EmitScalarExpr(E->getArg(4)); + + ArgTys = {Dmask, S, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_1d; + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; +} +case AMDGPU::BI_
[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
@@ -683,6 +683,206 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { +llvm::Type *RetTy = nullptr; +switch (BuiltinID) { +case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + RetTy = llvm::Type::getFloatTy(Builder.getContext()); + break; +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + RetTy = + FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 4); + break; +case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: +case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + RetTy = + FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4); + break; +} + +llvm::Value *Dmask = EmitScalarExpr(E->getArg(0)); +llvm::Value *S = EmitScalarExpr(E->getArg(1)); +llvm::Value *T = EmitScalarExpr(E->getArg(2)); +llvm::Value *Slice; ranapratap55 wrote: updated to `nullptr` https://github.com/llvm/llvm-project/pull/140210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits