llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-globalisel Author: Aaditya (easyonaadit) <details> <summary>Changes</summary> For #<!-- -->119822 --- Patch is 39.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120063.diff 4 Files Affected: - (added) clang/test/CodeGenHIP/dynamic-alloca.cpp (+532) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll (+10) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll (+85) - (modified) llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll (+40-2) ``````````diff diff --git a/clang/test/CodeGenHIP/dynamic-alloca.cpp b/clang/test/CodeGenHIP/dynamic-alloca.cpp new file mode 100644 index 00000000000000..4bbc6b2e69917f --- /dev/null +++ b/clang/test/CodeGenHIP/dynamic-alloca.cpp @@ -0,0 +1,532 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z34kernel_function_builtin_alloca_immv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 40, align 8, addrspace(5) +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// CHECK-NEXT: store ptr [[TMP1]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca_imm(){ + volatile int *alloca = static_cast<volatile int*>(__builtin_alloca(10*sizeof(int))); + static_cast<volatile int*>(alloca)[0] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z50kernel_function_non_entry_block_builtin_alloca_immPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: [[ALLOCA2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA2]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 40, align 8, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: [[TMP5:%.*]] = alloca i8, i64 80, align 8, addrspace(5) +// CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(5) [[TMP5]] to ptr +// CHECK-NEXT: store ptr [[TMP6]], ptr [[ALLOCA2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ALLOCA2_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 +// CHECK-NEXT: store volatile i32 20, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: ret void +// +__global__ void kernel_function_non_entry_block_builtin_alloca_imm(int* a){ + if(*a < 10){ + volatile void *alloca = __builtin_alloca(10*sizeof(int)); + static_cast<volatile int*>(alloca)[0] = 10; + } + else { + volatile void *alloca = __builtin_alloca(20*sizeof(int)); + static_cast<volatile int*>(alloca)[0] = 20; + } +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z30kernel_function_builtin_allocaPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca(int* a){ + volatile void *alloca = __builtin_alloca((*a)*sizeof(int)); + static_cast<volatile int*>(alloca)[0] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z44kernel_function_builtin_alloca_uninitializedPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile float 1.000000e+01, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca_uninitialized(int* a){ + volatile void *alloca = __builtin_alloca_uninitialized((*a)*sizeof(float)); + static_cast<volatile float*>(alloca)[0] = 10.0; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z44kernel_function_builtin_alloca_default_alignPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 8 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile i64 10, ptr [[ARRAYIDX]], align 8 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca_default_align(int* a){ + volatile void *alloca = __builtin_alloca_with_align((*a)*sizeof(long), 64); + static_cast<volatile long*>(alloca)[0] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z48kernel_function_builtin_alloca_non_default_alignPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 32, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca_non_default_align(int* a){ + volatile void *alloca = __builtin_alloca_with_align((*a)*sizeof(unsigned), 256); + static_cast<volatile unsigned*>(alloca)[0] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z62kernel_function_builtin_alloca_non_default_align_uninitializedPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 32, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr +// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__global__ void kernel_function_builtin_alloca_non_default_align_uninitialized(int* a){ + volatile void *alloca = __builtin_alloca_with_align_uninitialized((*a)*sizeof(unsigned), 256); + static_cast<volatile unsigned*>(alloca)[0] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z35kernel_function_variable_size_arrayPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[SAVED_STACK:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[SAVED_STACK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SAVED_STACK]] to ptr +// CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(5) @llvm.stacksave.p5() +// CHECK-NEXT: store ptr addrspace(5) [[TMP3]], ptr [[SAVED_STACK_ASCAST]], align 4 +// CHECK-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 4, addrspace(5) +// CHECK-NEXT: [[VLA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA]] to ptr +// CHECK-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA_ASCAST]], i64 2 +// CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(5), ptr [[SAVED_STACK_ASCAST]], align 4 +// CHECK-NEXT: call void @llvm.stackrestore.p5(ptr addrspace(5) [[TMP4]]) +// CHECK-NEXT: ret void +// +__global__ void kernel_function_variable_size_array(int* a){ + int arr[*a]; + arr[2] = 10; +} + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z51kernel_function_non_entry_block_static_sized_allocaPi( +// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: [[ALLOCA2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA2]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP3]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(5) [[TMP4]] to ptr +// CHECK-NEXT: store ptr [[TMP5]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK-NEXT: [[MUL3:%.*]] = mul nsw i32 2, [[TMP8]] +// CHECK-NEXT: [[CONV4:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK-NEXT: [[MUL5:%.*]] = mul i64 [[CONV4]], 4 +// CHECK-NEXT: [[TMP9:%.*]] = alloca i8, i64 [[MUL5]], align 8, addrspace(5) +// CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(5) [[TMP9]] to ptr +// CHECK-NEXT: store ptr [[TMP10]], ptr [[ALLOCA2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ALLOCA2_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 +// CHECK-NEXT: store volatile i32 20, ptr [[ARRAYIDX6]], align 4 +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: ret void +// +__global__ void kernel_function_non_entry_block_static_sized_alloca(int* a){ + if(*a < 10){ + volatile void *alloca = __builtin_alloca((*a)*sizeof(int)); + static_cast<volatile int*>(alloca)[0] = 10; + } + else { + volatile void *alloca = __builtin_alloca(2*(*a)*sizeof(int)); + static_cast<volatile int*>(alloca)[0] = 20; + } +} + +// CHECK-LABEL: define dso_local void @_Z50device_function_non_entry_block_builtin_alloca_immv( +// CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 10, align 8, addrspace(5) +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// CHECK-NEXT: store ptr [[TMP1]], ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +__device__ void device_function_non_entry_block_builtin_alloca_imm(){ + int *alloca = static_cast<int *>(__builtin_alloca(10)); + alloca[0] = 10; +} + +// CHECK-LABEL: define dso_local void @_Z30device_function_builtin_allocaPi( +// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ALLOCA:... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/120063 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits