[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)

via cfe-commits Sun, 15 Mar 2026 09:31:10 -0700

https://github.com/addmisol updated 
https://github.com/llvm/llvm-project/pull/185083


>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 01/18] Create amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 00000000000000..2399630ff797b2
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 02/18] Delete
 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 -------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b2..00000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
-    int i;
-    float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
-
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
-
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
-}

>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 03/18] fix for clang abi lowering

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 00000000000000..2399630ff797b2
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 04/18] Update amdgcn-openmp-device-math-complex.c

---
 clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c 
b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df29..34c05e2974a64e 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
 // CHECK-LABEL: define {{.*}}test_complex_f32
 #pragma omp target
   {
-    // CHECK: call [2 x i32] @__divsc3
-    // CHECK: call [2 x i32] @__mulsc3
+    // CHECK: call { float, float } @__divsc3
+    // CHECK: call { float, float } @__mulsc3
     (void)(a * (a / a));
   }
 }

>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 05/18] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb25..a13f8e8bbe1199 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 
%arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 
%arg.coerce1)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, 
char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) 
{ }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 
captures(none) %{{.}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, 
struct_char_x8 arg4, int arg5) { }

>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 06/18] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d7..f3c4b5ad0837b7 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType 
RetTy) const {
       if (Size <= 32)
         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
-      if (Size <= 64) {
-        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-      }
-
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
     }
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType 
Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
-    // Pack aggregates <= 8 bytes into single VGPR or pair.
-    uint64_t Size = getContext().getTypeSize(Ty);
-    if (Size <= 64) {
-      unsigned NumRegs = (Size + 31) / 32;
-      NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
-      // XXX: Should this be i64 instead, and should the limit increase?
-      llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-    }
-
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 07/18] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe1199..fb5ba69c86c6d2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
 
 // CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
 // CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };

>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 08/18] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d2..3e4506b88aac6a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, 
char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) 
{ }
 
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 
captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 
captures(none) %{{.*}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, 
struct_char_x8 arg4, int arg5) { }

>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 09/18] Update amdgpu-abi-struct-coerce.cl


>From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Sat, 7 Mar 2026 01:43:50 +0530
Subject: [PATCH 10/18] Update amdgpu-variadic-call.c

---
 clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c 
b/clang/test/CodeGen/amdgpu-variadic-call.c
index 17eda215211a2a..22402118d862fe 100644
--- a/clang/test/CodeGen/amdgpu-variadic-call.c
+++ b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -217,10 +217,9 @@ typedef union
 } union_f32_i32;
 
 // CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 
[[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float 
[[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue 
[[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue 
[[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[UNION_UNION_F32_I32]] 
[[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], 
[[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef 
[[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) 
#[[ATTR2]]
@@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1)
 }
 
 // CHECK-LABEL: define {{[^@]+}}@multiple_two
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double 
[[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 
[[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double 
[[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float 
[[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
 // CHECK-NEXT:    [[CONV:%.*]] = fpext float [[V1]] to double
 // CHECK-NEXT:    [[DOTFCA_0_INSERT16:%.*]] = insertvalue 
[[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
 // CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] 
[[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue 
[[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue 
[[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[STRUCT_PAIR_F64]] 
[[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] 
[[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], 
[[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], 
[[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef 
[[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double 
noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef 
[[V3]]) #[[ATTR2]]

>From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:00:33 +0530
Subject: [PATCH 11/18] Update amdgpu-abi-struct-coerce.cl

---
 .../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 3e4506b88aac6a..06d3cdb01deb25 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 
%arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 
%arg.coerce1, i8 %arg.coerce2)
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 
%arg.coerce1)
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
-// CHECK: ret %struct.struct_char_x8 zeroinitializer
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, 
char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) 
{ }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 
captures(none) %{{.*}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, 
struct_char_x8 arg4, int arg5) { }

>From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:01:14 +0530
Subject: [PATCH 12/18] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++---
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2399630ff797b2..f827978a8cd183 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,8 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
 
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
+// Check that structs containing floats or full-sized integers (i32, i64) are
+// NOT coerced to integer arrays. They should preserve their original types.
+// However, structs containing only sub-32-bit integer types (char, short)
+// should still be packed into i32 registers.
+
+// === Structs with floats - should NOT be coerced to integers ===
 
 typedef struct fp_int_pair {
     float f;
@@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) {
     return x;
 }
 
+// === Structs with full-sized integers - should NOT be coerced ===
+
 typedef struct two_ints {
     int a;
     int b;
@@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) {
     return x;
 }
 
-// Structs <= 32 bits should still be coerced to i32 for return value
+// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+
+// Structs of small integers <= 32 bits should be coerced to i32
 typedef struct small_struct {
     short a;
     short b;
 } small_struct;
 
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
 small_struct return_small_struct(small_struct x) {
     return x;
 }
 
-// Structs <= 16 bits should still be coerced to i16 for return value
+// Structs of small integers <= 16 bits should be coerced to i16
 typedef struct tiny_struct {
     char a;
     char b;
 } tiny_struct;
 
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
 tiny_struct return_tiny_struct(tiny_struct x) {
     return x;
 }
+
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+    char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+    return x;
+}
+
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+    char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+    return x;
+}
+
+// === Mixed tests - floats prevent coercion even with small integers ===
+
+typedef struct char_and_float {
+    char c;
+    float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+    return x;
+}

>From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:01:59 +0530
Subject: [PATCH 13/18] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f3c4b5ad0837b7..9e0ca7b77ecdd3 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
   return Members * NumRegs <= MaxNumRegsForArgsRet;
 }
 
+/// Check if all fields in an aggregate type contain only sub-32-bit integer
+/// types. Such aggregates should be packed into i32 registers rather than
+/// passed as individual elements. Aggregates containing floats or full-sized
+/// integer types (i32, i64) should preserve their original types.
+static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD,
+                                             const ASTContext &Context) {
+  for (const FieldDecl *Field : RD->fields()) {
+    QualType FieldTy = Field->getType();
+
+    // Recursively check nested structs
+    if (const auto *NestedRD = FieldTy->getAsRecordDecl()) {
+      if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+        return false;
+      continue;
+    }
+
+    // Arrays - check the element type
+    if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) {
+      QualType EltTy = AT->getElementType();
+      if (const auto *NestedRD = EltTy->getAsRecordDecl()) {
+        if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+          return false;
+        continue;
+      }
+      // For non-struct array elements, check if they're packable integers
+      if (!EltTy->isIntegerType())
+        return false;
+      uint64_t EltSize = Context.getTypeSize(EltTy);
+      if (EltSize >= 32)
+        return false;
+      continue;
+    }
+
+    // Floating point types should not be packed into integers
+    if (FieldTy->isFloatingType())
+      return false;
+
+    // Only integer types that are smaller than 32 bits should be packed
+    if (!FieldTy->isIntegerType())
+      return false;
+
+    uint64_t FieldSize = Context.getTypeSize(FieldTy);
+    if (FieldSize >= 32)
+      return false;
+  }
+  return true;
+}
+
 /// Estimate number of registers the type will use when passed in registers.
 uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
   uint64_t NumRegs = 0;
@@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType 
RetTy) const {
           RD && RD->hasFlexibleArrayMember())
         return DefaultABIInfo::classifyReturnType(RetTy);
 
-      // Pack aggregates <= 4 bytes into single VGPR or pair.
+      // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+      // contain sub-32-bit integer types. Aggregates with floats or full-sized
+      // integers should preserve their original types.
       uint64_t Size = getContext().getTypeSize(RetTy);
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+      if (Size <= 64) {
+        const auto *RD = RetTy->getAsRecordDecl();
+        bool ShouldPackToInt =
+            RD && containsOnlyPackableIntegerTypes(RD, getContext());
 
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+        if (ShouldPackToInt) {
+          if (Size <= 16)
+            return 
ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+          if (Size <= 32)
+            return 
ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+          llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+          return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+        }
+      }
 
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
@@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType 
Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
+    // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+    // contain sub-32-bit integer types. Aggregates with floats or full-sized
+    // integers (i32, i64) should preserve their original types.
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 64) {
+      const auto *RD = Ty->getAsRecordDecl();
+      bool ShouldPackToInt =
+          RD && containsOnlyPackableIntegerTypes(RD, getContext());
+
+      if (ShouldPackToInt) {
+        unsigned NumRegs = (Size + 31) / 32;
+        NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+        if (Size <= 16)
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+        if (Size <= 32)
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+      }
+    }
+
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:15:11 +0530
Subject: [PATCH 14/18] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 829500383a34a0..4918bdcd8111b5 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType 
RetTy) const {
 
         if (ShouldPackToInt) {
           if (Size <= 16)
-            return 
ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt16Ty(getVMContext()));
 
           if (Size <= 32)
-            return 
ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt32Ty(getVMContext()));
 
           llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
           return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
@@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType 
Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:28:52 +0530
Subject: [PATCH 15/18] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4918bdcd8111b5..06b066de590557 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType 
Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:53:13 +0530
Subject: [PATCH 16/18] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb25..e9cdb7f5da32a9 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32()
   return s;
 }
 
-// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr 
#[[ATTR1:[0-9]+]] {
-// CHECK: ret i32 0
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() 
local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
 transparent_u func_transparent_union_ret()
 {
   transparent_u u = { 0 };

>From 928aa4ed1558e7e2d52461df83d0f80004d317e0 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 21:58:43 +0530
Subject: [PATCH 17/18] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++-----------
 1 file changed, 319 insertions(+), 466 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index e9cdb7f5da32a9..7857d01f431c81 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,529 +1,382 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
-// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck 
%s
-
-typedef __attribute__(( ext_vector_type(2) )) char char2;
-typedef __attribute__(( ext_vector_type(3) )) char char3;
-typedef __attribute__(( ext_vector_type(4) )) char char4;
-
-typedef __attribute__(( ext_vector_type(2) )) short short2;
-typedef __attribute__(( ext_vector_type(3) )) short short3;
-typedef __attribute__(( ext_vector_type(4) )) short short4;
-
-typedef __attribute__(( ext_vector_type(2) )) int int2;
-typedef __attribute__(( ext_vector_type(3) )) int int3;
-typedef __attribute__(( ext_vector_type(4) )) int int4;
-typedef __attribute__(( ext_vector_type(16) )) int int16;
-typedef __attribute__(( ext_vector_type(32) )) int int32;
-
-// CHECK: %struct.empty_struct = type {}
-typedef struct empty_struct
-{
-} empty_struct;
-
-// CHECK-NOT: %struct.single_element_struct_arg
-typedef struct single_element_struct_arg
-{
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Test AMDGPU ABI struct coercion behavior:
+// - Structs containing ONLY sub-32-bit integers (char, short) should be 
packed into i32 registers
+// - Structs containing floats or full-sized integers (i32, i64) should 
preserve their original types
+//
+// This tests the fix for the issue where structs like {float, int} were 
incorrectly
+// coerced to [2 x i32], losing float type information.
+
+// ============================================================================
+// SECTION 1: Structs with floats - should NOT be coerced to integers
+// ============================================================================
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
     int i;
-} single_element_struct_arg_t;
-
-// CHECK-NOT: %struct.nested_single_element_struct_arg
-typedef struct nested_single_element_struct_arg
-{
-  single_element_struct_arg_t i;
-} nested_single_element_struct_arg_t;
-
-// CHECK: %struct.struct_arg = type { i32, float, i32 }
-typedef struct struct_arg
-{
-    int i1;
     float f;
-    int i2;
-} struct_arg_t;
-
-// CHECK: %struct.struct_padding_arg = type { i8, i64 }
-typedef struct struct_padding_arg
-{
-  char i1;
-  long f;
-} struct_padding_arg;
-
-// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], 
[3 x float], i32 }
-typedef struct struct_of_arrays_arg
-{
-    int i1[2];
-    float f1;
-    int i2[4];
-    float f2[3];
-    int i3;
-} struct_of_arrays_arg_t;
-
-// CHECK: %struct.struct_of_structs_arg = type { i32, float, 
%struct.struct_arg, i32 }
-typedef struct struct_of_structs_arg
-{
-    int i1;
-    float f1;
-    struct_arg_t s1;
-    int i2;
-} struct_of_structs_arg_t;
-
-typedef union
-{
-  int b1;
-  float b2;
-} transparent_u __attribute__((__transparent_union__));
-
-// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
-typedef struct single_array_element_struct_arg
-{
-    int i[4];
-} single_array_element_struct_arg_t;
-
-// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
-// CHECK: %struct.inner = type { i32, i64 }
-typedef struct single_struct_element_struct_arg
-{
-  struct inner {
-    int a;
-    long b;
-  } s;
-} single_struct_element_struct_arg_t;
-
-// CHECK: %struct.different_size_type_pair
-typedef struct different_size_type_pair {
-  long l;
-  int i;
-} different_size_type_pair;
-
-// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
-typedef struct flexible_array
-{
-  int i;
-  int flexible[];
-} flexible_array;
-
-// CHECK: %struct.struct_arr16 = type { [16 x i32] }
-typedef struct struct_arr16
-{
-    int arr[16];
-} struct_arr16;
-
-// CHECK: %struct.struct_arr32 = type { [32 x i32] }
-typedef struct struct_arr32
-{
-    int arr[32];
-} struct_arr32;
-
-// CHECK: %struct.struct_arr33 = type { [33 x i32] }
-typedef struct struct_arr33
-{
-    int arr[33];
-} struct_arr33;
-
-// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
-typedef struct struct_char_arr32
-{
-  char arr[32];
-} struct_char_arr32;
-
-// CHECK-NOT: %struct.struct_char_x8
-typedef struct struct_char_x8 {
-  char x, y, z, w;
-  char a, b, c, d;
-} struct_char_x8;
-
-// CHECK-NOT: %struct.struct_char_x4
-typedef struct struct_char_x4 {
-  char x, y, z, w;
-} struct_char_x4;
-
-// CHECK-NOT: %struct.struct_char_x3
-typedef struct struct_char_x3 {
-  char x, y, z;
-} struct_char_x3;
-
-// CHECK-NOT: %struct.struct_char_x2
-typedef struct struct_char_x2 {
-  char x, y;
-} struct_char_x2;
-
-// CHECK-NOT: %struct.struct_char_x1
-typedef struct struct_char_x1 {
-  char x;
-} struct_char_x1;
-
-// 4 registers from fields, 5 if padding included.
-// CHECK: %struct.nested = type { i8, i64 }
-// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
-typedef struct num_regs_nested_struct {
-  int x;
-  struct nested {
-    char z;
-    long y;
-  } inner;
-} num_regs_nested_struct;
-
-// CHECK: %struct.double_nested = type { %struct.inner_inner }
-// CHECK: %struct.inner_inner = type { i8, i32, i8 }
-// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, 
i16 }
-typedef struct double_nested_struct {
-  int x;
-  struct double_nested {
-    struct inner_inner {
-      char y;
-      int q;
-      char z;
-    } inner_inner;
-  } inner;
-
-  short w;
-} double_nested_struct;
-
-// This is a large struct, but uses fewer registers than the limit.
-// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, 
i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
-typedef struct large_struct_padding {
-  char e0;
-  int e1;
-  char e2;
-  int e3;
-  char e4;
-  char e5;
-  short e6;
-  short e7;
-  char e8[3];
-  long e9;
-  int e10;
-  char e11;
-  int e12;
-  short e13;
-  char e14;
-} large_struct_padding;
-
-// The number of registers computed should be 6, not 8.
-typedef struct int3_pair {
-       int3 dx;
-       int3 dy;
-} int3_pair;
-
-// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
-typedef struct struct_4regs
-{
-  int x;
-  int y;
-  int z;
-  int w;
-} struct_4regs;
-
-// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone 
byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
-__kernel void kernel_empty_struct_arg(empty_struct s) { }
-
-// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t 
arg1) { }
-
-// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void 
kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t 
arg1) { }
-
-// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, 
float %arg1.coerce1, i32 %arg1.coerce2)
-__kernel void kernel_struct_arg(struct_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 
%arg1.coerce0, i64 %arg1.coerce1)
-__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
-
-// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef 
readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x 
i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] 
%arg1.coerce3, i32 %arg1.coerce4)
-__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 
%arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 
%arg1.coerce3)
-__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
-
-// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
-__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
-
-// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) 
noundef readonly byref(%struct.single_array_element_struct_arg) align 4 
captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 
x i32] %arg1.coerce)
-__kernel void 
kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) 
{ }
-
-// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) 
noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 
captures(none) {{%.+}})
-// CHECK: void 
@__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner 
%arg1.coerce)
-__kernel void 
kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t 
arg1) { }
-
-// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef 
readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 
%arg1.coerce0, i32 %arg1.coerce1)
-__kernel void kernel_different_size_type_pair_arg(different_size_type_pair 
arg1) { }
-
-// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
-void func_f32_arg(float arg) { }
-
-// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
-void func_v2i16_arg(short2 arg) { }
-
-// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
-void func_v3i32_arg(int3 arg) { }
-
-// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
-void func_v4i32_arg(int4 arg) { }
-
-// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
-void func_v16i32_arg(int16 arg) { }
-
-// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
-void func_v32i32_arg(int32 arg) { }
+} int_fp_pair;
 
-// CHECK: define{{.*}} void @func_empty_struct_arg()
-void func_empty_struct_arg(empty_struct empty) { }
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
 
-// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
-void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
 
-// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
-void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t 
arg1) { }
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
 
-// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 
%arg1.coerce2)
-void func_struct_arg(struct_arg_t arg1) { }
+// Double precision floats
+typedef struct double_struct {
+    double d;
+} double_struct;
 
-// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
-void func_struct_padding_arg(struct_padding_arg arg1) { }
+// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
+double_struct return_double_struct(double_struct x) {
+    return x;
+}
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
-void func_struct_char_x8(struct_char_x8 arg) { }
+// ============================================================================
+// SECTION 2: Structs with full-sized integers - should NOT be coerced
+// ============================================================================
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
-void func_struct_char_x4(struct_char_x4 arg) { }
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
-void func_struct_char_x3(struct_char_x3 arg) { }
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
-void func_struct_char_x2(struct_char_x2 arg) { }
+typedef struct single_int {
+    int a;
+} single_int;
 
-// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
-void func_struct_char_x1(struct_char_x1 arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
+single_int return_single_int(single_int x) {
+    return x;
+}
 
-// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
-void func_transparent_union_arg(transparent_u u) { }
+typedef struct int64_struct {
+    long long a;
+} int64_struct;
 
-// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
-void func_single_array_element_struct_arg(single_array_element_struct_arg_t 
arg1) { }
+// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
+int64_struct return_int64_struct(int64_struct x) {
+    return x;
+}
 
-// CHECK: void @func_single_struct_element_struct_arg(%struct.inner 
%arg1.coerce)
-void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t 
arg1) { }
+// ============================================================================
+// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
+// ============================================================================
 
-// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 
%arg1.coerce1)
-void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
+// Structs of small integers <= 32 bits should be coerced to i32
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
 
-// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone 
byval(%struct.flexible_array) align 4 captures(none) %arg)
-void func_flexible_array_arg(flexible_array arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
 
-// CHECK: define{{.*}} float @func_f32_ret()
-float func_f32_ret()
-{
-  return 0.0f;
+// Structs of small integers <= 16 bits should be coerced to i16
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_empty_struct_ret()
-empty_struct func_empty_struct_ret()
-{
-  empty_struct s = {};
-  return s;
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+    char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @single_element_struct_ret()
-// CHECK: ret i32 0
-single_element_struct_arg_t single_element_struct_ret()
-{
-  single_element_struct_arg_t s = { 0 };
-  return s;
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+    char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
-// CHECK: ret i32 0
-nested_single_element_struct_arg_t nested_single_element_struct_ret()
-{
-  nested_single_element_struct_arg_t s = { 0 };
-  return s;
+// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
+typedef struct four_shorts {
+    short a, b, c, d;
+} four_shorts;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
+four_shorts return_four_shorts(four_shorts x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
-// CHECK: ret %struct.struct_arg zeroinitializer
-struct_arg_t func_struct_ret()
-{
-  struct_arg_t s = { 0 };
-  return s;
+// ============================================================================
+// SECTION 4: Mixed types - floats prevent coercion even with small integers
+// ============================================================================
+
+typedef struct char_and_float {
+    char c;
+    float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
-// CHECK: ret %struct.struct_padding_arg zeroinitializer
-struct_padding_arg func_struct_padding_ret()
-{
-  struct_padding_arg s = { 0 };
-  return s;
+typedef struct short_and_float {
+    short s;
+    float f;
+} short_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.short_and_float 
@return_short_and_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.short_and_float
+short_and_float return_short_and_float(short_and_float x) {
+    return x;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
-struct_char_x8 func_struct_char_x8_ret()
-{
-  struct_char_x8 s = { 0 };
-  return s;
+// Small int + full-sized int should NOT be coerced
+typedef struct char_and_int {
+    char c;
+    int i;
+} char_and_int;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 
%x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_and_int
+char_and_int return_char_and_int(char_and_int x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
-// CHECK: ret i32 0
-struct_char_x4 func_struct_char_x4_ret()
-{
-  struct_char_x4 s = { 0 };
-  return s;
+// ============================================================================
+// SECTION 5: Exotic/Complex aggregates (per reviewer request)
+// ============================================================================
+
+// --- Nested structs ---
+
+typedef struct inner_chars {
+    char a, b;
+} inner_chars;
+
+typedef struct outer_with_inner_chars {
+    inner_chars inner;
+    char c, d;
+} outer_with_inner_chars;
+
+// All chars, 32 bits total - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
+outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
-// CHECK: ret i32 0
-struct_char_x3 func_struct_char_x3_ret()
-{
-  struct_char_x3 s = { 0 };
-  return s;
+typedef struct inner_with_float {
+    char c;
+    float f;
+} inner_with_float;
+
+typedef struct outer_with_float_inner {
+    inner_with_float inner;
+} outer_with_float_inner;
+
+// Nested struct contains float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner 
@return_nested_with_float(%struct.inner_with_float %x.coerce)
+// CHECK: ret %struct.outer_with_float_inner
+outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
-struct_char_x2 func_struct_char_x2_ret()
-{
-  struct_char_x2 s = { 0 };
-  return s;
+// --- Arrays within structs ---
+
+typedef struct char_array_struct {
+    char arr[4];
+} char_array_struct;
+
+// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
+char_array_struct return_char_array(char_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
-// CHECK: ret i8 0
-struct_char_x1 func_struct_char_x1_ret()
-{
-  struct_char_x1 s = { 0 };
-  return s;
+typedef struct short_array_struct {
+    short arr[2];
+} short_array_struct;
+
+// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
+short_array_struct return_short_array(short_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
-// CHECK: ret %struct.struct_arr16 zeroinitializer
-struct_arr16 func_ret_struct_arr16()
-{
-  struct_arr16 s = { 0 };
-  return s;
+typedef struct int_array_struct {
+    int arr[2];
+} int_array_struct;
+
+// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x 
i32] %x.coerce)
+// CHECK: ret %struct.int_array_struct
+int_array_struct return_int_array(int_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 
captures(none) initializes((0, 128)) %agg.result)
-struct_arr32 func_ret_struct_arr32()
-{
-  struct_arr32 s = { 0 };
-  return s;
+typedef struct float_array_struct {
+    float arr[2];
+} float_array_struct;
+
+// Array of 2 floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 
x float] %x.coerce)
+// CHECK: ret %struct.float_array_struct
+float_array_struct return_float_array(float_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 
captures(none) initializes((0, 132)) %agg.result)
-struct_arr33 func_ret_struct_arr33()
-{
-  struct_arr33 s = { 0 };
-  return s;
+// --- Complex combinations ---
+
+typedef struct mixed_nested {
+    struct {
+        char a;
+        char b;
+    } inner;
+    short s;
+} mixed_nested;
+
+// All small integers (nested anonymous struct + short) = 32 bits - should be 
coerced
+// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
+mixed_nested return_mixed_nested(mixed_nested x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
-struct_char_arr32 func_ret_struct_char_arr32()
-{
-  struct_char_arr32 s = { 0 };
-  return s;
+typedef struct deeply_nested_chars {
+    struct {
+        struct {
+            char a, b;
+        } level2;
+        char c, d;
+    } level1;
+} deeply_nested_chars;
+
+// Deeply nested, but all chars = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
+deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() 
local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret %union.transparent_u zeroinitializer
-transparent_u func_transparent_union_ret()
-{
-  transparent_u u = { 0 };
-  return u;
+typedef struct deeply_nested_with_float {
+    struct {
+        struct {
+            char a;
+            float f;  // Float buried deep
+        } level2;
+    } level1;
+} deeply_nested_with_float;
+
+// Float buried in nested struct - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float 
@return_deeply_nested_float
+// CHECK: ret %struct.deeply_nested_with_float
+deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float 
x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.different_size_type_pair 
@func_different_size_type_pair_ret()
-different_size_type_pair func_different_size_type_pair_ret()
-{
-  different_size_type_pair s = { 0 };
-  return s;
+// --- Edge cases ---
+
+// Single char
+typedef struct single_char {
+    char c;
+} single_char;
+
+// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
+single_char return_single_char(single_char x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 
captures(none) initializes((0, 4)) %agg.result)
-flexible_array func_flexible_array_ret()
-{
-  flexible_array s = { 0 };
-  return s;
+// Three chars (24 bits, rounds up to 32)
+typedef struct three_chars {
+    char a, b, c;
+} three_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
+three_chars return_three_chars(three_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x 
i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, 
float %s.coerce1, i32 %s.coerce2)
-void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t 
s) { }
+// Five chars (40 bits, rounds up to 64)
+typedef struct five_chars {
+    char a, b, c, d, e;
+} five_chars;
 
-// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x 
i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef 
%arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 
captures(none) %{{.*}})
-void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, 
struct_arg_t s) { }
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
+five_chars return_five_chars(five_chars x) {
+    return x;
+}
 
-// XXX - Why don't the inner structs flatten?
-// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> 
noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested 
%arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr 
addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 
captures(none) %{{.*}})
-void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, 
num_regs_nested_struct arg2, num_regs_nested_struct arg3, 
num_regs_nested_struct arg4) { }
+// --- Union tests ---
 
-// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef 
%arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested 
%arg2.coerce1, i16 %arg2.coerce2)
-void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct 
arg2) { }
+typedef union char_int_union {
+    char c;
+    int i;
+} char_int_union;
 
-// CHECK: define{{.*}} %struct.double_nested_struct 
@func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
-double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
-  double_nested_struct s = { 0 };
-  return s;
+// Union with int - preserves union type
+// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 
%x.coerce)
+char_int_union return_char_int_union(char_int_union x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 
%arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 
%arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] 
%arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 
%arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
-void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
+typedef union float_int_union {
+    float f;
+    int i;
+} float_int_union;
 
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr 
addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr 
addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 
captures(none) %{{.*}})
-void func_large_struct_padding_arg_store(global large_struct_padding* out, 
large_struct_padding arg) {
-  *out = arg;
+// Union with float - preserves union type
+// CHECK-LABEL: define{{.*}} %union.float_int_union 
@return_float_int_union(float %x.coerce)
+float_int_union return_float_int_union(float_int_union x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x 
i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 
%arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
-void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t 
arg5) { }
-
-// Function signature from blender, nothing should be passed byval. The v3i32
-// should not count as 4 passed registers.
-// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef 
readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> 
%arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> 
%arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
-void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, 
int3_pair arg3, int3 arg4, float arg5) { }
-
-// Each short4 should fit pack into 2 registers.
-// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x 
i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> 
noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, 
i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
-                     short4 arg4, short4 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef 
%arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef 
%arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef 
%arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
-void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 
arg3,
-                               short4 arg4, short4 arg5, short4 arg6, 
struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x 
i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> 
noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, 
i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
-                     short3 arg4, short3 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 
x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x 
i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr 
addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
-void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
-                          short3 arg4, short3 arg5, short3 arg6, struct_4regs 
arg7) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x 
i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> 
noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> 
noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> 
noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 
%arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
-void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
-                     short2 arg4, short2 arg5, short2 arg6, short2 arg7,
-                     short2 arg8, short2 arg9, short2 arg10, short2 arg11,
-                     struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 
x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x 
i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> 
noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> 
noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr 
addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
-void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
-                          short2 arg4, short2 arg5, short2 arg6, short2 arg7,
-                          short2 arg8, short2 arg9, short2 arg10, short2 arg11,
-                          short2 arg12, struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> 
noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef 
%arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 
%arg6.coerce2, i32 %arg6.coerce3)
-void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
-                    char2 arg4, char2 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x 
i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> 
noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) 
noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
-                         char2 arg4, char2 arg5, int arg6, struct_4regs arg7) 
{ }
-
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
-void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, 
struct_char_x8 arg4, int arg5) { }
+// --- Padding scenarios ---
+
+typedef struct char_with_padding {
+    char c;
+    // 3 bytes padding
+    int i;
+} char_with_padding;
+
+// Has int, should NOT be coerced even though small + padding
+// CHECK-LABEL: define{{.*}} %struct.char_with_padding 
@return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_with_padding
+char_with_padding return_char_with_padding(char_with_padding x) {
+    return x;
+}

>From 8657523a6b425b05207956fc43db0025cd13fa51 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sun, 15 Mar 2026 22:00:44 +0530
Subject: [PATCH 18/18] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++++++-------
 1 file changed, 466 insertions(+), 319 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 7857d01f431c81..e9cdb7f5da32a9 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,382 +1,529 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Test AMDGPU ABI struct coercion behavior:
-// - Structs containing ONLY sub-32-bit integers (char, short) should be 
packed into i32 registers
-// - Structs containing floats or full-sized integers (i32, i64) should 
preserve their original types
-//
-// This tests the fix for the issue where structs like {float, int} were 
incorrectly
-// coerced to [2 x i32], losing float type information.
-
-// ============================================================================
-// SECTION 1: Structs with floats - should NOT be coerced to integers
-// ============================================================================
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck 
%s
+
+typedef __attribute__(( ext_vector_type(2) )) char char2;
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+typedef __attribute__(( ext_vector_type(2) )) short short2;
+typedef __attribute__(( ext_vector_type(3) )) short short3;
+typedef __attribute__(( ext_vector_type(4) )) short short4;
+
+typedef __attribute__(( ext_vector_type(2) )) int int2;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+typedef __attribute__(( ext_vector_type(4) )) int int4;
+typedef __attribute__(( ext_vector_type(16) )) int int16;
+typedef __attribute__(( ext_vector_type(32) )) int int32;
+
+// CHECK: %struct.empty_struct = type {}
+typedef struct empty_struct
+{
+} empty_struct;
+
+// CHECK-NOT: %struct.single_element_struct_arg
+typedef struct single_element_struct_arg
+{
     int i;
+} single_element_struct_arg_t;
+
+// CHECK-NOT: %struct.nested_single_element_struct_arg
+typedef struct nested_single_element_struct_arg
+{
+  single_element_struct_arg_t i;
+} nested_single_element_struct_arg_t;
+
+// CHECK: %struct.struct_arg = type { i32, float, i32 }
+typedef struct struct_arg
+{
+    int i1;
     float f;
-} int_fp_pair;
+    int i2;
+} struct_arg_t;
+
+// CHECK: %struct.struct_padding_arg = type { i8, i64 }
+typedef struct struct_padding_arg
+{
+  char i1;
+  long f;
+} struct_padding_arg;
+
+// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], 
[3 x float], i32 }
+typedef struct struct_of_arrays_arg
+{
+    int i1[2];
+    float f1;
+    int i2[4];
+    float f2[3];
+    int i3;
+} struct_of_arrays_arg_t;
+
+// CHECK: %struct.struct_of_structs_arg = type { i32, float, 
%struct.struct_arg, i32 }
+typedef struct struct_of_structs_arg
+{
+    int i1;
+    float f1;
+    struct_arg_t s1;
+    int i2;
+} struct_of_structs_arg_t;
+
+typedef union
+{
+  int b1;
+  float b2;
+} transparent_u __attribute__((__transparent_union__));
+
+// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
+typedef struct single_array_element_struct_arg
+{
+    int i[4];
+} single_array_element_struct_arg_t;
+
+// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
+// CHECK: %struct.inner = type { i32, i64 }
+typedef struct single_struct_element_struct_arg
+{
+  struct inner {
+    int a;
+    long b;
+  } s;
+} single_struct_element_struct_arg_t;
+
+// CHECK: %struct.different_size_type_pair
+typedef struct different_size_type_pair {
+  long l;
+  int i;
+} different_size_type_pair;
+
+// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
+typedef struct flexible_array
+{
+  int i;
+  int flexible[];
+} flexible_array;
+
+// CHECK: %struct.struct_arr16 = type { [16 x i32] }
+typedef struct struct_arr16
+{
+    int arr[16];
+} struct_arr16;
+
+// CHECK: %struct.struct_arr32 = type { [32 x i32] }
+typedef struct struct_arr32
+{
+    int arr[32];
+} struct_arr32;
+
+// CHECK: %struct.struct_arr33 = type { [33 x i32] }
+typedef struct struct_arr33
+{
+    int arr[33];
+} struct_arr33;
+
+// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
+typedef struct struct_char_arr32
+{
+  char arr[32];
+} struct_char_arr32;
+
+// CHECK-NOT: %struct.struct_char_x8
+typedef struct struct_char_x8 {
+  char x, y, z, w;
+  char a, b, c, d;
+} struct_char_x8;
+
+// CHECK-NOT: %struct.struct_char_x4
+typedef struct struct_char_x4 {
+  char x, y, z, w;
+} struct_char_x4;
+
+// CHECK-NOT: %struct.struct_char_x3
+typedef struct struct_char_x3 {
+  char x, y, z;
+} struct_char_x3;
+
+// CHECK-NOT: %struct.struct_char_x2
+typedef struct struct_char_x2 {
+  char x, y;
+} struct_char_x2;
+
+// CHECK-NOT: %struct.struct_char_x1
+typedef struct struct_char_x1 {
+  char x;
+} struct_char_x1;
+
+// 4 registers from fields, 5 if padding included.
+// CHECK: %struct.nested = type { i8, i64 }
+// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
+typedef struct num_regs_nested_struct {
+  int x;
+  struct nested {
+    char z;
+    long y;
+  } inner;
+} num_regs_nested_struct;
+
+// CHECK: %struct.double_nested = type { %struct.inner_inner }
+// CHECK: %struct.inner_inner = type { i8, i32, i8 }
+// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, 
i16 }
+typedef struct double_nested_struct {
+  int x;
+  struct double_nested {
+    struct inner_inner {
+      char y;
+      int q;
+      char z;
+    } inner_inner;
+  } inner;
+
+  short w;
+} double_nested_struct;
+
+// This is a large struct, but uses fewer registers than the limit.
+// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, 
i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
+typedef struct large_struct_padding {
+  char e0;
+  int e1;
+  char e2;
+  int e3;
+  char e4;
+  char e5;
+  short e6;
+  short e7;
+  char e8[3];
+  long e9;
+  int e10;
+  char e11;
+  int e12;
+  short e13;
+  char e14;
+} large_struct_padding;
+
+// The number of registers computed should be 6, not 8.
+typedef struct int3_pair {
+       int3 dx;
+       int3 dy;
+} int3_pair;
+
+// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
+typedef struct struct_4regs
+{
+  int x;
+  int y;
+  int z;
+  int w;
+} struct_4regs;
+
+// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone 
byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
+__kernel void kernel_empty_struct_arg(empty_struct s) { }
+
+// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t 
arg1) { }
+
+// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void 
kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t 
arg1) { }
+
+// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, 
float %arg1.coerce1, i32 %arg1.coerce2)
+__kernel void kernel_struct_arg(struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 
%arg1.coerce0, i64 %arg1.coerce1)
+__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
+
+// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef 
readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x 
i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] 
%arg1.coerce3, i32 %arg1.coerce4)
+__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly 
byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 
%arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 
%arg1.coerce3)
+__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
+
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
+__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
+
+// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) 
noundef readonly byref(%struct.single_array_element_struct_arg) align 4 
captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 
x i32] %arg1.coerce)
+__kernel void 
kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) 
{ }
+
+// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) 
noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 
captures(none) {{%.+}})
+// CHECK: void 
@__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner 
%arg1.coerce)
+__kernel void 
kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t 
arg1) { }
+
+// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef 
readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 
%arg1.coerce0, i32 %arg1.coerce1)
+__kernel void kernel_different_size_type_pair_arg(different_size_type_pair 
arg1) { }
+
+// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
+void func_f32_arg(float arg) { }
+
+// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
+void func_v2i16_arg(short2 arg) { }
+
+// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
+void func_v3i32_arg(int3 arg) { }
+
+// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
+void func_v4i32_arg(int4 arg) { }
+
+// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
+void func_v16i32_arg(int16 arg) { }
+
+// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
+void func_v32i32_arg(int32 arg) { }
 
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_empty_struct_arg()
+void func_empty_struct_arg(empty_struct empty) { }
 
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
+// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
+void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
 
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
+// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
+void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t 
arg1) { }
 
-// Double precision floats
-typedef struct double_struct {
-    double d;
-} double_struct;
+// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 
%arg1.coerce2)
+void func_struct_arg(struct_arg_t arg1) { }
 
-// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
-double_struct return_double_struct(double_struct x) {
-    return x;
-}
+// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// ============================================================================
-// SECTION 2: Structs with full-sized integers - should NOT be coerced
-// ============================================================================
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+void func_struct_char_x8(struct_char_x8 arg) { }
 
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+void func_struct_char_x3(struct_char_x3 arg) { }
 
-typedef struct single_int {
-    int a;
-} single_int;
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+void func_struct_char_x2(struct_char_x2 arg) { }
 
-// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
-single_int return_single_int(single_int x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
+void func_struct_char_x1(struct_char_x1 arg) { }
 
-typedef struct int64_struct {
-    long long a;
-} int64_struct;
+// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
+void func_transparent_union_arg(transparent_u u) { }
 
-// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
-int64_struct return_int64_struct(int64_struct x) {
-    return x;
-}
+// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+void func_single_array_element_struct_arg(single_array_element_struct_arg_t 
arg1) { }
 
-// ============================================================================
-// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
-// ============================================================================
+// CHECK: void @func_single_struct_element_struct_arg(%struct.inner 
%arg1.coerce)
+void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t 
arg1) { }
 
-// Structs of small integers <= 32 bits should be coerced to i32
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
+// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 
%arg1.coerce1)
+void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
+// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone 
byval(%struct.flexible_array) align 4 captures(none) %arg)
+void func_flexible_array_arg(flexible_array arg) { }
 
-// Structs of small integers <= 16 bits should be coerced to i16
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
+// CHECK: define{{.*}} float @func_f32_ret()
+float func_f32_ret()
+{
+  return 0.0f;
 }
 
-// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
-typedef struct eight_chars {
-    char a, b, c, d, e, f, g, h;
-} eight_chars;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
-eight_chars return_eight_chars(eight_chars x) {
-    return x;
+// CHECK: define{{.*}} void @func_empty_struct_ret()
+empty_struct func_empty_struct_ret()
+{
+  empty_struct s = {};
+  return s;
 }
 
-// Struct of 4 chars (32 bits) should be coerced to i32
-typedef struct four_chars {
-    char a, b, c, d;
-} four_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
-four_chars return_four_chars(four_chars x) {
-    return x;
+// CHECK: define{{.*}} i32 @single_element_struct_ret()
+// CHECK: ret i32 0
+single_element_struct_arg_t single_element_struct_ret()
+{
+  single_element_struct_arg_t s = { 0 };
+  return s;
 }
 
-// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
-typedef struct four_shorts {
-    short a, b, c, d;
-} four_shorts;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
-four_shorts return_four_shorts(four_shorts x) {
-    return x;
+// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
+// CHECK: ret i32 0
+nested_single_element_struct_arg_t nested_single_element_struct_ret()
+{
+  nested_single_element_struct_arg_t s = { 0 };
+  return s;
 }
 
-// ============================================================================
-// SECTION 4: Mixed types - floats prevent coercion even with small integers
-// ============================================================================
-
-typedef struct char_and_float {
-    char c;
-    float f;
-} char_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.char_and_float
-char_and_float return_char_and_float(char_and_float x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
+// CHECK: ret %struct.struct_arg zeroinitializer
+struct_arg_t func_struct_ret()
+{
+  struct_arg_t s = { 0 };
+  return s;
 }
 
-typedef struct short_and_float {
-    short s;
-    float f;
-} short_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.short_and_float 
@return_short_and_float(i16 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.short_and_float
-short_and_float return_short_and_float(short_and_float x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
+// CHECK: ret %struct.struct_padding_arg zeroinitializer
+struct_padding_arg func_struct_padding_ret()
+{
+  struct_padding_arg s = { 0 };
+  return s;
 }
 
-// Small int + full-sized int should NOT be coerced
-typedef struct char_and_int {
-    char c;
-    int i;
-} char_and_int;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 
%x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_and_int
-char_and_int return_char_and_int(char_and_int x) {
-    return x;
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
+struct_char_x8 func_struct_char_x8_ret()
+{
+  struct_char_x8 s = { 0 };
+  return s;
 }
 
-// ============================================================================
-// SECTION 5: Exotic/Complex aggregates (per reviewer request)
-// ============================================================================
-
-// --- Nested structs ---
-
-typedef struct inner_chars {
-    char a, b;
-} inner_chars;
-
-typedef struct outer_with_inner_chars {
-    inner_chars inner;
-    char c, d;
-} outer_with_inner_chars;
-
-// All chars, 32 bits total - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
-outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
-    return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
+// CHECK: ret i32 0
+struct_char_x4 func_struct_char_x4_ret()
+{
+  struct_char_x4 s = { 0 };
+  return s;
 }
 
-typedef struct inner_with_float {
-    char c;
-    float f;
-} inner_with_float;
-
-typedef struct outer_with_float_inner {
-    inner_with_float inner;
-} outer_with_float_inner;
-
-// Nested struct contains float - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner 
@return_nested_with_float(%struct.inner_with_float %x.coerce)
-// CHECK: ret %struct.outer_with_float_inner
-outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
-    return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
+// CHECK: ret i32 0
+struct_char_x3 func_struct_char_x3_ret()
+{
+  struct_char_x3 s = { 0 };
+  return s;
 }
 
-// --- Arrays within structs ---
-
-typedef struct char_array_struct {
-    char arr[4];
-} char_array_struct;
-
-// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
-char_array_struct return_char_array(char_array_struct x) {
-    return x;
+// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
+struct_char_x2 func_struct_char_x2_ret()
+{
+  struct_char_x2 s = { 0 };
+  return s;
 }
 
-typedef struct short_array_struct {
-    short arr[2];
-} short_array_struct;
-
-// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
-short_array_struct return_short_array(short_array_struct x) {
-    return x;
+// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
+// CHECK: ret i8 0
+struct_char_x1 func_struct_char_x1_ret()
+{
+  struct_char_x1 s = { 0 };
+  return s;
 }
 
-typedef struct int_array_struct {
-    int arr[2];
-} int_array_struct;
-
-// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x 
i32] %x.coerce)
-// CHECK: ret %struct.int_array_struct
-int_array_struct return_int_array(int_array_struct x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
+// CHECK: ret %struct.struct_arr16 zeroinitializer
+struct_arr16 func_ret_struct_arr16()
+{
+  struct_arr16 s = { 0 };
+  return s;
 }
 
-typedef struct float_array_struct {
-    float arr[2];
-} float_array_struct;
-
-// Array of 2 floats - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 
x float] %x.coerce)
-// CHECK: ret %struct.float_array_struct
-float_array_struct return_float_array(float_array_struct x) {
-    return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 
captures(none) initializes((0, 128)) %agg.result)
+struct_arr32 func_ret_struct_arr32()
+{
+  struct_arr32 s = { 0 };
+  return s;
 }
 
-// --- Complex combinations ---
-
-typedef struct mixed_nested {
-    struct {
-        char a;
-        char b;
-    } inner;
-    short s;
-} mixed_nested;
-
-// All small integers (nested anonymous struct + short) = 32 bits - should be 
coerced
-// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
-mixed_nested return_mixed_nested(mixed_nested x) {
-    return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 
captures(none) initializes((0, 132)) %agg.result)
+struct_arr33 func_ret_struct_arr33()
+{
+  struct_arr33 s = { 0 };
+  return s;
 }
 
-typedef struct deeply_nested_chars {
-    struct {
-        struct {
-            char a, b;
-        } level2;
-        char c, d;
-    } level1;
-} deeply_nested_chars;
-
-// Deeply nested, but all chars = 32 bits - should be coerced
-// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
-deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
+struct_char_arr32 func_ret_struct_char_arr32()
+{
+  struct_char_arr32 s = { 0 };
+  return s;
 }
 
-typedef struct deeply_nested_with_float {
-    struct {
-        struct {
-            char a;
-            float f;  // Float buried deep
-        } level2;
-    } level1;
-} deeply_nested_with_float;
-
-// Float buried in nested struct - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float 
@return_deeply_nested_float
-// CHECK: ret %struct.deeply_nested_with_float
-deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float 
x) {
-    return x;
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() 
local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
+transparent_u func_transparent_union_ret()
+{
+  transparent_u u = { 0 };
+  return u;
 }
 
-// --- Edge cases ---
-
-// Single char
-typedef struct single_char {
-    char c;
-} single_char;
-
-// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
-single_char return_single_char(single_char x) {
-    return x;
+// CHECK: define{{.*}} %struct.different_size_type_pair 
@func_different_size_type_pair_ret()
+different_size_type_pair func_different_size_type_pair_ret()
+{
+  different_size_type_pair s = { 0 };
+  return s;
 }
 
-// Three chars (24 bits, rounds up to 32)
-typedef struct three_chars {
-    char a, b, c;
-} three_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
-three_chars return_three_chars(three_chars x) {
-    return x;
+// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) 
dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 
captures(none) initializes((0, 4)) %agg.result)
+flexible_array func_flexible_array_ret()
+{
+  flexible_array s = { 0 };
+  return s;
 }
 
-// Five chars (40 bits, rounds up to 64)
-typedef struct five_chars {
-    char a, b, c, d, e;
-} five_chars;
+// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x 
i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, 
float %s.coerce1, i32 %s.coerce2)
+void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t 
s) { }
 
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
-five_chars return_five_chars(five_chars x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x 
i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef 
%arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 
captures(none) %{{.*}})
+void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, 
struct_arg_t s) { }
 
-// --- Union tests ---
+// XXX - Why don't the inner structs flatten?
+// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> 
noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested 
%arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr 
addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 
captures(none) %{{.*}})
+void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, 
num_regs_nested_struct arg2, num_regs_nested_struct arg3, 
num_regs_nested_struct arg4) { }
 
-typedef union char_int_union {
-    char c;
-    int i;
-} char_int_union;
+// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef 
%arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested 
%arg2.coerce1, i16 %arg2.coerce2)
+void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct 
arg2) { }
 
-// Union with int - preserves union type
-// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 
%x.coerce)
-char_int_union return_char_int_union(char_int_union x) {
-    return x;
+// CHECK: define{{.*}} %struct.double_nested_struct 
@func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
+double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
+  double_nested_struct s = { 0 };
+  return s;
 }
 
-typedef union float_int_union {
-    float f;
-    int i;
-} float_int_union;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 
%arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 
%arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] 
%arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 
%arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
+void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// Union with float - preserves union type
-// CHECK-LABEL: define{{.*}} %union.float_int_union 
@return_float_int_union(float %x.coerce)
-float_int_union return_float_int_union(float_int_union x) {
-    return x;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr 
addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr 
addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 
captures(none) %{{.*}})
+void func_large_struct_padding_arg_store(global large_struct_padding* out, 
large_struct_padding arg) {
+  *out = arg;
 }
 
-// --- Padding scenarios ---
-
-typedef struct char_with_padding {
-    char c;
-    // 3 bytes padding
-    int i;
-} char_with_padding;
-
-// Has int, should NOT be coerced even though small + padding
-// CHECK-LABEL: define{{.*}} %struct.char_with_padding 
@return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_with_padding
-char_with_padding return_char_with_padding(char_with_padding x) {
-    return x;
-}
+// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x 
i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 
%arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
+void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t 
arg5) { }
+
+// Function signature from blender, nothing should be passed byval. The v3i32
+// should not count as 4 passed registers.
+// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef 
readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> 
%arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> 
%arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
+void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, 
int3_pair arg3, int3 arg4, float arg5) { }
+
+// Each short4 should fit pack into 2 registers.
+// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x 
i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> 
noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, 
i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+                     short4 arg4, short4 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef 
%arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef 
%arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef 
%arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
+void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 
arg3,
+                               short4 arg4, short4 arg5, short4 arg6, 
struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x 
i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> 
noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, 
i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                     short3 arg4, short3 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 
x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x 
i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr 
addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
+void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                          short3 arg4, short3 arg5, short3 arg6, struct_4regs 
arg7) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x 
i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> 
noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> 
noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> 
noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 
%arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
+void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                     short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                     short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                     struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 
x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x 
i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> 
noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> 
noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr 
addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 
captures(none) %{{.*}})
+void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                          short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                          short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                          short2 arg12, struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> 
noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef 
%arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 
%arg6.coerce2, i32 %arg6.coerce3)
+void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                    char2 arg4, char2 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x 
i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> 
noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) 
noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                         char2 arg4, char2 arg5, int arg6, struct_4regs arg7) 
{ }
+
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef 
%arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef 
%arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, 
struct_char_x8 arg4, int arg5) { }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)

Reply via email to