https://github.com/CoTinker updated https://github.com/llvm/llvm-project/pull/90389
>From db18c52f8fd5f397324505bcb2e07343794e5dce Mon Sep 17 00:00:00 2001 From: Longsheng Mou <moulongsh...@huawei.com> Date: Sun, 28 Apr 2024 17:14:29 +0800 Subject: [PATCH] [X86_64] Fix empty field error in vaarg of C++. Such struct types: ``` struct { struct{} a; long long b; }; stuct { struct{} a; double b; }; ``` For such structures, Lo is NoClass and Hi is Integer/SSE. And when this structure argument is passed, the high part is passed at offset 8 in memory. So we should do special handling for these types in EmitVAArg. --- clang/lib/CodeGen/Targets/X86.cpp | 27 +++++++---- clang/test/CodeGenCXX/x86_64-vaarg.cpp | 64 +++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 94cf0d86f9bed7..eff54d12ce5496 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3122,9 +3122,24 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = Tmp.withElementType(LTy); - } else if (neededInt) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), - LTy, CharUnits::fromQuantity(8)); + } else if (neededInt || neededSSE == 1) { + llvm::Value *Offset = neededInt ? gp_offset : fp_offset; + uint64_t Alignment = neededInt ? 8 : 16; + Address Tmp = CGF.CreateMemTemp(Ty); + if (AI.isDirect() && AI.getDirectOffset() == 8) { + llvm::StructType *ST = cast<llvm::StructType>(LTy); + Tmp = Tmp.withElementType(ST); + llvm::Type *TyHi = AI.getCoerceToType(); + llvm::Value *Addr = + CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, Offset); + llvm::Value *V = CGF.Builder.CreateAlignedLoad( + TyHi, Addr, + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); + RegAddr = Tmp.withElementType(LTy); + } else + RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, Offset), + LTy, CharUnits::fromQuantity(Alignment)); // Copy to a temporary if necessary to ensure the appropriate alignment. auto TInfo = getContext().getTypeInfoInChars(Ty); @@ -3133,15 +3148,11 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Copy into a temporary if the type is more aligned than the // register save area. - if (TyAlign.getQuantity() > 8) { - Address Tmp = CGF.CreateMemTemp(Ty); + if (neededInt && TyAlign.getQuantity() > 8) { CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); RegAddr = Tmp; } - } else if (neededSSE == 1) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), - LTy, CharUnits::fromQuantity(16)); } else { assert(neededSSE == 2 && "Invalid number of needed registers!"); // SSE registers are spaced 16 bytes apart in the register save diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp index 985a0cc41a1410..dc6d9f070cf51f 100644 --- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp +++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp @@ -32,6 +32,7 @@ typedef struct { // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 // CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S1]], align 8 // CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) @@ -44,8 +45,11 @@ typedef struct { // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] -// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP]], i32 0, i32 1 +// CHECK-NEXT: store double [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]] // CHECK: vaarg.in_mem: // CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 @@ -54,14 +58,62 @@ typedef struct { // CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 // CHECK-NEXT: br label [[VAARG_END]] // CHECK: vaarg.end: -// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 -// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 -// CHECK-NEXT: ret double [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK-NEXT: ret double [[TMP6]] // s1 f(int z, ...) { __builtin_va_list list; __builtin_va_start(list, z); return __builtin_va_arg(list, s1); } + +typedef struct { + struct{} a; + long long b; +} s2; + +// CHECK-LABEL: @_Z2f2iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S2:%.*]], align 8 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S2]], align 8 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP]], i32 0, i32 1 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK-NEXT: ret i64 [[TMP6]] +// +s2 f2(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s2); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits