github-actions[bot] wrote: <!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning: <details> <summary> You can test this locally with the following command: </summary> ``````````bash git-clang-format --diff HEAD~1 HEAD --extensions cpp -- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp clang/lib/Sema/SemaAMDGPU.cpp clang/lib/Sema/SemaChecking.cpp `````````` </details> <details> <summary> View the diff from clang-format here. </summary> ``````````diff diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 8b7e419a1..fca6fbbf5 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -165,20 +165,22 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { // Assumptions: // - Return type equals source type (frontend/Sema should enforce). // - Semantics are on the object representation (raw bits), including padding. -// - For payloads > 32 bits, split into 32-bit words, permute each with the same index, +// - For payloads > 32 bits, split into 32-bit words, permute each with the same +// index, // and reassemble. -// - First-class scalar/vector values whose total size is a multiple of 32 bits use a -// register-only path by bitcasting to <N x i32>. Aggregates or odd sizes use a -// memory-backed path. -// - = 32-bit scalars (char/short/int/float/half) follow a fast i32 path for performance. -llvm::Value * -emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, - const clang::CallExpr *Call) { - auto &B = CGF.Builder; +// - First-class scalar/vector values whose total size is a multiple of 32 bits +// use a +// register-only path by bitcasting to <N x i32>. Aggregates or odd sizes use +// a memory-backed path. +// - = 32-bit scalars (char/short/int/float/half) follow a fast i32 path for +// performance. +llvm::Value *emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, + const clang::CallExpr *Call) { + auto &B = CGF.Builder; auto &CGM = CGF.CGM; const llvm::DataLayout &DL = CGM.getDataLayout(); - llvm::Type *I8 = B.getInt8Ty(); + llvm::Type *I8 = B.getInt8Ty(); llvm::Type *I32 = B.getInt32Ty(); llvm::Type *I64 = B.getInt64Ty(); @@ -194,24 +196,29 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, // - Integers: zext/trunc to i32. // - Pointers: ptrtoint to intptr, then zext/trunc to i32. // - Other first-class: bitcast to intN then zext/trunc to i32. - auto toI32Index = [&](llvm::Value *IdxVal, clang::QualType IdxQT) -> llvm::Value * { + auto toI32Index = [&](llvm::Value *IdxVal, + clang::QualType IdxQT) -> llvm::Value * { (void)IdxQT; // signedness not relevant for index llvm::Type *Ty = IdxVal->getType(); if (Ty->isIntegerTy()) return B.CreateZExtOrTrunc(IdxVal, I32); if (Ty->isPointerTy()) { unsigned PtrBits = DL.getPointerSizeInBits(Ty->getPointerAddressSpace()); - return B.CreateZExtOrTrunc(B.CreatePtrToInt(IdxVal, B.getIntNTy(PtrBits)), I32); + return B.CreateZExtOrTrunc(B.CreatePtrToInt(IdxVal, B.getIntNTy(PtrBits)), + I32); } unsigned Bits = getBitWidth(Ty); return B.CreateZExtOrTrunc(B.CreateBitCast(IdxVal, B.getIntNTy(Bits)), I32); }; // Coerces an arbitrary = 32-bit scalar payload to i32. - // - Integers: extend to i32 honoring signedness if narrower; zext/trunc otherwise. + // - Integers: extend to i32 honoring signedness if narrower; zext/trunc + // otherwise. // - Pointers: ptrtoint to intptr, then zext/trunc to i32. - // - Other first-class scalars (e.g., float, half): bitcast to intN then zext/trunc to i32. - auto coercePayloadToI32 = [&](llvm::Value *Val, clang::QualType SrcQT) -> llvm::Value * { + // - Other first-class scalars (e.g., float, half): bitcast to intN then + // zext/trunc to i32. + auto coercePayloadToI32 = [&](llvm::Value *Val, + clang::QualType SrcQT) -> llvm::Value * { llvm::Type *Ty = Val->getType(); if (Ty->isIntegerTy()) { unsigned BW = Ty->getIntegerBitWidth(); @@ -224,20 +231,22 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, } if (Ty->isPointerTy()) { unsigned PtrBits = DL.getPointerSizeInBits(Ty->getPointerAddressSpace()); - return B.CreateZExtOrTrunc(B.CreatePtrToInt(Val, B.getIntNTy(PtrBits)), I32); + return B.CreateZExtOrTrunc(B.CreatePtrToInt(Val, B.getIntNTy(PtrBits)), + I32); } unsigned Bits = getBitWidth(Ty); return B.CreateZExtOrTrunc(B.CreateBitCast(Val, B.getIntNTy(Bits)), I32); }; // Converts an i32 result back to an arbitrary = 32-bit destination type. - // - Integer = 32 bits: zext/sext/trunc appropriately using source signedness for narrow types. + // - Integer = 32 bits: zext/sext/trunc appropriately using source signedness + // for narrow types. // - Pointer = 32 bits: zext/trunc to pointer width and inttoptr. // - Other first-class types: // - If 32 bits: bitcast i32 to destination type. - // - If narrower than 32 bits (e.g., half = 16): first trunc i32 to iN, then bitcast iN to DstTy. - auto coerceFromI32ToType = [&](llvm::Value *I32Val, - llvm::Type *DstTy, + // - If narrower than 32 bits (e.g., half = 16): first trunc i32 to iN, then + // bitcast iN to DstTy. + auto coerceFromI32ToType = [&](llvm::Value *I32Val, llvm::Type *DstTy, clang::QualType SrcQT) -> llvm::Value * { if (DstTy->isIntegerTy()) { unsigned DW = DstTy->getIntegerBitWidth(); @@ -265,12 +274,14 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, if (BW < 32) Tr = B.CreateTrunc(I32Val, IntBW); else if (BW > 32) - Tr = B.CreateZExt(I32Val, IntBW); // should not happen in the fast 32-bit path + Tr = B.CreateZExt(I32Val, + IntBW); // should not happen in the fast 32-bit path return B.CreateBitCast(Tr, DstTy); }; // Returns {wordCount, tailBytes} for a payload size in bits. - auto wordCountAndTail = [&](unsigned totalBits) -> std::pair<unsigned, unsigned> { + auto wordCountAndTail = + [&](unsigned totalBits) -> std::pair<unsigned, unsigned> { unsigned bytes = totalBits / 8; return {bytes / 4, bytes % 4}; }; @@ -297,14 +308,16 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, if (totalBits <= 32) { llvm::Value *SrcI32 = coercePayloadToI32(SrcVal, SrcQT); llvm::SmallVector<llvm::Value *, 2> ArgsA{IndexI32, SrcI32}; - llvm::Value *ResI32 = B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsA); - llvm::Value *Res = coerceFromI32ToType(ResI32, RetTy, SrcQT); + llvm::Value *ResI32 = + B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsA); + llvm::Value *Res = coerceFromI32ToType(ResI32, RetTy, SrcQT); return Res; } } - // Fast path B: First-class scalar/vector whose total size is a multiple of 32 bits. - // Bitcast to <N x i32>, permute each lane, bitcast back. Register-only; no memory. + // Fast path B: First-class scalar/vector whose total size is a multiple of 32 + // bits. Bitcast to <N x i32>, permute each lane, bitcast back. Register-only; + // no memory. if (!IsAggregate) { llvm::Value *SrcVal = CGF.EmitScalarExpr(Call->getArg(1)); unsigned totalBits = getBitWidth(SrcVal->getType()); @@ -315,7 +328,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, // Handle pointers by going through intptr first llvm::Value *AsIntN = SrcVal; if (SrcVal->getType()->isPointerTy()) { - unsigned PW = DL.getPointerSizeInBits(SrcVal->getType()->getPointerAddressSpace()); + unsigned PW = DL.getPointerSizeInBits( + SrcVal->getType()->getPointerAddressSpace()); AsIntN = B.CreatePtrToInt(SrcVal, B.getIntNTy(PW)); } @@ -324,7 +338,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, llvm::Value *ResVec = llvm::UndefValue::get(I32VecTy); for (unsigned i = 0; i < words; ++i) { llvm::Value *Lane = B.CreateExtractElement(AsI32Vec, c32(i)); - llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm, {IndexI32, Lane}); + llvm::Value *Perm = + B.CreateCall(Bperm->getFunctionType(), Bperm, {IndexI32, Lane}); ResVec = B.CreateInsertElement(ResVec, Perm, c32(i)); } @@ -339,16 +354,20 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, // General aggregate/odd-size path: // - Works for structs/arrays/complex and any total size. - // - Materialize source to a temp, process 4-byte words (unaligned loads/stores), - // handle tail bytes by packing/unpacking into an i32, and return loaded Value*. + // - Materialize source to a temp, process 4-byte words (unaligned + // loads/stores), + // handle tail bytes by packing/unpacking into an i32, and return loaded + // Value*. auto emitAggregatePath = [&]() -> llvm::Value * { clang::QualType SrcQTLocal = Call->getArg(1)->getType(); llvm::Type *SrcTy = CGF.ConvertType(SrcQTLocal); - clang::CodeGen::Address SrcAddr = CGF.CreateMemTemp(SrcQTLocal, "dsbperm.src"); - clang::CodeGen::Address DstAddr = CGF.CreateMemTemp(RetQT, "dsbperm.dst"); + clang::CodeGen::Address SrcAddr = + CGF.CreateMemTemp(SrcQTLocal, "dsbperm.src"); + clang::CodeGen::Address DstAddr = CGF.CreateMemTemp(RetQT, "dsbperm.dst"); - CGF.EmitAnyExprToMem(Call->getArg(1), SrcAddr, SrcQTLocal.getQualifiers(), /*IsInit*/true); + CGF.EmitAnyExprToMem(Call->getArg(1), SrcAddr, SrcQTLocal.getQualifiers(), + /*IsInit*/ true); // i8 views of the buffers (as Address). clang::CodeGen::Address SrcI8Addr = SrcAddr.withElementType(I8); @@ -357,8 +376,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, auto CU = [&](uint64_t N) { return clang::CharUnits::fromQuantity(N); }; uint64_t sizeBytes = DL.getTypeAllocSize(SrcTy); - uint64_t words = sizeBytes / 4; - uint64_t tail = sizeBytes % 4; + uint64_t words = sizeBytes / 4; + uint64_t tail = sizeBytes % 4; for (uint64_t i = 0; i < words; ++i) { uint64_t off = i * 4; @@ -377,7 +396,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, auto *Ld = B.CreateLoad(SrcWordI32Addr); llvm::SmallVector<llvm::Value *, 2> ArgsWord{IndexI32, Ld}; - llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsWord); + llvm::Value *Perm = + B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsWord); (void)B.CreateStore(Perm, DstWordI32Addr); } @@ -398,7 +418,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, } llvm::SmallVector<llvm::Value *, 2> ArgsTail{IndexI32, Pack}; - llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsTail); + llvm::Value *Perm = + B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsTail); for (uint64_t b = 0; b < tail; ++b) { llvm::Value *Byte = B.CreateTrunc(B.CreateLShr(Perm, c32(8 * b)), I8); @@ -408,10 +429,12 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, } } - // Load the final result from the destination temporary and return it as a Value*. + // Load the final result from the destination temporary and return it as a + // Value*. llvm::Value *Res = B.CreateLoad(DstAddr); - // For aggregates (struct/array/union), ensure determinism by freezing the value. - // freeze turns any undef/poison in padding into a fixed but arbitrary value. + // For aggregates (struct/array/union), ensure determinism by freezing the + // value. freeze turns any undef/poison in padding into a fixed but + // arbitrary value. if (Res->getType()->isAggregateType()) Res = B.CreateFreeze(Res); return Res; @@ -420,8 +443,6 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF, return emitAggregatePath(); } - - } // namespace // Generates the IR for __builtin_read_exec_*. `````````` </details> https://github.com/llvm/llvm-project/pull/153501 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits