llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mc @llvm/pr-subscribers-clang Author: None (github-actions[bot]) <details> <summary>Changes</summary> resolves llvm/llvm-project#<!-- -->79277 --- Patch is 116.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79340.diff 34 Files Affected: - (modified) clang/test/CodeGenOpenCL/amdgpu-features.cl (+2-2) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl (+18-17) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+1) - (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+29-2) - (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+26) - (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+10) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+5-2) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+11) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+88-5) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+49-4) - (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+20-9) - (modified) llvm/lib/TargetParser/TargetParser.cpp (+1) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+142) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+197) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+405-61) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s (+12) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s (+12) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+36) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+108) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+138) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s (+12) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s (+12) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1.txt (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+12) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+12) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+108) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+48) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+12) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+12) ``````````diff diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 1ba2b129f6895ae..9c8ca0bb96f6125 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -100,8 +100,8 @@ // GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl index 56d757012a5e780..4e3a56b4201bb6d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl @@ -1,59 +1,60 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -emit-llvm -o - %s | FileCheck %s typedef float v2f __attribute__((ext_vector_type(2))); -// CHECK-GFX940-LABEL: @test_cvt_f32_bf8 -// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0) +// CHECK-LABEL: @test_cvt_f32_bf8 +// CHECK: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0) void test_cvt_f32_bf8(global int* out, int a) { *out = __builtin_amdgcn_cvt_f32_bf8(a, 0); } -// CHECK-GFX940-LABEL: @test_cvt_f32_fp8 -// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1) +// CHECK-LABEL: @test_cvt_f32_fp8 +// CHECK: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1) void test_cvt_f32_fp8(global int* out, int a) { *out = __builtin_amdgcn_cvt_f32_fp8(a, 1); } -// CHECK-GFX940-LABEL: @test_cvt_pk_f32_bf8 -// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false) +// CHECK-LABEL: @test_cvt_pk_f32_bf8 +// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false) void test_cvt_pk_f32_bf8(global v2f* out, int a) { *out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false); } -// CHECK-GFX940-LABEL: @test_cvt_pk_f32_fp8 -// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true) +// CHECK-LABEL: @test_cvt_pk_f32_fp8 +// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true) void test_cvt_pk_f32_fp8(global v2f* out, int a) { *out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true); } -// CHECK-GFX940-LABEL: @test_cvt_pk_bf8_f32 -// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false) +// CHECK-LABEL: @test_cvt_pk_bf8_f32 +// CHECK: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false) void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b) { *out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false); } -// CHECK-GFX940-LABEL: @test_cvt_pk_fp8_f32 -// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true) +// CHECK-LABEL: @test_cvt_pk_fp8_f32 +// CHECK: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true) void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b) { *out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true); } -// CHECK-GFX940-LABEL: @test_cvt_sr_bf8_f32 -// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2) +// CHECK-LABEL: @test_cvt_sr_bf8_f32 +// CHECK: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2) void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b) { *out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2); } -// CHECK-GFX940-LABEL: @test_cvt_sr_fp8_f32 -// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3) +// CHECK-LABEL: @test_cvt_sr_fp8_f32 +// CHECK: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3) void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b) { *out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index cb29d5d94759812..250e3e350c02e9b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1506,6 +1506,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeatureExtendedImageInsts, + FeatureFP8ConversionInsts, FeaturePackedTID, FeatureVcmpxPermlaneHazard, FeatureSALUFloatInsts, diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 489cf85693edb2c..6b1d04b04066fe7 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { return !isInlineConstant(Inst, OpIdx); } else if (MO.isReg()) { auto Reg = MO.getReg(); + if (!Reg) { + return false; + } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); auto PReg = mc2PseudoReg(Reg); return isSGPR(PReg, TRI) && PReg != SGPR_NULL; @@ -8303,12 +8306,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || - Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { + Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || + Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) { Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods Inst.addOperand(Inst.getOperand(0)); } - if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { + // Adding vdst_in operand is already covered for these DPP instructions in + // cvtVOP3DPP. + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && + !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); } @@ -8770,6 +8781,22 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, } } + int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); + if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { + Inst.addOperand(Inst.getOperand(0)); + } + + bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12; + if (IsVOP3CvtSrDpp) { + if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { + Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createReg(0)); + } + } + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO); if (TiedTo != -1) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 86096b0d80b4246..2c6aa2ee348a1f2 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -712,6 +712,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, AMDGPU::OpName::src2_modifiers); } + if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) { + // Insert dummy unused src2_modifiers. + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src2_modifiers); + } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && !AMDGPU::hasGDS(STI)) { insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds); @@ -942,6 +949,7 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const { // first add optional MI operands to check FI DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); + if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) { convertVOP3PDPPInst(MI); } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || @@ -951,6 +959,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { if (isMacDPP(MI)) convertMacDPPInst(MI); + int VDstInIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); + if (VDstInIdx != -1) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); + + if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { @@ -977,6 +994,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { if (isMacDPP(MI)) convertMacDPPInst(MI); + int VDstInIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); + if (VDstInIdx != -1) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); + + if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); + unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index e73e53aa270f911..9e64e3fd79576db 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1305,6 +1305,16 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Opc = MI->getOpcode(); + if (isCvt_F32_Fp8_Bf8_e64(Opc)) { + auto SrcMod = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + unsigned Mod = MI->getOperand(SrcMod).getImm(); + unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0); + unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1); + if (Index0 || Index1) + O << " op_sel:[" << Index0 << ',' << Index1 << ']'; + return; + } if (isPermlane16(Opc)) { auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a6820544f4b4d22..4ab840a4c848523 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1684,8 +1684,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, !if(HasOMod, (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp, omod0:$omod), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod0:$clamp)) + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) /* else */, // VOP1 without modifiers !if (HasClamp, @@ -2279,6 +2280,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field bit IsSingle = 0; field bit IsWMMA = 0; + field bit IsFP8 = 0; + field bit HasDst = !ne(DstVT.Value, untyped.Value); field bit HasDst32 = HasDst; field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0bf9452d822e970..106fdb19f27895b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -529,6 +529,17 @@ bool isPermlane16(unsigned Opc) { Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; } +bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { + return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || + Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; +} + bool isGenericAtomic(unsigned Opc) { return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index d3f55c79201747f..11b0bc5c81711e0 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -535,6 +535,9 @@ bool isPermlane16(unsigned Opc); LLVM_READNONE bool isGenericAtomic(unsigned Opc); +LLVM_READNONE +bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); + namespace VOPD { enum Component : unsigned { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 95a1d86963473a8..ef652fce65482c4 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in { } // End SubtargetPredicate = isGFX9Only class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { + let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; let HasExt = 1; @@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index, (inst_sdwa 0, $src, 0, 0, index) >; +let SubtargetPredicate = isGFX9Only in { let OtherPredicates = [HasCvtFP8VOP1Bug] in { def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; @@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in { def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; } +} // End SubtargetPredicate = isGFX9Only class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< @@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, (inst_e32 $src)) >; -foreach Index = [0, -1] in { - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, - V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, - V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; +let SubtargetPredicate = isGFX9Only in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; + } +} + + +// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. +def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> { + let HasOpSel = 1; + let HasExtVOP3DPP = 0; +} + +def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> { + let HasOpSel = 1; + let HasExtDPP = 1; + let HasExtVOP3DPP = 1; + let IsFP8 = 1; + let HasClamp = 0; + let HasOMod = 0; + let HasModifiers = 1; + let Src1VOP3DPP = Src1RC64; +} + +let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; + defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; +} + +class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_e64 !if(index{0}, + !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), + SRCMODS.OP_SEL_0), + !if(index{1}, SRCMODS.OP_SEL_1, 0)), + $src, 0), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index, + V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>; + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index, + V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>; + } +} + +class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>; + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/79340 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits