llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) <details> <summary>Changes</summary> --- Patch is 49.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180516.diff 15 Files Affected: - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+2-1) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+5-5) - (modified) llvm/lib/TargetParser/TargetParser.cpp (+1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll (+126-2) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s (+122) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp16.s (+25) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp16_err.s (+24) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp8.s (+25) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp8_err.s (+27) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_err.s (+133) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p.txt (+122) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_dpp16.txt (+25) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_dpp8.txt (+25) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_err.txt (+144) ``````````diff diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl index 2eb8b6d5f1069..576fb475f883d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl @@ -1,5 +1,6 @@ // REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s typedef unsigned int uint; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 9d723c86031f2..723b064afd64e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1885,7 +1885,8 @@ def FeatureISAVersion11_7_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureSALUFloatInsts, FeatureDPPSrc1SGPR, - FeatureFP8ConversionInsts])>; + FeatureFP8ConversionInsts, + FeatureDot11Insts])>; def FeatureISAVersion12 : FeatureSet< [FeatureGFX12, diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 256dd0bb027ad..063546e1a5bc2 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -693,7 +693,6 @@ def VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>, multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> { defm NAME : VOP3PInst<OpName, VOP3P_DOTF8_Profile, null_frag, 1>; - let SubtargetPredicate = isGFX12Plus in def : GCNPat <(intrinsic_node i32:$src0, i32:$src1, (VOP3Mods f32:$src2, i32:$src2_modifiers)), (!cast<Instruction>(NAME) i32:$src0, i32:$src1, @@ -2493,6 +2492,11 @@ multiclass VOP3P_Realtriple<GFXGen Gen, bits<8> op, string backing_ps_name = NAM multiclass VOP3P_Realtriple_gfx11_gfx12<bits<8> op> : VOP3P_Realtriple<GFX11Gen, op>, VOP3P_Realtriple<GFX12Gen, op>; +defm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple_gfx11_gfx12<0x24>; +defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple_gfx11_gfx12<0x25>; +defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple_gfx11_gfx12<0x26>; +defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple_gfx11_gfx12<0x27>; + //===----------------------------------------------------------------------===// // GFX12 //===----------------------------------------------------------------------===// @@ -2546,10 +2550,6 @@ def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">; defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>; defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>; -defm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x24>; -defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>; -defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>; -defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>; //===----------------------------------------------------------------------===// // GFX11 diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index d317ca4e1194a..fc5d7519bdffe 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -519,6 +519,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, break; case GK_GFX1170: // TODO-GFX1170: Update features map for gfx1170 + Features["dot11-insts"] = true; Features["fp8-conversion-insts"] = true; [[fallthrough]]; case GK_GFX1153: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll index 796f6b84b1d14..5891456364fa7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GCN,GFX1170 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GCN,GFX1170 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -18,6 +26,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_bf8_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -34,6 +48,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_bf8_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -50,6 +70,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -67,6 +93,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -84,6 +116,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_fp8(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -99,6 +137,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_fp8_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -115,6 +159,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_fp8_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -131,6 +181,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -148,6 +204,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -165,6 +227,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_fp8(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -180,6 +248,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_fp8_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -196,6 +270,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_fp8_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -212,6 +292,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -229,6 +315,12 @@ entry: } define float @test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -246,6 +338,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_bf8(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -261,6 +359,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_bf8_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -277,6 +381,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_bf8_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -293,6 +403,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -310,6 +426,12 @@ entry: } define float @test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -333,3 +455,5 @@ declare float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c) declare float @llvm.fabs.f32(float %a) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} diff --git a/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s new file mode 100644 index 0000000000000..eb2ad1c3faa15 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s @@ -0,0 +1,122 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1170 %s | FileCheck --check-prefix=GFX1170 %s + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x24,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, s0, v2, v3 +// GFX1170: v_dot4_f32_fp8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, s0, v3 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, s0 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 +// GFX1170: v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_fp8_bf8 v0, v1, v2, 1 +// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x25,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, s0, v2, v3 +// GFX1170: v_dot4_f32_bf8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, s0, v3 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, s0 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 +// GFX1170: v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_bf8_fp8 v0, v1, v2, 1 +// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x26,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, s0, v2, v3 +// GFX1170: v_dot4_f32_fp8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, s0, v3 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, s0 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 +// GFX1170: v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_fp8_fp8 v0, v1, v2, 1 +// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 +// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x27,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, s0, v2, v3 +// GFX1170: v_dot4_f32_bf8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, s0, v3 +// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_bf8_b... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/180516 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
