arsenm requested changes to this revision. arsenm added inline comments. This revision now requires changes to proceed.
================ Comment at: clang/lib/Basic/Targets/AMDGPU.h:119 + bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); } + const char *getBFloat16Mangling() const override { return "u6__bf16"; }; + ---------------- Don't understand this mangling. What is u6? ================ Comment at: clang/test/CodeGenCUDA/amdgpu-bf16.cu:30 + *out = bf16; +} + ---------------- should also test a load ================ Comment at: clang/test/SemaCUDA/amdgpu-bf16.cu:43 + *out = bf16; +} + ---------------- check casts to different int and float types? Is construction of bf16 vectors allowed? ================ Comment at: llvm/lib/Target/AMDGPU/SIISelLowering.cpp:4819-4831 + // When we don't have 16 bit instructions, bf16 is illegal and gets + // softened to i16 for storage, with float being used for arithmetic. + // + // After softening, some i16 -> fp32 bf16_to_fp operations can be left over. + // Lower those to (f32 (fp_extend (f16 (bitconvert x)))) + if (!Op->getValueType(0).isFloatingPoint() || + Op->getOperand(0).getValueType() != MVT::i16) ---------------- The generic legalizer should have handled this? ================ Comment at: llvm/lib/Target/AMDGPU/SIISelLowering.cpp:5185-5198 + // When we don't have 16 bit instructions, bf16 is illegal and gets + // softened to i16 for storage, with float being used for arithmetic. + // + // After softening, fp_to_bf16 can be emitted, but with a i16 VT instead. + // Of course those won't work, so we handle them here by lowering them + // to (i16 (bitconvert (f32 (fptrunc x)))) + if (N->getValueType(0) != MVT::i16) ---------------- Ditto ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16-ops.ll:1-4 +; RUN: not llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs +; RUN: not llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs +; RUN: not llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs +; RUN: not llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs ---------------- Drop -verify-machineinstrs ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16-ops.ll:22 + +define void @test_add(bfloat %a, bfloat %b, bfloat addrspace(1)* %out) { + %result = fadd bfloat %a, %b ---------------- Use opaque pointers ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16.ll:432 +} + +define void @test_arg_store_v4bf16(<4 x bfloat> %in, <4 x bfloat> addrspace(1)* %out) { ---------------- Missing v3 test ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16.ll:899 +} + +define bfloat @test_alloca_load_store_ret(bfloat %in) { ---------------- Ret of vectors ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16.ll:953 + store volatile bfloat %in, bfloat addrspace(5)* %in.addr, align 2 + %0 = load volatile bfloat, bfloat addrspace(5)* %in.addr, align 2 + ret bfloat %0 ---------------- Don't use anonymous values. Also use opaque pointers ================ Comment at: llvm/test/CodeGen/AMDGPU/bf16.ll:956 +} + ---------------- Should also test call argument, call return, passed in byval, sret, implicit sret, and passed in argument in overflow stack area Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D139398/new/ https://reviews.llvm.org/D139398 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits