https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184666
>From 3c066fbad224d15f6753a29f35ca292804332da4 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 4 Mar 2026 14:12:42 -0500 Subject: [PATCH 1/2] [PowerPC] Implement Deeply Compressed Weights Builtins Add support for the following deeply compressed weights builtins for ISA Future. - vec_uncompresshn(vector unsigned char, vector unsigned char) - vec_uncompressln(vector unsigned char, vector unsigned char) - vec_uncompresshb(vector unsigned char, vector unsigned char) - vec_uncompresslb(vector unsigned char, vector unsigned char) - vec_uncompresshh(vector unsigned char, vector unsigned char) - vec_uncompresslh(vector unsigned char, vector unsigned char) - vec_unpack_hsn_to_byte(vector unsigned char) - vec_unpack_lsn_to_byte(vector unsigned char) - vec_unpack_int4_to_bf16(vector unsigned char, uint2) - vec_unpack_int8_to_bf16(vector unsigned char, uint1) - vec_unpack_int4_to_fp32(vector unsigned char, uint3) - vec_unpack_int8_to_fp32(vector unsigned char, uint2) --- clang/include/clang/Basic/BuiltinsPPC.def | 26 ++ clang/lib/Basic/Targets/PPC.cpp | 4 + clang/lib/Basic/Targets/PPC.h | 1 + clang/lib/Headers/altivec.h | 58 +++++ clang/lib/Sema/SemaPPC.cpp | 8 + .../builtins-ppc-deeply-compressed-weights.c | 194 ++++++++++++++ ...tins-ppc-deeply-compressed-weights-error.c | 54 ++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 30 +++ llvm/lib/Target/PowerPC/PPC.td | 6 + llvm/lib/Target/PowerPC/PPCInstrFuture.td | 48 +++- .../PowerPC/deeply-compressed-weights.ll | 244 ++++++++++++++++++ 11 files changed, 661 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c create mode 100644 clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c create mode 100644 llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 75d7d92c4f9d4..3b1062a184175 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1158,6 +1158,32 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V", UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3", "mma,isa-future-instructions") +// Deeply Compressed Weights built-ins. +TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "", + "isa-future-instructions") + // FIXME: Obviously incomplete. #undef BUILTIN diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index a37a68ad91724..ccb6c7ba60b37 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -59,6 +59,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasP9Vector = true; } else if (Feature == "+power10-vector") { HasP10Vector = true; + } else if (Feature == "+isa-future-instructions") { + HasFutureVector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; } else if (Feature == "+spe" || Feature == "+efpu2") { @@ -434,6 +436,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__POWER10_VECTOR__"); if (HasPCRelativeMemops) Builder.defineMacro("__PCREL__"); + if (HasFutureVector) + Builder.defineMacro("__FUTURE_VECTOR__"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 664c9e15d8d18..0c71b8c3adfb0 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool HasFrsqrte = false; bool HasFrsqrtes = false; bool HasP10Vector = false; + bool HasFutureVector = false; bool HasPCRelativeMemops = false; bool HasQuadwordAtomics = false; bool UseLongCalls = false; diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 71d8d3c0c0771..2ce982bea5cf2 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -19314,6 +19314,64 @@ vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) { #endif /* __SIZEOF_INT128__ */ #endif /* __POWER10_VECTOR__ */ +#ifdef __FUTURE_VECTOR__ + +/* vec_uncompress* - Deeply Compressed Weights builtins */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshn(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhn(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompressln(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprln(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhb(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresslb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprlb(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshh(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhh(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresslh(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprlh(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_unpack_hsn_to_byte(vector unsigned char __a) { + return __builtin_altivec_vupkhsntob(__a); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_unpack_lsn_to_byte(vector unsigned char __a) { + return __builtin_altivec_vupklsntob(__a); +} + +#define vec_unpack_int4_to_bf16(__a, __imm) \ + __builtin_altivec_vupkint4tobf16((__a), (__imm)) + +#define vec_unpack_int8_to_bf16(__a, __imm) \ + __builtin_altivec_vupkint8tobf16((__a), (__imm)) + +#define vec_unpack_int4_to_fp32(__a, __imm) \ + __builtin_altivec_vupkint4tofp32((__a), (__imm)) + +#define vec_unpack_int8_to_fp32(__a, __imm) \ + __builtin_altivec_vupkint8tofp32((__a), (__imm)) + +#endif /* __FUTURE_VECTOR__ */ + #ifdef __POWER8_VECTOR__ #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps)) #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps)) diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 7f7f2f9638129..4013fd35011a9 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -224,6 +224,14 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7); case PPC::BI__builtin_vsx_xxpermx: return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 7); + case PPC::BI__builtin_altivec_vupkint4tobf16: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3); + case PPC::BI__builtin_altivec_vupkint8tobf16: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1); + case PPC::BI__builtin_altivec_vupkint4tofp32: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 7); + case PPC::BI__builtin_altivec_vupkint8tofp32: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3); case PPC::BI__builtin_ppc_tw: case PPC::BI__builtin_ppc_tdw: return SemaRef.BuiltinConstantArgRange(TheCall, 2, 1, 31); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c new file mode 100644 index 0000000000000..3b4eb0faa27c2 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c @@ -0,0 +1,194 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// AI Assisted. + +#include <altivec.h> + +vector unsigned char vuca, vucb; + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshn( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshn(void) { + return vec_uncompresshn(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompressln( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompressln(void) { + return vec_uncompressln(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshb( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshb(void) { + return vec_uncompresshb(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslb( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresslb(void) { + return vec_uncompresslb(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshh( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshh(void) { + return vec_uncompresshh(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslh( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresslh(void) { + return vec_uncompresslh(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_hsn_to_byte( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +vector unsigned char test_vec_unpack_hsn_to_byte(void) { + return vec_unpack_hsn_to_byte(vuca); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_lsn_to_byte( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +vector unsigned char test_vec_unpack_lsn_to_byte(void) { + return vec_unpack_lsn_to_byte(vuca); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_bf16( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> [[TMP0]], i32 2) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int4_to_bf16(void) { + return vec_unpack_int4_to_bf16(vuca, 2); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_bf16( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int8_to_bf16(void) { + return vec_unpack_int8_to_bf16(vuca, 1); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_fp32( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> [[TMP0]], i32 5) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int4_to_fp32(void) { + return vec_unpack_int4_to_fp32(vuca, 5); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_fp32( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> [[TMP0]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int8_to_fp32(void) { + return vec_unpack_int8_to_fp32(vuca, 3); +} diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c new file mode 100644 index 0000000000000..5092b15731c81 --- /dev/null +++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c @@ -0,0 +1,54 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify %s + +// AI Assissted. + +#include <altivec.h> + +vector unsigned char vuca, vucb; +vector signed int vsia; + +void test_invalid_params(void) { + vector unsigned char res; + + // Test invalid parameter types + res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} + res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} + res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} +} + +void test_invalid_immediates(void) { + vector unsigned char res; + + // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) + res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + + // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) + res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + + // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) + res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + + // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) + res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} +} + +void test_non_constant_immediates(void) { + vector unsigned char res; + unsigned int imm = 1; + + // Test non-constant immediate values + res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} + res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} + res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} + res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ec33af88c72d9..fa0a7393658a8 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1362,6 +1362,36 @@ def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">; def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">; def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">; def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">; +// Deeply Compressed Weights Intrinsics. +def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">; +def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">; +def int_ppc_altivec_vucmprhb : PowerPC_Vec_BBB_Intrinsic<"vucmprhb">; +def int_ppc_altivec_vucmprlb : PowerPC_Vec_BBB_Intrinsic<"vucmprlb">; +def int_ppc_altivec_vucmprhh : PowerPC_Vec_BBB_Intrinsic<"vucmprhh">; +def int_ppc_altivec_vucmprlh : PowerPC_Vec_BBB_Intrinsic<"vucmprlh">; +def int_ppc_altivec_vupkhsntob : + PowerPC_Vec_Intrinsic<"vupkhsntob", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_altivec_vupklsntob : + PowerPC_Vec_Intrinsic<"vupklsntob", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_altivec_vupkint4tobf16 : + PowerPC_Vec_Intrinsic<"vupkint4tobf16", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint8tobf16 : + PowerPC_Vec_Intrinsic<"vupkint8tobf16", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint4tofp32 : + PowerPC_Vec_Intrinsic<"vupkint4tofp32", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint8tofp32 : + PowerPC_Vec_Intrinsic<"vupkint8tofp32", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Definitions. diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index c0abbf6f50804..b4f1c422a7f27 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -292,6 +292,10 @@ def FeatureP10Vector : SubtargetFeature<"power10-vector", "HasP10Vector", "true", "Enable POWER10 vector instructions", [FeatureISA3_1, FeatureP9Vector]>; +def FeatureFutureVector : SubtargetFeature<"future-vector", "HasFutureVector", + "true", + "Enable Future vector instructions", + [FeatureISAFuture, FeatureP10Vector]>; // A separate feature for this even though it is equivalent to P9Vector // because this is a feature of the implementation rather than the architecture // and may go away with future CPU's. @@ -400,6 +404,7 @@ def HasP9Altivec : Predicate<"Subtarget->hasP9Altivec()">; def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">; def NoP10Vector : Predicate<"!Subtarget->hasP10Vector()">; def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">; +def HasFutureVector : Predicate<"Subtarget->hasFutureVector()">; // Predicates used to differenciate between different ISAs. def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">; @@ -554,6 +559,7 @@ def ProcessorFeatures { // For future CPU we assume that all of the existing features from Power11 // still exist with the exception of those we know are Power11 specific. list<SubtargetFeature> FutureAdditionalFeatures = [DirectivePwrFuture, + FeatureFutureVector, FeatureISAFuture]; list<SubtargetFeature> FutureSpecificFeatures = []; list<SubtargetFeature> FutureInheritableFeatures = diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 717454f78e2a4..855f58d8205ba 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -431,38 +431,62 @@ let Predicates = [HasVSX, IsISAFuture] in { } def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), - "vupkhsntob $VRT, $VRB", []>; + "vupkhsntob $VRT, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkhsntob v16i8:$VRB))]>; def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB), - "vupklsntob $VRT, $VRB", []>; + "vupklsntob $VRT, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vupklsntob v16i8:$VRB))]>; def VUPKINT4TOBF16 : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), - "vupkint4tobf16 $VRT, $VRB, $UIM", []>; + "vupkint4tobf16 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, timm:$UIM))]>; def VUPKINT8TOBF16 : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, u1imm:$UIM), - "vupkint8tobf16 $VRT, $VRB, $UIM", []>; + "vupkint8tobf16 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, timm:$UIM))]>; def VUPKINT8TOFP32 : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), - "vupkint8tofp32 $VRT, $VRB, $UIM", []>; + "vupkint8tofp32 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, timm:$UIM))]>; def VUPKINT4TOFP32 : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u3imm:$UIM), - "vupkint4tofp32 $VRT, $VRB, $UIM", []>; + "vupkint4tofp32 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, timm:$UIM))]>; def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhn $VRT, $VRA, $VRB", []>; + "vucmprhn $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhn v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprln $VRT, $VRA, $VRB", []>; + "vucmprln $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprln v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRHB : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhb $VRT, $VRA, $VRB", []>; + "vucmprhb $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhb v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLB : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprlb $VRT, $VRA, $VRB", []>; + "vucmprlb $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprlb v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRHH : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhh $VRT, $VRA, $VRB", []>; + "vucmprhh $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhh v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLH : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprlh $VRT, $VRA, $VRB", []>; + "vucmprlh $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprlh v16i8:$VRA, v16i8:$VRB))]>; def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvrlw $XT, $XA, $XB", diff --git a/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll new file mode 100644 index 0000000000000..85f84ade7c3c1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-BE + +; AI Assissted. + +declare <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8>, i32) + +define <16 x i8> @test_vucmprhn(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhn: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhn 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhn: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhn 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprln(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprln: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprln 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprln: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprln 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprhb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhb: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhb 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhb: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhb 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprlb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprlb: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprlb 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprlb: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprlb 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprhh(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhh: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhh 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhh: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhh 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprlh(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprlh: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprlh 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprlh: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprlh 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkhsntob(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkhsntob: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkhsntob 2, 2 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkhsntob: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkhsntob 2, 2 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> %a) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupklsntob(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupklsntob: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupklsntob 2, 2 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupklsntob: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupklsntob 2, 2 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> %a) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tobf16_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tobf16_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tobf16 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tobf16_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tobf16 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tobf16_3(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tobf16_3: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tobf16 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tobf16_3: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tobf16 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 3) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tobf16_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tobf16_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tobf16 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tobf16_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tobf16 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tobf16_1(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tobf16_1: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tobf16 2, 2, 1 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tobf16_1: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tobf16 2, 2, 1 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 1) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tofp32_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tofp32_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tofp32 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tofp32_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tofp32 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tofp32_7(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tofp32_7: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tofp32 2, 2, 7 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tofp32_7: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tofp32 2, 2, 7 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 7) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tofp32_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tofp32_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tofp32 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tofp32_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tofp32 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tofp32_3(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tofp32_3: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tofp32 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tofp32_3: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tofp32 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 3) + ret <16 x i8> %res +} >From 29895eab8f22ebafd9fdd2378787b5ff211862e0 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 4 Mar 2026 22:58:00 +0000 Subject: [PATCH 2/2] fix format --- clang/lib/Headers/altivec.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 2ce982bea5cf2..85323a55c6377 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -19358,16 +19358,16 @@ vec_unpack_lsn_to_byte(vector unsigned char __a) { return __builtin_altivec_vupklsntob(__a); } -#define vec_unpack_int4_to_bf16(__a, __imm) \ +#define vec_unpack_int4_to_bf16(__a, __imm) \ __builtin_altivec_vupkint4tobf16((__a), (__imm)) -#define vec_unpack_int8_to_bf16(__a, __imm) \ +#define vec_unpack_int8_to_bf16(__a, __imm) \ __builtin_altivec_vupkint8tobf16((__a), (__imm)) -#define vec_unpack_int4_to_fp32(__a, __imm) \ +#define vec_unpack_int4_to_fp32(__a, __imm) \ __builtin_altivec_vupkint4tofp32((__a), (__imm)) -#define vec_unpack_int8_to_fp32(__a, __imm) \ +#define vec_unpack_int8_to_fp32(__a, __imm) \ __builtin_altivec_vupkint8tofp32((__a), (__imm)) #endif /* __FUTURE_VECTOR__ */ _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
