[clang] [llvm] [PowerPC] Implement Deeply Compressed Weights Builtins (PR #184666)

Lei Huang via cfe-commits Wed, 04 Mar 2026 14:46:35 -0800

https://github.com/lei137 updated 
https://github.com/llvm/llvm-project/pull/184666


>From 3c066fbad224d15f6753a29f35ca292804332da4 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 4 Mar 2026 14:12:42 -0500
Subject: [PATCH 1/2] [PowerPC] Implement Deeply Compressed Weights Builtins

Add support for the following deeply compressed weights builtins for ISA Future.
- vec_uncompresshn(vector unsigned char, vector unsigned char)
- vec_uncompressln(vector unsigned char, vector unsigned char)
- vec_uncompresshb(vector unsigned char, vector unsigned char)
- vec_uncompresslb(vector unsigned char, vector unsigned char)
- vec_uncompresshh(vector unsigned char, vector unsigned char)
- vec_uncompresslh(vector unsigned char, vector unsigned char)
- vec_unpack_hsn_to_byte(vector unsigned char)
- vec_unpack_lsn_to_byte(vector unsigned char)
- vec_unpack_int4_to_bf16(vector unsigned char, uint2)
- vec_unpack_int8_to_bf16(vector unsigned char, uint1)
- vec_unpack_int4_to_fp32(vector unsigned char, uint3)
- vec_unpack_int8_to_fp32(vector unsigned char, uint2)
---
 clang/include/clang/Basic/BuiltinsPPC.def     |  26 ++
 clang/lib/Basic/Targets/PPC.cpp               |   4 +
 clang/lib/Basic/Targets/PPC.h                 |   1 +
 clang/lib/Headers/altivec.h                   |  58 +++++
 clang/lib/Sema/SemaPPC.cpp                    |   8 +
 .../builtins-ppc-deeply-compressed-weights.c  | 194 ++++++++++++++
 ...tins-ppc-deeply-compressed-weights-error.c |  54 ++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  30 +++
 llvm/lib/Target/PowerPC/PPC.td                |   6 +
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  48 +++-
 .../PowerPC/deeply-compressed-weights.ll      | 244 ++++++++++++++++++
 11 files changed, 661 insertions(+), 12 deletions(-)
 create mode 100644 
clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
 create mode 100644 
clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
 create mode 100644 llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def 
b/clang/include/clang/Basic/BuiltinsPPC.def
index 75d7d92c4f9d4..3b1062a184175 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1158,6 +1158,32 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, 
"vW1024*W256V",
 UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
                              "mma,isa-future-instructions")
 
+// Deeply Compressed Weights built-ins.
+TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index a37a68ad91724..ccb6c7ba60b37 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -59,6 +59,8 @@ bool 
PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasP9Vector = true;
     } else if (Feature == "+power10-vector") {
       HasP10Vector = true;
+    } else if (Feature == "+isa-future-instructions") {
+      HasFutureVector = true;
     } else if (Feature == "+pcrelative-memops") {
       HasPCRelativeMemops = true;
     } else if (Feature == "+spe" || Feature == "+efpu2") {
@@ -434,6 +436,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__POWER10_VECTOR__");
   if (HasPCRelativeMemops)
     Builder.defineMacro("__PCREL__");
+  if (HasFutureVector)
+    Builder.defineMacro("__FUTURE_VECTOR__");
 
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 664c9e15d8d18..0c71b8c3adfb0 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   bool HasFrsqrte = false;
   bool HasFrsqrtes = false;
   bool HasP10Vector = false;
+  bool HasFutureVector = false;
   bool HasPCRelativeMemops = false;
   bool HasQuadwordAtomics = false;
   bool UseLongCalls = false;
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 71d8d3c0c0771..2ce982bea5cf2 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19314,6 +19314,64 @@ vec_sra(vector signed __int128 __a, vector unsigned 
__int128 __b) {
 #endif /* __SIZEOF_INT128__ */
 #endif /* __POWER10_VECTOR__ */
 
+#ifdef __FUTURE_VECTOR__
+
+/* vec_uncompress* - Deeply Compressed Weights builtins */
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshn(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhn(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompressln(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprln(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_hsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupkhsntob(__a);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_lsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupklsntob(__a);
+}
+
+#define vec_unpack_int4_to_bf16(__a, __imm) \
+  __builtin_altivec_vupkint4tobf16((__a), (__imm))
+
+#define vec_unpack_int8_to_bf16(__a, __imm) \
+  __builtin_altivec_vupkint8tobf16((__a), (__imm))
+
+#define vec_unpack_int4_to_fp32(__a, __imm) \
+  __builtin_altivec_vupkint4tofp32((__a), (__imm))
+
+#define vec_unpack_int8_to_fp32(__a, __imm) \
+  __builtin_altivec_vupkint8tofp32((__a), (__imm))
+
+#endif /* __FUTURE_VECTOR__ */
+
 #ifdef __POWER8_VECTOR__
 #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps))
 #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps))
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 7f7f2f9638129..4013fd35011a9 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -224,6 +224,14 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo 
&TI,
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
   case PPC::BI__builtin_vsx_xxpermx:
     return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint4tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
+  case PPC::BI__builtin_altivec_vupkint8tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1);
+  case PPC::BI__builtin_altivec_vupkint4tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint8tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 1, 31);
diff --git 
a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
new file mode 100644
index 0000000000000..3b4eb0faa27c2
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64le-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+// AI Assisted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshn(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshn(void) {
+  return vec_uncompresshn(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompressln(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompressln(void) {
+  return vec_uncompressln(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshb(void) {
+  return vec_uncompresshb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslb(void) {
+  return vec_uncompresslb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshh(void) {
+  return vec_uncompresshh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslh(void) {
+  return vec_uncompresslh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_hsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkhsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_hsn_to_byte(void) {
+  return vec_unpack_hsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_lsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupklsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_lsn_to_byte(void) {
+  return vec_unpack_lsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint4tobf16(<16 x i8> [[TMP0]], i32 2)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_bf16(void) {
+  return vec_unpack_int4_to_bf16(vuca, 2);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint8tobf16(<16 x i8> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_bf16(void) {
+  return vec_unpack_int8_to_bf16(vuca, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint4tofp32(<16 x i8> [[TMP0]], i32 5)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_fp32(void) {
+  return vec_unpack_int4_to_fp32(vuca, 5);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint8tofp32(<16 x i8> [[TMP0]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_fp32(void) {
+  return vec_unpack_int8_to_fp32(vuca, 3);
+}
diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
new file mode 100644
index 0000000000000..5092b15731c81
--- /dev/null
+++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
@@ -0,0 +1,54 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+
+// AI Assissted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+vector signed int vsia;
+
+void test_invalid_params(void) {
+  vector unsigned char res;
+
+  // Test invalid parameter types
+  res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
+  res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}}
+  res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
+}
+
+void test_invalid_immediates(void) {
+  vector unsigned char res;
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
+  res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
+  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
+  res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}}
+  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}}
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
+  res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}}
+  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
+  res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
+  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
+}
+
+void test_non_constant_immediates(void) {
+  vector unsigned char res;
+  unsigned int imm = 1;
+
+  // Test non-constant immediate values
+  res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}}
+  res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}}
+  res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}}
+  res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td 
b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index ec33af88c72d9..fa0a7393658a8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1362,6 +1362,36 @@ def int_ppc_altivec_vmulhsw : 
PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
 def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
 def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
 def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+// Deeply Compressed Weights Intrinsics.
+def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">;
+def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">;
+def int_ppc_altivec_vucmprhb : PowerPC_Vec_BBB_Intrinsic<"vucmprhb">;
+def int_ppc_altivec_vucmprlb : PowerPC_Vec_BBB_Intrinsic<"vucmprlb">;
+def int_ppc_altivec_vucmprhh : PowerPC_Vec_BBB_Intrinsic<"vucmprhh">;
+def int_ppc_altivec_vucmprlh : PowerPC_Vec_BBB_Intrinsic<"vucmprlh">;
+def int_ppc_altivec_vupkhsntob :
+    PowerPC_Vec_Intrinsic<"vupkhsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupklsntob :
+    PowerPC_Vec_Intrinsic<"vupklsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupkint4tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint4tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint8tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint4tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint4tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint8tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
 
 
//===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index c0abbf6f50804..b4f1c422a7f27 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -292,6 +292,10 @@ def FeatureP10Vector  : SubtargetFeature<"power10-vector", 
"HasP10Vector",
                                          "true",
                                          "Enable POWER10 vector instructions",
                                          [FeatureISA3_1, FeatureP9Vector]>;
+def FeatureFutureVector : SubtargetFeature<"future-vector", "HasFutureVector",
+                                           "true",
+                                           "Enable Future vector instructions",
+                                           [FeatureISAFuture, 
FeatureP10Vector]>;
 // A separate feature for this even though it is equivalent to P9Vector
 // because this is a feature of the implementation rather than the architecture
 // and may go away with future CPU's.
@@ -400,6 +404,7 @@ def HasP9Altivec : Predicate<"Subtarget->hasP9Altivec()">;
 def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
 def NoP10Vector : Predicate<"!Subtarget->hasP10Vector()">;
 def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">;
+def HasFutureVector : Predicate<"Subtarget->hasFutureVector()">;
 
 // Predicates used to differenciate between different ISAs.
 def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
@@ -554,6 +559,7 @@ def ProcessorFeatures {
   // For future CPU we assume that all of the existing features from Power11
   // still exist with the exception of those we know are Power11 specific.
   list<SubtargetFeature> FutureAdditionalFeatures = [DirectivePwrFuture,
+                                                     FeatureFutureVector,
                                                      FeatureISAFuture];
   list<SubtargetFeature> FutureSpecificFeatures = [];
   list<SubtargetFeature> FutureInheritableFeatures =
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td 
b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 717454f78e2a4..855f58d8205ba 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -431,38 +431,62 @@ let Predicates = [HasVSX, IsISAFuture] in {
   }
 
   def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupkhsntob $VRT, $VRB", []>;
+                                "vupkhsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupkhsntob v16i8:$VRB))]>;
   def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupklsntob $VRT, $VRB", []>;
+                                "vupklsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupklsntob v16i8:$VRB))]>;
   def VUPKINT4TOBF16
       : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u2imm:$UIM),
-                          "vupkint4tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT8TOBF16
       : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u1imm:$UIM),
-                          "vupkint8tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT8TOFP32
       : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u2imm:$UIM),
-                          "vupkint8tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT4TOFP32
       : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u3imm:$UIM),
-                          "vupkint4tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, 
timm:$UIM))]>;
 
   def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                               "vucmprhn $VRT, $VRA, $VRB", []>;
+                               "vucmprhn $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprhn v16i8:$VRA, 
v16i8:$VRB))]>;
   def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, 
vrrc:$VRB),
-                               "vucmprln $VRT, $VRA, $VRB", []>;
+                               "vucmprln $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprln v16i8:$VRA, 
v16i8:$VRB))]>;
   def VUCMPRHB
       : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhb $VRT, $VRA, $VRB", []>;
+                      "vucmprhb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLB
       : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlb $VRT, $VRA, $VRB", []>;
+                      "vucmprlb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRHH
       : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhh $VRT, $VRA, $VRB", []>;
+                      "vucmprhh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhh v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLH
       : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlh $VRT, $VRA, $VRB", []>;
+                      "vucmprlh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlh v16i8:$VRA, v16i8:$VRB))]>;
 
   def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                             "xvrlw $XT, $XA, $XB",
diff --git a/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll 
b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
new file mode 100644
index 0000000000000..85f84ade7c3c1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; AI Assissted.
+
+declare <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8>, i32)
+
+define <16 x i8> @test_vucmprhn(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhn:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhn:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprln(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprln:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprln 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprln:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprln 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkhsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkhsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkhsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkhsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkhsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupklsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupklsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupklsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupklsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupklsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_1(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_1:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_1:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 1)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_7(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_7:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_7:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 7)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}

>From 29895eab8f22ebafd9fdd2378787b5ff211862e0 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 4 Mar 2026 22:58:00 +0000
Subject: [PATCH 2/2] fix format

---
 clang/lib/Headers/altivec.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 2ce982bea5cf2..85323a55c6377 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19358,16 +19358,16 @@ vec_unpack_lsn_to_byte(vector unsigned char __a) {
   return __builtin_altivec_vupklsntob(__a);
 }
 
-#define vec_unpack_int4_to_bf16(__a, __imm) \
+#define vec_unpack_int4_to_bf16(__a, __imm)                                    
\
   __builtin_altivec_vupkint4tobf16((__a), (__imm))
 
-#define vec_unpack_int8_to_bf16(__a, __imm) \
+#define vec_unpack_int8_to_bf16(__a, __imm)                                    
\
   __builtin_altivec_vupkint8tobf16((__a), (__imm))
 
-#define vec_unpack_int4_to_fp32(__a, __imm) \
+#define vec_unpack_int4_to_fp32(__a, __imm)                                    
\
   __builtin_altivec_vupkint4tofp32((__a), (__imm))
 
-#define vec_unpack_int8_to_fp32(__a, __imm) \
+#define vec_unpack_int8_to_fp32(__a, __imm)                                    
\
   __builtin_altivec_vupkint8tofp32((__a), (__imm))
 
 #endif /* __FUTURE_VECTOR__ */

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [PowerPC] Implement Deeply Compressed Weights Builtins (PR #184666)

Reply via email to