[clang] [llvm] [PowerPC] Implement Deeply Compressed Weights Builtins (PR #184666)

Thu Apr 16 07:05:19 PDT 2026

https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184666

>From bebc75cceba843f66ee4d0094d1d6ac0e1340f73 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Mar 2026 14:12:42 -0500
Subject: [PATCH 1/3] [PowerPC] Implement Deeply Compressed Weights Builtins

Add support for the following deeply compressed weights builtins for ISA Future.
- vec_uncompresshn(vector unsigned char, vector unsigned char)
- vec_uncompressln(vector unsigned char, vector unsigned char)
- vec_uncompresshb(vector unsigned char, vector unsigned char)
- vec_uncompresslb(vector unsigned char, vector unsigned char)
- vec_uncompresshh(vector unsigned char, vector unsigned char)
- vec_uncompresslh(vector unsigned char, vector unsigned char)
- vec_unpack_hsn_to_byte(vector unsigned char)
- vec_unpack_lsn_to_byte(vector unsigned char)
- vec_unpack_int4_to_bf16(vector unsigned char, uint2)
- vec_unpack_int8_to_bf16(vector unsigned char, uint1)
- vec_unpack_int4_to_fp32(vector unsigned char, uint3)
- vec_unpack_int8_to_fp32(vector unsigned char, uint2)
---
 clang/include/clang/Basic/BuiltinsPPC.def     |  26 ++
 clang/lib/Headers/altivec.h                   |  58 +++++
 clang/lib/Sema/SemaPPC.cpp                    |   8 +
 .../builtins-ppc-deeply-compressed-weights.c  | 194 ++++++++++++++
 ...tins-ppc-deeply-compressed-weights-error.c |  54 ++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  30 +++
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  48 +++-
 .../PowerPC/deeply-compressed-weights.ll      | 244 ++++++++++++++++++
 8 files changed, 650 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
 create mode 100644 clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
 create mode 100644 llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 8ad7efe5e814d..2825e6c677c82 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1185,6 +1185,32 @@ UNALIASED_CUSTOM_BUILTIN(xsrebase3t1uqm, "VVV", false, "future-vector")
 UNALIASED_CUSTOM_BUILTIN(xsrebase3t2uqm, "VVV", false, "future-vector")
 UNALIASED_CUSTOM_BUILTIN(xsrebase3t3uqm, "VVV", false, "future-vector")
 
+// Deeply Compressed Weights built-ins.
+TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 1c778ea0a829f..c62dad5293a63 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19314,6 +19314,64 @@ vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) {
 #endif /* __SIZEOF_INT128__ */
 #endif /* __POWER10_VECTOR__ */
 
+#ifdef __FUTURE_VECTOR__
+
+/* vec_uncompress* - Deeply Compressed Weights builtins */
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshn(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhn(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompressln(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprln(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_hsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupkhsntob(__a);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_lsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupklsntob(__a);
+}
+
+#define vec_unpack_int4_to_bf16(__a, __imm)                                    \
+  __builtin_altivec_vupkint4tobf16((__a), (__imm))
+
+#define vec_unpack_int8_to_bf16(__a, __imm)                                    \
+  __builtin_altivec_vupkint8tobf16((__a), (__imm))
+
+#define vec_unpack_int4_to_fp32(__a, __imm)                                    \
+  __builtin_altivec_vupkint4tofp32((__a), (__imm))
+
+#define vec_unpack_int8_to_fp32(__a, __imm)                                    \
+  __builtin_altivec_vupkint8tofp32((__a), (__imm))
+
+#endif /* __FUTURE_VECTOR__ */
+
 #ifdef __POWER8_VECTOR__
 #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps))
 #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps))
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 6a06dbf12c8dc..8a594fc86dea6 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -249,6 +249,14 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI,
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
   case PPC::BI__builtin_vsx_xxpermx:
     return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint4tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
+  case PPC::BI__builtin_altivec_vupkint8tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1);
+  case PPC::BI__builtin_altivec_vupkint4tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint8tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 1, 31);
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
new file mode 100644
index 0000000000000..3b4eb0faa27c2
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple powerpc64-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple powerpc64le-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+// AI Assisted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshn(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshn(void) {
+  return vec_uncompresshn(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompressln(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompressln(void) {
+  return vec_uncompressln(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshb(void) {
+  return vec_uncompresshb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslb(void) {
+  return vec_uncompresslb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshh(void) {
+  return vec_uncompresshh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslh(void) {
+  return vec_uncompresslh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_hsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_hsn_to_byte(void) {
+  return vec_unpack_hsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_lsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_lsn_to_byte(void) {
+  return vec_unpack_lsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> [[TMP0]], i32 2)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_bf16(void) {
+  return vec_unpack_int4_to_bf16(vuca, 2);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_bf16(void) {
+  return vec_unpack_int8_to_bf16(vuca, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> [[TMP0]], i32 5)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_fp32(void) {
+  return vec_unpack_int4_to_fp32(vuca, 5);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> [[TMP0]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_fp32(void) {
+  return vec_unpack_int8_to_fp32(vuca, 3);
+}
diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
new file mode 100644
index 0000000000000..5092b15731c81
--- /dev/null
+++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
@@ -0,0 +1,54 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+
+// AI Assissted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+vector signed int vsia;
+
+void test_invalid_params(void) {
+  vector unsigned char res;
+
+  // Test invalid parameter types
+  res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} expected-note at altivec.h:* {{passing argument to parameter '__a' here}}
+  res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} expected-note at altivec.h:* {{passing argument to parameter '__b' here}}
+  res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} expected-note at altivec.h:* {{passing argument to parameter '__a' here}}
+}
+
+void test_invalid_immediates(void) {
+  vector unsigned char res;
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3)
+  res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1)
+  res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7)
+  res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3)
+  res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+}
+
+void test_non_constant_immediates(void) {
+  vector unsigned char res;
+  unsigned int imm = 1;
+
+  // Test non-constant immediate values
+  res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}}
+  res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}}
+  res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}}
+  res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 0af95ab0bb78a..9fd3c9fc4e791 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1362,6 +1362,36 @@ def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
 def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
 def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
 def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+// Deeply Compressed Weights Intrinsics.
+def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">;
+def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">;
+def int_ppc_altivec_vucmprhb : PowerPC_Vec_BBB_Intrinsic<"vucmprhb">;
+def int_ppc_altivec_vucmprlb : PowerPC_Vec_BBB_Intrinsic<"vucmprlb">;
+def int_ppc_altivec_vucmprhh : PowerPC_Vec_BBB_Intrinsic<"vucmprhh">;
+def int_ppc_altivec_vucmprlh : PowerPC_Vec_BBB_Intrinsic<"vucmprlh">;
+def int_ppc_altivec_vupkhsntob :
+    PowerPC_Vec_Intrinsic<"vupkhsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupklsntob :
+    PowerPC_Vec_Intrinsic<"vupklsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupkint4tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint4tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint8tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint4tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint4tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint8tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
 
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index bd90d7b86c95a..6ff5b57502f69 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -431,38 +431,62 @@ let Predicates = [HasFutureVector] in {
   }
 
   def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupkhsntob $VRT, $VRB", []>;
+                                "vupkhsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupkhsntob v16i8:$VRB))]>;
   def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupklsntob $VRT, $VRB", []>;
+                                "vupklsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupklsntob v16i8:$VRB))]>;
   def VUPKINT4TOBF16
       : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM),
-                          "vupkint4tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, timm:$UIM))]>;
   def VUPKINT8TOBF16
       : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, u1imm:$UIM),
-                          "vupkint8tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, timm:$UIM))]>;
   def VUPKINT8TOFP32
       : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM),
-                          "vupkint8tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, timm:$UIM))]>;
   def VUPKINT4TOFP32
       : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u3imm:$UIM),
-                          "vupkint4tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, timm:$UIM))]>;
 
   def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                               "vucmprhn $VRT, $VRA, $VRB", []>;
+                               "vucmprhn $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprhn v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                               "vucmprln $VRT, $VRA, $VRB", []>;
+                               "vucmprln $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprln v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRHB
       : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhb $VRT, $VRA, $VRB", []>;
+                      "vucmprhb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLB
       : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlb $VRT, $VRA, $VRB", []>;
+                      "vucmprlb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRHH
       : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhh $VRT, $VRA, $VRB", []>;
+                      "vucmprhh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhh v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLH
       : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlh $VRT, $VRA, $VRB", []>;
+                      "vucmprlh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlh v16i8:$VRA, v16i8:$VRB))]>;
 
   def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                             "xvrlw $XT, $XA, $XB",
diff --git a/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
new file mode 100644
index 0000000000000..85f84ade7c3c1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; AI Assissted.
+
+declare <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8>, i32)
+
+define <16 x i8> @test_vucmprhn(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhn:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhn:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprln(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprln:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprln 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprln:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprln 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkhsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkhsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkhsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkhsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkhsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupklsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupklsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupklsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupklsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupklsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_1(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_1:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_1:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 1)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_7(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_7:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_7:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 7)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}

>From b382e3b1aa450adcaacb7d74e9e7a51fa54cdd63 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 15 Apr 2026 18:44:21 +0000
Subject: [PATCH 2/3] update pattern to use specific timm nodes

---
 llvm/lib/Target/PowerPC/PPCInstrFuture.td | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 6ff5b57502f69..e2705c899a89e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -442,22 +442,22 @@ let Predicates = [HasFutureVector] in {
       : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM),
                           "vupkint4tobf16 $VRT, $VRB, $UIM",
                           [(set v16i8:$VRT,
-                            (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, timm:$UIM))]>;
+                            (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, u2imm_timm:$UIM))]>;
   def VUPKINT8TOBF16
       : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, u1imm:$UIM),
                           "vupkint8tobf16 $VRT, $VRB, $UIM",
                           [(set v16i8:$VRT,
-                            (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, timm:$UIM))]>;
+                            (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, u1imm_timm:$UIM))]>;
   def VUPKINT8TOFP32
       : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM),
                           "vupkint8tofp32 $VRT, $VRB, $UIM",
                           [(set v16i8:$VRT,
-                            (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, timm:$UIM))]>;
+                            (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, u2imm_timm:$UIM))]>;
   def VUPKINT4TOFP32
       : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u3imm:$UIM),
                           "vupkint4tofp32 $VRT, $VRB, $UIM",
                           [(set v16i8:$VRT,
-                            (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, timm:$UIM))]>;
+                            (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, u3imm_timm:$UIM))]>;
 
   def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
                                "vucmprhn $VRT, $VRA, $VRB",

>From 5106040f4473a1ae9fb74d1d0d4d00d918a2712b Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 16 Apr 2026 10:04:24 -0400
Subject: [PATCH 3/3] rebase and add new required feature bits

---
 .../PowerPC/builtins-ppc-deeply-compressed-weights.c | 12 ++++++------
 .../builtins-ppc-deeply-compressed-weights-error.c   |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
index 3b4eb0faa27c2..3db773cfab7ac 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -triple powerpc64-unknown-unknown \
-// RUN:   -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -triple powerpc64le-unknown-unknown \
-// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +future-vector  \
+// RUN:   -target-feature +vsx -target-feature +isa-future-instructions \
+// RUN:   -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +future-vector  \
+// RUN:   -target-feature +vsx -target-feature +isa-future-instructions \
+// RUN:   -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
 // AI Assisted.
 
diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
index 5092b15731c81..8d94a14849b89 100644
--- a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
+++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
@@ -1,10 +1,10 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify %s
+// RUN:   -flax-vector-conversions=none -target-feature +future-vector \
+// RUN:   -target-feature +vsx -target-feature +isa-future-instructions -verify %s
 // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify %s
+// RUN:   -flax-vector-conversions=none -target-feature +future-vector \
+// RUN:   -target-feature +vsx -target-feature +isa-future-instructions -verify %s
 
 // AI Assissted.