[clang] [llvm] [PowerPC] Add builtins for Post Quantum Cryptography Acceleration (PR #184717)
Lei Huang via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 26 06:56:53 PDT 2026
https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184717
>From ac7e2921294007c2786f1ee4c4faa3075e533379 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Mar 2026 20:08:17 -0500
Subject: [PATCH 1/4] [PowerPC] Add builtins for Post Quantum Cryptography
Acceleration
This patch implements Post Quantum Cryptography (PQC) Acceleration
builtins for PowerPC's future ISA by ensuring that vector operations
(vec_add, vec_sub, vec_mul, vec_mulh) correctly map to VSX instructions
(xvadduwm, xvadduhm, xvsubuwm, xvsubuhm, xvmuluwm, xvmuluhm, xvmulhsw,
xvmulhsh, xvmulhuw, xvmulhuh) when targeting mcpu=future.
Also adds missing vec_mulh builtins:
* vector short vec_mulh(vector signed short, vector signed short)
* vector unsigned short vec_mulh(vector unsigned short, vector unsigned short)
Assisted by AI.
---
clang/include/clang/Basic/BuiltinsPPC.def | 2 +
clang/lib/Basic/Targets/PPC.cpp | 4 +
clang/lib/Basic/Targets/PPC.h | 1 +
clang/lib/Headers/altivec.h | 12 +++
.../builtins-ppc-post-quantum-crypto.c | 98 +++++++++++++++++
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 2 +
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 29 +++++
.../CodeGen/PowerPC/post-quantum-crypto.ll | 101 ++++++++++++++++++
8 files changed, 249 insertions(+)
create mode 100644 clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
create mode 100644 llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index c0c92c0b73793..c20e1acb8f407 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -654,6 +654,8 @@ TARGET_BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "",
"power10-vector")
TARGET_BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "",
"power10-vector")
+TARGET_BUILTIN(__builtin_altivec_vmulhsh, "V8SsV8SsV8Ss", "", "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vmulhuh, "V8UsV8UsV8Us", "", "isa-future-instructions")
// P10 Vector Expand with Mask built-ins.
TARGET_BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "", "power10-vector")
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 30ea714fbb6f8..90e2050e4d1d4 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -59,6 +59,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasP9Vector = true;
} else if (Feature == "+power10-vector") {
HasP10Vector = true;
+ } else if (Feature == "+isa-future-instructions") {
+ HasFutureVector = true;
} else if (Feature == "+pcrelative-memops") {
HasPCRelativeMemops = true;
} else if (Feature == "+spe" || Feature == "+efpu2") {
@@ -434,6 +436,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__POWER10_VECTOR__");
if (HasPCRelativeMemops)
Builder.defineMacro("__PCREL__");
+ if (HasFutureVector)
+ Builder.defineMacro("__FUTURE_VECTOR__");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 6f90ff1f5d57c..a9f49aa3aebe1 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool HasFrsqrte = false;
bool HasFrsqrtes = false;
bool HasP10Vector = false;
+ bool HasFutureVector = false;
bool HasPCRelativeMemops = false;
bool HasQuadwordAtomics = false;
bool UseLongCalls = false;
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 1c778ea0a829f..8994d1b233798 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -6404,6 +6404,18 @@ vec_mulh(vector unsigned long long __a, vector unsigned long long __b) {
}
#endif
+#ifdef __FUTURE_VECTOR__
+static __inline__ vector signed short
+ __ATTRS_o_ai vec_mulh(vector signed short __a, vector signed short __b) {
+ return __builtin_altivec_vmulhsh(__a, __b);
+}
+
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mulh(vector unsigned short __a, vector unsigned short __b) {
+ return __builtin_altivec_vmulhuh(__a, __b);
+}
+#endif
+
/* vec_mulo */
static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c b/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
new file mode 100644
index 0000000000000..59ee466259c0e
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
@@ -0,0 +1,98 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN: -target-feature +isa-future-instructions -target-cpu future \
+// RUN: -triple powerpc64-unknown-unknown -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN: -target-feature +isa-future-instructions -target-cpu future \
+// RUN: -triple powerpc64le-unknown-unknown -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+
+// AI Generated.
+
+#include <altivec.h>
+
+vector unsigned int vuia, vuib;
+vector unsigned short vusa, vusb;
+vector signed int vsia, vsib;
+vector signed short vssa, vssb;
+
+// Test vec_add for unsigned int
+vector unsigned int test_vec_add_ui(void) {
+ // CHECK-LABEL: @test_vec_add_ui
+ // CHECK: add <4 x i32>
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_add(vuia, vuib);
+}
+
+// Test vec_add for unsigned short
+vector unsigned short test_vec_add_uh(void) {
+ // CHECK-LABEL: @test_vec_add_uh
+ // CHECK: add <8 x i16>
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_add(vusa, vusb);
+}
+
+// Test vec_sub for unsigned int
+vector unsigned int test_vec_sub_ui(void) {
+ // CHECK-LABEL: @test_vec_sub_ui
+ // CHECK: sub <4 x i32>
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_sub(vuia, vuib);
+}
+
+// Test vec_sub for unsigned short
+vector unsigned short test_vec_sub_uh(void) {
+ // CHECK-LABEL: @test_vec_sub_uh
+ // CHECK: sub <8 x i16>
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_sub(vusa, vusb);
+}
+
+// Test vec_mul for unsigned int
+vector unsigned int test_vec_mul_ui(void) {
+ // CHECK-LABEL: @test_vec_mul_ui
+ // CHECK: mul <4 x i32>
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_mul(vuia, vuib);
+}
+
+// Test vec_mul for unsigned short
+vector unsigned short test_vec_mul_uh(void) {
+ // CHECK-LABEL: @test_vec_mul_uh
+ // CHECK: mul <8 x i16>
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_mul(vusa, vusb);
+}
+
+// Test vec_mulh for signed int
+vector signed int test_vec_mulh_si(void) {
+ // CHECK-LABEL: @test_vec_mulh_si
+ // CHECK: call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_mulh(vsia, vsib);
+}
+
+// Test vec_mulh for unsigned int
+vector unsigned int test_vec_mulh_ui(void) {
+ // CHECK-LABEL: @test_vec_mulh_ui
+ // CHECK: call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_mulh(vuia, vuib);
+}
+
+// Test vec_mulh for signed short
+vector signed short test_vec_mulh_ss(void) {
+ // CHECK-LABEL: @test_vec_mulh_ss
+ // CHECK: call <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_mulh(vssa, vssb);
+}
+
+// Test vec_mulh for unsigned short
+vector unsigned short test_vec_mulh_uh(void) {
+ // CHECK-LABEL: @test_vec_mulh_uh
+ // CHECK: call <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_mulh(vusa, vusb);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index bd8fb9e9a564d..8ebb2d7f45528 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1362,6 +1362,8 @@ def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+def int_ppc_altivec_vmulhsh : PowerPC_Vec_HHH_Intrinsic<"vmulhsh">;
+def int_ppc_altivec_vmulhuh : PowerPC_Vec_HHH_Intrinsic<"vmulhuh">;
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 0cd63a88cb96b..8eb7f30ff5f08 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -610,6 +610,35 @@ let Predicates = [HasFutureVector, PairedVectorMemops] in {
let Predicates = [HasFutureVector] in {
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
v4i32:$vB))>;
+
+ // Post Quantum Cryptography Acceleration patterns
+ // Use AddedComplexity to prefer these patterns over AltiVec patterns
+ let AddedComplexity = 400 in {
+ // Vector add
+ def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
+ def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVADDUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+
+ // Vector subtract
+ def : Pat<(v4i32 (sub v4i32:$XA, v4i32:$XB)), (v4i32 (XVSUBUWM $XA, $XB))>;
+ def : Pat<(v8i16 (sub v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVSUBUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+
+ // Vector multiply
+ def : Pat<(v4i32 (mul v4i32:$XA, v4i32:$XB)), (v4i32 (XVMULUWM $XA, $XB))>;
+ def : Pat<(v8i16 (mul v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+
+ // Vector multiply high intrinsics
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$XA, v4i32:$XB)),
+ (v4i32 (XVMULHSW $XA, $XB))>;
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$XA, v4i32:$XB)),
+ (v4i32 (XVMULHUW $XA, $XB))>;
+ def : Pat<(v8i16 (int_ppc_altivec_vmulhsh v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULHSH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ def : Pat<(v8i16 (int_ppc_altivec_vmulhuh v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULHUH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ }
}
//---------------------------- Instruction aliases ---------------------------//
diff --git a/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll b/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll
new file mode 100644
index 0000000000000..92ad707c40879
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future < %s | FileCheck %s
+
+; Test Post Quantum Cryptography Acceleration instructions
+
+define <4 x i32> @test_xvadduwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvadduwm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvadduwm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = add <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvadduhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvadduhm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvadduhm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = add <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_xvsubuwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvsubuwm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubuwm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = sub <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvsubuhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvsubuhm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubuhm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = sub <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_xvmuluwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmuluwm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmuluwm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = mul <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvmuluhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmuluhm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmuluhm 34, 34, 35
+; CHECK-NEXT: blr
+ %res = mul <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
+define <4 x i32> @test_xvmulhsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmulhsw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulhsw 34, 34, 35
+; CHECK-NEXT: blr
+ %res = call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
+define <4 x i32> @test_xvmulhuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmulhuw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulhuw 34, 34, 35
+; CHECK-NEXT: blr
+ %res = call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+declare <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16>, <8 x i16>)
+define <8 x i16> @test_xvmulhsh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmulhsh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulhsh 34, 34, 35
+; CHECK-NEXT: blr
+ %res = call <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16>, <8 x i16>)
+define <8 x i16> @test_xvmulhuh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmulhuh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulhuh 34, 34, 35
+; CHECK-NEXT: blr
+ %res = call <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
>From 4142e14738a3840142734641e7ce828cf315a70e Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 5 Mar 2026 10:48:40 -0500
Subject: [PATCH 2/4] add pattern to deal with negate vsx vector
---
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 55 +++++++++++------------
1 file changed, 27 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 8eb7f30ff5f08..f28cbc3a743f4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -610,35 +610,34 @@ let Predicates = [HasFutureVector, PairedVectorMemops] in {
let Predicates = [HasFutureVector] in {
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
v4i32:$vB))>;
+}
- // Post Quantum Cryptography Acceleration patterns
- // Use AddedComplexity to prefer these patterns over AltiVec patterns
- let AddedComplexity = 400 in {
- // Vector add
- def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
- def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
- (COPY_TO_REGCLASS (XVADDUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
-
- // Vector subtract
- def : Pat<(v4i32 (sub v4i32:$XA, v4i32:$XB)), (v4i32 (XVSUBUWM $XA, $XB))>;
- def : Pat<(v8i16 (sub v8i16:$XA, v8i16:$XB)),
- (COPY_TO_REGCLASS (XVSUBUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
-
- // Vector multiply
- def : Pat<(v4i32 (mul v4i32:$XA, v4i32:$XB)), (v4i32 (XVMULUWM $XA, $XB))>;
- def : Pat<(v8i16 (mul v8i16:$XA, v8i16:$XB)),
- (COPY_TO_REGCLASS (XVMULUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
-
- // Vector multiply high intrinsics
- def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$XA, v4i32:$XB)),
- (v4i32 (XVMULHSW $XA, $XB))>;
- def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$XA, v4i32:$XB)),
- (v4i32 (XVMULHUW $XA, $XB))>;
- def : Pat<(v8i16 (int_ppc_altivec_vmulhsh v8i16:$XA, v8i16:$XB)),
- (COPY_TO_REGCLASS (XVMULHSH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
- def : Pat<(v8i16 (int_ppc_altivec_vmulhuh v8i16:$XA, v8i16:$XB)),
- (COPY_TO_REGCLASS (XVMULHUH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
- }
+// Post Quantum Cryptography Acceleration patterns
+// Use AddedComplexity to prefer these patterns over AltiVec patterns
+let Predicates = [HasVSX, IsISAFuture], AddedComplexity = 400 in {
+ // Vector add
+ def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
+ def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVADDUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ // Vector subtract
+ // Don't have a VSX negate instruction so use VNEGW instead.
+ def : Pat<(v4i32 (sub (v4i32 immAllZerosV), v4i32:$vB)), (v4i32 (VNEGW $vB))>;
+ def : Pat<(v4i32 (sub v4i32:$XA, v4i32:$XB)), (v4i32 (XVSUBUWM $XA, $XB))>;
+ def : Pat<(v8i16 (sub v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVSUBUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ // Vector multiply
+ def : Pat<(v4i32 (mul v4i32:$XA, v4i32:$XB)), (v4i32 (XVMULUWM $XA, $XB))>;
+ def : Pat<(v8i16 (mul v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ // Vector multiply high intrinsics
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$XA, v4i32:$XB)),
+ (v4i32 (XVMULHSW $XA, $XB))>;
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$XA, v4i32:$XB)),
+ (v4i32 (XVMULHUW $XA, $XB))>;
+ def : Pat<(v8i16 (int_ppc_altivec_vmulhsh v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULHSH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+ def : Pat<(v8i16 (int_ppc_altivec_vmulhuh v8i16:$XA, v8i16:$XB)),
+ (COPY_TO_REGCLASS (XVMULHUH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
}
//---------------------------- Instruction aliases ---------------------------//
>From b977943a6328c59251947bd4375fc31ede2df56e Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 5 Mar 2026 11:04:30 -0500
Subject: [PATCH 3/4] update predicate add sema testing and change feature bits
to future-vector
---
clang/include/clang/Basic/BuiltinsPPC.def | 4 +-
clang/lib/Basic/Targets/PPC.cpp | 2 +-
.../builtins-ppc-post-quantum-crypto.c | 68 -------------------
.../PowerPC/builtins-ppc-altivec-mulh-error.c | 26 +++++++
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 2 +-
5 files changed, 30 insertions(+), 72 deletions(-)
create mode 100644 clang/test/Sema/PowerPC/builtins-ppc-altivec-mulh-error.c
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index c20e1acb8f407..82f6102a56b4f 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -654,8 +654,8 @@ TARGET_BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "",
"power10-vector")
TARGET_BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "",
"power10-vector")
-TARGET_BUILTIN(__builtin_altivec_vmulhsh, "V8SsV8SsV8Ss", "", "isa-future-instructions")
-TARGET_BUILTIN(__builtin_altivec_vmulhuh, "V8UsV8UsV8Us", "", "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vmulhsh, "V8SsV8SsV8Ss", "", "future-vector")
+TARGET_BUILTIN(__builtin_altivec_vmulhuh, "V8UsV8UsV8Us", "", "future-vector")
// P10 Vector Expand with Mask built-ins.
TARGET_BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "", "power10-vector")
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 90e2050e4d1d4..c9a41df806aff 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -59,7 +59,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasP9Vector = true;
} else if (Feature == "+power10-vector") {
HasP10Vector = true;
- } else if (Feature == "+isa-future-instructions") {
+ } else if (Feature == "+future-vector") {
HasFutureVector = true;
} else if (Feature == "+pcrelative-memops") {
HasPCRelativeMemops = true;
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c b/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
index 59ee466259c0e..ca36d67e0d32e 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-post-quantum-crypto.c
@@ -8,79 +8,11 @@
// RUN: -triple powerpc64le-unknown-unknown -emit-llvm %s \
// RUN: -o - | FileCheck %s
-// AI Generated.
-
#include <altivec.h>
-vector unsigned int vuia, vuib;
vector unsigned short vusa, vusb;
-vector signed int vsia, vsib;
vector signed short vssa, vssb;
-// Test vec_add for unsigned int
-vector unsigned int test_vec_add_ui(void) {
- // CHECK-LABEL: @test_vec_add_ui
- // CHECK: add <4 x i32>
- // CHECK-NEXT: ret <4 x i32>
- return vec_add(vuia, vuib);
-}
-
-// Test vec_add for unsigned short
-vector unsigned short test_vec_add_uh(void) {
- // CHECK-LABEL: @test_vec_add_uh
- // CHECK: add <8 x i16>
- // CHECK-NEXT: ret <8 x i16>
- return vec_add(vusa, vusb);
-}
-
-// Test vec_sub for unsigned int
-vector unsigned int test_vec_sub_ui(void) {
- // CHECK-LABEL: @test_vec_sub_ui
- // CHECK: sub <4 x i32>
- // CHECK-NEXT: ret <4 x i32>
- return vec_sub(vuia, vuib);
-}
-
-// Test vec_sub for unsigned short
-vector unsigned short test_vec_sub_uh(void) {
- // CHECK-LABEL: @test_vec_sub_uh
- // CHECK: sub <8 x i16>
- // CHECK-NEXT: ret <8 x i16>
- return vec_sub(vusa, vusb);
-}
-
-// Test vec_mul for unsigned int
-vector unsigned int test_vec_mul_ui(void) {
- // CHECK-LABEL: @test_vec_mul_ui
- // CHECK: mul <4 x i32>
- // CHECK-NEXT: ret <4 x i32>
- return vec_mul(vuia, vuib);
-}
-
-// Test vec_mul for unsigned short
-vector unsigned short test_vec_mul_uh(void) {
- // CHECK-LABEL: @test_vec_mul_uh
- // CHECK: mul <8 x i16>
- // CHECK-NEXT: ret <8 x i16>
- return vec_mul(vusa, vusb);
-}
-
-// Test vec_mulh for signed int
-vector signed int test_vec_mulh_si(void) {
- // CHECK-LABEL: @test_vec_mulh_si
- // CHECK: call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
- // CHECK-NEXT: ret <4 x i32>
- return vec_mulh(vsia, vsib);
-}
-
-// Test vec_mulh for unsigned int
-vector unsigned int test_vec_mulh_ui(void) {
- // CHECK-LABEL: @test_vec_mulh_ui
- // CHECK: call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
- // CHECK-NEXT: ret <4 x i32>
- return vec_mulh(vuia, vuib);
-}
-
// Test vec_mulh for signed short
vector signed short test_vec_mulh_ss(void) {
// CHECK-LABEL: @test_vec_mulh_ss
diff --git a/clang/test/Sema/PowerPC/builtins-ppc-altivec-mulh-error.c b/clang/test/Sema/PowerPC/builtins-ppc-altivec-mulh-error.c
new file mode 100644
index 0000000000000..2399078eba949
--- /dev/null
+++ b/clang/test/Sema/PowerPC/builtins-ppc-altivec-mulh-error.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \
+// RUN: -target-cpu pwr10 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \
+// RUN: -target-cpu future -target-feature -future-vector \
+// RUN: -fsyntax-only -verify %s
+
+#include <altivec.h>
+
+void test_altivec_mulh_builtins(void) {
+ vector signed short vss_a, vss_b;
+ vector unsigned short vus_a, vus_b;
+
+ // Test __builtin_altivec_vmulhsh - requires future-vector
+ vss_a = __builtin_altivec_vmulhsh(vss_a, vss_b); // expected-error {{'__builtin_altivec_vmulhsh' needs target feature future-vector}}
+
+ // Test __builtin_altivec_vmulhuh - requires future-vector
+ vus_a = __builtin_altivec_vmulhuh(vus_a, vus_b); // expected-error {{'__builtin_altivec_vmulhuh' needs target feature future-vector}}
+
+ // Test vec_mulh for signed short - no overload available for short types without __FUTURE_VECTOR__
+ vss_a = vec_mulh(vss_a, vss_b); // expected-error {{call to 'vec_mulh' is ambiguous}}
+ // expected-note@* 4 {{candidate function}}
+
+ // Test vec_mulh for unsigned short - no overload available for short types without __FUTURE_VECTOR__
+ vus_a = vec_mulh(vus_a, vus_b); // expected-error {{call to 'vec_mulh' is ambiguous}}
+ // expected-note@* 4 {{candidate function}}
+}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index f28cbc3a743f4..77d9057c253d1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -614,7 +614,7 @@ let Predicates = [HasFutureVector] in {
// Post Quantum Cryptography Acceleration patterns
// Use AddedComplexity to prefer these patterns over AltiVec patterns
-let Predicates = [HasVSX, IsISAFuture], AddedComplexity = 400 in {
+let Predicates = [HasVSX, HasFutureVector], AddedComplexity = 400 in {
// Vector add
def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
>From be4d54995c38e39a6b7c939216dfcdbe9af9de0a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 26 Mar 2026 09:55:41 -0400
Subject: [PATCH 4/4] update predicate
---
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 77d9057c253d1..d17ad2dbc7d5f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -612,9 +612,9 @@ let Predicates = [HasFutureVector] in {
v4i32:$vB))>;
}
-// Post Quantum Cryptography Acceleration patterns
-// Use AddedComplexity to prefer these patterns over AltiVec patterns
-let Predicates = [HasVSX, HasFutureVector], AddedComplexity = 400 in {
+// Post Quantum Cryptography Acceleration patterns.
+// Use AddedComplexity to prefer these patterns over AltiVec patterns.
+let Predicates = [HasFutureVector], AddedComplexity = 400 in {
// Vector add
def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
More information about the cfe-commits
mailing list