[clang] 2253d75 - [PowerPC] Add builtins for Post Quantum Cryptography Acceleration (#184717)

Fri Apr 17 10:01:44 PDT 2026

Author: Lei Huang
Date: 2026-04-17T13:01:38-04:00
New Revision: 2253d7575a52918b37012e20d2fa5d6758bc8017

URL: https://github.com/llvm/llvm-project/commit/2253d7575a52918b37012e20d2fa5d6758bc8017
DIFF: https://github.com/llvm/llvm-project/commit/2253d7575a52918b37012e20d2fa5d6758bc8017.diff

LOG: [PowerPC] Add builtins for Post Quantum Cryptography Acceleration (#184717)

This patch implements Post Quantum Cryptography (PQC) Acceleration
builtins for PowerPC's future ISA by ensuring that vector operations
(vec_add, vec_sub, vec_mul, vec_mulh) correctly map to VSX instructions
(xvadduwm, xvadduhm, xvsubuwm, xvsubuhm, xvmuluwm, xvmuluhm, xvmulhsw,
xvmulhsh, xvmulhuw, xvmulhuh) when targeting mcpu=future.

Implement new builtin for vec_mulh:
* vector short vec_mulh(vector signed short, vector signed short)
* vector unsigned short vec_mulh(vector unsigned short, vector unsigned
short)

Assisted by AI.

Added: 
    clang/test/CodeGen/PowerPC/builtins-post-quantum-crypto.c
    clang/test/Sema/PowerPC/builtins-post-quantum-crypto-error.c
    llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Headers/altivec.h
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrFuture.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 2825e6c677c82..187e2fdd3d985 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -654,6 +654,8 @@ TARGET_BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "",
                "power10-vector")
 TARGET_BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "",
                "power10-vector")
+TARGET_BUILTIN(__builtin_altivec_vmulhsh, "V8SsV8SsV8Ss", "", "future-vector")
+TARGET_BUILTIN(__builtin_altivec_vmulhuh, "V8UsV8UsV8Us", "", "future-vector")
 
 // P10 Vector Expand with Mask built-ins.
 TARGET_BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "", "power10-vector")

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index c62dad5293a63..2df3f46823732 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -6404,6 +6404,18 @@ vec_mulh(vector unsigned long long __a, vector unsigned long long __b) {
 }
 #endif
 
+#ifdef __FUTURE_VECTOR__
+static __inline__ vector signed short
+    __ATTRS_o_ai vec_mulh(vector signed short __a, vector signed short __b) {
+  return __builtin_altivec_vmulhsh(__a, __b);
+}
+
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mulh(vector unsigned short __a, vector unsigned short __b) {
+  return __builtin_altivec_vmulhuh(__a, __b);
+}
+#endif
+
 /* vec_mulo */
 
 static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a,

diff  --git a/clang/test/CodeGen/PowerPC/builtins-post-quantum-crypto.c b/clang/test/CodeGen/PowerPC/builtins-post-quantum-crypto.c
new file mode 100644
index 0000000000000..c2c9453d726f4
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-post-quantum-crypto.c
@@ -0,0 +1,26 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \
+// RUN:   -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN:   -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s
+
+#include <altivec.h>
+
+vector unsigned short vusa, vusb;
+vector signed short vssa, vssb;
+
+// Test vec_mulh for signed short
+vector signed short test_vec_mulh_ss(void) {
+  // CHECK-LABEL: @test_vec_mulh_ss
+  // CHECK: call <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_mulh(vssa, vssb);
+}
+
+// Test vec_mulh for unsigned short
+vector unsigned short test_vec_mulh_uh(void) {
+  // CHECK-LABEL: @test_vec_mulh_uh
+  // CHECK: call <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_mulh(vusa, vusb);
+}

diff  --git a/clang/test/Sema/PowerPC/builtins-post-quantum-crypto-error.c b/clang/test/Sema/PowerPC/builtins-post-quantum-crypto-error.c
new file mode 100644
index 0000000000000..3a09ef5591fb9
--- /dev/null
+++ b/clang/test/Sema/PowerPC/builtins-post-quantum-crypto-error.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr10 \
+// RUN:   -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
+// RUN:   -target-feature -future-vector -fsyntax-only -verify %s
+
+#include <altivec.h>
+
+void test_mulh_builtins(void) {
+  vector signed short vss_a, vss_b;
+  vector unsigned short vus_a, vus_b;
+
+  // Test __builtin_altivec_vmulhsh - requires future-vector
+  vss_a = __builtin_altivec_vmulhsh(vss_a, vss_b); // expected-error {{'__builtin_altivec_vmulhsh' needs target feature future-vector}}
+
+  // Test __builtin_altivec_vmulhuh - requires future-vector
+  vus_a = __builtin_altivec_vmulhuh(vus_a, vus_b); // expected-error {{'__builtin_altivec_vmulhuh' needs target feature future-vector}}
+
+  // Test vec_mulh for signed short - no overload available for short types without __FUTURE_VECTOR__
+  vss_a = vec_mulh(vss_a, vss_b); // expected-error {{call to 'vec_mulh' is ambiguous}}
+  // expected-note@* 4 {{candidate function}}
+
+  // Test vec_mulh for unsigned short - no overload available for short types without __FUTURE_VECTOR__
+  vus_a = vec_mulh(vus_a, vus_b); // expected-error {{call to 'vec_mulh' is ambiguous}}
+  // expected-note@* 4 {{candidate function}}
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 9fd3c9fc4e791..392c47ee7e456 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1362,6 +1362,7 @@ def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
 def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
 def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
 def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+
 // Deeply Compressed Weights Intrinsics.
 def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">;
 def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">;
@@ -1392,6 +1393,9 @@ def int_ppc_altivec_vupkint8tofp32 :
                           [llvm_v16i8_ty, llvm_i32_ty],
                           [IntrNoMem, ImmArg<ArgIndex<1>>]>;
 
+// Post Quantum Cryptography Acceleration.
+def int_ppc_altivec_vmulhsh : PowerPC_Vec_HHH_Intrinsic<"vmulhsh">;
+def int_ppc_altivec_vmulhuh : PowerPC_Vec_HHH_Intrinsic<"vmulhuh">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index e2705c899a89e..8d89056e6afaf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -682,6 +682,34 @@ let Predicates = [HasFutureVector] in {
             (v16i8 (COPY_TO_REGCLASS (XSREBASE3T3UQM RCCp.AToVSRC, RCCp.BToVSRC), VSRC))>;
 }
 
+// Post Quantum Cryptography Acceleration patterns.
+// Use AddedComplexity to prefer these patterns over AltiVec patterns.
+let Predicates = [HasFutureVector], AddedComplexity = 400 in {
+  // Vector add
+  def : Pat<(v4i32 (add v4i32:$XA, v4i32:$XB)), (v4i32 (XVADDUWM $XA, $XB))>;
+  def : Pat<(v8i16 (add v8i16:$XA, v8i16:$XB)),
+            (COPY_TO_REGCLASS (XVADDUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+  // Vector subtract
+  // Don't have a VSX negate instruction so use VNEGW instead.
+  def : Pat<(v4i32 (sub (v4i32 immAllZerosV), v4i32:$vB)), (v4i32 (VNEGW $vB))>;
+  def : Pat<(v4i32 (sub v4i32:$XA, v4i32:$XB)), (v4i32 (XVSUBUWM $XA, $XB))>;
+  def : Pat<(v8i16 (sub v8i16:$XA, v8i16:$XB)),
+            (COPY_TO_REGCLASS (XVSUBUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+  // Vector multiply
+  def : Pat<(v4i32 (mul v4i32:$XA, v4i32:$XB)), (v4i32 (XVMULUWM $XA, $XB))>;
+  def : Pat<(v8i16 (mul v8i16:$XA, v8i16:$XB)),
+            (COPY_TO_REGCLASS (XVMULUHM RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+  // Vector multiply high intrinsics
+  def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$XA, v4i32:$XB)),
+            (v4i32 (XVMULHSW $XA, $XB))>;
+  def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$XA, v4i32:$XB)),
+            (v4i32 (XVMULHUW $XA, $XB))>;
+  def : Pat<(v8i16 (int_ppc_altivec_vmulhsh v8i16:$XA, v8i16:$XB)),
+            (COPY_TO_REGCLASS (XVMULHSH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+  def : Pat<(v8i16 (int_ppc_altivec_vmulhuh v8i16:$XA, v8i16:$XB)),
+            (COPY_TO_REGCLASS (XVMULHUH RCCp.AToVSRC, RCCp.BToVSRC), VSRC)>;
+}
+
 //---------------------------- Instruction aliases ---------------------------//
 // Predicate combinations available:
 // [HasVSX, IsISAFuture]

diff  --git a/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll b/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll
new file mode 100644
index 0000000000000..92ad707c40879
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/post-quantum-crypto.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s
+
+; Test Post Quantum Cryptography Acceleration instructions
+
+define <4 x i32> @test_xvadduwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvadduwm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvadduwm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = add <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvadduhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvadduhm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvadduhm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = add <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_xvsubuwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvsubuwm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvsubuwm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = sub <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvsubuhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvsubuhm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvsubuhm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = sub <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_xvmuluwm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmuluwm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmuluwm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = mul <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_xvmuluhm(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmuluhm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmuluhm 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = mul <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
+define <4 x i32> @test_xvmulhsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmulhsw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmulhsw 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
+define <4 x i32> @test_xvmulhuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xvmulhuw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmulhuw 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+declare <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16>, <8 x i16>)
+define <8 x i16> @test_xvmulhsh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmulhsh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmulhsh 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = call <8 x i16> @llvm.ppc.altivec.vmulhsh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16>, <8 x i16>)
+define <8 x i16> @test_xvmulhuh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xvmulhuh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmulhuh 34, 34, 35
+; CHECK-NEXT:    blr
+  %res = call <8 x i16> @llvm.ppc.altivec.vmulhuh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}