[clang] [llvm] [PowerPC] Add AES Builtins (PR #186895)

Tue Mar 17 08:31:19 PDT 2026

https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/186895

>From acdabf6fcbdc90e26f5ba915da00582d89f55784 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 15:45:59 -0500
Subject: [PATCH 1/5] Implement builtins for aes encrypt pair

---
 clang/include/clang/Basic/BuiltinsPPC.def     | 10 +++
 clang/lib/CodeGen/TargetBuiltins/PPC.cpp      | 17 ++++
 .../PowerPC/builtins-ppc-aes-encrypt-paired.c | 71 ++++++++++++++++
 .../builtins-ppc-aes-encrypt-paired-error.c   | 54 ++++++++++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  6 ++
 llvm/lib/Target/PowerPC/PPC.td                |  4 -
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  6 +-
 .../builtins-ppc-aes-encrypt-paired.ll        | 83 +++++++++++++++++++
 8 files changed, 246 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c
 create mode 100644 clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c
 create mode 100644 llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index c0c92c0b73793..670ae55318bbe 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1162,6 +1162,16 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V",
 UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
                              "mma,isa-future-instructions")
 
+// AES Encrypt Paired builtins
+UNALIASED_CUSTOM_BUILTIN(aes_encrypt_paired, "W256W256W256i2", false,
+                         "isa-future-instructions")
+CUSTOM_BUILTIN(aes128_encrypt_paired, aes_encrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes192_encrypt_paired, aes_encrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes256_encrypt_paired, aes_encrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 01926878085e0..2fb4c1c1ca366 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1145,6 +1145,23 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
     }
+    // Handle AES encrypt paired builtins - they return a value directly
+    // For variant builtins, add the appropriate immediate value
+    if (BuiltinID == PPC::BI__builtin_aes_encrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes128_encrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes192_encrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes256_encrypt_paired) {
+      if (BuiltinID == PPC::BI__builtin_aes128_encrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
+      } else if (BuiltinID == PPC::BI__builtin_aes192_encrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
+      } else if (BuiltinID == PPC::BI__builtin_aes256_encrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
+      }
+      // For base builtin, Ops already has all 3 arguments
+      llvm::Function *F = CGM.getIntrinsic(ID);
+      return Builder.CreateCall(F, Ops, "");
+    }
     SmallVector<Value*, 4> CallOps;
     if (Accumulate) {
       Address Addr = EmitPointerWithAlignment(E->getArg(0));
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c
new file mode 100644
index 0000000000000..7c691335ed436
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu future \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @test_aes_encrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes_encrypt_paired(vp1, vp2, 0);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes128_encrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes128_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes128_encrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes192_encrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 1)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes192_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes192_encrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes256_encrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 2)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes256_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes256_encrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+// Made with AI
diff --git a/clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c b/clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c
new file mode 100644
index 0000000000000..d2e736a34d6d3
--- /dev/null
+++ b/clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c
@@ -0,0 +1,54 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN:   -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \
+// RUN:   -fsyntax-only -verify %s
+
+void test_aes_encrypt_paired_invalid_imm(void) {
+  __vector_pair vp1, vp2;
+
+  // Test invalid immediate values (valid range is 0-2)
+  __vector_pair res1 = __builtin_aes_encrypt_paired(vp1, vp2, 3);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res2 = __builtin_aes_encrypt_paired(vp1, vp2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res3 = __builtin_aes_encrypt_paired(vp1, vp2, 10); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_aes_encrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  int i = 0;
+  
+  // Test type mismatches
+  __vector_pair res1 = __builtin_aes_encrypt_paired(vc, vp, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes_encrypt_paired(vp, vc, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res3 = __builtin_aes_encrypt_paired(vp, vp, vc);   // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type 'int'}}
+}
+
+void test_aes128_encrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes128 variant
+  __vector_pair res1 = __builtin_aes128_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes128_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes192_encrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes192 variant
+  __vector_pair res1 = __builtin_aes192_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes192_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes256_encrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes256 variant
+  __vector_pair res1 = __builtin_aes256_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes256_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+// Made with Bob
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index bd8fb9e9a564d..dd5aeac27dc8c 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1721,6 +1721,12 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
                             [llvm_v256i1_ty], [IntrNoMem]>;
 
+  // AES Encrypt Paired Instructions
+  def int_ppc_aes_encrypt_paired :
+      DefaultAttrsIntrinsic<[llvm_v256i1_ty],
+                            [llvm_v256i1_ty, llvm_v256i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
   def int_ppc_mma_assemble_acc :
       DefaultAttrsIntrinsic<[llvm_v512i1_ty],
                             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 7b4bae60f7e74..d15eada402607 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -277,10 +277,6 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
                                      "true",
                                      "Enable instructions in ISA 3.1.",
                                      [FeatureISA3_0]>;
-def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
-                                        "IsISAFuture", "true",
-                                        "Enable instructions for Future ISA.",
-                                        [FeatureISA3_1]>;
 def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
                                         "Enable POWER9 Altivec instructions",
                                         [FeatureISA3_0, FeatureP8Altivec]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 0cd63a88cb96b..09ffd1e16cdc9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -472,7 +472,11 @@ let Predicates = [HasFutureVector] in {
   // AES Acceleration Instructions
   def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp),
                                     (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
-                                    "xxaesencp $XTp, $XAp, $XBp, $M", []>;
+                                    "xxaesencp $XTp, $XAp, $XBp, $M",
+                                    [(set v256i1:$XTp,
+                                      (int_ppc_aes_encrypt_paired v256i1:$XAp,
+                                                                   v256i1:$XBp,
+                                                                   i32:$M))]>;
   def XXAESDECP : XX3Form_XTABp5_M2<202, (outs vsrprc:$XTp),
                                     (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
                                     "xxaesdecp $XTp, $XAp, $XBp, $M", []>;
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll
new file mode 100644
index 0000000000000..9f932678ebcdd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; Generated by AI.
+
+declare <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1>, <256 x i1>, i32)
+
+define void @test_aes_encrypt_paired_imm0(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_encrypt_paired_imm0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes128encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_encrypt_paired_imm0:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes128encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 0)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_encrypt_paired_imm1(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_encrypt_paired_imm1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes192encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_encrypt_paired_imm1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes192encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 1)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_encrypt_paired_imm2(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_encrypt_paired_imm2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes256encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_encrypt_paired_imm2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes256encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 2)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}

>From dec2a4a19f844e738720c131966af96249ff3933 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 13:27:46 -0500
Subject: [PATCH 2/5] Implement builtins for aes dycrpt pair

---
 clang/include/clang/Basic/BuiltinsPPC.def     | 10 +++
 clang/lib/CodeGen/TargetBuiltins/PPC.cpp      | 23 +++++-
 ...ired.c => builtins-ppc-aes-acceleration.c} | 65 ++++++++++++++++
 ... => builtins-ppc-aes-acceleration-error.c} | 48 ++++++++++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  8 +-
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  6 +-
 ...ed.ll => builtins-ppc-aes-acceleration.ll} | 75 +++++++++++++++++++
 7 files changed, 229 insertions(+), 6 deletions(-)
 rename clang/test/CodeGen/PowerPC/{builtins-ppc-aes-encrypt-paired.c => builtins-ppc-aes-acceleration.c} (52%)
 rename clang/test/Sema/{builtins-ppc-aes-encrypt-paired-error.c => builtins-ppc-aes-acceleration-error.c} (52%)
 rename llvm/test/CodeGen/PowerPC/{builtins-ppc-aes-encrypt-paired.ll => builtins-ppc-aes-acceleration.ll} (53%)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 670ae55318bbe..5461189ec772b 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1171,6 +1171,16 @@ CUSTOM_BUILTIN(aes192_encrypt_paired, aes_encrypt_paired, "W256W256W256", false,
                "isa-future-instructions")
 CUSTOM_BUILTIN(aes256_encrypt_paired, aes_encrypt_paired, "W256W256W256", false,
                "isa-future-instructions")
+// AES Decrypt Paired builtins
+UNALIASED_CUSTOM_BUILTIN(aes_decrypt_paired, "W256W256W256i2", false,
+                         "isa-future-instructions")
+CUSTOM_BUILTIN(aes128_decrypt_paired, aes_decrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes192_decrypt_paired, aes_decrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes256_decrypt_paired, aes_decrypt_paired, "W256W256W256", false,
+               "isa-future-instructions")
+
 
 // FIXME: Obviously incomplete.
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 2fb4c1c1ca366..00d38dc0e8cd1 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1145,8 +1145,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
     }
-    // Handle AES encrypt paired builtins - they return a value directly
-    // For variant builtins, add the appropriate immediate value
+    // Handle AES encrypt paired builtins - they return a value directly.
+    // For variant builtins, add the appropriate immediate value.
     if (BuiltinID == PPC::BI__builtin_aes_encrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes128_encrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes192_encrypt_paired ||
@@ -1158,7 +1158,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       } else if (BuiltinID == PPC::BI__builtin_aes256_encrypt_paired) {
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
       }
-      // For base builtin, Ops already has all 3 arguments
+      // For base builtin, Ops already has all 3 arguments.
+      llvm::Function *F = CGM.getIntrinsic(ID);
+      return Builder.CreateCall(F, Ops, "");
+    }
+    // Handle AES decrypt paired builtins - they return a value directly.
+    // For variant builtins, add the appropriate immediate value.
+    if (BuiltinID == PPC::BI__builtin_aes_decrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes128_decrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes192_decrypt_paired ||
+        BuiltinID == PPC::BI__builtin_aes256_decrypt_paired) {
+      if (BuiltinID == PPC::BI__builtin_aes128_decrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
+      } else if (BuiltinID == PPC::BI__builtin_aes192_decrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
+      } else if (BuiltinID == PPC::BI__builtin_aes256_decrypt_paired) {
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
+      }
+      // For base builtin, Ops already has all 3 arguments.
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
     }
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
similarity index 52%
rename from clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
index 7c691335ed436..c7701b1d85a62 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
@@ -68,4 +68,69 @@ void test_aes256_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
+// CHECK-LABEL: define dso_local void @test_aes_decrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes_decrypt_paired(vp1, vp2, 0);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes128_decrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes128_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes128_decrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes192_decrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 1)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes192_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes192_decrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes256_decrypt_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 2)
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes256_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair vp2 = *((__vector_pair *)vpp2);
+  __vector_pair res = __builtin_aes256_decrypt_paired(vp1, vp2);
+  *((__vector_pair *)resp) = res;
+}
+
+
 // Made with AI
diff --git a/clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
similarity index 52%
rename from clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c
rename to clang/test/Sema/builtins-ppc-aes-acceleration-error.c
index d2e736a34d6d3..7114311ea93de 100644
--- a/clang/test/Sema/builtins-ppc-aes-encrypt-paired-error.c
+++ b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
@@ -51,4 +51,52 @@ void test_aes256_encrypt_paired_type_mismatch(void) {
   __vector_pair res2 = __builtin_aes256_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
 }
 
+void test_aes_decrypt_paired_invalid_imm(void) {
+  __vector_pair vp1, vp2;
+
+  // Test invalid immediate values (valid range is 0-2)
+  __vector_pair res1 = __builtin_aes_decrypt_paired(vp1, vp2, 3);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res2 = __builtin_aes_decrypt_paired(vp1, vp2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res3 = __builtin_aes_decrypt_paired(vp1, vp2, 10); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_aes_decrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  int i = 0;
+  
+  // Test type mismatches
+  __vector_pair res1 = __builtin_aes_decrypt_paired(vc, vp, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes_decrypt_paired(vp, vc, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res3 = __builtin_aes_decrypt_paired(vp, vp, vc);   // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type 'int'}}
+}
+
+void test_aes128_decrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes128 variant
+  __vector_pair res1 = __builtin_aes128_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes128_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes192_decrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes192 variant
+  __vector_pair res1 = __builtin_aes192_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes192_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes256_decrypt_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  
+  // Test type mismatches for aes256 variant
+  __vector_pair res1 = __builtin_aes256_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes256_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+
 // Made with Bob
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index dd5aeac27dc8c..dfc2e7803a9c0 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1721,12 +1721,16 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
                             [llvm_v256i1_ty], [IntrNoMem]>;
 
-  // AES Encrypt Paired Instructions
+  // AES Encrypt Paired Instructions.
   def int_ppc_aes_encrypt_paired :
       DefaultAttrsIntrinsic<[llvm_v256i1_ty],
                             [llvm_v256i1_ty, llvm_v256i1_ty, llvm_i32_ty],
                             [IntrNoMem, ImmArg<ArgIndex<2>>]>;
-
+  // AES Decrypt Paired Instructions.
+  def int_ppc_aes_decrypt_paired :
+      DefaultAttrsIntrinsic<[llvm_v256i1_ty],
+                            [llvm_v256i1_ty, llvm_v256i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
   def int_ppc_mma_assemble_acc :
       DefaultAttrsIntrinsic<[llvm_v512i1_ty],
                             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 09ffd1e16cdc9..2c365c2a2d716 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -479,7 +479,11 @@ let Predicates = [HasFutureVector] in {
                                                                    i32:$M))]>;
   def XXAESDECP : XX3Form_XTABp5_M2<202, (outs vsrprc:$XTp),
                                     (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
-                                    "xxaesdecp $XTp, $XAp, $XBp, $M", []>;
+                                    "xxaesdecp $XTp, $XAp, $XBp, $M",
+                                    [(set v256i1:$XTp,
+                                      (int_ppc_aes_decrypt_paired v256i1:$XAp,
+                                                                   v256i1:$XBp,
+                                                                   i32:$M))]>;
   def XXAESGENLKP : XX3Form_XTBp5_M2<420, (outs vsrprc:$XTp),
                                      (ins vsrprc:$XBp, u2imm:$M),
                                      "xxaesgenlkp $XTp, $XBp, $M", []>;
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
similarity index 53%
rename from llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll
rename to llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
index 9f932678ebcdd..33c8d9f38b47b 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-encrypt-paired.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
@@ -8,6 +8,8 @@
 
 ; Generated by AI.
 
+declare <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1>, <256 x i1>, i32)
+
 declare <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1>, <256 x i1>, i32)
 
 define void @test_aes_encrypt_paired_imm0(ptr %ptr, ptr %vpp1, ptr %vpp2) {
@@ -81,3 +83,76 @@ entry:
   store <256 x i1> %0, ptr %ptr, align 32
   ret void
 }
+
+
+define void @test_aes_decrypt_paired_imm0(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_decrypt_paired_imm0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes128decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_decrypt_paired_imm0:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes128decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 0)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_decrypt_paired_imm1(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_decrypt_paired_imm1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes192decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_decrypt_paired_imm1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes192decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 1)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_decrypt_paired_imm2(ptr %ptr, ptr %vpp1, ptr %vpp2) {
+; CHECK-LABEL: test_aes_decrypt_paired_imm2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-NEXT:    xxaes256decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_decrypt_paired_imm2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
+; CHECK-BE-NEXT:    xxaes256decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %vp2 = load <256 x i1>, ptr %vpp2, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> %vp1, <256 x i1> %vp2, i32 2)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}

>From b8d544be13b0c816bff229ce61f16509fdcc386a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 18:27:59 -0500
Subject: [PATCH 3/5] Implement builtins for aes genlastkey paired

---
 clang/include/clang/Basic/BuiltinsPPC.def     | 10 +++
 clang/lib/CodeGen/TargetBuiltins/PPC.cpp      | 28 +++++---
 .../PowerPC/builtins-ppc-aes-acceleration.c   | 56 ++++++++++++++++
 .../builtins-ppc-aes-acceleration-error.c     | 43 ++++++++++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  6 ++
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  5 +-
 .../PowerPC/builtins-ppc-aes-acceleration.ll  | 66 +++++++++++++++++++
 7 files changed, 205 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 5461189ec772b..d5e4ef1f65a5c 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1180,6 +1180,16 @@ CUSTOM_BUILTIN(aes192_decrypt_paired, aes_decrypt_paired, "W256W256W256", false,
                "isa-future-instructions")
 CUSTOM_BUILTIN(aes256_decrypt_paired, aes_decrypt_paired, "W256W256W256", false,
                "isa-future-instructions")
+// AES Generate Last Key Paired builtins
+UNALIASED_CUSTOM_BUILTIN(aes_genlastkey_paired, "W256W256i2", false,
+                         "isa-future-instructions")
+CUSTOM_BUILTIN(aes128_genlastkey_paired, aes_genlastkey_paired, "W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes192_genlastkey_paired, aes_genlastkey_paired, "W256W256", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(aes256_genlastkey_paired, aes_genlastkey_paired, "W256W256", false,
+               "isa-future-instructions")
+
 
 
 // FIXME: Obviously incomplete.
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 00d38dc0e8cd1..47d98db3e5d58 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1151,13 +1151,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
         BuiltinID == PPC::BI__builtin_aes128_encrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes192_encrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes256_encrypt_paired) {
-      if (BuiltinID == PPC::BI__builtin_aes128_encrypt_paired) {
+      if (BuiltinID == PPC::BI__builtin_aes128_encrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
-      } else if (BuiltinID == PPC::BI__builtin_aes192_encrypt_paired) {
+      else if (BuiltinID == PPC::BI__builtin_aes192_encrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
-      } else if (BuiltinID == PPC::BI__builtin_aes256_encrypt_paired) {
+      else if (BuiltinID == PPC::BI__builtin_aes256_encrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
-      }
       // For base builtin, Ops already has all 3 arguments.
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
@@ -1168,17 +1167,30 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
         BuiltinID == PPC::BI__builtin_aes128_decrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes192_decrypt_paired ||
         BuiltinID == PPC::BI__builtin_aes256_decrypt_paired) {
-      if (BuiltinID == PPC::BI__builtin_aes128_decrypt_paired) {
+      if (BuiltinID == PPC::BI__builtin_aes128_decrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
-      } else if (BuiltinID == PPC::BI__builtin_aes192_decrypt_paired) {
+      else if (BuiltinID == PPC::BI__builtin_aes192_decrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
-      } else if (BuiltinID == PPC::BI__builtin_aes256_decrypt_paired) {
+      else if (BuiltinID == PPC::BI__builtin_aes256_decrypt_paired)
         Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
-      }
       // For base builtin, Ops already has all 3 arguments.
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
     }
+    if (BuiltinID == PPC::BI__builtin_aes_genlastkey_paired ||
+        BuiltinID == PPC::BI__builtin_aes128_genlastkey_paired ||
+        BuiltinID == PPC::BI__builtin_aes192_genlastkey_paired ||
+        BuiltinID == PPC::BI__builtin_aes256_genlastkey_paired) {
+      if (BuiltinID == PPC::BI__builtin_aes128_genlastkey_paired)
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
+      else if (BuiltinID == PPC::BI__builtin_aes192_genlastkey_paired)
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
+      else if (BuiltinID == PPC::BI__builtin_aes256_genlastkey_paired)
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 2));
+      // For base builtin, Ops already has all 2 arguments.
+      llvm::Function *F = CGM.getIntrinsic(ID);
+      return Builder.CreateCall(F, Ops, "");
+    }
     SmallVector<Value*, 4> CallOps;
     if (Accumulate) {
       Address Addr = EmitPointerWithAlignment(E->getArg(0));
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
index c7701b1d85a62..f548b4c8fea49 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
@@ -131,6 +131,62 @@ void test_aes256_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   __vector_pair res = __builtin_aes256_decrypt_paired(vp1, vp2);
   *((__vector_pair *)resp) = res;
 }
+// CHECK-LABEL: define dso_local void @test_aes_genlastkey_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair res = __builtin_aes_genlastkey_paired(vp1, 0);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes128_genlastkey_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 0)
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes128_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair res = __builtin_aes128_genlastkey_paired(vp1);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes192_genlastkey_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes192_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair res = __builtin_aes192_genlastkey_paired(vp1);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_aes256_genlastkey_paired(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 2)
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_aes256_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
+  __vector_pair vp1 = *((__vector_pair *)vpp1);
+  __vector_pair res = __builtin_aes256_genlastkey_paired(vp1);
+  *((__vector_pair *)resp) = res;
+}
+
 
 
 // Made with AI
diff --git a/clang/test/Sema/builtins-ppc-aes-acceleration-error.c b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
index 7114311ea93de..8529d30ec3cf3 100644
--- a/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
+++ b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
@@ -97,6 +97,49 @@ void test_aes256_decrypt_paired_type_mismatch(void) {
   __vector_pair res1 = __builtin_aes256_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes256_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
 }
+void test_aes_genlastkey_paired_invalid_imm(void) {
+  __vector_pair vp1;
+
+  // Test invalid immediate values (valid range is 0-2)
+  __vector_pair res1 = __builtin_aes_genlastkey_paired(vp1, 3);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res2 = __builtin_aes_genlastkey_paired(vp1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __vector_pair res3 = __builtin_aes_genlastkey_paired(vp1, 10); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_aes_genlastkey_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+  int i = 0;
+
+  // Test type mismatches
+  __vector_pair res1 = __builtin_aes_genlastkey_paired(vc, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+  __vector_pair res2 = __builtin_aes_genlastkey_paired(vp, vc);   // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type 'int'}}
+}
+
+void test_aes128_genlastkey_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+
+  // Test type mismatches for aes128 variant
+  __vector_pair res1 = __builtin_aes128_genlastkey_paired(vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes192_genlastkey_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+
+  // Test type mismatches for aes192 variant
+  __vector_pair res1 = __builtin_aes192_genlastkey_paired(vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
+void test_aes256_genlastkey_paired_type_mismatch(void) {
+  __vector_pair vp;
+  vector unsigned char vc;
+
+  // Test type mismatches for aes256 variant
+  __vector_pair res1 = __builtin_aes256_genlastkey_paired(vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
+}
+
 
 
 // Made with Bob
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index dfc2e7803a9c0..75b636e584b1e 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1731,6 +1731,12 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v256i1_ty],
                             [llvm_v256i1_ty, llvm_v256i1_ty, llvm_i32_ty],
                             [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  // AES Generate Last Key Paired Instructions.
+  def int_ppc_aes_genlastkey_paired :
+      DefaultAttrsIntrinsic<[llvm_v256i1_ty],
+                            [llvm_v256i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
   def int_ppc_mma_assemble_acc :
       DefaultAttrsIntrinsic<[llvm_v512i1_ty],
                             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 2c365c2a2d716..0435947869391 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -486,7 +486,10 @@ let Predicates = [HasFutureVector] in {
                                                                    i32:$M))]>;
   def XXAESGENLKP : XX3Form_XTBp5_M2<420, (outs vsrprc:$XTp),
                                      (ins vsrprc:$XBp, u2imm:$M),
-                                     "xxaesgenlkp $XTp, $XBp, $M", []>;
+                                     "xxaesgenlkp $XTp, $XBp, $M",
+                                     [(set v256i1:$XTp,
+                                       (int_ppc_aes_genlastkey_paired v256i1:$XBp,
+                                                                       i32:$M))]>;
   def XXGFMUL128 : XX3Form_XTAB6_P1<26, (outs vsrc:$XT),
                                     (ins vsrc:$XA, vsrc:$XB, u1imm:$P),
                                     "xxgfmul128 $XT, $XA, $XB, $P", []>;
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
index 33c8d9f38b47b..ce45008aac7ed 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
@@ -8,6 +8,8 @@
 
 ; Generated by AI.
 
+declare <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1>, i32)
+
 declare <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1>, <256 x i1>, i32)
 
 declare <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1>, <256 x i1>, i32)
@@ -156,3 +158,67 @@ entry:
   store <256 x i1> %0, ptr %ptr, align 32
   ret void
 }
+
+
+define void @test_aes_genlastkey_paired_imm0(ptr %ptr, ptr %vpp1) {
+; CHECK-LABEL: test_aes_genlastkey_paired_imm0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    xxaes128genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm0:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    xxaes128genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 0)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_genlastkey_paired_imm1(ptr %ptr, ptr %vpp1) {
+; CHECK-LABEL: test_aes_genlastkey_paired_imm1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    xxaes192genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    xxaes192genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 1)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}
+
+define void @test_aes_genlastkey_paired_imm2(ptr %ptr, ptr %vpp1) {
+; CHECK-LABEL: test_aes_genlastkey_paired_imm2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-NEXT:    xxaes256genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
+; CHECK-BE-NEXT:    xxaes256genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
+; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vp1 = load <256 x i1>, ptr %vpp1, align 32
+  %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 2)
+  store <256 x i1> %0, ptr %ptr, align 32
+  ret void
+}

>From 1d85bcaa020f8f4ac092f1b2bfaac0eac1049a18 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 23:53:44 -0500
Subject: [PATCH 4/5]  Implement builtins for aes Galois Field Multiplication
 builtins

---
 clang/include/clang/Basic/BuiltinsPPC.def     |   9 +-
 clang/lib/CodeGen/TargetBuiltins/PPC.cpp      |  11 ++
 .../PowerPC/builtins-ppc-aes-acceleration.c   | 151 ++++++++--------
 .../builtins-ppc-aes-acceleration-error.c     |  54 +++++-
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |   5 +
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  40 ++---
 .../PowerPC/builtins-ppc-aes-acceleration.ll  | 162 ++++++------------
 7 files changed, 226 insertions(+), 206 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index d5e4ef1f65a5c..592e070b5a6ab 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1189,8 +1189,13 @@ CUSTOM_BUILTIN(aes192_genlastkey_paired, aes_genlastkey_paired, "W256W256", fals
                "isa-future-instructions")
 CUSTOM_BUILTIN(aes256_genlastkey_paired, aes_genlastkey_paired, "W256W256", false,
                "isa-future-instructions")
-
-
+// Galois Field Multiplication builtins
+UNALIASED_CUSTOM_BUILTIN(galois_field_mult, "VVVi1", false,
+                         "isa-future-instructions")
+CUSTOM_BUILTIN(galois_field_mult_gcm, galois_field_mult, "VVV", false,
+               "isa-future-instructions")
+CUSTOM_BUILTIN(galois_field_mult_xts, galois_field_mult, "VVV", false,
+               "isa-future-instructions")
 
 // FIXME: Obviously incomplete.
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 47d98db3e5d58..763259eb40f88 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1191,6 +1191,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       llvm::Function *F = CGM.getIntrinsic(ID);
       return Builder.CreateCall(F, Ops, "");
     }
+    if (BuiltinID == PPC::BI__builtin_galois_field_mult ||
+        BuiltinID == PPC::BI__builtin_galois_field_mult_gcm ||
+        BuiltinID == PPC::BI__builtin_galois_field_mult_xts) {
+      if (BuiltinID == PPC::BI__builtin_galois_field_mult_gcm)
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 0));
+      else if (BuiltinID == PPC::BI__builtin_galois_field_mult_xts)
+        Ops.push_back(llvm::ConstantInt::get(Int32Ty, 1));
+      // For base builtin, Ops already has all 3 arguments.
+      llvm::Function *F = CGM.getIntrinsic(ID);
+      return Builder.CreateCall(F, Ops, "");
+    }
     SmallVector<Value*, 4> CallOps;
     if (Accumulate) {
       Address Addr = EmitPointerWithAlignment(E->getArg(0));
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
index f548b4c8fea49..d6e5bec7bb923 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.c
@@ -4,13 +4,14 @@
 // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu future \
 // RUN:   -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define dso_local void @test_aes_encrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// Made with AI
+
+// CHECK-LABEL: @test_aes_encrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -20,13 +21,12 @@ void test_aes_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes128_encrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes128_encrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes128_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -36,13 +36,12 @@ void test_aes128_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes192_encrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes192_encrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 1)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes192_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -52,13 +51,12 @@ void test_aes192_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes256_encrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes256_encrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 2)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes256_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -68,13 +66,12 @@ void test_aes256_encrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes_decrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes_decrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -84,13 +81,12 @@ void test_aes_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes128_decrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes128_decrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes128_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -100,13 +96,12 @@ void test_aes128_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes192_decrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes192_decrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 1)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes192_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -116,13 +111,12 @@ void test_aes192_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes256_decrypt_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef readonly captures(none) [[VPP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
-// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2]], align 32
+// CHECK-LABEL: @test_aes256_decrypt_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP2:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1> [[TMP0]], <256 x i1> [[TMP1]], i32 2)
-// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP2]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes256_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsigned char *resp) {
@@ -131,12 +125,11 @@ void test_aes256_decrypt_paired(unsigned char *vpp1, unsigned char *vpp2, unsign
   __vector_pair res = __builtin_aes256_decrypt_paired(vp1, vp2);
   *((__vector_pair *)resp) = res;
 }
-// CHECK-LABEL: define dso_local void @test_aes_genlastkey_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-LABEL: @test_aes_genlastkey_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
@@ -145,12 +138,11 @@ void test_aes_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes128_genlastkey_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-LABEL: @test_aes128_genlastkey_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 0)
-// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes128_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
@@ -159,12 +151,11 @@ void test_aes128_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes192_genlastkey_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-LABEL: @test_aes192_genlastkey_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 1)
-// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes192_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
@@ -173,12 +164,11 @@ void test_aes192_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: define dso_local void @test_aes256_genlastkey_paired(
-// CHECK-SAME: ptr noundef readonly captures(none) [[VPP1:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1]], align 32
+// CHECK-LABEL: @test_aes256_genlastkey_paired(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP1:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> [[TMP0]], i32 2)
-// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP]], align 32
+// CHECK-NEXT:    store <256 x i1> [[TMP1]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 void test_aes256_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
@@ -187,6 +177,29 @@ void test_aes256_genlastkey_paired(unsigned char *vpp1, unsigned char *resp) {
   *((__vector_pair *)resp) = res;
 }
 
+// CHECK-LABEL: @test_galois_field_mult(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+vector unsigned char test_galois_field_mult(vector unsigned char a, vector unsigned char b) {
+  return __builtin_galois_field_mult(a, b, 0);
+}
 
+// CHECK-LABEL: @test_galois_field_mult_gcm(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+vector unsigned char test_galois_field_mult_gcm(vector unsigned char a, vector unsigned char b) {
+  return __builtin_galois_field_mult_gcm(a, b);
+}
 
-// Made with AI
+// CHECK-LABEL: @test_galois_field_mult_xts(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+vector unsigned char test_galois_field_mult_xts(vector unsigned char a, vector unsigned char b) {
+  return __builtin_galois_field_mult_xts(a, b);
+}
diff --git a/clang/test/Sema/builtins-ppc-aes-acceleration-error.c b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
index 8529d30ec3cf3..5c590825e3558 100644
--- a/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
+++ b/clang/test/Sema/builtins-ppc-aes-acceleration-error.c
@@ -4,6 +4,8 @@
 // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \
 // RUN:   -fsyntax-only -verify %s
 
+// Made with AI
+
 void test_aes_encrypt_paired_invalid_imm(void) {
   __vector_pair vp1, vp2;
 
@@ -17,7 +19,7 @@ void test_aes_encrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
   int i = 0;
-  
+
   // Test type mismatches
   __vector_pair res1 = __builtin_aes_encrypt_paired(vc, vp, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes_encrypt_paired(vp, vc, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -27,7 +29,7 @@ void test_aes_encrypt_paired_type_mismatch(void) {
 void test_aes128_encrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes128 variant
   __vector_pair res1 = __builtin_aes128_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes128_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -36,7 +38,7 @@ void test_aes128_encrypt_paired_type_mismatch(void) {
 void test_aes192_encrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes192 variant
   __vector_pair res1 = __builtin_aes192_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes192_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -45,7 +47,7 @@ void test_aes192_encrypt_paired_type_mismatch(void) {
 void test_aes256_encrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes256 variant
   __vector_pair res1 = __builtin_aes256_encrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes256_encrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -64,7 +66,7 @@ void test_aes_decrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
   int i = 0;
-  
+
   // Test type mismatches
   __vector_pair res1 = __builtin_aes_decrypt_paired(vc, vp, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes_decrypt_paired(vp, vc, 0);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -74,7 +76,7 @@ void test_aes_decrypt_paired_type_mismatch(void) {
 void test_aes128_decrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes128 variant
   __vector_pair res1 = __builtin_aes128_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes128_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -83,7 +85,7 @@ void test_aes128_decrypt_paired_type_mismatch(void) {
 void test_aes192_decrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes192 variant
   __vector_pair res1 = __builtin_aes192_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes192_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -92,7 +94,7 @@ void test_aes192_decrypt_paired_type_mismatch(void) {
 void test_aes256_decrypt_paired_type_mismatch(void) {
   __vector_pair vp;
   vector unsigned char vc;
-  
+
   // Test type mismatches for aes256 variant
   __vector_pair res1 = __builtin_aes256_decrypt_paired(vc, vp);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
   __vector_pair res2 = __builtin_aes256_decrypt_paired(vp, vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
@@ -140,6 +142,40 @@ void test_aes256_genlastkey_paired_type_mismatch(void) {
   __vector_pair res1 = __builtin_aes256_genlastkey_paired(vc);    // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type '__vector_pair'}}
 }
 
+void test_galois_field_mult_invalid_imm(void) {
+  vector unsigned char a, b;
+
+  // Test invalid immediate values (valid range is 0-1)
+  vector unsigned char res1 = __builtin_galois_field_mult(a, b, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vector unsigned char res2 = __builtin_galois_field_mult(a, b, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vector unsigned char res3 = __builtin_galois_field_mult(a, b, 10); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_galois_field_mult_type_mismatch(void) {
+  vector unsigned char vc;
+  __vector_pair vp;
+  int i = 0;
+
+  // Test type mismatches
+  vector unsigned char res1 = __builtin_galois_field_mult(vp, vc, 0);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  vector unsigned char res2 = __builtin_galois_field_mult(vc, vp, 0);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  vector unsigned char res3 = __builtin_galois_field_mult(vc, vc, vc);   // expected-error {{passing '__vector unsigned char' (vector of 16 'unsigned char' values) to parameter of incompatible type 'int'}}
+}
+
+void test_galois_field_mult_gcm_type_mismatch(void) {
+  vector unsigned char vc;
+  __vector_pair vp;
+
+  // Test type mismatches for gcm variant
+  vector unsigned char res1 = __builtin_galois_field_mult_gcm(vp, vc);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  vector unsigned char res2 = __builtin_galois_field_mult_gcm(vc, vp);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+}
 
+void test_galois_field_mult_xts_type_mismatch(void) {
+  vector unsigned char vc;
+  __vector_pair vp;
 
-// Made with Bob
+  // Test type mismatches for xts variant
+  vector unsigned char res1 = __builtin_galois_field_mult_xts(vp, vc);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  vector unsigned char res2 = __builtin_galois_field_mult_xts(vc, vp);    // expected-error {{passing '__vector_pair' to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 75b636e584b1e..9eebed764e488 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1736,6 +1736,11 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v256i1_ty],
                             [llvm_v256i1_ty, llvm_i32_ty],
                             [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  // Galois Field Multiplication Instructions.
+  def int_ppc_galois_field_mult :
+      DefaultAttrsIntrinsic<[llvm_v16i8_ty],
+                            [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 
   def int_ppc_mma_assemble_acc :
       DefaultAttrsIntrinsic<[llvm_v512i1_ty],
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 0435947869391..7e31f929317b4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -470,26 +470,23 @@ let Predicates = [HasFutureVector] in {
                                                  v4i32:$XB))]>;
 
   // AES Acceleration Instructions
-  def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp),
-                                    (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
-                                    "xxaesencp $XTp, $XAp, $XBp, $M",
-                                    [(set v256i1:$XTp,
-                                      (int_ppc_aes_encrypt_paired v256i1:$XAp,
-                                                                   v256i1:$XBp,
-                                                                   i32:$M))]>;
-  def XXAESDECP : XX3Form_XTABp5_M2<202, (outs vsrprc:$XTp),
-                                    (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
-                                    "xxaesdecp $XTp, $XAp, $XBp, $M",
-                                    [(set v256i1:$XTp,
-                                      (int_ppc_aes_decrypt_paired v256i1:$XAp,
-                                                                   v256i1:$XBp,
-                                                                   i32:$M))]>;
-  def XXAESGENLKP : XX3Form_XTBp5_M2<420, (outs vsrprc:$XTp),
-                                     (ins vsrprc:$XBp, u2imm:$M),
-                                     "xxaesgenlkp $XTp, $XBp, $M",
-                                     [(set v256i1:$XTp,
-                                       (int_ppc_aes_genlastkey_paired v256i1:$XBp,
-                                                                       i32:$M))]>;
+  def XXAESENCP
+      : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp),
+                          (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
+                          "xxaesencp $XTp, $XAp, $XBp, $M",
+                          [(set v256i1:$XTp,
+                                (int_ppc_aes_encrypt_paired v256i1:$XAp, v256i1:$XBp, u2imm_timm:$M))]>;
+  def XXAESDECP
+      : XX3Form_XTABp5_M2<202, (outs vsrprc:$XTp),
+                          (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M),
+                          "xxaesdecp $XTp, $XAp, $XBp, $M",
+                          [(set v256i1:$XTp,
+                                (int_ppc_aes_decrypt_paired v256i1:$XAp, v256i1:$XBp, u2imm_timm:$M))]>;
+  def XXAESGENLKP
+      : XX3Form_XTBp5_M2<420, (outs vsrprc:$XTp), (ins vsrprc:$XBp, u2imm:$M),
+                         "xxaesgenlkp $XTp, $XBp, $M",
+                         [(set v256i1:$XTp,
+                               (int_ppc_aes_genlastkey_paired v256i1:$XBp, u2imm_timm:$M))]>;
   def XXGFMUL128 : XX3Form_XTAB6_P1<26, (outs vsrc:$XT),
                                     (ins vsrc:$XA, vsrc:$XB, u1imm:$P),
                                     "xxgfmul128 $XT, $XA, $XB, $P", []>;
@@ -608,6 +605,9 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp,
 def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
                                                                      $RA, $RB)>;
 
+def: Pat<(v16i8 (int_ppc_galois_field_mult v16i8:$XA, v16i8:$XB, u1imm_timm:$IMM)),
+         (COPY_TO_REGCLASS (XXGFMUL128 RCCp.AToVSRC, RCCp.BToVSRC, $IMM), VSRC)>;
+
 // Regular load/store patterns for v256i1 (for ISA Future)
 let Predicates = [HasFutureVector, PairedVectorMemops] in {
   def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
index ce45008aac7ed..18864bb5592fc 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-aes-acceleration.ll
@@ -2,34 +2,25 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:   < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN:   < %s | FileCheck %s
 
 ; Generated by AI.
 
 declare <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1>, i32)
-
+declare <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8>, <16 x i8>, i32)
 declare <256 x i1> @llvm.ppc.aes.decrypt.paired(<256 x i1>, <256 x i1>, i32)
-
 declare <256 x i1> @llvm.ppc.aes.encrypt.paired(<256 x i1>, <256 x i1>, i32)
 
 define void @test_aes_encrypt_paired_imm0(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_encrypt_paired_imm0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes128encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes128encp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_encrypt_paired_imm0:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes128encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -41,19 +32,11 @@ entry:
 define void @test_aes_encrypt_paired_imm1(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_encrypt_paired_imm1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes192encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes192encp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_encrypt_paired_imm1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes192encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -65,19 +48,11 @@ entry:
 define void @test_aes_encrypt_paired_imm2(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_encrypt_paired_imm2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes256encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes256encp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_encrypt_paired_imm2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes256encp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -90,19 +65,11 @@ entry:
 define void @test_aes_decrypt_paired_imm0(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_decrypt_paired_imm0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes128decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes128decp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_decrypt_paired_imm0:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes128decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -114,19 +81,11 @@ entry:
 define void @test_aes_decrypt_paired_imm1(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_decrypt_paired_imm1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes192decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes192decp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_decrypt_paired_imm1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes192decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -138,19 +97,11 @@ entry:
 define void @test_aes_decrypt_paired_imm2(ptr %ptr, ptr %vpp1, ptr %vpp2) {
 ; CHECK-LABEL: test_aes_decrypt_paired_imm2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-NEXT:    xxaes256decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
+; CHECK-NEXT:    xxaes256decp vsp34, vsp34, vsp36
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_decrypt_paired_imm2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r5)
-; CHECK-BE-NEXT:    xxaes256decp vsp{{[0-9]+}}, vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %vp2 = load <256 x i1>, ptr %vpp2, align 32
@@ -163,17 +114,10 @@ entry:
 define void @test_aes_genlastkey_paired_imm0(ptr %ptr, ptr %vpp1) {
 ; CHECK-LABEL: test_aes_genlastkey_paired_imm0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    xxaes128genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    xxaes128genlkp vsp34, vsp34
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm0:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    xxaes128genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 0)
@@ -184,17 +128,10 @@ entry:
 define void @test_aes_genlastkey_paired_imm1(ptr %ptr, ptr %vpp1) {
 ; CHECK-LABEL: test_aes_genlastkey_paired_imm1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    xxaes192genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    xxaes192genlkp vsp34, vsp34
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    xxaes192genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 1)
@@ -205,20 +142,33 @@ entry:
 define void @test_aes_genlastkey_paired_imm2(ptr %ptr, ptr %vpp1) {
 ; CHECK-LABEL: test_aes_genlastkey_paired_imm2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-NEXT:    xxaes256genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    xxaes256genlkp vsp34, vsp34
+; CHECK-NEXT:    stxvp vsp34, 0(r3)
 ; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_aes_genlastkey_paired_imm2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp{{[0-9]+}}, 0(r4)
-; CHECK-BE-NEXT:    xxaes256genlkp vsp{{[0-9]+}}, vsp{{[0-9]+}}
-; CHECK-BE-NEXT:    stxvp vsp{{[0-9]+}}, 0(r3)
-; CHECK-BE-NEXT:    blr
 entry:
   %vp1 = load <256 x i1>, ptr %vpp1, align 32
   %0 = tail call <256 x i1> @llvm.ppc.aes.genlastkey.paired(<256 x i1> %vp1, i32 2)
   store <256 x i1> %0, ptr %ptr, align 32
   ret void
 }
+
+define <16 x i8> @test_galois_field_mult_imm0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_galois_field_mult_imm0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxgfmul128gcm v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8> %a, <16 x i8> %b, i32 0)
+  ret <16 x i8> %0
+}
+
+define <16 x i8> @test_galois_field_mult_imm1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_galois_field_mult_imm1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxgfmul128xts v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.galois.field.mult(<16 x i8> %a, <16 x i8> %b, i32 1)
+  ret <16 x i8> %0
+}

>From ef64cdeeaa6d8b4a843d6e6d579dd829658947a9 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Mon, 16 Mar 2026 17:35:55 -0400
Subject: [PATCH 5/5] mix merge conflict issue

---
 llvm/lib/Target/PowerPC/PPC.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index d15eada402607..7b4bae60f7e74 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -277,6 +277,10 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
                                      "true",
                                      "Enable instructions in ISA 3.1.",
                                      [FeatureISA3_0]>;
+def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
+                                        "IsISAFuture", "true",
+                                        "Enable instructions for Future ISA.",
+                                        [FeatureISA3_1]>;
 def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
                                         "Enable POWER9 Altivec instructions",
                                         [FeatureISA3_0, FeatureP8Altivec]>;