[llvm] [AArch64][ISel] Add clmul to pmullb/t lowering (PR #180568)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 06:23:16 PST 2026
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/180568
>From b3d1230edf159e726fef58d623bf6b0bc933e66c Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Mon, 9 Feb 2026 17:24:31 +0000
Subject: [PATCH 1/5] [AArch64][ISel] Add i64 clmul to pmullb/t lowering
---
.../Target/AArch64/AArch64ISelLowering.cpp | 5 +-
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 11 +
llvm/test/CodeGen/AArch64/clmul-scalable.ll | 740 +-----------------
3 files changed, 48 insertions(+), 708 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index add3a92343943..4d81ea1b7ddcf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2002,7 +2002,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv8i16, Legal);
setPartialReduceMLAAction(MLAOps, MVT::nxv8i16, MVT::nxv16i8, Legal);
- setOperationAction(ISD::CLMUL, MVT::nxv16i8, Legal);
+ setOperationAction(ISD::CLMUL, {MVT::nxv16i8, MVT::nxv4i32}, Legal);
}
// Handle floating-point partial reduction
@@ -2015,6 +2015,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget->hasSVE2AES())
+ setOperationAction(ISD::CLMUL, MVT::nxv2i64, Legal);
+
// Handle non-aliasing elements mask
if (Subtarget->hasSVE2() ||
(Subtarget->hasSME() && Subtarget->isStreaming())) {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 61db07493260c..d7f402bf03a30 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4254,6 +4254,17 @@ let Predicates = [HasSVEAES, HasNonStreamingSVE_or_SSVE_AES] in {
defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>;
}
+let Predicates = [HasSVEAES, HasSVE2_or_SME] in {
+ def : Pat<(nxv2i64 (clmul nxv2i64:$Rn, nxv2i64:$Rm)),
+ (ZIP1_ZZZ_D (UZP1_ZZZ_D (PMULLB_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm)),
+ (UZP1_ZZZ_D (PMULLT_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm)))>;
+}
+let Predicates = [HasSVE2_or_SME] in {
+ def : Pat<(nxv4i32 (clmul nxv4i32:$Rn, nxv4i32:$Rm)),
+ (ZIP1_ZZZ_S (UZP1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm)),
+ (UZP1_ZZZ_S (PMULLT_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm)))>;
+}
+
let Predicates = [HasSVESM4] in {
// SVE2 crypto constructive binary operations
defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>;
diff --git a/llvm/test/CodeGen/AArch64/clmul-scalable.ll b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
index 58dbba833f67b..a6c1bfae6e1b9 100644
--- a/llvm/test/CodeGen/AArch64/clmul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
@@ -377,234 +377,20 @@ define <vscale x 4 x i32> @clmul_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i3
;
; CHECK-SVE2-LABEL: clmul_nxv4i32:
; CHECK-SVE2: // %bb.0:
-; CHECK-SVE2-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z2.s, z2.s, #0x2
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x1
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x8
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x4
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x20
-; CHECK-SVE2-NEXT: mul z2.s, z0.s, z2.s
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x10
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x80
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x40
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x200
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x100
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x800
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x400
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x2000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x1000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x8000
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x4000
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x20000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x10000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x80000
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x40000
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x200000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x100000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x800000
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x400000
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x2000000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x1000000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x8000000
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x4000000
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x20000000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x10000000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: and z1.s, z1.s, #0x40000000
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x80000000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: mul z0.s, z0.s, z1.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
-; CHECK-SVE2-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SVE2-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SVE2-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-SVE2-NEXT: uzp1 z0.s, z0.s, z2.s
+; CHECK-SVE2-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-SVE2-NEXT: ret
;
; CHECK-SVE2-AES-LABEL: clmul_nxv4i32:
; CHECK-SVE2-AES: // %bb.0:
-; CHECK-SVE2-AES-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z2.s, z2.s, #0x2
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x1
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x8
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x4
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x20
-; CHECK-SVE2-AES-NEXT: mul z2.s, z0.s, z2.s
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x10
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x80
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x40
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x200
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x100
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x800
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x400
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x2000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x1000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x8000
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x4000
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x20000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x10000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x80000
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x40000
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x200000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x100000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x800000
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x400000
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x2000000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x1000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x8000000
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x4000000
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x20000000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x10000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z1.s, z1.s, #0x40000000
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x80000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: mul z0.s, z0.s, z1.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-SVE2-AES-NEXT: uzp1 z0.s, z0.s, z2.s
+; CHECK-SVE2-AES-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-SVE2-AES-NEXT: ret
%a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
ret <vscale x 4 x i32> %a
@@ -1100,230 +886,11 @@ define <vscale x 2 x i64> @clmul_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i6
;
; CHECK-SVE2-AES-LABEL: clmul_nxv2i64:
; CHECK-SVE2-AES: // %bb.0:
-; CHECK-SVE2-AES-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z2.d, z2.d, #0x2
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x1
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x20
-; CHECK-SVE2-AES-NEXT: mul z2.d, z0.d, z2.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x10
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80000000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40000000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200000000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100000000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800000000000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400000000000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000000000000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000000000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z1.d, z1.d, #0x4000000000000000
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x8000000000000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: mul z0.d, z0.d, z1.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: uzp1 z1.d, z2.d, z2.d
+; CHECK-SVE2-AES-NEXT: uzp1 z0.d, z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-SVE2-AES-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
ret <vscale x 2 x i64> %a
@@ -1597,142 +1164,24 @@ define <vscale x 4 x i32> @clmul_nxv4i32_zext(<vscale x 4 x i16> %x, <vscale x 4
;
; CHECK-SVE2-LABEL: clmul_nxv4i32_zext:
; CHECK-SVE2: // %bb.0:
-; CHECK-SVE2-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
; CHECK-SVE2-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z2.s, z2.s, #0x2
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x1
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x8
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x4
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x20
-; CHECK-SVE2-NEXT: mul z2.s, z0.s, z2.s
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x10
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x80
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x40
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x200
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x100
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z4.s, z4.s, #0x800
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x400
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-NEXT: and z3.s, z3.s, #0x2000
-; CHECK-SVE2-NEXT: and z6.s, z6.s, #0x1000
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-NEXT: and z1.s, z1.s, #0x4000
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-NEXT: mul z4.s, z0.s, z6.s
-; CHECK-SVE2-NEXT: and z5.s, z5.s, #0x8000
-; CHECK-SVE2-NEXT: mul z1.s, z0.s, z1.s
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-NEXT: mul z3.s, z0.s, z5.s
-; CHECK-SVE2-NEXT: mul z0.s, z0.s, #0
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE2-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SVE2-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SVE2-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-SVE2-NEXT: uzp1 z0.s, z0.s, z2.s
+; CHECK-SVE2-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-SVE2-NEXT: ret
;
; CHECK-SVE2-AES-LABEL: clmul_nxv4i32_zext:
; CHECK-SVE2-AES: // %bb.0:
-; CHECK-SVE2-AES-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
; CHECK-SVE2-AES-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z2.s, z2.s, #0x2
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x1
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x8
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x4
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x20
-; CHECK-SVE2-AES-NEXT: mul z2.s, z0.s, z2.s
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x10
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x80
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x40
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x200
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x100
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z6.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z4.s, z4.s, #0x800
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x400
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z4.s
-; CHECK-SVE2-AES-NEXT: mul z5.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.s, z3.s, #0x2000
-; CHECK-SVE2-AES-NEXT: and z6.s, z6.s, #0x1000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z1.s, z1.s, #0x4000
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z3.s
-; CHECK-SVE2-AES-NEXT: mul z4.s, z0.s, z6.s
-; CHECK-SVE2-AES-NEXT: and z5.s, z5.s, #0x8000
-; CHECK-SVE2-AES-NEXT: mul z1.s, z0.s, z1.s
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z3.s, z0.s, z5.s
-; CHECK-SVE2-AES-NEXT: mul z0.s, z0.s, #0
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE2-AES-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-SVE2-AES-NEXT: uzp1 z0.s, z0.s, z2.s
+; CHECK-SVE2-AES-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-SVE2-AES-NEXT: ret
%zextx = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
%zexty = zext <vscale x 4 x i16> %y to <vscale x 4 x i32>
@@ -2096,136 +1545,13 @@ define <vscale x 2 x i64> @clmul_nxv2i64_zext(<vscale x 2 x i32> %x, <vscale x 2
;
; CHECK-SVE2-AES-LABEL: clmul_nxv2i64_zext:
; CHECK-SVE2-AES: // %bb.0:
-; CHECK-SVE2-AES-NEXT: mov z2.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
; CHECK-SVE2-AES-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z2.d, z2.d, #0x2
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x1
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x20
-; CHECK-SVE2-AES-NEXT: mul z2.d, z0.d, z2.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x10
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x80000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x40000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x200000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x100000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x800000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x400000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x2000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x1000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z4.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z6.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z4.d, z4.d, #0x8000000
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x4000000
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z4.d
-; CHECK-SVE2-AES-NEXT: mul z5.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
-; CHECK-SVE2-AES-NEXT: mov z3.d, z1.d
-; CHECK-SVE2-AES-NEXT: mov z6.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z3.d, z3.d, #0x20000000
-; CHECK-SVE2-AES-NEXT: and z6.d, z6.d, #0x10000000
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
-; CHECK-SVE2-AES-NEXT: mov z5.d, z1.d
-; CHECK-SVE2-AES-NEXT: and z1.d, z1.d, #0x40000000
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z4.d, z0.d, z6.d
-; CHECK-SVE2-AES-NEXT: and z5.d, z5.d, #0x80000000
-; CHECK-SVE2-AES-NEXT: mul z1.d, z0.d, z1.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
-; CHECK-SVE2-AES-NEXT: mul z3.d, z0.d, z5.d
-; CHECK-SVE2-AES-NEXT: mul z0.d, z0.d, #0
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
-; CHECK-SVE2-AES-NEXT: mov z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-SVE2-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: uzp1 z1.d, z2.d, z2.d
+; CHECK-SVE2-AES-NEXT: uzp1 z0.d, z0.d, z2.d
+; CHECK-SVE2-AES-NEXT: zip1 z0.d, z0.d, z1.d
; CHECK-SVE2-AES-NEXT: ret
%zextx = zext <vscale x 2 x i32> %x to <vscale x 2 x i64>
%zexty = zext <vscale x 2 x i32> %y to <vscale x 2 x i64>
>From 4de7e03e1994ab89f611556cd450a8d6f8d5427f Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Tue, 10 Feb 2026 14:26:26 +0000
Subject: [PATCH 2/5] Use TRN1
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 ++---
llvm/test/CodeGen/AArch64/clmul-scalable.ll | 24 +++++--------------
2 files changed, 8 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d7f402bf03a30..87e419b6d3667 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4256,13 +4256,11 @@ let Predicates = [HasSVEAES, HasNonStreamingSVE_or_SSVE_AES] in {
let Predicates = [HasSVEAES, HasSVE2_or_SME] in {
def : Pat<(nxv2i64 (clmul nxv2i64:$Rn, nxv2i64:$Rm)),
- (ZIP1_ZZZ_D (UZP1_ZZZ_D (PMULLB_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm)),
- (UZP1_ZZZ_D (PMULLT_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm)))>;
+ (TRN1_ZZZ_D (PMULLB_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm))>;
}
let Predicates = [HasSVE2_or_SME] in {
def : Pat<(nxv4i32 (clmul nxv4i32:$Rn, nxv4i32:$Rm)),
- (ZIP1_ZZZ_S (UZP1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm)),
- (UZP1_ZZZ_S (PMULLT_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm)))>;
+ (TRN1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm))>;
}
let Predicates = [HasSVESM4] in {
diff --git a/llvm/test/CodeGen/AArch64/clmul-scalable.ll b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
index a6c1bfae6e1b9..b974340ce6cd0 100644
--- a/llvm/test/CodeGen/AArch64/clmul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
@@ -379,18 +379,14 @@ define <vscale x 4 x i32> @clmul_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: pmullt z2.d, z0.s, z1.s
; CHECK-SVE2-NEXT: pmullb z0.d, z0.s, z1.s
-; CHECK-SVE2-NEXT: uzp1 z1.s, z2.s, z2.s
-; CHECK-SVE2-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-SVE2-NEXT: zip1 z0.s, z0.s, z1.s
+; CHECK-SVE2-NEXT: trn1 z0.s, z0.s, z2.s
; CHECK-SVE2-NEXT: ret
;
; CHECK-SVE2-AES-LABEL: clmul_nxv4i32:
; CHECK-SVE2-AES: // %bb.0:
; CHECK-SVE2-AES-NEXT: pmullt z2.d, z0.s, z1.s
; CHECK-SVE2-AES-NEXT: pmullb z0.d, z0.s, z1.s
-; CHECK-SVE2-AES-NEXT: uzp1 z1.s, z2.s, z2.s
-; CHECK-SVE2-AES-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-SVE2-AES-NEXT: zip1 z0.s, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: trn1 z0.s, z0.s, z2.s
; CHECK-SVE2-AES-NEXT: ret
%a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
ret <vscale x 4 x i32> %a
@@ -888,9 +884,7 @@ define <vscale x 2 x i64> @clmul_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-SVE2-AES: // %bb.0:
; CHECK-SVE2-AES-NEXT: pmullt z2.q, z0.d, z1.d
; CHECK-SVE2-AES-NEXT: pmullb z0.q, z0.d, z1.d
-; CHECK-SVE2-AES-NEXT: uzp1 z1.d, z2.d, z2.d
-; CHECK-SVE2-AES-NEXT: uzp1 z0.d, z0.d, z2.d
-; CHECK-SVE2-AES-NEXT: zip1 z0.d, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: trn1 z0.d, z0.d, z2.d
; CHECK-SVE2-AES-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
ret <vscale x 2 x i64> %a
@@ -1168,9 +1162,7 @@ define <vscale x 4 x i32> @clmul_nxv4i32_zext(<vscale x 4 x i16> %x, <vscale x 4
; CHECK-SVE2-NEXT: and z1.s, z1.s, #0xffff
; CHECK-SVE2-NEXT: pmullt z2.d, z0.s, z1.s
; CHECK-SVE2-NEXT: pmullb z0.d, z0.s, z1.s
-; CHECK-SVE2-NEXT: uzp1 z1.s, z2.s, z2.s
-; CHECK-SVE2-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-SVE2-NEXT: zip1 z0.s, z0.s, z1.s
+; CHECK-SVE2-NEXT: trn1 z0.s, z0.s, z2.s
; CHECK-SVE2-NEXT: ret
;
; CHECK-SVE2-AES-LABEL: clmul_nxv4i32_zext:
@@ -1179,9 +1171,7 @@ define <vscale x 4 x i32> @clmul_nxv4i32_zext(<vscale x 4 x i16> %x, <vscale x 4
; CHECK-SVE2-AES-NEXT: and z1.s, z1.s, #0xffff
; CHECK-SVE2-AES-NEXT: pmullt z2.d, z0.s, z1.s
; CHECK-SVE2-AES-NEXT: pmullb z0.d, z0.s, z1.s
-; CHECK-SVE2-AES-NEXT: uzp1 z1.s, z2.s, z2.s
-; CHECK-SVE2-AES-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-SVE2-AES-NEXT: zip1 z0.s, z0.s, z1.s
+; CHECK-SVE2-AES-NEXT: trn1 z0.s, z0.s, z2.s
; CHECK-SVE2-AES-NEXT: ret
%zextx = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
%zexty = zext <vscale x 4 x i16> %y to <vscale x 4 x i32>
@@ -1549,9 +1539,7 @@ define <vscale x 2 x i64> @clmul_nxv2i64_zext(<vscale x 2 x i32> %x, <vscale x 2
; CHECK-SVE2-AES-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-SVE2-AES-NEXT: pmullt z2.q, z0.d, z1.d
; CHECK-SVE2-AES-NEXT: pmullb z0.q, z0.d, z1.d
-; CHECK-SVE2-AES-NEXT: uzp1 z1.d, z2.d, z2.d
-; CHECK-SVE2-AES-NEXT: uzp1 z0.d, z0.d, z2.d
-; CHECK-SVE2-AES-NEXT: zip1 z0.d, z0.d, z1.d
+; CHECK-SVE2-AES-NEXT: trn1 z0.d, z0.d, z2.d
; CHECK-SVE2-AES-NEXT: ret
%zextx = zext <vscale x 2 x i32> %x to <vscale x 2 x i64>
%zexty = zext <vscale x 2 x i32> %y to <vscale x 2 x i64>
>From ecd9e2bf4fb9b43eb47a54c2dcc9d3ef7b5b0fba Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 11 Feb 2026 11:30:21 +0000
Subject: [PATCH 3/5] Rewrite predicates and move defs
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++-
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 ++++++------
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4d81ea1b7ddcf..8e5b2dd4c1ca3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2015,7 +2015,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->hasSVE2AES())
+ if (Subtarget->hasSVEAES() &&
+ (!Subtarget->isStreaming() || Subtarget->hasSSVE_AES()))
setOperationAction(ISD::CLMUL, MVT::nxv2i64, Legal);
// Handle non-aliasing elements mask
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 87e419b6d3667..b2e5cd442079b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3993,6 +3993,11 @@ let Predicates = [HasSVE2_or_SME] in {
defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb", int_aarch64_sve_pmullb_pair>;
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>;
+let Predicates = [HasSVE2_or_SME] in {
+ def : Pat<(nxv4i32 (clmul nxv4i32:$Rn, nxv4i32:$Rm)),
+ (TRN1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm))>;
+}
+
// SVE2 bitwise shift and insert
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", AArch64vsri>;
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", AArch64vsli>;
@@ -4252,16 +4257,11 @@ let Predicates = [HasSVEAES, HasNonStreamingSVE_or_SSVE_AES] in {
// to NEON PMULL2 instruction.
defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>;
defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>;
-}
-let Predicates = [HasSVEAES, HasSVE2_or_SME] in {
+ // Use low 64 bits of PMULLB/T for nxv2i64 clmul
def : Pat<(nxv2i64 (clmul nxv2i64:$Rn, nxv2i64:$Rm)),
(TRN1_ZZZ_D (PMULLB_ZZZ_Q $Rn, $Rm), (PMULLT_ZZZ_Q $Rn, $Rm))>;
}
-let Predicates = [HasSVE2_or_SME] in {
- def : Pat<(nxv4i32 (clmul nxv4i32:$Rn, nxv4i32:$Rm)),
- (TRN1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm))>;
-}
let Predicates = [HasSVESM4] in {
// SVE2 crypto constructive binary operations
>From 030aad78136e3c2b88f0ef3324d3ec37c97c59b9 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 11 Feb 2026 14:17:05 +0000
Subject: [PATCH 4/5] Re-re-write predicates and increase test coverage
---
.../Target/AArch64/AArch64ISelLowering.cpp | 2 +-
llvm/test/CodeGen/AArch64/clmul-scalable.ll | 1039 +++++++++++++++++
2 files changed, 1040 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8e5b2dd4c1ca3..98000bceffa50 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2016,7 +2016,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
if (Subtarget->hasSVEAES() &&
- (!Subtarget->isStreaming() || Subtarget->hasSSVE_AES()))
+ (Subtarget->isSVEAvailable() || Subtarget->hasSSVE_AES()))
setOperationAction(ISD::CLMUL, MVT::nxv2i64, Legal);
// Handle non-aliasing elements mask
diff --git a/llvm/test/CodeGen/AArch64/clmul-scalable.ll b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
index b974340ce6cd0..df7b7542d6cac 100644
--- a/llvm/test/CodeGen/AArch64/clmul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sve,sve-aes | FileCheck %s --check-prefix=CHECK-SVE-AES
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sme -force-streaming | FileCheck %s --check-prefix=CHECK-SME-STREAMING
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sme,ssve-aes -force-streaming | FileCheck %s --check-prefix=CHECK-SME-STREAMING-SSVE-AES
; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2
; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sve2-aes | FileCheck %s --check-prefix=CHECK-SVE2-AES
@@ -39,6 +42,51 @@ define <vscale x 16 x i8> @clmul_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv16i8:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: ptrue p0.b
+; CHECK-SVE-AES-NEXT: and z1.b, z1.b, #0x80
+; CHECK-SVE-AES-NEXT: and z2.b, z2.b, #0x2
+; CHECK-SVE-AES-NEXT: and z3.b, z3.b, #0x1
+; CHECK-SVE-AES-NEXT: and z4.b, z4.b, #0x4
+; CHECK-SVE-AES-NEXT: and z5.b, z5.b, #0x8
+; CHECK-SVE-AES-NEXT: and z6.b, z6.b, #0x10
+; CHECK-SVE-AES-NEXT: and z7.b, z7.b, #0x20
+; CHECK-SVE-AES-NEXT: and z24.b, z24.b, #0x40
+; CHECK-SVE-AES-NEXT: mul z2.b, p0/m, z2.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z3.b, p0/m, z3.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z4.b, p0/m, z4.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z5.b, p0/m, z5.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z6.b, p0/m, z6.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z7.b, p0/m, z7.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z24.b, p0/m, z24.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z0.b, p0/m, z0.b, z1.b
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z5.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z6.d, z7.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z24.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv16i8:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: pmul z0.b, z0.b, z1.b
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv16i8:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmul z0.b, z0.b, z1.b
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv16i8:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: pmul z0.b, z0.b, z1.b
@@ -120,6 +168,193 @@ define <vscale x 8 x i16> @clmul_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv8i16:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: ptrue p0.h
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z25.d, z1.d
+; CHECK-SVE-AES-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SVE-AES-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SVE-AES-NEXT: and z4.h, z4.h, #0x4
+; CHECK-SVE-AES-NEXT: and z5.h, z5.h, #0x8
+; CHECK-SVE-AES-NEXT: mov z26.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z27.d, z1.d
+; CHECK-SVE-AES-NEXT: and z6.h, z6.h, #0x10
+; CHECK-SVE-AES-NEXT: and z7.h, z7.h, #0x20
+; CHECK-SVE-AES-NEXT: and z24.h, z24.h, #0x80
+; CHECK-SVE-AES-NEXT: mul z2.h, p0/m, z2.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z3.h, p0/m, z3.h, z0.h
+; CHECK-SVE-AES-NEXT: and z25.h, z25.h, #0x100
+; CHECK-SVE-AES-NEXT: mul z4.h, p0/m, z4.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z5.h, p0/m, z5.h, z0.h
+; CHECK-SVE-AES-NEXT: mov z28.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z6.h, p0/m, z6.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z7.h, p0/m, z7.h, z0.h
+; CHECK-SVE-AES-NEXT: and z26.h, z26.h, #0x800
+; CHECK-SVE-AES-NEXT: mul z24.h, p0/m, z24.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z25.h, p0/m, z25.h, z0.h
+; CHECK-SVE-AES-NEXT: and z27.h, z27.h, #0x1000
+; CHECK-SVE-AES-NEXT: mov z29.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z30.d, z1.d
+; CHECK-SVE-AES-NEXT: and z28.h, z28.h, #0x40
+; CHECK-SVE-AES-NEXT: mul z26.h, p0/m, z26.h, z0.h
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z5.d
+; CHECK-SVE-AES-NEXT: mul z27.h, p0/m, z27.h, z0.h
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z28.h, p0/m, z28.h, z0.h
+; CHECK-SVE-AES-NEXT: and z29.h, z29.h, #0x200
+; CHECK-SVE-AES-NEXT: and z30.h, z30.h, #0x2000
+; CHECK-SVE-AES-NEXT: eor z6.d, z6.d, z7.d
+; CHECK-SVE-AES-NEXT: eor z7.d, z24.d, z25.d
+; CHECK-SVE-AES-NEXT: and z1.h, z1.h, #0x8000
+; CHECK-SVE-AES-NEXT: and z4.h, z4.h, #0x400
+; CHECK-SVE-AES-NEXT: and z5.h, z5.h, #0x4000
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: mul z29.h, p0/m, z29.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z30.h, p0/m, z30.h, z0.h
+; CHECK-SVE-AES-NEXT: eor z24.d, z26.d, z27.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z6.d, z28.d
+; CHECK-SVE-AES-NEXT: mul z4.h, p0/m, z4.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z5.h, p0/m, z5.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-SVE-AES-NEXT: eor z6.d, z7.d, z29.d
+; CHECK-SVE-AES-NEXT: eor z7.d, z24.d, z30.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z6.d, z4.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z7.d, z5.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z3.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv8i16:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SME-STREAMING-NEXT: and z4.h, z4.h, #0x8
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x4
+; CHECK-SME-STREAMING-NEXT: and z6.h, z6.h, #0x20
+; CHECK-SME-STREAMING-NEXT: mul z2.h, z0.h, z2.h
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: mul z6.h, z0.h, z6.h
+; CHECK-SME-STREAMING-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x10
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: and z4.h, z4.h, #0x80
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x40
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x200
+; CHECK-SME-STREAMING-NEXT: and z6.h, z6.h, #0x100
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: mul z6.h, z0.h, z6.h
+; CHECK-SME-STREAMING-NEXT: and z4.h, z4.h, #0x800
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x400
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x2000
+; CHECK-SME-STREAMING-NEXT: and z6.h, z6.h, #0x1000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z1.h, z1.h, #0x4000
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z6.h
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x8000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv8i16:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z4.h, z4.h, #0x8
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x4
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z6.h, z6.h, #0x20
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z2.h, z0.h, z2.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z6.h, z0.h, z6.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x10
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z4.h, z4.h, #0x80
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x40
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x200
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z6.h, z6.h, #0x100
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z6.h, z0.h, z6.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z4.h, z4.h, #0x800
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x400
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x2000
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z6.h, z6.h, #0x1000
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z1.h, z1.h, #0x4000
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z6.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x8000
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv8i16:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: mov z2.d, z1.d
@@ -375,6 +610,151 @@ define <vscale x 4 x i32> @clmul_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv4i32:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: ptrue p0.s
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z25.d, z1.d
+; CHECK-SVE-AES-NEXT: and z2.s, z2.s, #0x2
+; CHECK-SVE-AES-NEXT: and z3.s, z3.s, #0x1
+; CHECK-SVE-AES-NEXT: mov z26.d, z1.d
+; CHECK-SVE-AES-NEXT: and z4.s, z4.s, #0x4
+; CHECK-SVE-AES-NEXT: and z5.s, z5.s, #0x8
+; CHECK-SVE-AES-NEXT: and z6.s, z6.s, #0x10
+; CHECK-SVE-AES-NEXT: and z7.s, z7.s, #0x20
+; CHECK-SVE-AES-NEXT: and z24.s, z24.s, #0x80
+; CHECK-SVE-AES-NEXT: and z25.s, z25.s, #0x100
+; CHECK-SVE-AES-NEXT: mul z2.s, p0/m, z2.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z3.s, p0/m, z3.s, z0.s
+; CHECK-SVE-AES-NEXT: mov z27.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z4.s, p0/m, z4.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z5.s, p0/m, z5.s, z0.s
+; CHECK-SVE-AES-NEXT: and z26.s, z26.s, #0x40
+; CHECK-SVE-AES-NEXT: mul z6.s, p0/m, z6.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z7.s, p0/m, z7.s, z0.s
+; CHECK-SVE-AES-NEXT: mov z28.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z24.s, p0/m, z24.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z25.s, p0/m, z25.s, z0.s
+; CHECK-SVE-AES-NEXT: and z27.s, z27.s, #0x200
+; CHECK-SVE-AES-NEXT: mul z26.s, p0/m, z26.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z4.d, z5.d
+; CHECK-SVE-AES-NEXT: mov z29.d, z1.d
+; CHECK-SVE-AES-NEXT: and z28.s, z28.s, #0x8000
+; CHECK-SVE-AES-NEXT: mul z27.s, p0/m, z27.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z5.d, z6.d, z7.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: and z3.s, z3.s, #0x400
+; CHECK-SVE-AES-NEXT: eor z6.d, z24.d, z25.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z28.s, p0/m, z28.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z4.d
+; CHECK-SVE-AES-NEXT: mov z25.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z5.d, z26.d
+; CHECK-SVE-AES-NEXT: and z7.s, z7.s, #0x800
+; CHECK-SVE-AES-NEXT: mov z26.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z3.s, p0/m, z3.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z5.d, z6.d, z27.d
+; CHECK-SVE-AES-NEXT: and z24.s, z24.s, #0x1000
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: and z25.s, z25.s, #0x800000
+; CHECK-SVE-AES-NEXT: mov z27.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z7.s, p0/m, z7.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z4.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z24.s, p0/m, z24.s, z0.s
+; CHECK-SVE-AES-NEXT: and z26.s, z26.s, #0x40000
+; CHECK-SVE-AES-NEXT: and z29.s, z29.s, #0x100000
+; CHECK-SVE-AES-NEXT: mul z25.s, p0/m, z25.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z3.d, z5.d, z3.d
+; CHECK-SVE-AES-NEXT: and z6.s, z6.s, #0x2000
+; CHECK-SVE-AES-NEXT: and z4.s, z4.s, #0x10000
+; CHECK-SVE-AES-NEXT: and z27.s, z27.s, #0x1000000
+; CHECK-SVE-AES-NEXT: mul z26.s, p0/m, z26.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z29.s, p0/m, z29.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z6.s, p0/m, z6.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z4.s, p0/m, z4.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z27.s, p0/m, z27.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z5.d, z7.d, z24.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: and z3.s, z3.s, #0x20000
+; CHECK-SVE-AES-NEXT: eor z5.d, z5.d, z6.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: and z24.s, z24.s, #0x400000
+; CHECK-SVE-AES-NEXT: and z7.s, z7.s, #0x4000
+; CHECK-SVE-AES-NEXT: mul z3.s, p0/m, z3.s, z0.s
+; CHECK-SVE-AES-NEXT: and z6.s, z6.s, #0x80000
+; CHECK-SVE-AES-NEXT: mul z24.s, p0/m, z24.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z7.s, p0/m, z7.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z3.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z6.s, p0/m, z6.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z24.d, z24.d, z25.d
+; CHECK-SVE-AES-NEXT: mov z25.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z5.d, z5.d, z7.d
+; CHECK-SVE-AES-NEXT: and z4.s, z4.s, #0x2000000
+; CHECK-SVE-AES-NEXT: eor z3.d, z3.d, z26.d
+; CHECK-SVE-AES-NEXT: mov z26.d, z1.d
+; CHECK-SVE-AES-NEXT: and z25.s, z25.s, #0x4000000
+; CHECK-SVE-AES-NEXT: eor z7.d, z24.d, z27.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z4.s, p0/m, z4.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z3.d, z3.d, z6.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: and z26.s, z26.s, #0x200000
+; CHECK-SVE-AES-NEXT: mov z27.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z5.d, z5.d, z28.d
+; CHECK-SVE-AES-NEXT: mul z25.s, p0/m, z25.s, z0.s
+; CHECK-SVE-AES-NEXT: and z24.s, z24.s, #0x20000000
+; CHECK-SVE-AES-NEXT: and z6.s, z6.s, #0x8000000
+; CHECK-SVE-AES-NEXT: eor z3.d, z3.d, z29.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z7.d, z4.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mul z26.s, p0/m, z26.s, z0.s
+; CHECK-SVE-AES-NEXT: and z27.s, z27.s, #0x40000000
+; CHECK-SVE-AES-NEXT: mul z24.s, p0/m, z24.s, z0.s
+; CHECK-SVE-AES-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-SVE-AES-NEXT: mul z6.s, p0/m, z6.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z5.d
+; CHECK-SVE-AES-NEXT: and z7.s, z7.s, #0x10000000
+; CHECK-SVE-AES-NEXT: eor z4.d, z4.d, z25.d
+; CHECK-SVE-AES-NEXT: mul z27.s, p0/m, z27.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z3.d, z3.d, z26.d
+; CHECK-SVE-AES-NEXT: mul z7.s, p0/m, z7.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z4.d, z4.d, z6.d
+; CHECK-SVE-AES-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z24.d, z27.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z4.d, z7.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z3.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv4i32:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SME-STREAMING-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SME-STREAMING-NEXT: trn1 z0.s, z0.s, z2.s
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv4i32:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: trn1 z0.s, z0.s, z2.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv4i32:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: pmullt z2.d, z0.s, z1.s
@@ -652,6 +1032,248 @@ define <vscale x 2 x i64> @clmul_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv2i64:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SVE-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SVE-AES-NEXT: trn1 z0.d, z0.d, z2.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv2i64:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z2.d, z2.d, #0x2
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x1
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x20
+; CHECK-SME-STREAMING-NEXT: mul z2.d, z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x10
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80000000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40000000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200000000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100000000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800000000000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400000000000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000000000000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000000000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z1.d, z1.d, #0x4000000000000000
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x8000000000000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: mul z0.d, z0.d, z1.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv2i64:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: trn1 z0.d, z0.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv2i64:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: mov z2.d, z1.d
@@ -922,6 +1544,45 @@ define <vscale x 16 x i8> @clmul_nxv16i8_zext(<vscale x 16 x i4> %x, <vscale x 1
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv16i8_zext:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: and z0.b, z0.b, #0xf
+; CHECK-SVE-AES-NEXT: and z1.b, z1.b, #0x8
+; CHECK-SVE-AES-NEXT: ptrue p0.b
+; CHECK-SVE-AES-NEXT: and z2.b, z2.b, #0x2
+; CHECK-SVE-AES-NEXT: and z3.b, z3.b, #0x1
+; CHECK-SVE-AES-NEXT: and z4.b, z4.b, #0x4
+; CHECK-SVE-AES-NEXT: mul z1.b, p0/m, z1.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z2.b, p0/m, z2.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z3.b, p0/m, z3.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z4.b, p0/m, z4.b, z0.b
+; CHECK-SVE-AES-NEXT: mul z0.b, z0.b, #0
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z4.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z0.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv16i8_zext:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: and z0.b, z0.b, #0xf
+; CHECK-SME-STREAMING-NEXT: and z1.b, z1.b, #0xf
+; CHECK-SME-STREAMING-NEXT: pmul z0.b, z0.b, z1.b
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv16i8_zext:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z0.b, z0.b, #0xf
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z1.b, z1.b, #0xf
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmul z0.b, z0.b, z1.b
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv16i8_zext:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: and z0.b, z0.b, #0xf
@@ -987,6 +1648,127 @@ define <vscale x 8 x i16> @clmul_nxv8i16_zext(<vscale x 8 x i8> %x, <vscale x 8
; CHECK-SVE-NEXT: eor z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv8i16_zext:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: and z0.h, z0.h, #0xff
+; CHECK-SVE-AES-NEXT: and z1.h, z1.h, #0x80
+; CHECK-SVE-AES-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SVE-AES-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SVE-AES-NEXT: and z4.h, z4.h, #0x4
+; CHECK-SVE-AES-NEXT: and z5.h, z5.h, #0x8
+; CHECK-SVE-AES-NEXT: and z6.h, z6.h, #0x10
+; CHECK-SVE-AES-NEXT: and z7.h, z7.h, #0x20
+; CHECK-SVE-AES-NEXT: and z24.h, z24.h, #0x40
+; CHECK-SVE-AES-NEXT: ptrue p0.h
+; CHECK-SVE-AES-NEXT: mul z2.h, p0/m, z2.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z3.h, p0/m, z3.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z4.h, p0/m, z4.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z5.h, p0/m, z5.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z6.h, p0/m, z6.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z7.h, p0/m, z7.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z24.h, p0/m, z24.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z1.h, p0/m, z1.h, z0.h
+; CHECK-SVE-AES-NEXT: mul z0.h, z0.h, #0
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z5.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z6.d, z7.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z5.d, z0.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z24.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z5.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z3.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv8i16_zext:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z0.h, z0.h, #0xff
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SME-STREAMING-NEXT: and z4.h, z4.h, #0x8
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x4
+; CHECK-SME-STREAMING-NEXT: and z6.h, z6.h, #0x20
+; CHECK-SME-STREAMING-NEXT: mul z2.h, z0.h, z2.h
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.h, z3.h, #0x10
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z4.h, z0.h, z6.h
+; CHECK-SME-STREAMING-NEXT: and z1.h, z1.h, #0x40
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-NEXT: and z5.h, z5.h, #0x80
+; CHECK-SME-STREAMING-NEXT: mul z1.h, z0.h, z1.h
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z3.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z3.h, z0.h, z5.h
+; CHECK-SME-STREAMING-NEXT: mul z0.h, z0.h, #0
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv8i16_zext:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z0.h, z0.h, #0xff
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z2.h, z2.h, #0x2
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x1
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z4.h, z4.h, #0x8
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x4
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z6.h, z6.h, #0x20
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z2.h, z0.h, z2.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z4.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z5.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z3.h, z3.h, #0x10
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z4.h, z0.h, z6.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z1.h, z1.h, #0x40
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z3.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z5.h, z5.h, #0x80
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z1.h, z0.h, z1.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z3.d, z4.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z3.h, z0.h, z5.h
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mul z0.h, z0.h, #0
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv8i16_zext:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: mov z2.d, z1.d
@@ -1156,6 +1938,111 @@ define <vscale x 4 x i32> @clmul_nxv4i32_zext(<vscale x 4 x i16> %x, <vscale x 4
; CHECK-SVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv4i32_zext:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SVE-AES-NEXT: addvl sp, sp, #-1
+; CHECK-SVE-AES-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK-SVE-AES-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-SVE-AES-NEXT: .cfi_offset w29, -16
+; CHECK-SVE-AES-NEXT: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
+; CHECK-SVE-AES-NEXT: mov z2.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z3.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z4.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z5.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z6.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z7.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z24.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z25.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z26.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z27.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z28.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z29.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z30.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z31.d, z1.d
+; CHECK-SVE-AES-NEXT: mov z8.d, z1.d
+; CHECK-SVE-AES-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-AES-NEXT: and z2.s, z2.s, #0x2
+; CHECK-SVE-AES-NEXT: and z3.s, z3.s, #0x1
+; CHECK-SVE-AES-NEXT: and z4.s, z4.s, #0x4
+; CHECK-SVE-AES-NEXT: and z5.s, z5.s, #0x8
+; CHECK-SVE-AES-NEXT: and z6.s, z6.s, #0x10
+; CHECK-SVE-AES-NEXT: and z7.s, z7.s, #0x20
+; CHECK-SVE-AES-NEXT: and z24.s, z24.s, #0x40
+; CHECK-SVE-AES-NEXT: and z25.s, z25.s, #0x80
+; CHECK-SVE-AES-NEXT: and z26.s, z26.s, #0x100
+; CHECK-SVE-AES-NEXT: and z27.s, z27.s, #0x200
+; CHECK-SVE-AES-NEXT: and z28.s, z28.s, #0x400
+; CHECK-SVE-AES-NEXT: and z29.s, z29.s, #0x800
+; CHECK-SVE-AES-NEXT: and z30.s, z30.s, #0x1000
+; CHECK-SVE-AES-NEXT: and z31.s, z31.s, #0x2000
+; CHECK-SVE-AES-NEXT: and z8.s, z8.s, #0x4000
+; CHECK-SVE-AES-NEXT: and z1.s, z1.s, #0x8000
+; CHECK-SVE-AES-NEXT: ptrue p0.s
+; CHECK-SVE-AES-NEXT: mul z2.s, p0/m, z2.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z3.s, p0/m, z3.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z4.s, p0/m, z4.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z5.s, p0/m, z5.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z6.s, p0/m, z6.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z7.s, p0/m, z7.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z24.s, p0/m, z24.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z25.s, p0/m, z25.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z26.s, p0/m, z26.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z27.s, p0/m, z27.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SVE-AES-NEXT: mul z28.s, p0/m, z28.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z29.s, p0/m, z29.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z5.d
+; CHECK-SVE-AES-NEXT: mul z30.s, p0/m, z30.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z31.s, p0/m, z31.s, z0.s
+; CHECK-SVE-AES-NEXT: eor z4.d, z6.d, z7.d
+; CHECK-SVE-AES-NEXT: mul z8.s, p0/m, z8.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z1.s, p0/m, z1.s, z0.s
+; CHECK-SVE-AES-NEXT: mul z0.s, z0.s, #0
+; CHECK-SVE-AES-NEXT: eor z5.d, z25.d, z26.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z24.d
+; CHECK-SVE-AES-NEXT: eor z6.d, z29.d, z30.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z5.d, z27.d
+; CHECK-SVE-AES-NEXT: eor z7.d, z0.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z5.d, z6.d, z31.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z4.d, z28.d
+; CHECK-SVE-AES-NEXT: eor z6.d, z7.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z4.d, z5.d, z8.d
+; CHECK-SVE-AES-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-SVE-AES-NEXT: eor z2.d, z2.d, z3.d
+; CHECK-SVE-AES-NEXT: eor z5.d, z6.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z4.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z3.d, z5.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z2.d, z1.d
+; CHECK-SVE-AES-NEXT: eor z2.d, z3.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z1.d, z1.d, z2.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-SVE-AES-NEXT: eor z0.d, z0.d, z6.d
+; CHECK-SVE-AES-NEXT: addvl sp, sp, #1
+; CHECK-SVE-AES-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv4i32_zext:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SME-STREAMING-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SME-STREAMING-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SME-STREAMING-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SME-STREAMING-NEXT: trn1 z0.s, z0.s, z2.s
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv4i32_zext:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullt z2.d, z0.s, z1.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullb z0.d, z0.s, z1.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: trn1 z0.s, z0.s, z2.s
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv4i32_zext:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: and z0.s, z0.s, #0xffff
@@ -1399,6 +2286,158 @@ define <vscale x 2 x i64> @clmul_nxv2i64_zext(<vscale x 2 x i32> %x, <vscale x 2
; CHECK-SVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-SVE-NEXT: ret
;
+; CHECK-SVE-AES-LABEL: clmul_nxv2i64_zext:
+; CHECK-SVE-AES: // %bb.0:
+; CHECK-SVE-AES-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-AES-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-SVE-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SVE-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SVE-AES-NEXT: trn1 z0.d, z0.d, z2.d
+; CHECK-SVE-AES-NEXT: ret
+;
+; CHECK-SME-STREAMING-LABEL: clmul_nxv2i64_zext:
+; CHECK-SME-STREAMING: // %bb.0:
+; CHECK-SME-STREAMING-NEXT: mov z2.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z2.d, z2.d, #0x2
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x1
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x20
+; CHECK-SME-STREAMING-NEXT: mul z2.d, z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: eor z2.d, z3.d, z2.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x10
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x80000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x40000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x200000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x100000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x800000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x400000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x2000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x1000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z4.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z6.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z4.d, z4.d, #0x8000000
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x4000000
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mul z5.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mov z3.d, z1.d
+; CHECK-SME-STREAMING-NEXT: mov z6.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z3.d, z3.d, #0x20000000
+; CHECK-SME-STREAMING-NEXT: and z6.d, z6.d, #0x10000000
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-SME-STREAMING-NEXT: mov z5.d, z1.d
+; CHECK-SME-STREAMING-NEXT: and z1.d, z1.d, #0x40000000
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z4.d, z0.d, z6.d
+; CHECK-SME-STREAMING-NEXT: and z5.d, z5.d, #0x80000000
+; CHECK-SME-STREAMING-NEXT: mul z1.d, z0.d, z1.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-SME-STREAMING-NEXT: mul z3.d, z0.d, z5.d
+; CHECK-SME-STREAMING-NEXT: mul z0.d, z0.d, #0
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z1.d, z3.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: eor3 z2.d, z2.d, z0.d, z0.d
+; CHECK-SME-STREAMING-NEXT: mov z0.d, z2.d
+; CHECK-SME-STREAMING-NEXT: ret
+;
+; CHECK-SME-STREAMING-SSVE-AES-LABEL: clmul_nxv2i64_zext:
+; CHECK-SME-STREAMING-SSVE-AES: // %bb.0:
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullt z2.q, z0.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: pmullb z0.q, z0.d, z1.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: trn1 z0.d, z0.d, z2.d
+; CHECK-SME-STREAMING-SSVE-AES-NEXT: ret
+;
; CHECK-SVE2-LABEL: clmul_nxv2i64_zext:
; CHECK-SVE2: // %bb.0:
; CHECK-SVE2-NEXT: mov z2.d, z1.d
>From 0dcc6572071591243471df4aa0b85cc5e9194d18 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 11 Feb 2026 14:21:36 +0000
Subject: [PATCH 5/5] Remove redundant predicate
---
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b2e5cd442079b..2d24e22f682b9 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3993,10 +3993,8 @@ let Predicates = [HasSVE2_or_SME] in {
defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb", int_aarch64_sve_pmullb_pair>;
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>;
-let Predicates = [HasSVE2_or_SME] in {
def : Pat<(nxv4i32 (clmul nxv4i32:$Rn, nxv4i32:$Rm)),
(TRN1_ZZZ_S (PMULLB_ZZZ_D $Rn, $Rm), (PMULLT_ZZZ_D $Rn, $Rm))>;
-}
// SVE2 bitwise shift and insert
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", AArch64vsri>;
More information about the llvm-commits
mailing list