[llvm] [AArch64] Add clmul AArch64 lowering tests (PR #179495)

Matthew Devereau via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 08:31:32 PST 2026


https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/179495

None

>From ef9e5ca21bfc6a7dba80ec89228728d242b3ef5d Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Tue, 3 Feb 2026 16:11:34 +0000
Subject: [PATCH] [AArch64] Add clmul AArch64 lowering tests

---
 llvm/test/CodeGen/AArch64/clmul-fixed.ll    |  458 ++++++++
 llvm/test/CodeGen/AArch64/clmul-scalable.ll | 1172 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/clmul.ll          |  470 +++++++-
 3 files changed, 2084 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/clmul-fixed.ll
 create mode 100644 llvm/test/CodeGen/AArch64/clmul-scalable.ll

diff --git a/llvm/test/CodeGen/AArch64/clmul-fixed.ll b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
new file mode 100644
index 0000000000000..ff5eeb6cdb43d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s
+
+define <16 x i8> @clmul_v16i8_neon(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: clmul_v16i8_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmul v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %a = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i8> %a
+}
+
+define <8 x i8> @clmul_v8i8_neon(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: clmul_v8i8_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmul v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %a = call <8 x i8> @llvm.clmul.v8i8(<8 x i8> %x, <8 x i8> %y)
+  ret <8 x i8> %a
+}
+
+define <8 x i16> @clmul_v8i16_neon(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: clmul_v8i16_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #2
+; CHECK-NEXT:    movi v3.8h, #1
+; CHECK-NEXT:    movi v4.8h, #4
+; CHECK-NEXT:    movi v5.8h, #8
+; CHECK-NEXT:    movi v6.8h, #16
+; CHECK-NEXT:    movi v7.8h, #32
+; CHECK-NEXT:    movi v16.8h, #128
+; CHECK-NEXT:    movi v17.8h, #1, lsl #8
+; CHECK-NEXT:    movi v18.8h, #8, lsl #8
+; CHECK-NEXT:    movi v19.8h, #16, lsl #8
+; CHECK-NEXT:    movi v20.8h, #64
+; CHECK-NEXT:    movi v21.8h, #2, lsl #8
+; CHECK-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEXT:    and v5.16b, v1.16b, v5.16b
+; CHECK-NEXT:    movi v22.8h, #32, lsl #8
+; CHECK-NEXT:    and v6.16b, v1.16b, v6.16b
+; CHECK-NEXT:    and v7.16b, v1.16b, v7.16b
+; CHECK-NEXT:    and v16.16b, v1.16b, v16.16b
+; CHECK-NEXT:    and v17.16b, v1.16b, v17.16b
+; CHECK-NEXT:    and v18.16b, v1.16b, v18.16b
+; CHECK-NEXT:    and v19.16b, v1.16b, v19.16b
+; CHECK-NEXT:    mul v2.8h, v0.8h, v2.8h
+; CHECK-NEXT:    mul v3.8h, v0.8h, v3.8h
+; CHECK-NEXT:    mul v4.8h, v0.8h, v4.8h
+; CHECK-NEXT:    mul v5.8h, v0.8h, v5.8h
+; CHECK-NEXT:    and v20.16b, v1.16b, v20.16b
+; CHECK-NEXT:    movi v23.8h, #4, lsl #8
+; CHECK-NEXT:    movi v24.8h, #64, lsl #8
+; CHECK-NEXT:    mul v6.8h, v0.8h, v6.8h
+; CHECK-NEXT:    mul v7.8h, v0.8h, v7.8h
+; CHECK-NEXT:    mul v16.8h, v0.8h, v16.8h
+; CHECK-NEXT:    mul v17.8h, v0.8h, v17.8h
+; CHECK-NEXT:    and v21.16b, v1.16b, v21.16b
+; CHECK-NEXT:    mul v18.8h, v0.8h, v18.8h
+; CHECK-NEXT:    mul v19.8h, v0.8h, v19.8h
+; CHECK-NEXT:    and v22.16b, v1.16b, v22.16b
+; CHECK-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEXT:    eor v3.16b, v4.16b, v5.16b
+; CHECK-NEXT:    mul v4.8h, v0.8h, v20.8h
+; CHECK-NEXT:    movi v20.8h, #128, lsl #8
+; CHECK-NEXT:    mul v5.8h, v0.8h, v21.8h
+; CHECK-NEXT:    and v21.16b, v1.16b, v23.16b
+; CHECK-NEXT:    and v23.16b, v1.16b, v24.16b
+; CHECK-NEXT:    mul v22.8h, v0.8h, v22.8h
+; CHECK-NEXT:    eor v6.16b, v6.16b, v7.16b
+; CHECK-NEXT:    eor v7.16b, v16.16b, v17.16b
+; CHECK-NEXT:    eor v16.16b, v18.16b, v19.16b
+; CHECK-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v20.16b
+; CHECK-NEXT:    mul v3.8h, v0.8h, v21.8h
+; CHECK-NEXT:    mul v17.8h, v0.8h, v23.8h
+; CHECK-NEXT:    eor v4.16b, v6.16b, v4.16b
+; CHECK-NEXT:    eor v5.16b, v7.16b, v5.16b
+; CHECK-NEXT:    eor v6.16b, v16.16b, v22.16b
+; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    eor v1.16b, v2.16b, v4.16b
+; CHECK-NEXT:    eor v2.16b, v5.16b, v3.16b
+; CHECK-NEXT:    eor v3.16b, v6.16b, v17.16b
+; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    eor v0.16b, v3.16b, v0.16b
+; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    ret
+  %a = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %x, <8 x i16> %y)
+  ret <8 x i16> %a
+}
+
+define <4 x i16> @clmul_v4i16_neon(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: clmul_v4i16_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4h, #2
+; CHECK-NEXT:    movi v3.4h, #1
+; CHECK-NEXT:    movi v4.4h, #4
+; CHECK-NEXT:    movi v5.4h, #8
+; CHECK-NEXT:    movi v6.4h, #16
+; CHECK-NEXT:    movi v7.4h, #32
+; CHECK-NEXT:    movi v16.4h, #128
+; CHECK-NEXT:    movi v17.4h, #1, lsl #8
+; CHECK-NEXT:    movi v18.4h, #8, lsl #8
+; CHECK-NEXT:    movi v19.4h, #16, lsl #8
+; CHECK-NEXT:    movi v20.4h, #64
+; CHECK-NEXT:    movi v21.4h, #2, lsl #8
+; CHECK-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEXT:    movi v22.4h, #32, lsl #8
+; CHECK-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEXT:    and v7.8b, v1.8b, v7.8b
+; CHECK-NEXT:    and v16.8b, v1.8b, v16.8b
+; CHECK-NEXT:    and v17.8b, v1.8b, v17.8b
+; CHECK-NEXT:    and v18.8b, v1.8b, v18.8b
+; CHECK-NEXT:    and v19.8b, v1.8b, v19.8b
+; CHECK-NEXT:    mul v2.4h, v0.4h, v2.4h
+; CHECK-NEXT:    mul v3.4h, v0.4h, v3.4h
+; CHECK-NEXT:    mul v4.4h, v0.4h, v4.4h
+; CHECK-NEXT:    mul v5.4h, v0.4h, v5.4h
+; CHECK-NEXT:    and v20.8b, v1.8b, v20.8b
+; CHECK-NEXT:    movi v23.4h, #4, lsl #8
+; CHECK-NEXT:    movi v24.4h, #64, lsl #8
+; CHECK-NEXT:    mul v6.4h, v0.4h, v6.4h
+; CHECK-NEXT:    mul v7.4h, v0.4h, v7.4h
+; CHECK-NEXT:    mul v16.4h, v0.4h, v16.4h
+; CHECK-NEXT:    mul v17.4h, v0.4h, v17.4h
+; CHECK-NEXT:    and v21.8b, v1.8b, v21.8b
+; CHECK-NEXT:    mul v18.4h, v0.4h, v18.4h
+; CHECK-NEXT:    mul v19.4h, v0.4h, v19.4h
+; CHECK-NEXT:    and v22.8b, v1.8b, v22.8b
+; CHECK-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEXT:    eor v3.8b, v4.8b, v5.8b
+; CHECK-NEXT:    mul v4.4h, v0.4h, v20.4h
+; CHECK-NEXT:    movi v20.4h, #128, lsl #8
+; CHECK-NEXT:    mul v5.4h, v0.4h, v21.4h
+; CHECK-NEXT:    and v21.8b, v1.8b, v23.8b
+; CHECK-NEXT:    and v23.8b, v1.8b, v24.8b
+; CHECK-NEXT:    mul v22.4h, v0.4h, v22.4h
+; CHECK-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEXT:    eor v7.8b, v16.8b, v17.8b
+; CHECK-NEXT:    eor v16.8b, v18.8b, v19.8b
+; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v20.8b
+; CHECK-NEXT:    mul v3.4h, v0.4h, v21.4h
+; CHECK-NEXT:    mul v17.4h, v0.4h, v23.4h
+; CHECK-NEXT:    eor v4.8b, v6.8b, v4.8b
+; CHECK-NEXT:    eor v5.8b, v7.8b, v5.8b
+; CHECK-NEXT:    eor v6.8b, v16.8b, v22.8b
+; CHECK-NEXT:    mul v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    eor v1.8b, v2.8b, v4.8b
+; CHECK-NEXT:    eor v2.8b, v5.8b, v3.8b
+; CHECK-NEXT:    eor v3.8b, v6.8b, v17.8b
+; CHECK-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    eor v0.8b, v3.8b, v0.8b
+; CHECK-NEXT:    eor v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    ret
+  %a = call <4 x i16> @llvm.clmul.v4i16(<4 x i16> %x, <4 x i16> %y)
+  ret <4 x i16> %a
+}
+
+define <4 x i32> @clmul_v4i32_neon(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: clmul_v4i32_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #2
+; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v4.4s, #4
+; CHECK-NEXT:    movi v5.4s, #8
+; CHECK-NEXT:    movi v6.4s, #16
+; CHECK-NEXT:    movi v7.4s, #32
+; CHECK-NEXT:    movi v16.4s, #64
+; CHECK-NEXT:    movi v17.4s, #128
+; CHECK-NEXT:    movi v18.4s, #1, lsl #8
+; CHECK-NEXT:    movi v19.4s, #2, lsl #8
+; CHECK-NEXT:    movi v20.4s, #8, lsl #8
+; CHECK-NEXT:    movi v21.4s, #128, lsl #16
+; CHECK-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEXT:    and v5.16b, v1.16b, v5.16b
+; CHECK-NEXT:    and v6.16b, v1.16b, v6.16b
+; CHECK-NEXT:    and v7.16b, v1.16b, v7.16b
+; CHECK-NEXT:    and v16.16b, v1.16b, v16.16b
+; CHECK-NEXT:    and v17.16b, v1.16b, v17.16b
+; CHECK-NEXT:    and v18.16b, v1.16b, v18.16b
+; CHECK-NEXT:    mul v2.4s, v0.4s, v2.4s
+; CHECK-NEXT:    mul v3.4s, v0.4s, v3.4s
+; CHECK-NEXT:    mul v4.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mul v5.4s, v0.4s, v5.4s
+; CHECK-NEXT:    mul v6.4s, v0.4s, v6.4s
+; CHECK-NEXT:    mul v7.4s, v0.4s, v7.4s
+; CHECK-NEXT:    and v21.16b, v1.16b, v21.16b
+; CHECK-NEXT:    movi v22.4s, #8, lsl #16
+; CHECK-NEXT:    movi v23.4s, #2, lsl #24
+; CHECK-NEXT:    movi v25.4s, #4, lsl #24
+; CHECK-NEXT:    movi v24.4s, #32, lsl #16
+; CHECK-NEXT:    movi v26.4s, #8, lsl #24
+; CHECK-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEXT:    eor v3.16b, v4.16b, v5.16b
+; CHECK-NEXT:    movi v4.4s, #16, lsl #8
+; CHECK-NEXT:    mul v5.4s, v0.4s, v16.4s
+; CHECK-NEXT:    mul v16.4s, v0.4s, v17.4s
+; CHECK-NEXT:    mul v17.4s, v0.4s, v18.4s
+; CHECK-NEXT:    eor v6.16b, v6.16b, v7.16b
+; CHECK-NEXT:    and v7.16b, v1.16b, v19.16b
+; CHECK-NEXT:    movi v19.4s, #32, lsl #8
+; CHECK-NEXT:    and v18.16b, v1.16b, v20.16b
+; CHECK-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEXT:    movi v20.4s, #64, lsl #8
+; CHECK-NEXT:    mul v21.4s, v0.4s, v21.4s
+; CHECK-NEXT:    and v3.16b, v1.16b, v4.16b
+; CHECK-NEXT:    eor v5.16b, v6.16b, v5.16b
+; CHECK-NEXT:    movi v4.4s, #1, lsl #16
+; CHECK-NEXT:    eor v6.16b, v16.16b, v17.16b
+; CHECK-NEXT:    movi v16.4s, #2, lsl #16
+; CHECK-NEXT:    mul v7.4s, v0.4s, v7.4s
+; CHECK-NEXT:    mul v18.4s, v0.4s, v18.4s
+; CHECK-NEXT:    and v19.16b, v1.16b, v19.16b
+; CHECK-NEXT:    movi v17.4s, #4, lsl #8
+; CHECK-NEXT:    mul v3.4s, v0.4s, v3.4s
+; CHECK-NEXT:    eor v2.16b, v2.16b, v5.16b
+; CHECK-NEXT:    and v23.16b, v1.16b, v23.16b
+; CHECK-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEXT:    and v5.16b, v1.16b, v16.16b
+; CHECK-NEXT:    movi v16.4s, #64, lsl #16
+; CHECK-NEXT:    eor v6.16b, v6.16b, v7.16b
+; CHECK-NEXT:    mul v7.4s, v0.4s, v19.4s
+; CHECK-NEXT:    movi v19.4s, #4, lsl #16
+; CHECK-NEXT:    and v17.16b, v1.16b, v17.16b
+; CHECK-NEXT:    eor v3.16b, v18.16b, v3.16b
+; CHECK-NEXT:    and v18.16b, v1.16b, v20.16b
+; CHECK-NEXT:    movi v20.4s, #1, lsl #24
+; CHECK-NEXT:    mul v4.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mul v5.4s, v0.4s, v5.4s
+; CHECK-NEXT:    and v16.16b, v1.16b, v16.16b
+; CHECK-NEXT:    mul v17.4s, v0.4s, v17.4s
+; CHECK-NEXT:    eor v3.16b, v3.16b, v7.16b
+; CHECK-NEXT:    and v7.16b, v1.16b, v19.16b
+; CHECK-NEXT:    mul v18.4s, v0.4s, v18.4s
+; CHECK-NEXT:    and v20.16b, v1.16b, v20.16b
+; CHECK-NEXT:    movi v19.4s, #128, lsl #8
+; CHECK-NEXT:    mul v16.4s, v0.4s, v16.4s
+; CHECK-NEXT:    eor v4.16b, v4.16b, v5.16b
+; CHECK-NEXT:    mul v5.4s, v0.4s, v7.4s
+; CHECK-NEXT:    and v7.16b, v1.16b, v22.16b
+; CHECK-NEXT:    movi v22.4s, #16, lsl #16
+; CHECK-NEXT:    mul v20.4s, v0.4s, v20.4s
+; CHECK-NEXT:    eor v6.16b, v6.16b, v17.16b
+; CHECK-NEXT:    eor v3.16b, v3.16b, v18.16b
+; CHECK-NEXT:    and v17.16b, v1.16b, v19.16b
+; CHECK-NEXT:    mul v18.4s, v0.4s, v23.4s
+; CHECK-NEXT:    and v19.16b, v1.16b, v25.16b
+; CHECK-NEXT:    eor v16.16b, v16.16b, v21.16b
+; CHECK-NEXT:    and v21.16b, v1.16b, v24.16b
+; CHECK-NEXT:    movi v23.4s, #32, lsl #24
+; CHECK-NEXT:    eor v4.16b, v4.16b, v5.16b
+; CHECK-NEXT:    mul v5.4s, v0.4s, v7.4s
+; CHECK-NEXT:    and v7.16b, v1.16b, v22.16b
+; CHECK-NEXT:    movi v22.4s, #16, lsl #24
+; CHECK-NEXT:    movi v24.4s, #64, lsl #24
+; CHECK-NEXT:    mul v17.4s, v0.4s, v17.4s
+; CHECK-NEXT:    eor v16.16b, v16.16b, v20.16b
+; CHECK-NEXT:    and v20.16b, v1.16b, v26.16b
+; CHECK-NEXT:    mul v19.4s, v0.4s, v19.4s
+; CHECK-NEXT:    mul v7.4s, v0.4s, v7.4s
+; CHECK-NEXT:    eor v2.16b, v2.16b, v6.16b
+; CHECK-NEXT:    mul v6.4s, v0.4s, v21.4s
+; CHECK-NEXT:    eor v4.16b, v4.16b, v5.16b
+; CHECK-NEXT:    and v21.16b, v1.16b, v23.16b
+; CHECK-NEXT:    eor v5.16b, v16.16b, v18.16b
+; CHECK-NEXT:    movi v16.4s, #128, lsl #24
+; CHECK-NEXT:    mul v18.4s, v0.4s, v20.4s
+; CHECK-NEXT:    and v20.16b, v1.16b, v22.16b
+; CHECK-NEXT:    and v22.16b, v1.16b, v24.16b
+; CHECK-NEXT:    eor v3.16b, v3.16b, v17.16b
+; CHECK-NEXT:    eor v4.16b, v4.16b, v7.16b
+; CHECK-NEXT:    eor v5.16b, v5.16b, v19.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v16.16b
+; CHECK-NEXT:    mul v7.4s, v0.4s, v20.4s
+; CHECK-NEXT:    mul v16.4s, v0.4s, v21.4s
+; CHECK-NEXT:    mul v17.4s, v0.4s, v22.4s
+; CHECK-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEXT:    eor v3.16b, v4.16b, v6.16b
+; CHECK-NEXT:    eor v4.16b, v5.16b, v18.16b
+; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    eor v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    eor v2.16b, v4.16b, v7.16b
+; CHECK-NEXT:    eor v3.16b, v16.16b, v17.16b
+; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    eor v0.16b, v3.16b, v0.16b
+; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    ret
+  %a = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %a
+}
+
+define <2 x i32> @clmul_v2i32_neon(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: clmul_v2i32_neon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2s, #2
+; CHECK-NEXT:    movi v3.2s, #1
+; CHECK-NEXT:    movi v4.2s, #4
+; CHECK-NEXT:    movi v5.2s, #8
+; CHECK-NEXT:    movi v6.2s, #16
+; CHECK-NEXT:    movi v7.2s, #32
+; CHECK-NEXT:    movi v16.2s, #64
+; CHECK-NEXT:    movi v17.2s, #128
+; CHECK-NEXT:    movi v18.2s, #1, lsl #8
+; CHECK-NEXT:    movi v19.2s, #2, lsl #8
+; CHECK-NEXT:    movi v20.2s, #8, lsl #8
+; CHECK-NEXT:    movi v21.2s, #128, lsl #16
+; CHECK-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEXT:    and v7.8b, v1.8b, v7.8b
+; CHECK-NEXT:    and v16.8b, v1.8b, v16.8b
+; CHECK-NEXT:    and v17.8b, v1.8b, v17.8b
+; CHECK-NEXT:    and v18.8b, v1.8b, v18.8b
+; CHECK-NEXT:    mul v2.2s, v0.2s, v2.2s
+; CHECK-NEXT:    mul v3.2s, v0.2s, v3.2s
+; CHECK-NEXT:    mul v4.2s, v0.2s, v4.2s
+; CHECK-NEXT:    mul v5.2s, v0.2s, v5.2s
+; CHECK-NEXT:    mul v6.2s, v0.2s, v6.2s
+; CHECK-NEXT:    mul v7.2s, v0.2s, v7.2s
+; CHECK-NEXT:    and v21.8b, v1.8b, v21.8b
+; CHECK-NEXT:    movi v22.2s, #8, lsl #16
+; CHECK-NEXT:    movi v23.2s, #2, lsl #24
+; CHECK-NEXT:    movi v25.2s, #4, lsl #24
+; CHECK-NEXT:    movi v24.2s, #32, lsl #16
+; CHECK-NEXT:    movi v26.2s, #8, lsl #24
+; CHECK-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEXT:    eor v3.8b, v4.8b, v5.8b
+; CHECK-NEXT:    movi v4.2s, #16, lsl #8
+; CHECK-NEXT:    mul v5.2s, v0.2s, v16.2s
+; CHECK-NEXT:    mul v16.2s, v0.2s, v17.2s
+; CHECK-NEXT:    mul v17.2s, v0.2s, v18.2s
+; CHECK-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEXT:    and v7.8b, v1.8b, v19.8b
+; CHECK-NEXT:    movi v19.2s, #32, lsl #8
+; CHECK-NEXT:    and v18.8b, v1.8b, v20.8b
+; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEXT:    movi v20.2s, #64, lsl #8
+; CHECK-NEXT:    mul v21.2s, v0.2s, v21.2s
+; CHECK-NEXT:    and v3.8b, v1.8b, v4.8b
+; CHECK-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEXT:    movi v4.2s, #1, lsl #16
+; CHECK-NEXT:    eor v6.8b, v16.8b, v17.8b
+; CHECK-NEXT:    movi v16.2s, #2, lsl #16
+; CHECK-NEXT:    mul v7.2s, v0.2s, v7.2s
+; CHECK-NEXT:    mul v18.2s, v0.2s, v18.2s
+; CHECK-NEXT:    and v19.8b, v1.8b, v19.8b
+; CHECK-NEXT:    movi v17.2s, #4, lsl #8
+; CHECK-NEXT:    mul v3.2s, v0.2s, v3.2s
+; CHECK-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEXT:    and v23.8b, v1.8b, v23.8b
+; CHECK-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEXT:    and v5.8b, v1.8b, v16.8b
+; CHECK-NEXT:    movi v16.2s, #64, lsl #16
+; CHECK-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEXT:    mul v7.2s, v0.2s, v19.2s
+; CHECK-NEXT:    movi v19.2s, #4, lsl #16
+; CHECK-NEXT:    and v17.8b, v1.8b, v17.8b
+; CHECK-NEXT:    eor v3.8b, v18.8b, v3.8b
+; CHECK-NEXT:    and v18.8b, v1.8b, v20.8b
+; CHECK-NEXT:    movi v20.2s, #1, lsl #24
+; CHECK-NEXT:    mul v4.2s, v0.2s, v4.2s
+; CHECK-NEXT:    mul v5.2s, v0.2s, v5.2s
+; CHECK-NEXT:    and v16.8b, v1.8b, v16.8b
+; CHECK-NEXT:    mul v17.2s, v0.2s, v17.2s
+; CHECK-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEXT:    and v7.8b, v1.8b, v19.8b
+; CHECK-NEXT:    mul v18.2s, v0.2s, v18.2s
+; CHECK-NEXT:    and v20.8b, v1.8b, v20.8b
+; CHECK-NEXT:    movi v19.2s, #128, lsl #8
+; CHECK-NEXT:    mul v16.2s, v0.2s, v16.2s
+; CHECK-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEXT:    mul v5.2s, v0.2s, v7.2s
+; CHECK-NEXT:    and v7.8b, v1.8b, v22.8b
+; CHECK-NEXT:    movi v22.2s, #16, lsl #16
+; CHECK-NEXT:    mul v20.2s, v0.2s, v20.2s
+; CHECK-NEXT:    eor v6.8b, v6.8b, v17.8b
+; CHECK-NEXT:    eor v3.8b, v3.8b, v18.8b
+; CHECK-NEXT:    and v17.8b, v1.8b, v19.8b
+; CHECK-NEXT:    mul v18.2s, v0.2s, v23.2s
+; CHECK-NEXT:    and v19.8b, v1.8b, v25.8b
+; CHECK-NEXT:    eor v16.8b, v16.8b, v21.8b
+; CHECK-NEXT:    and v21.8b, v1.8b, v24.8b
+; CHECK-NEXT:    movi v23.2s, #32, lsl #24
+; CHECK-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEXT:    mul v5.2s, v0.2s, v7.2s
+; CHECK-NEXT:    and v7.8b, v1.8b, v22.8b
+; CHECK-NEXT:    movi v22.2s, #16, lsl #24
+; CHECK-NEXT:    movi v24.2s, #64, lsl #24
+; CHECK-NEXT:    mul v17.2s, v0.2s, v17.2s
+; CHECK-NEXT:    eor v16.8b, v16.8b, v20.8b
+; CHECK-NEXT:    and v20.8b, v1.8b, v26.8b
+; CHECK-NEXT:    mul v19.2s, v0.2s, v19.2s
+; CHECK-NEXT:    mul v7.2s, v0.2s, v7.2s
+; CHECK-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEXT:    mul v6.2s, v0.2s, v21.2s
+; CHECK-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEXT:    and v21.8b, v1.8b, v23.8b
+; CHECK-NEXT:    eor v5.8b, v16.8b, v18.8b
+; CHECK-NEXT:    movi v16.2s, #128, lsl #24
+; CHECK-NEXT:    mul v18.2s, v0.2s, v20.2s
+; CHECK-NEXT:    and v20.8b, v1.8b, v22.8b
+; CHECK-NEXT:    and v22.8b, v1.8b, v24.8b
+; CHECK-NEXT:    eor v3.8b, v3.8b, v17.8b
+; CHECK-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEXT:    eor v5.8b, v5.8b, v19.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v16.8b
+; CHECK-NEXT:    mul v7.2s, v0.2s, v20.2s
+; CHECK-NEXT:    mul v16.2s, v0.2s, v21.2s
+; CHECK-NEXT:    mul v17.2s, v0.2s, v22.2s
+; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEXT:    eor v3.8b, v4.8b, v6.8b
+; CHECK-NEXT:    eor v4.8b, v5.8b, v18.8b
+; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    eor v1.8b, v2.8b, v3.8b
+; CHECK-NEXT:    eor v2.8b, v4.8b, v7.8b
+; CHECK-NEXT:    eor v3.8b, v16.8b, v17.8b
+; CHECK-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    eor v0.8b, v3.8b, v0.8b
+; CHECK-NEXT:    eor v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    ret
+  %a = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %x, <2 x i32> %y)
+  ret <2 x i32> %a
+}
+
+; TODO: Fix
+; define <2 x i64> @clmul_v2i64_neon(<2 x i64> %x, <2 x i64> %y) {
+;   %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
+;   ret <2 x i64> %a
+; }
+
+; TODO: Fix
+; define <1 x i64> @clmul_v1i64_neon(<1 x i64> %x, <1 x i64> %y) {
+;   %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
+;   ret <1 x i64> %a
+; }
+
+; TODO: Fix: lower with PMULL and PMULL2 .d variants which require FEAT_PMULL
+; define <2 x i64> @clmul_v2i64_neon_aes(<2 x i64> %x, <2 x i64> %y) #0 {
+;   %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
+;   ret <2 x i64> %a
+; }
+
+; TODO: Fix: lower with PMULL and PMULL2 .d variants which require FEAT_PMULL
+; define <1 x i64> @clmul_v1i64_neon_aes(<1 x i64> %x, <1 x i64> %y) #0 {
+;   %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
+;   ret <1 x i64> %a
+; }
+
+attributes #0 = { "target-features"="+aes" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/clmul-scalable.ll b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
new file mode 100644
index 0000000000000..889bfccace0e8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/clmul-scalable.ll
@@ -0,0 +1,1172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s
+
+define <vscale x 16 x i8> @clmul_nxv16i8_sve(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: clmul_nxv16i8_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    and z1.b, z1.b, #0x80
+; CHECK-NEXT:    and z2.b, z2.b, #0x2
+; CHECK-NEXT:    and z3.b, z3.b, #0x1
+; CHECK-NEXT:    and z4.b, z4.b, #0x4
+; CHECK-NEXT:    and z5.b, z5.b, #0x8
+; CHECK-NEXT:    and z6.b, z6.b, #0x10
+; CHECK-NEXT:    and z7.b, z7.b, #0x20
+; CHECK-NEXT:    and z24.b, z24.b, #0x40
+; CHECK-NEXT:    mul z2.b, p0/m, z2.b, z0.b
+; CHECK-NEXT:    mul z3.b, p0/m, z3.b, z0.b
+; CHECK-NEXT:    mul z4.b, p0/m, z4.b, z0.b
+; CHECK-NEXT:    mul z5.b, p0/m, z5.b, z0.b
+; CHECK-NEXT:    mul z6.b, p0/m, z6.b, z0.b
+; CHECK-NEXT:    mul z7.b, p0/m, z7.b, z0.b
+; CHECK-NEXT:    mul z24.b, p0/m, z24.b, z0.b
+; CHECK-NEXT:    mul z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    eor z3.d, z4.d, z5.d
+; CHECK-NEXT:    eor z4.d, z6.d, z7.d
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    eor z3.d, z4.d, z24.d
+; CHECK-NEXT:    eor z1.d, z2.d, z3.d
+; CHECK-NEXT:    eor z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 16 x i8> @llvm.clmul.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 8 x i16> @clmul_nxv8i16_sve(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) #0 {
+; CHECK-LABEL: clmul_nxv8i16_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x2
+; CHECK-NEXT:    and z3.h, z3.h, #0x1
+; CHECK-NEXT:    and z4.h, z4.h, #0x4
+; CHECK-NEXT:    and z5.h, z5.h, #0x8
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    and z6.h, z6.h, #0x10
+; CHECK-NEXT:    and z7.h, z7.h, #0x20
+; CHECK-NEXT:    and z24.h, z24.h, #0x80
+; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z0.h
+; CHECK-NEXT:    mul z3.h, p0/m, z3.h, z0.h
+; CHECK-NEXT:    and z25.h, z25.h, #0x100
+; CHECK-NEXT:    mul z4.h, p0/m, z4.h, z0.h
+; CHECK-NEXT:    mul z5.h, p0/m, z5.h, z0.h
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    mul z6.h, p0/m, z6.h, z0.h
+; CHECK-NEXT:    mul z7.h, p0/m, z7.h, z0.h
+; CHECK-NEXT:    and z26.h, z26.h, #0x800
+; CHECK-NEXT:    mul z24.h, p0/m, z24.h, z0.h
+; CHECK-NEXT:    mul z25.h, p0/m, z25.h, z0.h
+; CHECK-NEXT:    and z27.h, z27.h, #0x1000
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    mov z30.d, z1.d
+; CHECK-NEXT:    and z28.h, z28.h, #0x40
+; CHECK-NEXT:    mul z26.h, p0/m, z26.h, z0.h
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    eor z3.d, z4.d, z5.d
+; CHECK-NEXT:    mul z27.h, p0/m, z27.h, z0.h
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z28.h, p0/m, z28.h, z0.h
+; CHECK-NEXT:    and z29.h, z29.h, #0x200
+; CHECK-NEXT:    and z30.h, z30.h, #0x2000
+; CHECK-NEXT:    eor z6.d, z6.d, z7.d
+; CHECK-NEXT:    eor z7.d, z24.d, z25.d
+; CHECK-NEXT:    and z1.h, z1.h, #0x8000
+; CHECK-NEXT:    and z4.h, z4.h, #0x400
+; CHECK-NEXT:    and z5.h, z5.h, #0x4000
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    mul z29.h, p0/m, z29.h, z0.h
+; CHECK-NEXT:    mul z30.h, p0/m, z30.h, z0.h
+; CHECK-NEXT:    eor z24.d, z26.d, z27.d
+; CHECK-NEXT:    eor z3.d, z6.d, z28.d
+; CHECK-NEXT:    mul z4.h, p0/m, z4.h, z0.h
+; CHECK-NEXT:    mul z5.h, p0/m, z5.h, z0.h
+; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    eor z6.d, z7.d, z29.d
+; CHECK-NEXT:    eor z7.d, z24.d, z30.d
+; CHECK-NEXT:    eor z1.d, z2.d, z3.d
+; CHECK-NEXT:    eor z2.d, z6.d, z4.d
+; CHECK-NEXT:    eor z3.d, z7.d, z5.d
+; CHECK-NEXT:    eor z1.d, z1.d, z2.d
+; CHECK-NEXT:    eor z0.d, z3.d, z0.d
+; CHECK-NEXT:    eor z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 8 x i16> @llvm.clmul.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 4 x i32> @clmul_nxv4i32_sve(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) #0 {
+; CHECK-LABEL: clmul_nxv4i32_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x2
+; CHECK-NEXT:    and z3.s, z3.s, #0x1
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    and z4.s, z4.s, #0x4
+; CHECK-NEXT:    and z5.s, z5.s, #0x8
+; CHECK-NEXT:    and z6.s, z6.s, #0x10
+; CHECK-NEXT:    and z7.s, z7.s, #0x20
+; CHECK-NEXT:    and z24.s, z24.s, #0x80
+; CHECK-NEXT:    and z25.s, z25.s, #0x100
+; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT:    mul z3.s, p0/m, z3.s, z0.s
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT:    mul z5.s, p0/m, z5.s, z0.s
+; CHECK-NEXT:    and z26.s, z26.s, #0x40
+; CHECK-NEXT:    mul z6.s, p0/m, z6.s, z0.s
+; CHECK-NEXT:    mul z7.s, p0/m, z7.s, z0.s
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    mul z24.s, p0/m, z24.s, z0.s
+; CHECK-NEXT:    mul z25.s, p0/m, z25.s, z0.s
+; CHECK-NEXT:    and z27.s, z27.s, #0x200
+; CHECK-NEXT:    mul z26.s, p0/m, z26.s, z0.s
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    eor z4.d, z4.d, z5.d
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    and z28.s, z28.s, #0x8000
+; CHECK-NEXT:    mul z27.s, p0/m, z27.s, z0.s
+; CHECK-NEXT:    eor z5.d, z6.d, z7.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x400
+; CHECK-NEXT:    eor z6.d, z24.d, z25.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z28.s, p0/m, z28.s, z0.s
+; CHECK-NEXT:    eor z2.d, z2.d, z4.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    eor z4.d, z5.d, z26.d
+; CHECK-NEXT:    and z7.s, z7.s, #0x800
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    mul z3.s, p0/m, z3.s, z0.s
+; CHECK-NEXT:    eor z5.d, z6.d, z27.d
+; CHECK-NEXT:    and z24.s, z24.s, #0x1000
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z25.s, z25.s, #0x800000
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z7.s, p0/m, z7.s, z0.s
+; CHECK-NEXT:    eor z2.d, z2.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mul z24.s, p0/m, z24.s, z0.s
+; CHECK-NEXT:    and z26.s, z26.s, #0x40000
+; CHECK-NEXT:    and z29.s, z29.s, #0x100000
+; CHECK-NEXT:    mul z25.s, p0/m, z25.s, z0.s
+; CHECK-NEXT:    eor z3.d, z5.d, z3.d
+; CHECK-NEXT:    and z6.s, z6.s, #0x2000
+; CHECK-NEXT:    and z4.s, z4.s, #0x10000
+; CHECK-NEXT:    and z27.s, z27.s, #0x1000000
+; CHECK-NEXT:    mul z26.s, p0/m, z26.s, z0.s
+; CHECK-NEXT:    mul z29.s, p0/m, z29.s, z0.s
+; CHECK-NEXT:    mul z6.s, p0/m, z6.s, z0.s
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mul z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT:    mul z27.s, p0/m, z27.s, z0.s
+; CHECK-NEXT:    eor z5.d, z7.d, z24.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x20000
+; CHECK-NEXT:    eor z5.d, z5.d, z6.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z24.s, z24.s, #0x400000
+; CHECK-NEXT:    and z7.s, z7.s, #0x4000
+; CHECK-NEXT:    mul z3.s, p0/m, z3.s, z0.s
+; CHECK-NEXT:    and z6.s, z6.s, #0x80000
+; CHECK-NEXT:    mul z24.s, p0/m, z24.s, z0.s
+; CHECK-NEXT:    mul z7.s, p0/m, z7.s, z0.s
+; CHECK-NEXT:    eor z3.d, z4.d, z3.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mul z6.s, p0/m, z6.s, z0.s
+; CHECK-NEXT:    eor z24.d, z24.d, z25.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    eor z5.d, z5.d, z7.d
+; CHECK-NEXT:    and z4.s, z4.s, #0x2000000
+; CHECK-NEXT:    eor z3.d, z3.d, z26.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    and z25.s, z25.s, #0x4000000
+; CHECK-NEXT:    eor z7.d, z24.d, z27.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT:    eor z3.d, z3.d, z6.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z26.s, z26.s, #0x200000
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    eor z5.d, z5.d, z28.d
+; CHECK-NEXT:    mul z25.s, p0/m, z25.s, z0.s
+; CHECK-NEXT:    and z24.s, z24.s, #0x20000000
+; CHECK-NEXT:    and z6.s, z6.s, #0x8000000
+; CHECK-NEXT:    eor z3.d, z3.d, z29.d
+; CHECK-NEXT:    eor z4.d, z7.d, z4.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mul z26.s, p0/m, z26.s, z0.s
+; CHECK-NEXT:    and z27.s, z27.s, #0x40000000
+; CHECK-NEXT:    mul z24.s, p0/m, z24.s, z0.s
+; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
+; CHECK-NEXT:    mul z6.s, p0/m, z6.s, z0.s
+; CHECK-NEXT:    eor z2.d, z2.d, z5.d
+; CHECK-NEXT:    and z7.s, z7.s, #0x10000000
+; CHECK-NEXT:    eor z4.d, z4.d, z25.d
+; CHECK-NEXT:    mul z27.s, p0/m, z27.s, z0.s
+; CHECK-NEXT:    eor z3.d, z3.d, z26.d
+; CHECK-NEXT:    mul z7.s, p0/m, z7.s, z0.s
+; CHECK-NEXT:    eor z4.d, z4.d, z6.d
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    eor z1.d, z2.d, z3.d
+; CHECK-NEXT:    eor z3.d, z24.d, z27.d
+; CHECK-NEXT:    eor z2.d, z4.d, z7.d
+; CHECK-NEXT:    eor z0.d, z3.d, z0.d
+; CHECK-NEXT:    eor z1.d, z1.d, z2.d
+; CHECK-NEXT:    eor z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 2 x i64> @clmul_nxv2i64_sve(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) #0 {
+; CHECK-LABEL: clmul_nxv2i64_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    and z2.d, z2.d, #0x2
+; CHECK-NEXT:    and z3.d, z3.d, #0x1
+; CHECK-NEXT:    and z4.d, z4.d, #0x4
+; CHECK-NEXT:    and z5.d, z5.d, #0x8
+; CHECK-NEXT:    and z6.d, z6.d, #0x10
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x20
+; CHECK-NEXT:    and z24.d, z24.d, #0x40
+; CHECK-NEXT:    and z25.d, z25.d, #0x80
+; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT:    mul z3.d, p0/m, z3.d, z0.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z4.d, p0/m, z4.d, z0.d
+; CHECK-NEXT:    mul z5.d, p0/m, z5.d, z0.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x100
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    and z27.d, z27.d, #0x20000
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    eor z3.d, z4.d, z5.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    and z28.d, z28.d, #0x400000
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    eor z4.d, z6.d, z7.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z29.d, z29.d, #0x800000
+; CHECK-NEXT:    mov z30.d, z1.d
+; CHECK-NEXT:    mul z28.d, p0/m, z28.d, z0.d
+; CHECK-NEXT:    and z5.d, z5.d, #0x200
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    and z6.d, z6.d, #0x800
+; CHECK-NEXT:    eor z3.d, z4.d, z24.d
+; CHECK-NEXT:    eor z4.d, z25.d, z26.d
+; CHECK-NEXT:    mul z29.d, p0/m, z29.d, z0.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x1000
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    mul z5.d, p0/m, z5.d, z0.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    and z30.d, z30.d, #0x800000000
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    and z25.d, z25.d, #0x2000
+; CHECK-NEXT:    and z26.d, z26.d, #0x10000
+; CHECK-NEXT:    and z24.d, z24.d, #0x400
+; CHECK-NEXT:    mul z30.d, p0/m, z30.d, z0.d
+; CHECK-NEXT:    eor z3.d, z4.d, z5.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    eor z6.d, z6.d, z7.d
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x4000
+; CHECK-NEXT:    eor z6.d, z6.d, z25.d
+; CHECK-NEXT:    mul z5.d, p0/m, z5.d, z0.d
+; CHECK-NEXT:    eor z25.d, z26.d, z27.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z4.d, p0/m, z4.d, z0.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x80000
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    eor z3.d, z3.d, z24.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    and z27.d, z27.d, #0x1000000
+; CHECK-NEXT:    eor z5.d, z25.d, z5.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x8000
+; CHECK-NEXT:    eor z4.d, z6.d, z4.d
+; CHECK-NEXT:    and z24.d, z24.d, #0x2000000
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    eor z6.d, z28.d, z29.d
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    and z25.d, z25.d, #0x100000
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    eor z5.d, z5.d, z7.d
+; CHECK-NEXT:    and z28.d, z28.d, #0x4000000
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    eor z6.d, z6.d, z27.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    and z29.d, z29.d, #0x40000000
+; CHECK-NEXT:    mul z28.d, p0/m, z28.d, z0.d
+; CHECK-NEXT:    eor z3.d, z4.d, z26.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x200000
+; CHECK-NEXT:    and z27.d, z27.d, #0x20000000
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    eor z4.d, z5.d, z25.d
+; CHECK-NEXT:    eor z5.d, z6.d, z24.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mul z29.d, p0/m, z29.d, z0.d
+; CHECK-NEXT:    eor z3.d, z2.d, z3.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x4000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x80000000
+; CHECK-NEXT:    eor z2.d, z5.d, z28.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    eor z4.d, z4.d, z7.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z25.d, z25.d, #0x2000000000
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    and z5.d, z5.d, #0x100000000
+; CHECK-NEXT:    eor z24.d, z27.d, z29.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    and z28.d, z28.d, #0x8000000000
+; CHECK-NEXT:    and z7.d, z7.d, #0x8000000
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    mul z5.d, p0/m, z5.d, z0.d
+; CHECK-NEXT:    eor z3.d, z3.d, z4.d
+; CHECK-NEXT:    eor z6.d, z24.d, z6.d
+; CHECK-NEXT:    and z27.d, z27.d, #0x200000000
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    mul z28.d, p0/m, z28.d, z0.d
+; CHECK-NEXT:    and z29.d, z29.d, #0x10000000
+; CHECK-NEXT:    eor z25.d, z25.d, z26.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    eor z5.d, z6.d, z5.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    and z24.d, z24.d, #0x400000000
+; CHECK-NEXT:    mul z29.d, p0/m, z29.d, z0.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x20000000000
+; CHECK-NEXT:    eor z2.d, z2.d, z7.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000000
+; CHECK-NEXT:    eor z4.d, z25.d, z28.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    eor z5.d, z5.d, z27.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x40000000000
+; CHECK-NEXT:    eor z2.d, z2.d, z29.d
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    and z25.d, z25.d, #0x400000000000
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    and z27.d, z27.d, #0x800000000000
+; CHECK-NEXT:    mov z29.d, z1.d
+; CHECK-NEXT:    eor z5.d, z5.d, z24.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    and z28.d, z28.d, #0x400000000000000
+; CHECK-NEXT:    eor z4.d, z4.d, z6.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    and z24.d, z24.d, #0x1000000000
+; CHECK-NEXT:    eor z5.d, z5.d, z30.d
+; CHECK-NEXT:    mov z30.d, z1.d
+; CHECK-NEXT:    mul z28.d, p0/m, z28.d, z0.d
+; CHECK-NEXT:    and z29.d, z29.d, #0x200000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x80000000000
+; CHECK-NEXT:    eor z4.d, z4.d, z26.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    and z30.d, z30.d, #0x800000000000000
+; CHECK-NEXT:    mul z29.d, p0/m, z29.d, z0.d
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x1000000000000
+; CHECK-NEXT:    eor z4.d, z4.d, z7.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mul z30.d, p0/m, z30.d, z0.d
+; CHECK-NEXT:    eor z3.d, z5.d, z24.d
+; CHECK-NEXT:    eor z5.d, z25.d, z27.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    mov z27.d, z1.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x2000000000000
+; CHECK-NEXT:    eor z4.d, z4.d, z6.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z24.d, z24.d, #0x100000000000000
+; CHECK-NEXT:    eor z2.d, z2.d, z3.d
+; CHECK-NEXT:    and z25.d, z25.d, #0x200000000000000
+; CHECK-NEXT:    and z27.d, z27.d, #0x100000000000
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    and z6.d, z6.d, #0x4000000000000
+; CHECK-NEXT:    eor z5.d, z5.d, z26.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    mul z27.d, p0/m, z27.d, z0.d
+; CHECK-NEXT:    mul z6.d, p0/m, z6.d, z0.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x8000000000000
+; CHECK-NEXT:    eor z5.d, z5.d, z7.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    eor z4.d, z4.d, z27.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x10000000000000
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    eor z5.d, z5.d, z6.d
+; CHECK-NEXT:    eor z6.d, z24.d, z25.d
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    mov z25.d, z1.d
+; CHECK-NEXT:    eor z3.d, z4.d, z29.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    and z24.d, z24.d, #0x20000000000000
+; CHECK-NEXT:    and z25.d, z25.d, #0x1000000000000000
+; CHECK-NEXT:    eor z5.d, z5.d, z26.d
+; CHECK-NEXT:    mov z26.d, z1.d
+; CHECK-NEXT:    eor z6.d, z6.d, z28.d
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z0.d
+; CHECK-NEXT:    mul z25.d, p0/m, z25.d, z0.d
+; CHECK-NEXT:    eor z5.d, z5.d, z7.d
+; CHECK-NEXT:    and z26.d, z26.d, #0x40000000000000
+; CHECK-NEXT:    and z28.d, z28.d, #0x2000000000000000
+; CHECK-NEXT:    eor z6.d, z6.d, z30.d
+; CHECK-NEXT:    mov z7.d, z1.d
+; CHECK-NEXT:    mov z30.d, z1.d
+; CHECK-NEXT:    and z1.d, z1.d, #0x8000000000000000
+; CHECK-NEXT:    mul z26.d, p0/m, z26.d, z0.d
+; CHECK-NEXT:    mul z28.d, p0/m, z28.d, z0.d
+; CHECK-NEXT:    eor z5.d, z5.d, z24.d
+; CHECK-NEXT:    and z7.d, z7.d, #0x80000000000000
+; CHECK-NEXT:    and z30.d, z30.d, #0x4000000000000000
+; CHECK-NEXT:    eor z6.d, z6.d, z25.d
+; CHECK-NEXT:    mul z7.d, p0/m, z7.d, z0.d
+; CHECK-NEXT:    mul z30.d, p0/m, z30.d, z0.d
+; CHECK-NEXT:    eor z4.d, z5.d, z26.d
+; CHECK-NEXT:    eor z5.d, z6.d, z28.d
+; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    eor z1.d, z2.d, z3.d
+; CHECK-NEXT:    eor z2.d, z4.d, z7.d
+; CHECK-NEXT:    eor z3.d, z5.d, z30.d
+; CHECK-NEXT:    eor z1.d, z1.d, z2.d
+; CHECK-NEXT:    eor z0.d, z3.d, z0.d
+; CHECK-NEXT:    eor z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 16 x i8> @clmul_nxv16i8_sve2(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) #1 {
+; CHECK-LABEL: clmul_nxv16i8_sve2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %a = call <vscale x 16 x i8> @llvm.clmul.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
+  ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 8 x i16> @clmul_nxv8i16_sve2(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) #1 {
+; CHECK-LABEL: clmul_nxv8i16_sve2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x2
+; CHECK-NEXT:    and z3.h, z3.h, #0x1
+; CHECK-NEXT:    and z4.h, z4.h, #0x8
+; CHECK-NEXT:    and z5.h, z5.h, #0x4
+; CHECK-NEXT:    and z6.h, z6.h, #0x20
+; CHECK-NEXT:    mul z2.h, z0.h, z2.h
+; CHECK-NEXT:    mul z3.h, z0.h, z3.h
+; CHECK-NEXT:    mul z4.h, z0.h, z4.h
+; CHECK-NEXT:    mul z5.h, z0.h, z5.h
+; CHECK-NEXT:    mul z6.h, z0.h, z6.h
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    and z3.h, z3.h, #0x10
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.h, z0.h, z3.h
+; CHECK-NEXT:    and z4.h, z4.h, #0x80
+; CHECK-NEXT:    and z5.h, z5.h, #0x40
+; CHECK-NEXT:    mul z4.h, z0.h, z4.h
+; CHECK-NEXT:    mul z5.h, z0.h, z5.h
+; CHECK-NEXT:    eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.h, z3.h, #0x200
+; CHECK-NEXT:    and z6.h, z6.h, #0x100
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.h, z0.h, z3.h
+; CHECK-NEXT:    mul z6.h, z0.h, z6.h
+; CHECK-NEXT:    and z4.h, z4.h, #0x800
+; CHECK-NEXT:    and z5.h, z5.h, #0x400
+; CHECK-NEXT:    mul z4.h, z0.h, z4.h
+; CHECK-NEXT:    mul z5.h, z0.h, z5.h
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.h, z3.h, #0x2000
+; CHECK-NEXT:    and z6.h, z6.h, #0x1000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    and z1.h, z1.h, #0x4000
+; CHECK-NEXT:    mul z3.h, z0.h, z3.h
+; CHECK-NEXT:    mul z4.h, z0.h, z6.h
+; CHECK-NEXT:    and z5.h, z5.h, #0x8000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-NEXT:    mul z3.h, z0.h, z5.h
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 8 x i16> @llvm.clmul.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
+  ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 4 x i32> @clmul_nxv4i32_sve2(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) #1 {
+; CHECK-LABEL: clmul_nxv4i32_sve2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x2
+; CHECK-NEXT:    and z3.s, z3.s, #0x1
+; CHECK-NEXT:    and z4.s, z4.s, #0x8
+; CHECK-NEXT:    and z5.s, z5.s, #0x4
+; CHECK-NEXT:    and z6.s, z6.s, #0x20
+; CHECK-NEXT:    mul z2.s, z0.s, z2.s
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x10
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x80
+; CHECK-NEXT:    and z5.s, z5.s, #0x40
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x200
+; CHECK-NEXT:    and z6.s, z6.s, #0x100
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x800
+; CHECK-NEXT:    and z5.s, z5.s, #0x400
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x2000
+; CHECK-NEXT:    and z6.s, z6.s, #0x1000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x8000
+; CHECK-NEXT:    and z5.s, z5.s, #0x4000
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x20000
+; CHECK-NEXT:    and z6.s, z6.s, #0x10000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x80000
+; CHECK-NEXT:    and z5.s, z5.s, #0x40000
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x200000
+; CHECK-NEXT:    and z6.s, z6.s, #0x100000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x800000
+; CHECK-NEXT:    and z5.s, z5.s, #0x400000
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x2000000
+; CHECK-NEXT:    and z6.s, z6.s, #0x1000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z6.s, z0.s, z6.s
+; CHECK-NEXT:    and z4.s, z4.s, #0x8000000
+; CHECK-NEXT:    and z5.s, z5.s, #0x4000000
+; CHECK-NEXT:    mul z4.s, z0.s, z4.s
+; CHECK-NEXT:    mul z5.s, z0.s, z5.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.s, z3.s, #0x20000000
+; CHECK-NEXT:    and z6.s, z6.s, #0x10000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    and z1.s, z1.s, #0x40000000
+; CHECK-NEXT:    mul z3.s, z0.s, z3.s
+; CHECK-NEXT:    mul z4.s, z0.s, z6.s
+; CHECK-NEXT:    and z5.s, z5.s, #0x80000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-NEXT:    mul z3.s, z0.s, z5.s
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
+  ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 2 x i64> @clmul_nxv2i64_sve2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) #1 {
+; CHECK-LABEL: clmul_nxv2i64_sve2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z2.d, z2.d, #0x2
+; CHECK-NEXT:    and z3.d, z3.d, #0x1
+; CHECK-NEXT:    and z4.d, z4.d, #0x8
+; CHECK-NEXT:    and z5.d, z5.d, #0x4
+; CHECK-NEXT:    and z6.d, z6.d, #0x20
+; CHECK-NEXT:    mul z2.d, z0.d, z2.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x10
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80
+; CHECK-NEXT:    and z5.d, z5.d, #0x40
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200
+; CHECK-NEXT:    and z6.d, z6.d, #0x100
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800
+; CHECK-NEXT:    and z5.d, z5.d, #0x400
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    and z1.d, z1.d, #0x4000000000000000
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z4.d, z0.d, z6.d
+; CHECK-NEXT:    and z5.d, z5.d, #0x8000000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-NEXT:    mul z3.d, z0.d, z5.d
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %a
+}
+
+; TODO: lower with .d variant of pmullt/b which requires FEAT_SVE_PMULL128
+define <vscale x 2 x i64> @clmul_nxv2i64_sve2_aes(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) #2 {
+; CHECK-LABEL: clmul_nxv2i64_sve2_aes:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z2.d, z2.d, #0x2
+; CHECK-NEXT:    and z3.d, z3.d, #0x1
+; CHECK-NEXT:    and z4.d, z4.d, #0x8
+; CHECK-NEXT:    and z5.d, z5.d, #0x4
+; CHECK-NEXT:    and z6.d, z6.d, #0x20
+; CHECK-NEXT:    mul z2.d, z0.d, z2.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x10
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80
+; CHECK-NEXT:    and z5.d, z5.d, #0x40
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z3.d, z6.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200
+; CHECK-NEXT:    and z6.d, z6.d, #0x100
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800
+; CHECK-NEXT:    and z5.d, z5.d, #0x400
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x8000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x4000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x20000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x10000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x80000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x40000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x200000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x100000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z6.d, z0.d, z6.d
+; CHECK-NEXT:    and z4.d, z4.d, #0x800000000000000
+; CHECK-NEXT:    and z5.d, z5.d, #0x400000000000000
+; CHECK-NEXT:    mul z4.d, z0.d, z4.d
+; CHECK-NEXT:    mul z5.d, z0.d, z5.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z6.d, z3.d
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    and z3.d, z3.d, #0x2000000000000000
+; CHECK-NEXT:    and z6.d, z6.d, #0x1000000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z5.d, z4.d
+; CHECK-NEXT:    mov z5.d, z1.d
+; CHECK-NEXT:    and z1.d, z1.d, #0x4000000000000000
+; CHECK-NEXT:    mul z3.d, z0.d, z3.d
+; CHECK-NEXT:    mul z4.d, z0.d, z6.d
+; CHECK-NEXT:    and z5.d, z5.d, #0x8000000000000000
+; CHECK-NEXT:    eor3 z2.d, z2.d, z4.d, z3.d
+; CHECK-NEXT:    mul z3.d, z0.d, z5.d
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    eor3 z2.d, z2.d, z0.d, z3.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
+  ret <vscale x 2 x i64> %a
+}
+
+attributes #0 = { "target-features"="+sve" }
+attributes #1 = { "target-features"="+sve2" }
+attributes #2 = { "target-features"="+sve2,+sve2-aes" }
diff --git a/llvm/test/CodeGen/AArch64/clmul.ll b/llvm/test/CodeGen/AArch64/clmul.ll
index e0fa67607d151..a7260523ae802 100644
--- a/llvm/test/CodeGen/AArch64/clmul.ll
+++ b/llvm/test/CodeGen/AArch64/clmul.ll
@@ -1,29 +1,467 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+sve2 | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s
 
-define <vscale x 16 x i8> @clmul_nxv16i8_sve(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
-; CHECK-LABEL: clmul_nxv16i8_sve:
+define i8 @clmul_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: clmul_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    pmul z0.b, z0.b, z1.b
+; CHECK-NEXT:    and w8, w1, #0x2
+; CHECK-NEXT:    and w9, w1, #0x1
+; CHECK-NEXT:    and w10, w1, #0x4
+; CHECK-NEXT:    mul w8, w0, w8
+; CHECK-NEXT:    and w11, w1, #0x8
+; CHECK-NEXT:    and w12, w1, #0x10
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    and w13, w1, #0x20
+; CHECK-NEXT:    and w14, w1, #0x40
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    mul w12, w0, w12
+; CHECK-NEXT:    eor w8, w9, w8
+; CHECK-NEXT:    and w9, w1, #0xffffff80
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    mul w14, w0, w14
+; CHECK-NEXT:    eor w10, w10, w11
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    eor w8, w8, w10
+; CHECK-NEXT:    eor w11, w12, w13
+; CHECK-NEXT:    eor w10, w11, w14
+; CHECK-NEXT:    eor w8, w8, w10
+; CHECK-NEXT:    eor w0, w8, w9
 ; CHECK-NEXT:    ret
-  %a = call <vscale x 16 x i8> @llvm.clmul.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
-  ret <vscale x 16 x i8> %a
+  %a = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
+  ret i8 %a
 }
 
-define <16 x i8> @clmul_v16i8_neon(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-LABEL: clmul_v16i8_neon:
+define i16 @clmul_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: clmul_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    pmul v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    and w8, w1, #0x2
+; CHECK-NEXT:    and w9, w1, #0x1
+; CHECK-NEXT:    and w10, w1, #0x4
+; CHECK-NEXT:    mul w8, w0, w8
+; CHECK-NEXT:    and w11, w1, #0x8
+; CHECK-NEXT:    and w12, w1, #0x10
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    and w13, w1, #0x20
+; CHECK-NEXT:    and w15, w1, #0x80
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    and w16, w1, #0x100
+; CHECK-NEXT:    and w2, w1, #0x800
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    and w14, w1, #0x40
+; CHECK-NEXT:    and w17, w1, #0x200
+; CHECK-NEXT:    mul w12, w0, w12
+; CHECK-NEXT:    eor w8, w9, w8
+; CHECK-NEXT:    and w9, w1, #0x1000
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    and w18, w1, #0x400
+; CHECK-NEXT:    mul w15, w0, w15
+; CHECK-NEXT:    eor w10, w10, w11
+; CHECK-NEXT:    and w11, w1, #0x2000
+; CHECK-NEXT:    mul w16, w0, w16
+; CHECK-NEXT:    eor w8, w8, w10
+; CHECK-NEXT:    and w10, w1, #0x4000
+; CHECK-NEXT:    mul w2, w0, w2
+; CHECK-NEXT:    eor w12, w12, w13
+; CHECK-NEXT:    and w13, w1, #0xffff8000
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    mul w14, w0, w14
+; CHECK-NEXT:    eor w15, w15, w16
+; CHECK-NEXT:    mul w17, w0, w17
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    eor w9, w2, w9
+; CHECK-NEXT:    mul w18, w0, w18
+; CHECK-NEXT:    eor w12, w12, w14
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    eor w14, w15, w17
+; CHECK-NEXT:    eor w8, w8, w12
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    eor w9, w9, w11
+; CHECK-NEXT:    eor w11, w14, w18
+; CHECK-NEXT:    eor w9, w9, w10
+; CHECK-NEXT:    eor w8, w8, w11
+; CHECK-NEXT:    eor w9, w9, w13
+; CHECK-NEXT:    eor w0, w8, w9
 ; CHECK-NEXT:    ret
-  %a = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %x, <16 x i8> %y)
-  ret <16 x i8> %a
+  %a = call i16 @llvm.clmul.i16(i16 %x, i16 %y)
+  ret i16 %a
 }
 
-define <8 x i8> @clmul_v8i8_neon(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: clmul_v8i8_neon:
+define i32 @clmul_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: clmul_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    pmul v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    and w8, w1, #0x2
+; CHECK-NEXT:    and w9, w1, #0x1
+; CHECK-NEXT:    and w10, w1, #0x4
+; CHECK-NEXT:    mul w8, w0, w8
+; CHECK-NEXT:    and w11, w1, #0x8
+; CHECK-NEXT:    and w12, w1, #0x10
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    and w13, w1, #0x20
+; CHECK-NEXT:    and w14, w1, #0x40
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    and w2, w1, #0x800
+; CHECK-NEXT:    and w15, w1, #0x80
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    and w16, w1, #0x100
+; CHECK-NEXT:    and w17, w1, #0x200
+; CHECK-NEXT:    mul w12, w0, w12
+; CHECK-NEXT:    eor w8, w9, w8
+; CHECK-NEXT:    and w9, w1, #0x1000
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    and w18, w1, #0x400
+; CHECK-NEXT:    mul w14, w0, w14
+; CHECK-NEXT:    eor w10, w10, w11
+; CHECK-NEXT:    and w11, w1, #0x2000
+; CHECK-NEXT:    mul w2, w0, w2
+; CHECK-NEXT:    eor w8, w8, w10
+; CHECK-NEXT:    and w10, w1, #0x4000
+; CHECK-NEXT:    mul w9, w0, w9
+; CHECK-NEXT:    eor w12, w12, w13
+; CHECK-NEXT:    and w13, w1, #0x8000
+; CHECK-NEXT:    mul w15, w0, w15
+; CHECK-NEXT:    eor w12, w12, w14
+; CHECK-NEXT:    and w14, w1, #0x10000
+; CHECK-NEXT:    mul w16, w0, w16
+; CHECK-NEXT:    eor w8, w8, w12
+; CHECK-NEXT:    and w12, w1, #0x20000
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    eor w9, w2, w9
+; CHECK-NEXT:    and w2, w1, #0x400000
+; CHECK-NEXT:    mul w17, w0, w17
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    eor w15, w15, w16
+; CHECK-NEXT:    and w16, w1, #0x40000
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    eor w9, w9, w11
+; CHECK-NEXT:    and w11, w1, #0x800000
+; CHECK-NEXT:    mul w18, w0, w18
+; CHECK-NEXT:    eor w15, w15, w17
+; CHECK-NEXT:    and w17, w1, #0x80000
+; CHECK-NEXT:    mul w14, w0, w14
+; CHECK-NEXT:    eor w9, w9, w10
+; CHECK-NEXT:    and w10, w1, #0x1000000
+; CHECK-NEXT:    mul w12, w0, w12
+; CHECK-NEXT:    eor w9, w9, w13
+; CHECK-NEXT:    and w13, w1, #0x2000000
+; CHECK-NEXT:    mul w16, w0, w16
+; CHECK-NEXT:    eor w15, w15, w18
+; CHECK-NEXT:    and w18, w1, #0x100000
+; CHECK-NEXT:    mul w2, w0, w2
+; CHECK-NEXT:    eor w8, w8, w15
+; CHECK-NEXT:    and w15, w1, #0x200000
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    eor w12, w14, w12
+; CHECK-NEXT:    and w14, w1, #0x4000000
+; CHECK-NEXT:    mul w17, w0, w17
+; CHECK-NEXT:    eor w12, w12, w16
+; CHECK-NEXT:    and w16, w1, #0x8000000
+; CHECK-NEXT:    mul w10, w0, w10
+; CHECK-NEXT:    eor w8, w8, w9
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    eor w11, w2, w11
+; CHECK-NEXT:    and w2, w1, #0x20000000
+; CHECK-NEXT:    mul w18, w0, w18
+; CHECK-NEXT:    eor w12, w12, w17
+; CHECK-NEXT:    and w17, w1, #0x10000000
+; CHECK-NEXT:    mul w14, w0, w14
+; CHECK-NEXT:    eor w10, w11, w10
+; CHECK-NEXT:    and w11, w1, #0x40000000
+; CHECK-NEXT:    mul w15, w0, w15
+; CHECK-NEXT:    eor w10, w10, w13
+; CHECK-NEXT:    and w13, w1, #0x80000000
+; CHECK-NEXT:    mul w16, w0, w16
+; CHECK-NEXT:    eor w12, w12, w18
+; CHECK-NEXT:    mul w17, w0, w17
+; CHECK-NEXT:    eor w10, w10, w14
+; CHECK-NEXT:    mul w2, w0, w2
+; CHECK-NEXT:    eor w9, w12, w15
+; CHECK-NEXT:    mul w11, w0, w11
+; CHECK-NEXT:    eor w10, w10, w16
+; CHECK-NEXT:    eor w8, w8, w9
+; CHECK-NEXT:    mul w13, w0, w13
+; CHECK-NEXT:    eor w9, w10, w17
+; CHECK-NEXT:    eor w8, w8, w9
+; CHECK-NEXT:    eor w10, w2, w11
+; CHECK-NEXT:    eor w9, w10, w13
+; CHECK-NEXT:    eor w0, w8, w9
 ; CHECK-NEXT:    ret
-  %a = call <8 x i8> @llvm.clmul.v8i8(<8 x i8> %x, <8 x i8> %y)
-  ret <8 x i8> %a
+  %a = call i32 @llvm.clmul.i32(i32 %x, i32 %y)
+  ret i32 %a
+}
+
+define i64 @clmul_i64(i64 %x, i64 %y) {
+; CHECK-LABEL: clmul_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #304
+; CHECK-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 304
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -88
+; CHECK-NEXT:    .cfi_offset w29, -96
+; CHECK-NEXT:    and x8, x1, #0x2
+; CHECK-NEXT:    mul x9, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x1
+; CHECK-NEXT:    mul x10, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x4
+; CHECK-NEXT:    mul x11, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x8
+; CHECK-NEXT:    mul x13, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x10
+; CHECK-NEXT:    eor x9, x10, x9
+; CHECK-NEXT:    mul x12, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x20
+; CHECK-NEXT:    mul x14, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x40
+; CHECK-NEXT:    eor x10, x11, x13
+; CHECK-NEXT:    and x11, x1, #0x10000000000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #200] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x80
+; CHECK-NEXT:    mul x15, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x100
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #160] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x200
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #152] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x400
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #184] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x800
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #192] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x1000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #144] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x2000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #136] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x4000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #176] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x8000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #168] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x10000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #120] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x20000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #80] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x40000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #72] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x80000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #104] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x100000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #96] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x200000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #128] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x400000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #112] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x800000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #64] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x1000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #40] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x2000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Reload
+; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x4000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #56] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x8000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #48] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x10000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #88] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x20000000
+; CHECK-NEXT:    mul x26, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x40000000
+; CHECK-NEXT:    mul x22, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x80000000
+; CHECK-NEXT:    mul x23, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x100000000
+; CHECK-NEXT:    mul x24, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x200000000
+; CHECK-NEXT:    eor x22, x26, x22
+; CHECK-NEXT:    ldr x26, [sp, #32] // 8-byte Reload
+; CHECK-NEXT:    mul x25, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x400000000
+; CHECK-NEXT:    eor x22, x22, x23
+; CHECK-NEXT:    and x23, x1, #0x400000000000000
+; CHECK-NEXT:    mul x27, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x800000000
+; CHECK-NEXT:    eor x22, x22, x24
+; CHECK-NEXT:    ldr x24, [sp, #48] // 8-byte Reload
+; CHECK-NEXT:    mul x28, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x1000000000
+; CHECK-NEXT:    eor x22, x22, x25
+; CHECK-NEXT:    ldr x25, [sp, #88] // 8-byte Reload
+; CHECK-NEXT:    mul x29, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x2000000000
+; CHECK-NEXT:    eor x22, x22, x27
+; CHECK-NEXT:    mul x21, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x4000000000
+; CHECK-NEXT:    mul x7, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x8000000000
+; CHECK-NEXT:    mul x19, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x10000000000
+; CHECK-NEXT:    mul x5, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x20000000000
+; CHECK-NEXT:    eor x7, x21, x7
+; CHECK-NEXT:    mul x6, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x40000000000
+; CHECK-NEXT:    mul x20, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x80000000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    mul x23, x0, x23
+; CHECK-NEXT:    str x8, [sp, #24] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x100000000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x200000000000
+; CHECK-NEXT:    mul x8, x0, x8
+; CHECK-NEXT:    str x8, [sp, #8] // 8-byte Spill
+; CHECK-NEXT:    and x8, x1, #0x400000000000
+; CHECK-NEXT:    mul x4, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x800000000000
+; CHECK-NEXT:    mul x17, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x1000000000000
+; CHECK-NEXT:    mul x18, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x2000000000000
+; CHECK-NEXT:    mul x3, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x4000000000000
+; CHECK-NEXT:    eor x17, x4, x17
+; CHECK-NEXT:    mul x2, x0, x8
+; CHECK-NEXT:    and x8, x1, #0x8000000000000
+; CHECK-NEXT:    eor x17, x17, x18
+; CHECK-NEXT:    and x18, x1, #0x4000000000000000
+; CHECK-NEXT:    mul x16, x0, x8
+; CHECK-NEXT:    eor x8, x9, x10
+; CHECK-NEXT:    ldr x9, [sp, #160] // 8-byte Reload
+; CHECK-NEXT:    eor x10, x12, x14
+; CHECK-NEXT:    ldr x12, [sp, #80] // 8-byte Reload
+; CHECK-NEXT:    eor x17, x17, x3
+; CHECK-NEXT:    eor x9, x15, x9
+; CHECK-NEXT:    mul x15, x0, x11
+; CHECK-NEXT:    ldr x11, [sp, #200] // 8-byte Reload
+; CHECK-NEXT:    eor x17, x17, x2
+; CHECK-NEXT:    eor x10, x10, x11
+; CHECK-NEXT:    ldr x11, [sp, #152] // 8-byte Reload
+; CHECK-NEXT:    mul x18, x0, x18
+; CHECK-NEXT:    eor x8, x8, x10
+; CHECK-NEXT:    ldr x10, [sp, #184] // 8-byte Reload
+; CHECK-NEXT:    eor x16, x17, x16
+; CHECK-NEXT:    eor x9, x9, x11
+; CHECK-NEXT:    and x11, x1, #0x20000000000000
+; CHECK-NEXT:    ldr x17, [sp, #24] // 8-byte Reload
+; CHECK-NEXT:    eor x9, x9, x10
+; CHECK-NEXT:    mul x14, x0, x11
+; CHECK-NEXT:    and x10, x1, #0x40000000000000
+; CHECK-NEXT:    eor x11, x8, x9
+; CHECK-NEXT:    ldr x8, [sp, #192] // 8-byte Reload
+; CHECK-NEXT:    ldr x9, [sp, #144] // 8-byte Reload
+; CHECK-NEXT:    mul x13, x0, x10
+; CHECK-NEXT:    ldr x10, [sp, #136] // 8-byte Reload
+; CHECK-NEXT:    eor x15, x16, x15
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    ldr x9, [sp, #120] // 8-byte Reload
+; CHECK-NEXT:    ldr x16, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    eor x8, x8, x10
+; CHECK-NEXT:    ldr x10, [sp, #72] // 8-byte Reload
+; CHECK-NEXT:    eor x9, x9, x12
+; CHECK-NEXT:    ldr x12, [sp, #176] // 8-byte Reload
+; CHECK-NEXT:    eor x14, x15, x14
+; CHECK-NEXT:    eor x9, x9, x10
+; CHECK-NEXT:    and x10, x1, #0x80000000000000
+; CHECK-NEXT:    ldr x15, [sp, #8] // 8-byte Reload
+; CHECK-NEXT:    eor x8, x8, x12
+; CHECK-NEXT:    ldr x12, [sp, #104] // 8-byte Reload
+; CHECK-NEXT:    eor x13, x14, x13
+; CHECK-NEXT:    eor x9, x9, x12
+; CHECK-NEXT:    mul x12, x0, x10
+; CHECK-NEXT:    ldr x10, [sp, #168] // 8-byte Reload
+; CHECK-NEXT:    eor x8, x8, x10
+; CHECK-NEXT:    ldr x10, [sp, #96] // 8-byte Reload
+; CHECK-NEXT:    eor x8, x11, x8
+; CHECK-NEXT:    ldr x11, [sp, #128] // 8-byte Reload
+; CHECK-NEXT:    eor x9, x9, x10
+; CHECK-NEXT:    and x10, x1, #0x100000000000000
+; CHECK-NEXT:    eor x9, x9, x11
+; CHECK-NEXT:    ldr x11, [sp, #64] // 8-byte Reload
+; CHECK-NEXT:    mul x10, x0, x10
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    ldr x9, [sp, #112] // 8-byte Reload
+; CHECK-NEXT:    eor x9, x9, x11
+; CHECK-NEXT:    and x11, x1, #0x200000000000000
+; CHECK-NEXT:    eor x9, x9, x30
+; CHECK-NEXT:    mul x11, x0, x11
+; CHECK-NEXT:    eor x9, x9, x26
+; CHECK-NEXT:    ldr x26, [sp, #56] // 8-byte Reload
+; CHECK-NEXT:    eor x9, x9, x26
+; CHECK-NEXT:    eor x9, x9, x24
+; CHECK-NEXT:    and x24, x1, #0x800000000000000
+; CHECK-NEXT:    eor x9, x9, x25
+; CHECK-NEXT:    mul x24, x0, x24
+; CHECK-NEXT:    eor x10, x10, x11
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    eor x9, x22, x28
+; CHECK-NEXT:    and x22, x1, #0x1000000000000000
+; CHECK-NEXT:    eor x9, x9, x29
+; CHECK-NEXT:    mul x21, x0, x22
+; CHECK-NEXT:    and x11, x1, #0x8000000000000000
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    eor x9, x7, x19
+; CHECK-NEXT:    and x7, x1, #0x2000000000000000
+; CHECK-NEXT:    eor x9, x9, x5
+; CHECK-NEXT:    mul x4, x0, x7
+; CHECK-NEXT:    eor x10, x10, x23
+; CHECK-NEXT:    eor x9, x9, x6
+; CHECK-NEXT:    eor x10, x10, x24
+; CHECK-NEXT:    eor x9, x9, x20
+; CHECK-NEXT:    mul x11, x0, x11
+; CHECK-NEXT:    eor x9, x9, x17
+; CHECK-NEXT:    eor x10, x10, x21
+; CHECK-NEXT:    eor x9, x9, x16
+; CHECK-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    eor x9, x9, x15
+; CHECK-NEXT:    eor x10, x10, x4
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    eor x9, x13, x12
+; CHECK-NEXT:    eor x10, x10, x18
+; CHECK-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    eor x8, x8, x9
+; CHECK-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    eor x9, x10, x11
+; CHECK-NEXT:    ldp x26, x25, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    eor x0, x8, x9
+; CHECK-NEXT:    ldp x28, x27, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #304
+; CHECK-NEXT:    ret
+  %a = call i64 @llvm.clmul.i64(i64 %x, i64 %y)
+  ret i64 %a
 }



More information about the llvm-commits mailing list